diff --git a/.github/CODEOWNERSHIP b/.github/CODEOWNERSHIP
index 682dff7fe3c0..831b8a40f530 100644
--- a/.github/CODEOWNERSHIP
+++ b/.github/CODEOWNERSHIP
@@ -132,13 +132,6 @@ src/runtime/**  @vinx13 @tqchen @FronzenGene @liangfu @areusch @tmoreau89 @ajtul
 include/tvm/runtime/**  @vinx13 @tqchen @FronzenGene @liangfu @areusch @tmoreau89 @ajtulloch @masahi @kazum @ZihengJiang  @junrushao1994
 python/tvm/runtime/**  @vinx13 @tqchen @FronzenGene @liangfu @areusch @tmoreau89 @ajtulloch @masahi @kazum @ZihengJiang  @junrushao1994
 
-# runtime/micro
-src/runtime/micro/** @areusch @liangfu @tmoreau89 @manupa-arm
-src/runtime/crt/** @areusch @liangfu @tmoreau89 @manupa-arm
-include/tvm/runtime/crt/** @areusch @liangfu @tmoreau89 @manupa-arm
-include/tvm/runtime/micro/** @areusch @liangfu @tmoreau89 @manupa-arm
-python/tvm/micro/** @areusch @liangfu @tmoreau89 @manupa-arm
-
 # relay
 src/relay/** @jroesch @slyubomirsky @icemelon @MarisaKirisame @ZihengJiang @yzhliu @vinx13 @mbrookhart @jwfromm @zhiics @anijain2305 @wweic @eqy @junrushao1994
 include/tvm/relay/** @jroesch @slyubomirsky @icemelon @MarisaKirisame @ZihengJiang @yzhliu @vinx13 @mbrookhart @jwfromm @zhiics @anijain2305 @wweic @eqy @junrushao1994
diff --git a/.gitignore b/.gitignore
index 851552d95976..5bcbd5e37314 100644
--- a/.gitignore
+++ b/.gitignore
@@ -263,9 +263,6 @@ src/runtime/hexagon/rpc/hexagon_rpc_stub.c
 # Local tvm-site checkout
 tvm-site/
 
-# Generated docs files
-gallery/how_to/work_with_microtvm/micro_tvmc.py
-
 # Test sample data files
 !tests/python/ci/sample_prs/*.json
 
diff --git a/CMakeLists.txt b/CMakeLists.txt
index cac2e726fbda..8abdfad24c28 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -12,7 +12,6 @@ include(cmake/utils/FindVulkan.cmake)
 include(cmake/utils/FindLLVM.cmake)
 include(cmake/utils/FindROCM.cmake)
 include(cmake/utils/FindRCCL.cmake)
-include(cmake/utils/FindEthosN.cmake)
 include(cmake/utils/FindNVSHMEM.cmake)
 
 if(EXISTS ${CMAKE_BINARY_DIR}/config.cmake)
@@ -68,14 +67,11 @@ tvm_option(TVM_DEBUG_WITH_ABI_CHANGE "Enable debug code that may cause ABI chang
 tvm_option(TVM_LOG_BEFORE_THROW "Whether log before throw, for debugging purposes" OFF)
 tvm_option(USE_RTTI "Build with RTTI" ON)
 tvm_option(USE_MSVC_MT "Build with MT" OFF)
-tvm_option(USE_MICRO "Build with Micro TVM support" OFF)
 tvm_option(INSTALL_DEV "Install compiler infrastructure" OFF)
 tvm_option(HIDE_PRIVATE_SYMBOLS "Compile with -fvisibility=hidden." OFF)
 tvm_option(USE_TF_TVMDSOOP "Build with TensorFlow TVMDSOOp" OFF)
 tvm_option(USE_PT_TVMDSOOP "Build with PyTorch TVMDSOOp" OFF)
 tvm_option(USE_FALLBACK_STL_MAP "Use TVM's POD compatible Map" OFF)
-tvm_option(USE_ETHOSN "Build with Arm(R) Ethos(TM)-N" OFF)
-tvm_option(USE_CMSISNN "Build with Arm CMSIS-NN" OFF)
 tvm_option(INDEX_DEFAULT_I64 "Defaults the index datatype to int64" ON)
 tvm_option(USE_LIBBACKTRACE "Use libbacktrace to supply linenumbers on stack traces" AUTO)
 tvm_option(BACKTRACE_ON_SEGFAULT "Install a signal handler to print a backtrace on segfault" OFF)
@@ -113,7 +109,6 @@ tvm_option(USE_SORT "Build with sort support" ON)
 tvm_option(USE_NNPACK "Build with nnpack support" OFF)
 tvm_option(USE_LIBTORCH "Build with libtorch support" OFF)
 tvm_option(USE_RANDOM "Build with random support" ON)
-tvm_option(USE_MICRO_STANDALONE_RUNTIME "Build with micro.standalone_runtime support" OFF)
 tvm_option(USE_CPP_RPC "Build CPP RPC" OFF)
 tvm_option(USE_IOS_RPC "Build iOS RPC" OFF)
 tvm_option(USE_TFLITE "Build with tflite support" OFF)
@@ -569,7 +564,6 @@ else()
 endif()
 
 # Module rules
-include(cmake/modules/StandaloneCrt.cmake)
 include(cmake/modules/CUDA.cmake)
 include(cmake/modules/Hexagon.cmake) # This must come before logging.cmake
 include(cmake/modules/contrib/CLML.cmake) # Must be before OpenCL.cmake
@@ -579,10 +573,6 @@ include(cmake/modules/Vulkan.cmake)
 include(cmake/modules/Metal.cmake)
 include(cmake/modules/ROCM.cmake)
 include(cmake/modules/LLVM.cmake)
-include(cmake/modules/Micro.cmake)
-include(cmake/modules/contrib/EthosN.cmake)
-include(cmake/modules/contrib/CMSISNN.cmake)
-include(cmake/modules/contrib/EthosU.cmake)
 include(cmake/modules/contrib/BLAS.cmake)
 include(cmake/modules/contrib/CODEGENC.cmake)
 include(cmake/modules/contrib/DNNL.cmake)
@@ -591,7 +581,6 @@ include(cmake/modules/contrib/CUTLASS.cmake)
 include(cmake/modules/contrib/ExampleTargetHooks.cmake)
 include(cmake/modules/contrib/Random.cmake)
 include(cmake/modules/contrib/Posit.cmake)
-include(cmake/modules/contrib/MicroStandaloneRuntime.cmake)
 include(cmake/modules/contrib/MSCCLPP.cmake)
 include(cmake/modules/contrib/Sort.cmake)
 include(cmake/modules/contrib/NNPack.cmake)
@@ -682,15 +671,6 @@ include(cmake/modules/Logging.cmake)
 
 include(cmake/modules/contrib/PAPI.cmake)
 
-if(USE_MICRO)
-  # NOTE: cmake doesn't track dependencies at the file level across subdirectories. For the
-  # Unix Makefiles generator, need to add these explicit target-level dependency)
-  add_dependencies(tvm_runtime arduino)
-  add_dependencies(tvm_runtime crt)
-  add_dependencies(tvm_runtime host_standalone_crt)
-  add_dependencies(tvm_runtime zephyr)
-endif()
-
 if(USE_CPP_RPC)
   add_subdirectory("apps/cpp_rpc")
 endif()
@@ -817,9 +797,6 @@ if(GTEST_FOUND)
       target_link_libraries(cpptest PRIVATE ${LLVM_LIBS})
     endif()
   endif()
-  if(DEFINED ETHOSN_RUNTIME_LIBRARY)
-    target_link_libraries(cpptest PRIVATE ${ETHOSN_RUNTIME_LIBRARY})
-  endif()
   set_target_properties(cpptest PROPERTIES EXCLUDE_FROM_ALL 1)
   set_target_properties(cpptest PROPERTIES EXCLUDE_FROM_DEFAULT_BUILD 1)
   if(USE_RELAY_DEBUG)
diff --git a/apps/cpp_rpc/CMakeLists.txt b/apps/cpp_rpc/CMakeLists.txt
index 4b2b6ca61d7d..e16da0ee4929 100644
--- a/apps/cpp_rpc/CMakeLists.txt
+++ b/apps/cpp_rpc/CMakeLists.txt
@@ -61,14 +61,6 @@ if (BUILD_FOR_ANDROID AND USE_HEXAGON)
   list(APPEND TVM_RPC_LINKER_LIBS cdsprpc log)
 endif()
 
-if(USE_ETHOSN)
-  if (ETHOSN_RUNTIME_LIBRARY)
-    list(APPEND TVM_RPC_LINKER_LIBS ${ETHOSN_RUNTIME_LIBRARY})
-  else()
-    message(WARNING "Could not locate Arm(R) Ethos(TM)-N runtime library components")
-  endif()
-endif()
-
 if(BUILD_STATIC_RUNTIME)
   list(APPEND TVM_RPC_LINKER_LIBS -Wl,--whole-archive tvm_runtime -Wl,--no-whole-archive)
 else()
diff --git a/apps/cpp_rtvm/CMakeLists.txt b/apps/cpp_rtvm/CMakeLists.txt
index f89663f08173..0f20920af400 100644
--- a/apps/cpp_rtvm/CMakeLists.txt
+++ b/apps/cpp_rtvm/CMakeLists.txt
@@ -71,14 +71,6 @@ if (BUILD_FOR_ANDROID AND USE_HEXAGON)
   list(APPEND RTVM_LINKER_LIBS cdsprpc log)
 endif()
 
-if(USE_ETHOSN)
-  if (ETHOSN_RUNTIME_LIBRARY)
-    list(APPEND RTVM_LINKER_LIBS ${ETHOSN_RUNTIME_LIBRARY})
-  else()
-    message(WARNING "Could not locate Arm(R) Ethos(TM)-N runtime library components")
-  endif()
-endif()
-
 if(BUILD_STATIC_RUNTIME)
   list(APPEND RTVM_LINKER_LIBS -Wl,--whole-archive tvm_runtime -Wl,--no-whole-archive z)
 else()
diff --git a/apps/microtvm/README.md b/apps/microtvm/README.md
deleted file mode 100644
index 1467c237c502..000000000000
--- a/apps/microtvm/README.md
+++ /dev/null
@@ -1,29 +0,0 @@
-<!--- Licensed to the Apache Software Foundation (ASF) under one -->
-<!--- or more contributor license agreements.  See the NOTICE file -->
-<!--- distributed with this work for additional information -->
-<!--- regarding copyright ownership.  The ASF licenses this file -->
-<!--- to you under the Apache License, Version 2.0 (the -->
-<!--- "License"); you may not use this file except in compliance -->
-<!--- with the License.  You may obtain a copy of the License at -->
-
-<!---   http://www.apache.org/licenses/LICENSE-2.0 -->
-
-<!--- Unless required by applicable law or agreed to in writing, -->
-<!--- software distributed under the License is distributed on an -->
-<!--- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -->
-<!--- KIND, either express or implied.  See the License for the -->
-<!--- specific language governing permissions and limitations -->
-<!--- under the License. -->
-
-# microTVM
-
-microTVM is the effort that allows TVM to build and execute models on bare-metal microcontrollers.
-
-The `pyproject.toml` file in this directory can be used to create a
-[Poetry](https://python-poetry.org/) Python environment with all of the required
-dependencies installed for running microTVM. To use it, run:
-
-```
-$ poetry lock && poetry install
-$ poetry shell
-```
diff --git a/apps/microtvm/arduino/README.md b/apps/microtvm/arduino/README.md
deleted file mode 100644
index b33557b53239..000000000000
--- a/apps/microtvm/arduino/README.md
+++ /dev/null
@@ -1,18 +0,0 @@
-<!--- Licensed to the Apache Software Foundation (ASF) under one -->
-<!--- or more contributor license agreements.  See the NOTICE file -->
-<!--- distributed with this work for additional information -->
-<!--- regarding copyright ownership.  The ASF licenses this file -->
-<!--- to you under the Apache License, Version 2.0 (the -->
-<!--- "License"); you may not use this file except in compliance -->
-<!--- with the License.  You may obtain a copy of the License at -->
-
-<!---   http://www.apache.org/licenses/LICENSE-2.0 -->
-
-<!--- Unless required by applicable law or agreed to in writing, -->
-<!--- software distributed under the License is distributed on an -->
-<!--- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -->
-<!--- KIND, either express or implied.  See the License for the -->
-<!--- specific language governing permissions and limitations -->
-<!--- under the License. -->
-
-This directory contains code to interface microTVM with [Arduino](https://www.arduino.cc/).
diff --git a/apps/microtvm/arduino/template_project/Makefile.template b/apps/microtvm/arduino/template_project/Makefile.template
deleted file mode 100644
index f067991865bd..000000000000
--- a/apps/microtvm/arduino/template_project/Makefile.template
+++ /dev/null
@@ -1,64 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-FQBN			    ?= <FQBN>
-VERBOSE_FLAG	    ?= <VERBOSE_FLAG>
-BUILD_DIR		    := $(subst :,.,build)
-PORT			    ?=
-ARUINO_CLI_CMD	    ?= <ARUINO_CLI_CMD>
-BOARD			    := <BOARD>
-BUILD_EXTRA_FLAGS   := <BUILD_EXTRA_FLAGS>
-
-SRC        	:= $(wildcard *.ino)
-BIN        	:= $(BUILD_DIR)/$(SRC).bin
-ELF        	:= $(BUILD_DIR)/$(SRC).elf
-
-$(info FQBN                 `fully qualified board name` => [${FQBN}])
-$(info BUILD_DIR            `build directory for this project` => [${BUILD_DIR}])
-$(info SRC                  `Arduino .ino file for this project` => [${SRC}])
-$(info BIN                  `generated binary file path` => [${BIN}])
-$(info PORT                 `board's port` => [${PORT}])
-$(info BOARD                `board name` => [${BOARD}])
-$(info BUILD_EXTRA_FLAGS    `build extra flags including header include directories and other compiler flags` => [${BUILD_EXTRA_FLAGS}])
-
-all: $(ELF) flash
-.PHONY: all
-
-build: $(ELF)
-.PHONY: build
-
-$(ELF): $(SRC)
-	$(ARUINO_CLI_CMD) compile --fqbn $(FQBN) \
-    --build-path $(BUILD_DIR) \
-    --build-properties $(BUILD_EXTRA_FLAGS) \
-    $(VERBOSE_FLAG)
-
-flash:
-	if [ -z $(PORT) ] ; then \
-	echo "---> ERROR: Please set the device port environment variable PORT"; \
-	else $(ARUINO_CLI_CMD) upload --fqbn $(FQBN) --input-dir $(BUILD_DIR) --port $(PORT) $(VERBOSE_FLAG); \
-	fi
-
-info:
-	$(info --------------------------------------INFO--------------------------------------)
-	$(info This makefile is for building and flashing an Arduino project with TVM.)
-	$(info To build run: `make build`)
-	$(info To upload the sketch run: `make flash PORT=<Arduino board port path>`)
-	$(info --------------------------------------INFO--------------------------------------)
-
-clean:
-	rm -rf build
diff --git a/apps/microtvm/arduino/template_project/boards.json b/apps/microtvm/arduino/template_project/boards.json
deleted file mode 100644
index 3ae981ff32cf..000000000000
--- a/apps/microtvm/arduino/template_project/boards.json
+++ /dev/null
@@ -1,92 +0,0 @@
-{
-    "due": {
-        "package": "arduino",
-        "architecture": "sam",
-        "board": "arduino_due_x_dbg",
-        "model": "sam3x8e",
-        "vid_hex": "2341",
-        "pid_hex": "003d"
-    },
-    "feathers2": {
-        "package": "esp32",
-        "architecture": "esp32",
-        "board": "feathers2",
-        "model": "esp32",
-        "note": "Due to the way the Feather S2 bootloader works, compilation behaves fine but uploads cannot be done automatically.",
-        "vid_hex": "303a",
-        "pid_hex": "0002"
-    },
-    "metrom4": {
-        "package": "adafruit",
-        "architecture": "samd",
-        "board": "adafruit_metro_m4",
-        "model": "atsamd51",
-        "vid_hex": "",
-        "pid_hex": ""
-    },
-    "spresense": {
-        "package": "SPRESENSE",
-        "architecture": "spresense",
-        "board": "spresense",
-        "model": "cxd5602gg",
-        "note": "Spresense only works as of its v2.3.0 sdk.",
-        "vid_hex": "10c4",
-        "pid_hex": "ea60"
-    },
-    "nano33ble": {
-        "package": "arduino",
-        "architecture": "mbed_nano",
-        "board": "nano33ble",
-        "model": "nrf52840",
-        "vid_hex": "2341",
-        "pid_hex": "805a"
-    },
-    "portentah7": {
-        "package": "arduino",
-        "architecture": "mbed_portenta",
-        "board": "envie_m7",
-        "model": "stm32h7xx",
-        "vid_hex": "2341",
-        "pid_hex": "025b"
-    },
-    "pybadge": {
-        "package": "adafruit",
-        "architecture": "samd",
-        "board": "adafruit_pybadge_m4",
-        "model": "atsamd51",
-        "vid_hex": "",
-        "pid_hex": ""
-    },
-    "rpipico": {
-        "package": "rp2040",
-        "architecture": "rp2040",
-        "board": "rpipico",
-        "model": "rp2040",
-        "vid_hex": "2e8a",
-        "pid_hex": "000a"
-    },
-    "teensy40": {
-        "package": "teensy",
-        "architecture": "avr",
-        "board": "teensy40",
-        "model": "imxrt10xx",
-        "vid_hex": "16c0",
-        "pid_hex": "0478"
-    },
-    "teensy41": {
-        "package": "teensy",
-        "architecture": "avr",
-        "board": "teensy41",
-        "model": "imxrt10xx",
-        "vid_hex": "16c0",
-        "pid_hex": "0478"
-    },
-    "wioterminal": {
-        "package": "Seeeduino",
-        "architecture": "samd",
-        "board": "seeed_wio_terminal",
-        "model": "atsamd51",
-        "vid_hex": "2886",
-        "pid_hex": "802d"
-    }
-}
diff --git a/apps/microtvm/arduino/template_project/launch_microtvm_api_server.sh b/apps/microtvm/arduino/template_project/launch_microtvm_api_server.sh
deleted file mode 100755
index 80f3e5bd7467..000000000000
--- a/apps/microtvm/arduino/template_project/launch_microtvm_api_server.sh
+++ /dev/null
@@ -1,43 +0,0 @@
-#!/bin/bash
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-function show_usage() {
-    cat <<EOF
-This script is for running microtvm_api_server with Arduino.
-Usage: launch_microtvm_api_server.sh <microtvm_api_server.py> --read-fd <READ_FD_PATH> --write-fd <WRITE_FD_PATH>
-EOF
-}
-
-if [ "$#" -lt 5 -o "$1" == "--help" ]; then
-    show_usage
-    exit -1
-fi
-
-ARDUINO_VENV_PATH=${HOME}/.tvm/micro_arduino
-
-# Create virtual env
-mkdir -p ${HOME}/.tvm
-PYTHON_CMD=$(which python3)
-$PYTHON_CMD -m venv ${ARDUINO_VENV_PATH}
-ARDUINO_PYTHON_CMD="${ARDUINO_VENV_PATH}/bin/python3"
-
-# Install dependencies
-$ARDUINO_PYTHON_CMD -m pip install pyusb packaging
-
-# Run server
-$ARDUINO_PYTHON_CMD $1 $2 $3 $4 $5
diff --git a/apps/microtvm/arduino/template_project/microtvm_api_server.py b/apps/microtvm/arduino/template_project/microtvm_api_server.py
deleted file mode 100644
index 735aca120c59..000000000000
--- a/apps/microtvm/arduino/template_project/microtvm_api_server.py
+++ /dev/null
@@ -1,689 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import json
-import logging
-import os.path
-import pathlib
-import re
-import shutil
-import subprocess
-import tarfile
-import tempfile
-import time
-from string import Template
-from packaging import version
-
-import server
-
-_LOG = logging.getLogger(__name__)
-
-MODEL_LIBRARY_FORMAT_RELPATH = pathlib.Path("src") / "model" / "model.tar"
-API_SERVER_DIR = pathlib.Path(os.path.dirname(__file__) or os.path.getcwd())
-BUILD_DIR = API_SERVER_DIR / "build"
-MODEL_LIBRARY_FORMAT_PATH = API_SERVER_DIR / MODEL_LIBRARY_FORMAT_RELPATH
-
-IS_TEMPLATE = not (API_SERVER_DIR / MODEL_LIBRARY_FORMAT_RELPATH).exists()
-
-MIN_ARDUINO_CLI_VERSION = version.parse("0.18.0")
-
-BOARDS = API_SERVER_DIR / "boards.json"
-
-ARDUINO_CLI_CMD = shutil.which("arduino-cli")
-
-MAKEFILE_FILENAME = "Makefile"
-
-# Data structure to hold the information microtvm_api_server.py needs
-# to communicate with each of these boards.
-try:
-    with open(BOARDS) as boards:
-        BOARD_PROPERTIES = json.load(boards)
-except FileNotFoundError:
-    raise FileNotFoundError(f"Board file {{{BOARDS}}} does not exist.")
-
-
-class BoardAutodetectFailed(Exception):
-    """Raised when no attached hardware is found matching the requested board"""
-
-
-PROJECT_TYPES = ["example_project", "host_driven"]
-
-PROJECT_OPTIONS = server.default_project_options(
-    project_type={"choices": tuple(PROJECT_TYPES)},
-    board={"choices": list(BOARD_PROPERTIES), "optional": ["flash", "open_transport"]},
-    warning_as_error={"optional": ["build", "flash"]},
-) + [
-    server.ProjectOption(
-        "arduino_cli_cmd",
-        required=(
-            ["generate_project", "build", "flash", "open_transport"]
-            if not ARDUINO_CLI_CMD
-            else None
-        ),
-        optional=(
-            ["generate_project", "build", "flash", "open_transport"] if ARDUINO_CLI_CMD else None
-        ),
-        type="str",
-        default=ARDUINO_CLI_CMD,
-        help="Path to the arduino-cli tool.",
-    ),
-    server.ProjectOption(
-        "port",
-        optional=["flash", "open_transport"],
-        type="int",
-        default=None,
-        help=(
-            "Port to use for connecting to hardware. "
-            "If port and serial_number options are not set it will try to autodetect the port."
-        ),
-    ),
-    server.ProjectOption(
-        "serial_number",
-        optional=["open_transport", "flash"],
-        type="str",
-        default=None,
-        help=(
-            "Board serial number. If both serial_number and port options are set,"
-            " it will throw exception."
-        ),
-    ),
-]
-
-
-class Handler(server.ProjectAPIHandler):
-    def __init__(self):
-        super(Handler, self).__init__()
-        self._proc = None
-        self._port = None
-        self._serial = None
-        self._version = None
-
-    def server_info_query(self, tvm_version):
-        return server.ServerInfo(
-            platform_name="arduino",
-            is_template=IS_TEMPLATE,
-            model_library_format_path="" if IS_TEMPLATE else MODEL_LIBRARY_FORMAT_PATH,
-            project_options=PROJECT_OPTIONS,
-        )
-
-    def _copy_project_files(self, api_server_dir, project_dir, project_type):
-        """Copies the files for project_type into project_dir.
-
-        Notes
-        -----
-        template_dir is NOT a project type, and that directory is never copied
-        in this function. template_dir only holds this file and its unit tests,
-        so this file is copied separately in generate_project.
-
-        """
-        for item in (API_SERVER_DIR / "src" / project_type).iterdir():
-            if item.name == "project.ino":
-                continue
-            dest = project_dir / "src" / item.name
-            if item.is_dir():
-                shutil.copytree(item, dest)
-            else:
-                shutil.copy2(item, dest)
-
-        # Arduino requires the .ino file have the same filename as its containing folder
-        shutil.copy2(
-            API_SERVER_DIR / "src" / project_type / "project.ino",
-            project_dir / f"{project_dir.stem}.ino",
-        )
-
-    CRT_COPY_ITEMS = ("include", "src")
-
-    def _copy_standalone_crt(self, source_dir, standalone_crt_dir):
-        output_crt_dir = source_dir / "standalone_crt"
-        for item in self.CRT_COPY_ITEMS:
-            src_path = os.path.join(standalone_crt_dir, item)
-            dst_path = output_crt_dir / item
-            if os.path.isdir(src_path):
-                shutil.copytree(src_path, dst_path)
-            else:
-                shutil.copy2(src_path, dst_path)
-
-    # Example project is the "minimum viable project",
-    # and doesn't need a fancy RPC server
-    EXAMPLE_PROJECT_UNUSED_COMPONENTS = [
-        "include/dmlc",
-        "src/support",
-        "src/runtime/minrpc",
-        "src/runtime/crt/graph_executor",
-        "src/runtime/crt/microtvm_rpc_common",
-        "src/runtime/crt/microtvm_rpc_server",
-        "src/runtime/crt/tab",
-    ]
-
-    def _remove_unused_components(self, source_dir, project_type):
-        unused_components = []
-        if project_type == "example_project":
-            unused_components = self.EXAMPLE_PROJECT_UNUSED_COMPONENTS
-
-        for component in unused_components:
-            shutil.rmtree(source_dir / "standalone_crt" / component)
-
-    def _disassemble_mlf(self, mlf_tar_path, source_dir):
-        with tempfile.TemporaryDirectory() as mlf_unpacking_dir_str:
-            mlf_unpacking_dir = pathlib.Path(mlf_unpacking_dir_str)
-            with tarfile.open(mlf_tar_path, "r:") as tar:
-                tar.extractall(mlf_unpacking_dir)
-
-            model_dir = source_dir / "model"
-            model_dir.mkdir()
-
-            # Copy C files from model. The filesnames and quantity
-            # depend on the target string, so we just copy all c files
-            source_dir = mlf_unpacking_dir / "codegen" / "host" / "src"
-            for file in source_dir.rglob("*.c"):
-                shutil.copy(file, model_dir)
-
-            # Return metadata.json for use in templating
-            with open(os.path.join(mlf_unpacking_dir, "metadata.json")) as f:
-                metadata = json.load(f)
-        return metadata
-
-    def _template_model(self, source_dir, metadata):
-        with open(source_dir / "platform.c", "r") as f:
-            model_h_template = Template(f.read())
-
-        all_module_names = []
-        for name in metadata["modules"].keys():
-            all_module_names.append(name)
-
-        assert all(
-            metadata["modules"][mod_name]["style"] == "full-model" for mod_name in all_module_names
-        ), "when generating AOT, expect only full-model Model Library Format"
-
-        workspace_size_bytes = 0
-        for mod_name in all_module_names:
-            workspace_size_bytes += metadata["modules"][mod_name]["memory"]["functions"]["main"][0][
-                "workspace_size_bytes"
-            ]
-        template_values = {
-            "workspace_size_bytes": workspace_size_bytes,
-        }
-
-        with open(source_dir / "platform.c", "w") as f:
-            f.write(model_h_template.substitute(template_values))
-
-    # Arduino ONLY recognizes .ino, .ccp, .c, .h
-
-    CPP_FILE_EXTENSION_SYNONYMS = ("cc", "cxx")
-
-    def _change_cpp_file_extensions(self, source_dir):
-        for ext in self.CPP_FILE_EXTENSION_SYNONYMS:
-            for filename in source_dir.rglob(f"*.{ext}"):
-                filename.rename(filename.with_suffix(".cpp"))
-
-        for filename in source_dir.rglob("*.inc"):
-            filename.rename(filename.with_suffix(".h"))
-
-    def _convert_includes(self, project_dir, source_dir):
-        """Changes all #include statements in project_dir to be relevant to their
-        containing file's location.
-
-        Arduino only supports includes relative to a file's location, so this
-        function finds each time we #include a file and changes the path to
-        be relative to the file location. Does not do this for standard C
-        libraries. Also changes angle brackets syntax to double quotes syntax.
-
-        See Also
-        -----
-        https://www.arduino.cc/reference/en/language/structure/further-syntax/include/
-
-        """
-        for ext in ("c", "h", "cpp"):
-            for filename in source_dir.rglob(f"*.{ext}"):
-                with filename.open("rb") as src_file:
-                    lines = src_file.readlines()
-                    with filename.open("wb") as dst_file:
-                        for line in lines:
-                            line_str = str(line, "utf-8")
-                            # Check if line has an include
-                            result = re.search(r"#include\s*[<\"]([^>]*)[>\"]", line_str)
-                            if not result:
-                                dst_file.write(line)
-                            else:
-                                new_include = self._find_modified_include_path(
-                                    project_dir, filename, result.groups()[0]
-                                )
-                                updated_line = f'#include "{new_include}"\n'
-                                dst_file.write(updated_line.encode("utf-8"))
-
-    # Most of the files we used to be able to point to directly are under "src/standalone_crt/include/".
-    # Howver, crt_config.h lives under "src/standalone_crt/crt_config/", and more exceptions might
-    # be added in the future.
-    POSSIBLE_BASE_PATHS = ["src/standalone_crt/include/", "src/standalone_crt/crt_config/"]
-
-    def _find_modified_include_path(self, project_dir, file_path, include_path):
-        """Takes a single #include path, and returns the location it should point to.
-
-        Examples
-        --------
-        >>> _find_modified_include_path(
-        ...     "/path/to/project/dir"
-        ...     "/path/to/project/dir/src/standalone_crt/src/runtime/crt/common/ndarray.c"
-        ...     "tvm/runtime/crt/platform.h"
-        ... )
-        "../../../../../../src/standalone_crt/include/tvm/runtime/crt/platform.h"
-
-        """
-        if include_path.endswith(".inc"):
-            include_path = re.sub(r"\.[a-z]+$", ".h", include_path)
-
-        # Change includes referencing .cc and .cxx files to point to the renamed .cpp file
-        if include_path.endswith(self.CPP_FILE_EXTENSION_SYNONYMS):
-            include_path = re.sub(r"\.[a-z]+$", ".cpp", include_path)
-
-        # If the include already works, don't modify it
-        if (file_path.parents[0] / include_path).exists():
-            return include_path
-
-        relative_path = file_path.relative_to(project_dir)
-        up_dirs_path = "../" * str(relative_path).count("/")
-
-        for base_path in self.POSSIBLE_BASE_PATHS:
-            full_potential_path = project_dir / base_path / include_path
-            if full_potential_path.exists():
-                return up_dirs_path + base_path + include_path
-
-        # If we can't find the file, just leave it untouched
-        # It's probably a standard C/C++ header
-        return include_path
-
-    CMSIS_INCLUDE_HEADERS = [
-        "arm_nn_math_types.h",
-        "arm_nn_tables.h",
-        "arm_nn_types.h",
-        "arm_nnfunctions.h",
-        "arm_nnsupportfunctions.h",
-    ]
-
-    def _cmsis_required(self, project_path: pathlib.Path) -> bool:
-        """Check if CMSIS dependency is required."""
-        project_path = pathlib.Path(project_path)
-        for path in (project_path / "src" / "model").iterdir():
-            if path.is_file():
-                # Encoding is for reading C generated code which also includes hex numbers
-                with open(path, "r", encoding="ISO-8859-1") as lib_f:
-                    lib_content = lib_f.read()
-                if any(header in lib_content for header in self.CMSIS_INCLUDE_HEADERS):
-                    return True
-        return False
-
-    def _copy_cmsis(self, project_path: pathlib.Path, cmsis_path: str):
-        """Copy CMSIS header files to project.
-        Note: We use this CMSIS package:https://www.arduino.cc/reference/en/libraries/arduino_cmsis-dsp/
-        However, the latest release does not include header files that are copied in this function.
-        """
-        (project_path / "include" / "cmsis").mkdir()
-        cmsis_path = pathlib.Path(cmsis_path)
-        for item in self.CMSIS_INCLUDE_HEADERS:
-            shutil.copy2(
-                cmsis_path / "CMSIS" / "NN" / "Include" / item,
-                project_path / "include" / "cmsis" / item,
-            )
-
-    def _populate_makefile(
-        self,
-        makefile_template_path: pathlib.Path,
-        makefile_path: pathlib.Path,
-        board: str,
-        verbose: bool,
-        arduino_cli_cmd: str,
-        build_extra_flags: str,
-    ):
-        """Generate Makefile from template."""
-        flags = {
-            "FQBN": self._get_fqbn(board),
-            "VERBOSE_FLAG": "--verbose" if verbose else "",
-            "ARUINO_CLI_CMD": arduino_cli_cmd,
-            "BOARD": board,
-            "BUILD_EXTRA_FLAGS": build_extra_flags,
-        }
-
-        with open(makefile_path, "w") as makefile_f:
-            with open(makefile_template_path, "r") as makefile_template_f:
-                for line in makefile_template_f:
-                    SUBST_TOKEN_RE = re.compile(r"<([A-Z_]+)>")
-                    outs = []
-                    for i, m in enumerate(re.split(SUBST_TOKEN_RE, line)):
-                        if i % 2 == 1:
-                            m = flags[m]
-                        outs.append(m)
-                    line = "".join(outs)
-                    makefile_f.write(line)
-
-    def generate_project(self, model_library_format_path, standalone_crt_dir, project_dir, options):
-        # List all used project options
-        board = options["board"]
-        project_type = options["project_type"]
-        arduino_cli_cmd = options["arduino_cli_cmd"]
-        verbose = options["verbose"]
-
-        cmsis_path = options.get("cmsis_path")
-        compile_definitions = options.get("compile_definitions")
-        extra_files_tar = options.get("extra_files_tar")
-
-        # Reference key directories with pathlib
-        project_dir = pathlib.Path(project_dir)
-        project_dir.mkdir()
-        source_dir = project_dir / "src"
-        source_dir.mkdir()
-
-        # Copies files from the template folder to project_dir
-        for file in os.listdir(API_SERVER_DIR):
-            if file.endswith(".py"):
-                shutil.copy2(API_SERVER_DIR / file, project_dir / file)
-
-        # Copy launch script
-        shutil.copy2(
-            API_SERVER_DIR / "launch_microtvm_api_server.sh",
-            project_dir / "launch_microtvm_api_server.sh",
-        )
-
-        shutil.copy2(BOARDS, project_dir / BOARDS.name)
-        self._copy_project_files(API_SERVER_DIR, project_dir, project_type)
-
-        # Copy standalone_crt into src folder
-        self._copy_standalone_crt(source_dir, standalone_crt_dir)
-        self._remove_unused_components(source_dir, project_type)
-
-        # Populate crt-config.h
-        crt_config_dir = project_dir / "src" / "standalone_crt" / "crt_config"
-        crt_config_dir.mkdir()
-        shutil.copy2(
-            API_SERVER_DIR / "crt_config" / "crt_config.h", crt_config_dir / "crt_config.h"
-        )
-
-        # Unpack the MLF and copy the relevant files
-        metadata = self._disassemble_mlf(model_library_format_path, source_dir)
-        shutil.copy2(model_library_format_path, project_dir / MODEL_LIBRARY_FORMAT_RELPATH)
-
-        # For AOT, template platform.c with metadata to minimize space usage
-        if project_type == "example_project":
-            self._template_model(source_dir, metadata)
-
-        self._change_cpp_file_extensions(source_dir)
-
-        # Recursively change includes
-        self._convert_includes(project_dir, source_dir)
-
-        # create include directory
-        (project_dir / "include").mkdir()
-
-        # Populate extra_files
-        if extra_files_tar:
-            with tarfile.open(extra_files_tar, mode="r:*") as tf:
-                tf.extractall(project_dir)
-
-        build_extra_flags = '"build.extra_flags='
-        if extra_files_tar:
-            build_extra_flags += "-I./include "
-
-        if compile_definitions:
-            for item in compile_definitions:
-                build_extra_flags += f"{item} "
-
-        if self._cmsis_required(project_dir):
-            build_extra_flags += f"-I./include/cmsis "
-            self._copy_cmsis(project_dir, cmsis_path)
-
-        build_extra_flags += '"'
-
-        # Check if build_extra_flags is empty
-        if build_extra_flags == '"build.extra_flags="':
-            build_extra_flags = '""'
-
-        # Populate Makefile
-        self._populate_makefile(
-            API_SERVER_DIR / f"{MAKEFILE_FILENAME}.template",
-            project_dir / MAKEFILE_FILENAME,
-            board,
-            verbose,
-            arduino_cli_cmd,
-            build_extra_flags,
-        )
-
-    def _get_platform_version(self, arduino_cli_path: str) -> float:
-        # sample output of this command:
-        # 'arduino-cli alpha Version: 0.18.3 Commit: d710b642 Date: 2021-05-14T12:36:58Z\n'
-        version_output = subprocess.run(
-            [arduino_cli_path, "version"], check=True, stdout=subprocess.PIPE
-        ).stdout.decode("utf-8")
-        str_version = re.search(r"Version: ([\.0-9]*)", version_output).group(1)
-
-        # Using too low a version should raise an error. Note that naively
-        # comparing floats will fail here: 0.7 > 0.21, but 0.21 is a higher
-        # version (hence we need version.parse)
-        return version.parse(str_version)
-
-    # This will only be run for build and upload
-    def _check_platform_version(self, cli_command: str, warning_as_error: bool):
-        if not self._version:
-            self._version = self._get_platform_version(cli_command)
-
-        if self._version < MIN_ARDUINO_CLI_VERSION:
-            message = (
-                f"Arduino CLI version too old: found {self._version}, "
-                f"need at least {str(MIN_ARDUINO_CLI_VERSION)}."
-            )
-            if warning_as_error is not None and warning_as_error:
-                raise server.ServerError(message=message)
-            _LOG.warning(message)
-
-    def _get_fqbn(self, board: str):
-        o = BOARD_PROPERTIES[board]
-        return f"{o['package']}:{o['architecture']}:{o['board']}"
-
-    def build(self, options):
-        # List all used project options
-        arduino_cli_cmd = options["arduino_cli_cmd"]
-        warning_as_error = options.get("warning_as_error")
-
-        self._check_platform_version(arduino_cli_cmd, warning_as_error)
-        compile_cmd = ["make", "build"]
-        # Specify project to compile
-        subprocess.run(compile_cmd, check=True, cwd=API_SERVER_DIR)
-
-    POSSIBLE_BOARD_LIST_HEADERS = ("Port", "Protocol", "Type", "Board Name", "FQBN", "Core")
-
-    def _parse_connected_boards(self, tabular_str):
-        """Parses the tabular output from `arduino-cli board list` into a 2D array
-
-        Examples
-        --------
-        >>> list(_parse_connected_boards(bytes(
-        ...     "Port         Type              Board Name FQBN                          Core               \n"
-        ...     "/dev/ttyS4   Serial Port       Unknown                                                     \n"
-        ...     "/dev/ttyUSB0 Serial Port (USB) Spresense  SPRESENSE:spresense:spresense SPRESENSE:spresense\n"
-        ...     "\n",
-        ... "utf-8")))
-        [['/dev/ttys4', 'Serial Port', 'Unknown', '', ''], ['/dev/ttyUSB0', 'Serial Port (USB)',
-        'Spresense', 'SPRESENSE:spresense:spresense', 'SPRESENSE:spresense']]
-
-        """
-
-        # Which column headers are present depends on the version of arduino-cli
-        column_regex = r"\s*|".join(self.POSSIBLE_BOARD_LIST_HEADERS) + r"\s*"
-        str_rows = tabular_str.split("\n")
-        column_headers = list(re.finditer(column_regex, str_rows[0]))
-        assert len(column_headers) > 0
-
-        for str_row in str_rows[1:]:
-            if not str_row.strip():
-                continue
-            device = {}
-
-            for column in column_headers:
-                col_name = column.group(0).strip().lower()
-                device[col_name] = str_row[column.start() : column.end()].strip()
-            yield device
-
-    def _auto_detect_port(self, arduino_cli_cmd: str, board: str) -> str:
-        # It is assumed only one board with this type is connected to this host machine.
-        list_cmd = [arduino_cli_cmd, "board", "list"]
-        list_cmd_output = subprocess.run(
-            list_cmd, check=True, stdout=subprocess.PIPE
-        ).stdout.decode("utf-8")
-
-        desired_fqbn = self._get_fqbn(board)
-        for device in self._parse_connected_boards(list_cmd_output):
-            if device["fqbn"] == desired_fqbn:
-                return device["port"]
-
-        # If no compatible boards, raise an error
-        raise BoardAutodetectFailed()
-
-    def _get_arduino_port(
-        self, arduino_cli_cmd: str, board: str, port: int = None, serial_number: str = None
-    ):
-        """Returns Arduino serial port.
-        If both port and serial_number are set, it throw Runtime exception.
-        If none of those options are set, it tries to autodetect the serial port.
-        """
-        # TODO: This is to avoid breaking GPU docker on running the tutorials.
-        import serial.tools.list_ports
-
-        if serial_number and port:
-            raise RuntimeError(
-                "port and serial_number cannot be set together. Please set only one."
-            )
-
-        if not self._port:
-            if port:
-                self._port = port
-            elif serial_number:
-                com_ports = serial.tools.list_ports.comports()
-                for port in com_ports:
-                    if port.serial_number == serial_number:
-                        self._port = port.device
-                        break
-                if not self._port:
-                    raise BoardAutodetectFailed(
-                        f"Detecting port with board serial_number {serial_number} failed."
-                    )
-            else:
-                self._port = self._auto_detect_port(arduino_cli_cmd, board)
-
-        return self._port
-
-    def _get_board_from_makefile(self, makefile_path: pathlib.Path) -> str:
-        """Get Board from generated Makefile."""
-        with open(makefile_path) as makefile_f:
-            lines = makefile_f.readlines()
-            for line in lines:
-                if "BOARD" in line:
-                    board = re.sub(r"\s", "", line).split(":=")[1]
-                    return board
-        raise RuntimeError("Board was not found in Makefile: {}".format(makefile_path))
-
-    FLASH_TIMEOUT_SEC = 60
-    FLASH_MAX_RETRIES = 5
-
-    def flash(self, options):
-        # List all used project options
-        arduino_cli_cmd = options["arduino_cli_cmd"]
-        warning_as_error = options.get("warning_as_error")
-        port = options.get("port")
-        board = options.get("board")
-        serial_number = options.get("serial_number")
-
-        if not board:
-            board = self._get_board_from_makefile(API_SERVER_DIR / MAKEFILE_FILENAME)
-
-        self._check_platform_version(arduino_cli_cmd, warning_as_error)
-        port = self._get_arduino_port(arduino_cli_cmd, board, port, serial_number)
-
-        upload_cmd = ["make", "flash", f"PORT={port}"]
-        for _ in range(self.FLASH_MAX_RETRIES):
-            try:
-                subprocess.run(
-                    upload_cmd, check=True, timeout=self.FLASH_TIMEOUT_SEC, cwd=API_SERVER_DIR
-                )
-                break
-
-            # We only catch timeout errors - a subprocess.CalledProcessError
-            # (caused by subprocess.run returning non-zero code) will not
-            # be caught.
-            except subprocess.TimeoutExpired:
-                _LOG.warning(
-                    f"Upload attempt to port {port} timed out after {self.FLASH_TIMEOUT_SEC} seconds"
-                )
-
-        else:
-            raise RuntimeError(
-                f"Unable to flash Arduino board after {self.FLASH_MAX_RETRIES} attempts"
-            )
-
-    def open_transport(self, options):
-        # TODO: This is to avoid breaking GPU docker on running the tutorials.
-        import serial
-        import serial.tools.list_ports
-
-        # List all used project options
-        arduino_cli_cmd = options["arduino_cli_cmd"]
-        port = options.get("port")
-        board = options.get("board")
-        serial_number = options.get("serial_number")
-
-        if not board:
-            board = self._get_board_from_makefile(API_SERVER_DIR / MAKEFILE_FILENAME)
-
-        # Zephyr example doesn't throw an error in this case
-        if self._serial is not None:
-            return
-
-        port = self._get_arduino_port(arduino_cli_cmd, board, port, serial_number)
-
-        # It takes a moment for the Arduino code to finish initializing
-        # and start communicating over serial
-        for _ in range(10):
-            if any(serial.tools.list_ports.grep(port)):
-                break
-            time.sleep(0.5)
-
-        self._serial = serial.Serial(port, baudrate=115200, timeout=10)
-
-        return server.TransportTimeouts(
-            session_start_retry_timeout_sec=2.0,
-            session_start_timeout_sec=5.0,
-            session_established_timeout_sec=5.0,
-        )
-
-    def close_transport(self):
-        if self._serial is None:
-            return
-        self._serial.close()
-        self._serial = None
-
-    def read_transport(self, n, timeout_sec):
-        self._serial.timeout = timeout_sec
-        if self._serial is None:
-            raise server.TransportClosedError()
-        return self._serial.read(n)
-
-    def write_transport(self, data, timeout_sec):
-        self._serial.write_timeout = timeout_sec
-        if self._serial is None:
-            raise server.TransportClosedError()
-        return self._serial.write(data)
-
-
-if __name__ == "__main__":
-    server.main(Handler())
diff --git a/apps/microtvm/arduino/template_project/src/example_project/platform.c b/apps/microtvm/arduino/template_project/src/example_project/platform.c
deleted file mode 100644
index 973b8aa18cc4..000000000000
--- a/apps/microtvm/arduino/template_project/src/example_project/platform.c
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \brief Implementation of TVMPlatform functions in tvm/runtime/crt/platform.h
- */
-
-#include "Arduino.h"
-#include "standalone_crt/include/dlpack/dlpack.h"
-#include "standalone_crt/include/tvm/runtime/crt/stack_allocator.h"
-
-#define TVM_WORKSPACE_SIZE_BYTES $workspace_size_bytes
-
-// AOT memory array, stack allocator wants it aligned
-static uint8_t g_aot_memory[TVM_WORKSPACE_SIZE_BYTES]
-    __attribute__((aligned(TVM_RUNTIME_ALLOC_ALIGNMENT_BYTES)));
-tvm_workspace_t app_workspace;
-
-// Called when an internal error occurs and execution cannot continue.
-// Blink code for debugging purposes
-void TVMPlatformAbort(tvm_crt_error_t error) {
-  TVMLogf("TVMPlatformAbort: 0x%08x\n", error);
-  for (;;) {
-#ifdef LED_BUILTIN
-    digitalWrite(LED_BUILTIN, HIGH);
-    delay(250);
-    digitalWrite(LED_BUILTIN, LOW);
-    delay(250);
-    digitalWrite(LED_BUILTIN, HIGH);
-    delay(250);
-    digitalWrite(LED_BUILTIN, LOW);
-    delay(750);
-#endif
-  }
-}
-
-// Allocate memory for use by TVM.
-tvm_crt_error_t TVMPlatformMemoryAllocate(size_t num_bytes, DLDevice dev, void** out_ptr) {
-  return StackMemoryManager_Allocate(&app_workspace, num_bytes, out_ptr);
-}
-
-// Free memory used by TVM.
-tvm_crt_error_t TVMPlatformMemoryFree(void* ptr, DLDevice dev) {
-  return StackMemoryManager_Free(&app_workspace, ptr);
-}
-
-// Internal logging API call implementation.
-void TVMLogf(const char* msg, ...) {}
-
-unsigned long g_utvm_start_time_micros;
-int g_utvm_timer_running = 0;
-
-// Start a device timer.
-tvm_crt_error_t TVMPlatformTimerStart() {
-  if (g_utvm_timer_running) {
-    return kTvmErrorPlatformTimerBadState;
-  }
-  g_utvm_timer_running = 1;
-  g_utvm_start_time_micros = micros();
-  return kTvmErrorNoError;
-}
-
-// Stop the running device timer and get the elapsed time (in microseconds).
-tvm_crt_error_t TVMPlatformTimerStop(double* elapsed_time_seconds) {
-  if (!g_utvm_timer_running) {
-    return kTvmErrorPlatformTimerBadState;
-  }
-  g_utvm_timer_running = 0;
-  unsigned long g_utvm_stop_time = micros() - g_utvm_start_time_micros;
-  *elapsed_time_seconds = ((double)g_utvm_stop_time) / 1e6;
-  return kTvmErrorNoError;
-}
-
-// Fill a buffer with random data.
-tvm_crt_error_t TVMPlatformGenerateRandom(uint8_t* buffer, size_t num_bytes) {
-  for (size_t i = 0; i < num_bytes; i++) {
-    buffer[i] = rand();
-  }
-  return kTvmErrorNoError;
-}
-
-// Initialize TVM inference.
-tvm_crt_error_t TVMPlatformInitialize() {
-  StackMemoryManager_Init(&app_workspace, g_aot_memory, sizeof(g_aot_memory));
-  return kTvmErrorNoError;
-}
-
-void TVMExecute(void* input_data, void* output_data) {
-  int ret_val = tvmgen_default___tvm_main__(input_data, output_data);
-  if (ret_val != 0) {
-    TVMPlatformAbort(kTvmErrorPlatformCheckFailure);
-  }
-}
diff --git a/apps/microtvm/arduino/template_project/src/example_project/platform.h b/apps/microtvm/arduino/template_project/src/example_project/platform.h
deleted file mode 100644
index d6f10e13e96e..000000000000
--- a/apps/microtvm/arduino/template_project/src/example_project/platform.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* TODO template this function signature with the input and output
- * data types and sizes. For example:
- *
- * void TVMExecute(uint8_t input_data[9216], uint8_t output_data[3]);
- *
- * Note this can only be done once MLF has JSON metadata describing
- * inputs and outputs.
- */
-void TVMExecute(void* input_data, void* output_data);
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
diff --git a/apps/microtvm/arduino/template_project/src/example_project/project.ino b/apps/microtvm/arduino/template_project/src/example_project/project.ino
deleted file mode 100644
index 666396b407ae..000000000000
--- a/apps/microtvm/arduino/template_project/src/example_project/project.ino
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#include "src/standalone_crt/include/tvm/runtime/crt/platform.h"
-
-void setup() {
-  TVMPlatformInitialize();
-  // If desired, initialize the RNG with random noise
-  // randomSeed(analogRead(0));
-}
-
-void loop() {
-  //TVMExecute(input_data, output_data);
-}
diff --git a/apps/microtvm/arduino/template_project/src/host_driven/platform.c b/apps/microtvm/arduino/template_project/src/host_driven/platform.c
deleted file mode 100644
index 0a276134d419..000000000000
--- a/apps/microtvm/arduino/template_project/src/host_driven/platform.c
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \brief Implementation of TVMPlatform functions in tvm/runtime/crt/platform.h
- */
-
-#include "standalone_crt/include/dlpack/dlpack.h"
-#include "standalone_crt/include/tvm/runtime/crt/error_codes.h"
-#include "stdarg.h"
-
-// Called when an internal error occurs and execution cannot continue.
-void TVMPlatformAbort(tvm_crt_error_t error) {
-  TVMLogf("TVMPlatformAbort: 0x%08x\n", error);
-  for (;;)
-    ;
-}
-
-// Called by the microTVM RPC server to implement TVMLogf.
-size_t TVMPlatformFormatMessage(char* out_buf, size_t out_buf_size_bytes, const char* fmt,
-                                va_list args) {
-  return vsnprintf(out_buf, out_buf_size_bytes, fmt, args);
-}
-
-// Allocate memory for use by TVM.
-tvm_crt_error_t TVMPlatformMemoryAllocate(size_t num_bytes, DLDevice dev, void** out_ptr) {
-  if (num_bytes == 0) {
-    num_bytes = sizeof(int);
-  }
-  *out_ptr = malloc(num_bytes);
-  return (*out_ptr == NULL) ? kTvmErrorPlatformNoMemory : kTvmErrorNoError;
-}
-
-// Free memory used by TVM.
-tvm_crt_error_t TVMPlatformMemoryFree(void* ptr, DLDevice dev) {
-  free(ptr);
-  return kTvmErrorNoError;
-}
-
-unsigned long g_utvm_start_time_micros;
-int g_utvm_timer_running = 0;
-
-// Start a device timer.
-tvm_crt_error_t TVMPlatformTimerStart() {
-  if (g_utvm_timer_running) {
-    return kTvmErrorPlatformTimerBadState;
-  }
-  g_utvm_timer_running = 1;
-  g_utvm_start_time_micros = micros();
-  return kTvmErrorNoError;
-}
-
-// Stop the running device timer and get the elapsed time (in microseconds).
-tvm_crt_error_t TVMPlatformTimerStop(double* elapsed_time_seconds) {
-  if (!g_utvm_timer_running) {
-    return kTvmErrorPlatformTimerBadState;
-  }
-  g_utvm_timer_running = 0;
-  unsigned long g_utvm_stop_time = micros() - g_utvm_start_time_micros;
-  *elapsed_time_seconds = ((double)g_utvm_stop_time) / 1e6;
-  return kTvmErrorNoError;
-}
-
-// Fill a buffer with random data.
-tvm_crt_error_t TVMPlatformGenerateRandom(uint8_t* buffer, size_t num_bytes) {
-  for (size_t i = 0; i < num_bytes; i++) {
-    buffer[i] = rand();
-  }
-  return kTvmErrorNoError;
-}
diff --git a/apps/microtvm/arduino/template_project/src/host_driven/project.ino b/apps/microtvm/arduino/template_project/src/host_driven/project.ino
deleted file mode 100644
index d394059e1bf5..000000000000
--- a/apps/microtvm/arduino/template_project/src/host_driven/project.ino
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#include "src/standalone_crt/include/tvm/runtime/crt/microtvm_rpc_server.h"
-#include "src/standalone_crt/include/tvm/runtime/crt/logging.h"
-microtvm_rpc_server_t server;
-
-// Called by TVM to write serial data to the UART.
-ssize_t write_serial(void* unused_context, const uint8_t* data, size_t size) {
-  Serial.write(data, size);
-  return size;
-}
-
-void setup() {
-  server = MicroTVMRpcServerInit(write_serial, NULL);
-  TVMLogf("microTVM Arduino runtime - running");
-  Serial.begin(115200);
-
-  // If desired, initialize the RNG with random noise
-  // randomSeed(analogRead(0));
-}
-
-void loop() {
-  // Read at most 128 bytes at a time to prevent stack blowup
-  int to_read = min(Serial.available(), 128);
-
-  uint8_t data[to_read];
-  size_t bytes_remaining = Serial.readBytes((char*) data, to_read);
-  uint8_t* arr_ptr = data;
-  while (bytes_remaining > 0) {
-    // Pass the received bytes to the RPC server.
-    tvm_crt_error_t err = MicroTVMRpcServerLoop(server, &arr_ptr, &bytes_remaining);
-    if (err != kTvmErrorNoError && err != kTvmErrorFramingShortPacket) {
-      TVMPlatformAbort(err);
-    }
-  }
-}
diff --git a/apps/microtvm/cmsisnn/.gitignore b/apps/microtvm/cmsisnn/.gitignore
deleted file mode 100644
index 59c962ef83f8..000000000000
--- a/apps/microtvm/cmsisnn/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-include/inputs.h
-include/outputs.h
diff --git a/apps/microtvm/cmsisnn/Makefile b/apps/microtvm/cmsisnn/Makefile
deleted file mode 100644
index 15a4f1337b32..000000000000
--- a/apps/microtvm/cmsisnn/Makefile
+++ /dev/null
@@ -1,112 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# Makefile to build demo
-
-# Setup build environment
-BUILD_DIR := build
-
-ARM_CPU = ARMCM55
-ETHOSU_PATH = /opt/arm/ethosu
-CMSIS_PATH ?= ${ETHOSU_PATH}/cmsis
-ETHOSU_PLATFORM_PATH ?= ${ETHOSU_PATH}/core_platform
-STANDALONE_CRT_PATH := $(abspath $(BUILD_DIR))/runtime
-CORSTONE_300_PATH = ${ETHOSU_PLATFORM_PATH}/targets/corstone-300
-PKG_COMPILE_OPTS = -g -Wall -O2 -Wno-incompatible-pointer-types -Wno-format -mcpu=cortex-m55 -mthumb -mfloat-abi=hard -std=gnu99
-CMAKE ?= cmake
-CC = arm-none-eabi-gcc
-AR = arm-none-eabi-ar
-RANLIB = arm-none-eabi-ranlib
-PKG_CFLAGS = ${PKG_COMPILE_OPTS} \
-	-I${STANDALONE_CRT_PATH}/include \
-	-I${STANDALONE_CRT_PATH}/src/runtime/crt/include \
-	-I${PWD}/${BUILD_DIR}/crt_config \
-	-I${PWD}/include \
-	-I${ETHOSU_PLATFORM_PATH}/drivers/uart/include \
-	-I${CMSIS_PATH}/Device/ARM/${ARM_CPU}/Include/ \
-	-I${CMSIS_PATH}/CMSIS/Core/Include \
-	-I${CMSIS_PATH}/CMSIS-NN/Include \
-	-I${CMSIS_PATH}/CMSIS/DSP/Include \
-	-I$(abspath $(BUILD_DIR))/codegen/host/include
-CMAKE_FLAGS = -DCMAKE_TOOLCHAIN_FILE=$(abspath $(BUILD_DIR))/../arm-none-eabi-gcc.cmake \
-	-DTARGET_CPU=cortex-m55
-PKG_LDFLAGS = -lm -specs=nosys.specs -static -T corstone300.ld
-
-$(ifeq VERBOSE,1)
-QUIET ?=
-$(else)
-QUIET ?= @
-$(endif)
-
-DEMO_MAIN = src/demo_bare_metal.c
-CODEGEN_SRCS = $(wildcard $(abspath $(BUILD_DIR))/codegen/host/src/*.c)
-CODEGEN_OBJS = $(subst .c,.o,$(CODEGEN_SRCS))
-CMSIS_STARTUP_SRCS = $(wildcard ${CMSIS_PATH}/Device/ARM/${ARM_CPU}/Source/*.c)
-CMSIS_NN_SRCS = $(shell find ${CMSIS_PATH}/CMSIS-NN/Source/*/*.c)
-CORSTONE_300_SRCS = $(wildcard ${CORSTONE_300_PATH}/*.c)
-
-demo: $(BUILD_DIR)/demo
-
-$(BUILD_DIR)/stack_allocator.o: $(STANDALONE_CRT_PATH)/src/runtime/crt/memory/stack_allocator.c
-	$(QUIET)mkdir -p $(@D)
-	$(QUIET)$(CC) -c $(PKG_CFLAGS) -o $@  $^
-
-$(BUILD_DIR)/crt_backend_api.o: $(STANDALONE_CRT_PATH)/src/runtime/crt/common/crt_backend_api.c
-	$(QUIET)mkdir -p $(@D)
-	$(QUIET)$(CC) -c $(PKG_CFLAGS) -o $@  $^
-
-# Build generated code
-$(BUILD_DIR)/libcodegen.a: $(CODEGEN_SRCS)
-	$(QUIET)cd $(abspath $(BUILD_DIR)/codegen/host/src) && $(CC) -c $(PKG_CFLAGS) $(CODEGEN_SRCS)
-	$(QUIET)$(AR) -cr $(abspath $(BUILD_DIR)/libcodegen.a) $(CODEGEN_OBJS)
-	$(QUIET)$(RANLIB) $(abspath $(BUILD_DIR)/libcodegen.a)
-
-# Build CMSIS startup code
-${BUILD_DIR}/libcmsis_startup.a: $(CMSIS_STARTUP_SRCS)
-	$(QUIET)mkdir -p $(abspath $(BUILD_DIR)/libcmsis_startup)
-	$(QUIET)cd $(abspath $(BUILD_DIR)/libcmsis_startup) && $(CC) -c $(PKG_CFLAGS) -D${ARM_CPU} $^
-	$(QUIET)$(AR) -cr $(abspath $(BUILD_DIR)/libcmsis_startup.a) $(abspath $(BUILD_DIR))/libcmsis_startup/*.o
-	$(QUIET)$(RANLIB) $(abspath $(BUILD_DIR)/libcmsis_startup.a)
-
-# Build CMSIS-NN code
-${BUILD_DIR}/libcmsis_nn.a: $(CMSIS_NN_SRCS)
-	$(QUIET)mkdir -p $(abspath $(BUILD_DIR)/libcmsis_nn)
-	$(QUIET)cd $(abspath $(BUILD_DIR)/libcmsis_nn) && $(CC) -c $(PKG_CFLAGS) -D${ARM_CPU} $^
-	$(QUIET)$(AR) -cr $(abspath $(BUILD_DIR)/libcmsis_nn.a) $(abspath $(BUILD_DIR))/libcmsis_nn/*.o
-	$(QUIET)$(RANLIB) $(abspath $(BUILD_DIR)/libcmsis_nn.a)
-
-# Build UART driver
-${BUILD_DIR}/ethosu_core_platform/libethosu_uart_cmsdk_apb.a:
-	$(QUIET)mkdir -p $(@D)
-	$(QUIET)cd ${ETHOSU_PLATFORM_PATH}/drivers/uart && $(CMAKE) -B $(abspath $(BUILD_DIR)/ethosu_core_platform) $(CMAKE_FLAGS)
-	$(QUIET)cd $(abspath $(BUILD_DIR)/ethosu_core_platform) && $(MAKE)
-
-# Build demo application
-$(BUILD_DIR)/demo: $(DEMO_MAIN) $(CORSTONE_300_SRCS) $(BUILD_DIR)/stack_allocator.o $(BUILD_DIR)/crt_backend_api.o \
-	${BUILD_DIR}/libcodegen.a ${BUILD_DIR}/libcmsis_startup.a ${BUILD_DIR}/libcmsis_nn.a ${BUILD_DIR}/ethosu_core_platform/libethosu_uart_cmsdk_apb.a
-	$(QUIET)mkdir -p $(@D)
-	$(QUIET)$(CC) $(PKG_CFLAGS) $(FREERTOS_FLAGS) -o $@ -Wl,--whole-archive $^ -Wl,--no-whole-archive $(PKG_LDFLAGS)
-
-clean:
-	$(QUIET)rm -rf $(BUILD_DIR)/codegen
-
-cleanall:
-	$(QUIET)rm -rf $(BUILD_DIR)
-
-.SUFFIXES:
-
-.DEFAULT: demo
diff --git a/apps/microtvm/cmsisnn/README.md b/apps/microtvm/cmsisnn/README.md
deleted file mode 100644
index 3f32c44baed5..000000000000
--- a/apps/microtvm/cmsisnn/README.md
+++ /dev/null
@@ -1,94 +0,0 @@
-<!--- Licensed to the Apache Software Foundation (ASF) under one -->
-<!--- or more contributor license agreements.  See the NOTICE file -->
-<!--- distributed with this work for additional information -->
-<!--- regarding copyright ownership.  The ASF licenses this file -->
-<!--- to you under the Apache License, Version 2.0 (the -->
-<!--- "License"); you may not use this file except in compliance -->
-<!--- with the License.  You may obtain a copy of the License at -->
-
-<!---   http://www.apache.org/licenses/LICENSE-2.0 -->
-
-<!--- Unless required by applicable law or agreed to in writing, -->
-<!--- software distributed under the License is distributed on an -->
-<!--- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -->
-<!--- KIND, either express or implied.  See the License for the -->
-<!--- specific language governing permissions and limitations -->
-<!--- under the License. -->
-
-
-Running TVM on bare metal Arm(R) Cortex(R)-M55 CPU and CMSIS-NN
-===============================================================
-
-This folder contains an example of how to use TVM to run a model
-on bare metal Cortex(R)-M55 CPU and CMSIS-NN.
-
-Prerequisites
--------------
-If the demo is run in the ci_cpu Docker container provided with TVM, then the following
-software will already be installed.
-
-If the demo is not run in the ci_cpu Docker container, then you will need the following:
-- Software required to build and run the demo (These can all be installed by running
-  tvm/docker/install/ubuntu_install_ethosu_driver_stack.sh.)
-  - [Fixed Virtual Platform (FVP) based on Arm(R) Corstone(TM)-300 software](https://developer.arm.com/tools-and-software/open-source-software/arm-platforms-software/arm-ecosystem-fvps)
-  - [cmake 3.19.5](https://github.com/Kitware/CMake/releases/)
-  - [GCC toolchain from Arm(R)](https://developer.arm.com/-/media/Files/downloads/gnu-rm/10-2020q4/gcc-arm-none-eabi-10-2020-q4-major-x86_64-linux.tar.bz2)
-  - [Arm(R) Ethos(TM)-U NPU driver stack](https://review.mlplatform.org)
-  - [CMSIS](https://github.com/ARM-software/CMSIS_5)
-  - [CMSIS NN](https://github.com/ARM-software/CMSIS-NN)
-- The python libraries listed in the requirements.txt of this directory
-  - These can be installed by running the following from the current directory:
-    ```bash
-    pip install -r ./requirements.txt
-    ```
-
-You will also need TVM which can either be:
-  - Built from source (see [Install from Source](https://tvm.apache.org/docs/install/from_source.html))
-    - When building from source, the following need to be set in config.cmake:
-      - set(USE_CMSISNN ON)
-      - set(USE_MICRO ON)
-      - set(USE_LLVM ON)
-  - Installed from TLCPack(see [TLCPack](https://tlcpack.ai/))
-
-You will need to update your PATH environment variable to include the path to cmake 3.19.5 and the FVP.
-For example if you've installed these in ```/opt/arm``` , then you would do the following:
-```bash
-export PATH=/opt/arm/FVP_Corstone_SSE-300/models/Linux64_GCC-6.4:/opt/arm/cmake/bin:$PATH
-```
-
-Running the demo application
-----------------------------
-Type the following command to run the bare metal demo application ([src/demo_bare_metal.c](./src/demo_bare_metal.c)):
-
-```bash
-./run_demo.sh
-```
-
-If the Ethos(TM)-U platform and/or CMSIS have not been installed in /opt/arm/ethosu then
-the locations for these can be specified as arguments to run_demo.sh, for example:
-
-```bash
-./run_demo.sh --cmsis_path /home/tvm-user/cmsis \
---ethosu_platform_path /home/tvm-user/ethosu/core_platform
-```
-
-This will:
-- Download a quantized (int8) person detection model
-- Use tvmc to compile the model for Cortex(R)-M55 CPU and CMSIS-NN
-- Download an image to run the model on
-- Create a C header file inputs.c containing the image data as a C array
-- Create a C header file outputs.c containing a C array where the output of inference will be stored
-- Build the demo application
-- Run the demo application on a Fixed Virtual Platform (FVP) based on Arm(R) Corstone(TM)-300 software
-- The application will report whether a person was detected e.g. "Person detected." or "No person detected."
-
-Using your own image
---------------------
-The create_image.py script takes a single argument on the command line which is the path of the
-image to be converted into an array of bytes for consumption by the model.
-
-The demo can be modified to use an image of your choice by changing the following line in run_demo.sh
-
-```bash
-curl -sS https://raw.githubusercontent.com/tensorflow/tflite-micro/main/tensorflow/lite/micro/examples/person_detection/testdata/person.bmp -o input_image.bmp
-```
diff --git a/apps/microtvm/cmsisnn/arm-none-eabi-gcc.cmake b/apps/microtvm/cmsisnn/arm-none-eabi-gcc.cmake
deleted file mode 100644
index 415b3139be1b..000000000000
--- a/apps/microtvm/cmsisnn/arm-none-eabi-gcc.cmake
+++ /dev/null
@@ -1,79 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-if (__TOOLCHAIN_LOADED)
-    return()
-endif()
-set(__TOOLCHAIN_LOADED TRUE)
-
-set(CMAKE_SYSTEM_NAME Generic)
-set(CMAKE_C_COMPILER "arm-none-eabi-gcc")
-set(CMAKE_CXX_COMPILER "arm-none-eabi-g++")
-set(CMAKE_SYSTEM_PROCESSOR "cortex-m55" CACHE STRING "Select Arm(R) Cortex(R)-M architecture. (cortex-m0, cortex-m3, cortex-m33, cortex-m4, cortex-m55, cortex-m7, etc)")
-
-set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
-
-SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
-SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
-SET(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
-
-set(CMAKE_C_STANDARD 99)
-set(CMAKE_CXX_STANDARD 14)
-
-# The system processor could for example be set to cortex-m33+nodsp+nofp.
-set(__CPU_COMPILE_TARGET ${CMAKE_SYSTEM_PROCESSOR})
-string(REPLACE "+" ";" __CPU_FEATURES ${__CPU_COMPILE_TARGET})
-list(POP_FRONT __CPU_FEATURES CMAKE_SYSTEM_PROCESSOR)
-
-string(FIND ${__CPU_COMPILE_TARGET} "+" __OFFSET)
-if(__OFFSET GREATER_EQUAL 0)
-    string(SUBSTRING ${__CPU_COMPILE_TARGET} ${__OFFSET} -1 CPU_FEATURES)
-endif()
-
-# Add -mcpu to the compile options to override the -mcpu the CMake toolchain adds
-add_compile_options(-mcpu=${__CPU_COMPILE_TARGET})
-
-# Set floating point unit
-if("${__CPU_COMPILE_TARGET}" MATCHES "\\+fp")
-    set(FLOAT hard)
-elseif("${__CPU_COMPILE_TARGET}" MATCHES "\\+nofp")
-    set(FLOAT soft)
-elseif("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "cortex-m33" OR
-       "${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "cortex-m55")
-    set(FLOAT hard)
-else()
-    set(FLOAT soft)
-endif()
-
-add_compile_options(-mfloat-abi=${FLOAT})
-add_link_options(-mfloat-abi=${FLOAT})
-
-# Link target
-add_link_options(-mcpu=${__CPU_COMPILE_TARGET})
-add_link_options(-Xlinker -Map=output.map)
-
-#
-# Compile options
-#
-set(cxx_flags "-fno-unwind-tables;-fno-rtti;-fno-exceptions")
-
-add_compile_options("-Wall;-Wextra;-Wsign-compare;-Wunused;-Wswitch-default;\
--Wdouble-promotion;-Wredundant-decls;-Wshadow;-Wnull-dereference;\
--Wno-format-extra-args;-Wno-unused-function;-Wno-unused-label;\
--Wno-missing-field-initializers;-Wno-return-type;-Wno-format;-Wno-int-conversion"
-    "$<$<COMPILE_LANGUAGE:CXX>:${cxx_flags}>"
-)
diff --git a/apps/microtvm/cmsisnn/convert_image.py b/apps/microtvm/cmsisnn/convert_image.py
deleted file mode 100755
index 06b23654a0db..000000000000
--- a/apps/microtvm/cmsisnn/convert_image.py
+++ /dev/null
@@ -1,82 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import os
-import pathlib
-import re
-import sys
-from PIL import Image
-import numpy as np
-
-from tvm.micro import copy_crt_config_header
-
-
-def create_header_file(name, tensor_name, tensor_data, output_path):
-    """
-    This function generates a header file containing the data from the numpy array provided.
-    """
-    file_path = pathlib.Path(f"{output_path}/" + name).resolve()
-    # Create header file with npy_data as a C array
-    raw_path = file_path.with_suffix(".h").resolve()
-    with open(raw_path, "w") as header_file:
-        header_file.write(
-            "\n"
-            + f"const size_t {tensor_name}_len = {tensor_data.size};\n"
-            + f'__attribute__((section(".data.tvm"), aligned(16))) int8_t {tensor_name}[] = "'
-        )
-
-        data_hexstr = tensor_data.tobytes().hex()
-        for i in range(0, len(data_hexstr), 2):
-            header_file.write(f"\\x{data_hexstr[i:i+2]}")
-        header_file.write('";\n\n')
-
-
-def create_headers(image_name):
-    """
-    This function generates C header files for the input and output arrays required to run inferences
-    """
-    img_path = os.path.join("./", f"{image_name}")
-
-    # Resize image to 224x224
-    resized_image = Image.open(img_path).resize((224, 224))
-    img_data = np.asarray(resized_image).astype("float32")
-
-    # # Add the batch dimension, as we are expecting 4-dimensional input: NCHW.
-    img_data = np.expand_dims(img_data, axis=0)
-
-    # Create input header file
-    input_data = img_data - 128
-    input_data = input_data.astype(np.int8)
-    create_header_file("inputs", "input", input_data, "./include")
-    # Create output header file
-    output_data = np.zeros([2], np.int8)
-    create_header_file(
-        "outputs",
-        "output",
-        output_data,
-        "./include",
-    )
-
-
-if __name__ == "__main__":
-    create_headers(sys.argv[1])
-
-    # Generate crt_config.h
-    crt_config_output_path = pathlib.Path(__file__).parent.resolve() / "build" / "crt_config"
-    if not crt_config_output_path.exists():
-        crt_config_output_path.mkdir()
-    copy_crt_config_header("crt", crt_config_output_path)
diff --git a/apps/microtvm/cmsisnn/corstone300.ld b/apps/microtvm/cmsisnn/corstone300.ld
deleted file mode 100644
index 2c5a0f7ef862..000000000000
--- a/apps/microtvm/cmsisnn/corstone300.ld
+++ /dev/null
@@ -1,295 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*------------------ Reference System Memories -------------
-  +===================+============+=======+============+============+
-  | Memory            | Address    | Size  | CPU Access | NPU Access |
-  +===================+============+=======+============+============+
-  | ITCM              | 0x00000000 | 512KB | Yes (RO)   | No         |
-  +-------------------+------------+-------+------------+------------+
-  | DTCM              | 0x20000000 | 512KB | Yes (R/W)  | No         |
-  +-------------------+------------+-------+------------+------------+
-  | SSE-300 SRAM      | 0x21000000 |   2MB | Yes (R/W)  | Yes (R/W)  |
-  +-------------------+------------+-------+------------+------------+
-  | Data SRAM         | 0x01000000 |   2MB | Yes (R/W)  | Yes (R/W)  |
-  +-------------------+------------+-------+------------+------------+
-  | DDR               | 0x60000000 |  32MB | Yes (R/W)  | Yes (R/W)  |
-  +-------------------+------------+-------+------------+------------+ */
-
-/*---------------------- ITCM Configuration ----------------------------------
-  <h> Flash Configuration
-    <o0> Flash Base Address <0x0-0xFFFFFFFF:8>
-    <o1> Flash Size (in Bytes) <0x0-0xFFFFFFFF:8>
-  </h>
-  -----------------------------------------------------------------------------*/
-__ROM_BASE = 0x00000000;
-__ROM_SIZE = 0x00080000;
-
-/*--------------------- DTCM RAM Configuration ----------------------------
-  <h> RAM Configuration
-    <o0> RAM Base Address    <0x0-0xFFFFFFFF:8>
-    <o1> RAM Size (in Bytes) <0x0-0xFFFFFFFF:8>
-  </h>
- -----------------------------------------------------------------------------*/
-__RAM_BASE = 0x20000000;
-__RAM_SIZE = 0x00080000;
-
-/*----------------------- Data SRAM Configuration ------------------------------
-  <h> Data SRAM Configuration
-    <o0> DATA_SRAM Base Address    <0x0-0xFFFFFFFF:8>
-    <o1> DATA_SRAM Size (in Bytes) <0x0-0xFFFFFFFF:8>
-  </h>
- -----------------------------------------------------------------------------*/
-__DATA_SRAM_BASE = 0x01000000;
-__DATA_SRAM_SIZE = 0x00200000;
-
-/*--------------------- Embedded SRAM Configuration ----------------------------
-  <h> SRAM Configuration
-    <o0> SRAM Base Address    <0x0-0xFFFFFFFF:8>
-    <o1> SRAM Size (in Bytes) <0x0-0xFFFFFFFF:8>
-  </h>
- -----------------------------------------------------------------------------*/
-__SRAM_BASE = 0x21000000;
-__SRAM_SIZE = 0x00200000;
-
-/*--------------------- Stack / Heap Configuration ----------------------------
-  <h> Stack / Heap Configuration
-    <o0> Stack Size (in Bytes) <0x0-0xFFFFFFFF:8>
-    <o1> Heap Size (in Bytes) <0x0-0xFFFFFFFF:8>
-  </h>
-  -----------------------------------------------------------------------------*/
-__STACK_SIZE = 0x00008000;
-__HEAP_SIZE  = 0x00008000;
-
-/*--------------------- Embedded RAM Configuration ----------------------------
-  <h> DDR Configuration
-    <o0> DDR Base Address    <0x0-0xFFFFFFFF:8>
-    <o1> DDR Size (in Bytes) <0x0-0xFFFFFFFF:8>
-  </h>
- -----------------------------------------------------------------------------*/
-__DDR_BASE = 0x60000000;
-__DDR_SIZE = 0x02000000;
-
-/*
- *-------------------- <<< end of configuration section >>> -------------------
- */
-
-MEMORY
-{
-  ITCM       (rx)  : ORIGIN = __ROM_BASE, LENGTH = __ROM_SIZE
-  DTCM       (rwx) : ORIGIN = __RAM_BASE, LENGTH = __RAM_SIZE
-  DATA_SRAM  (rwx) : ORIGIN = __DATA_SRAM_BASE, LENGTH = __DATA_SRAM_SIZE
-  SRAM       (rwx) : ORIGIN = __SRAM_BASE, LENGTH = __SRAM_SIZE
-  DDR        (rwx) : ORIGIN = __DDR_BASE, LENGTH = __DDR_SIZE
-}
-
-/* Linker script to place sections and symbol values. Should be used together
- * with other linker script that defines memory regions ITCM and RAM.
- * It references following symbols, which must be defined in code:
- *   Reset_Handler : Entry of reset handler
- *
- * It defines following symbols, which code can use without definition:
- *   __exidx_start
- *   __exidx_end
- *   __copy_table_start__
- *   __copy_table_end__
- *   __zero_table_start__
- *   __zero_table_end__
- *   __etext
- *   __data_start__
- *   __preinit_array_start
- *   __preinit_array_end
- *   __init_array_start
- *   __init_array_end
- *   __fini_array_start
- *   __fini_array_end
- *   __data_end__
- *   __bss_start__
- *   __bss_end__
- *   __end__
- *   end
- *   __HeapLimit
- *   __StackLimit
- *   __StackTop
- *   __stack
- */
-ENTRY(Reset_Handler)
-
-SECTIONS
-{
-  /* .ddr is placed before .text so that .rodata.tvm is encountered before .rodata* */
-  .ddr :
-  {
-    . = ALIGN (16);
-    *(.rodata.tvm)
-    . = ALIGN (16);
-    *(.data.tvm);
-    . = ALIGN(16);
-  } > DDR
-
-  .text :
-  {
-    KEEP(*(.vectors))
-    *(.text*)
-
-    KEEP(*(.init))
-    KEEP(*(.fini))
-
-    /* .ctors */
-    *crtbegin.o(.ctors)
-    *crtbegin?.o(.ctors)
-    *(EXCLUDE_FILE(*crtend?.o *crtend.o) .ctors)
-    *(SORT(.ctors.*))
-    *(.ctors)
-
-    /* .dtors */
-    *crtbegin.o(.dtors)
-    *crtbegin?.o(.dtors)
-    *(EXCLUDE_FILE(*crtend?.o *crtend.o) .dtors)
-    *(SORT(.dtors.*))
-    *(.dtors)
-
-    *(.rodata*)
-
-    KEEP(*(.eh_frame*))
-  } > ITCM
-
-  .ARM.extab :
-  {
-    *(.ARM.extab* .gnu.linkonce.armextab.*)
-  } > ITCM
-
-  __exidx_start = .;
-  .ARM.exidx :
-  {
-    *(.ARM.exidx* .gnu.linkonce.armexidx.*)
-  } > ITCM
-  __exidx_end = .;
-
-  .copy.table :
-  {
-    . = ALIGN(4);
-    __copy_table_start__ = .;
-    LONG (__etext)
-    LONG (__data_start__)
-    LONG (__data_end__ - __data_start__)
-    /* Add each additional data section here */
-    __copy_table_end__ = .;
-  } > ITCM
-
-  .zero.table :
-  {
-    . = ALIGN(4);
-    __zero_table_start__ = .;
-    __zero_table_end__ = .;
-  } > ITCM
-
-  /**
-   * Location counter can end up 2byte aligned with narrow Thumb code but
-   * __etext is assumed by startup code to be the LMA of a section in DTCM
-   * which must be 4byte aligned
-   */
-  __etext = ALIGN (4);
-
-  .sram :
-  {
-    . = ALIGN(16);
-  } > SRAM AT > SRAM
-
-  .data : AT (__etext)
-  {
-    __data_start__ = .;
-    *(vtable)
-    *(.data)
-    *(.data.*)
-
-    . = ALIGN(4);
-    /* preinit data */
-    PROVIDE_HIDDEN (__preinit_array_start = .);
-    KEEP(*(.preinit_array))
-    PROVIDE_HIDDEN (__preinit_array_end = .);
-
-    . = ALIGN(4);
-    /* init data */
-    PROVIDE_HIDDEN (__init_array_start = .);
-    KEEP(*(SORT(.init_array.*)))
-    KEEP(*(.init_array))
-    PROVIDE_HIDDEN (__init_array_end = .);
-
-
-    . = ALIGN(4);
-    /* finit data */
-    PROVIDE_HIDDEN (__fini_array_start = .);
-    KEEP(*(SORT(.fini_array.*)))
-    KEEP(*(.fini_array))
-    PROVIDE_HIDDEN (__fini_array_end = .);
-
-    KEEP(*(.jcr*))
-    . = ALIGN(4);
-    /* All data end */
-    __data_end__ = .;
-
-  } > DTCM
-
-  .bss.noinit (NOLOAD):
-  {
-    . = ALIGN(16);
-    *(.bss.noinit.*)
-    . = ALIGN(16);
-  } > SRAM AT > SRAM
-
-  .bss :
-  {
-    . = ALIGN(4);
-    __bss_start__ = .;
-    *(.bss)
-    *(.bss.*)
-    *(COMMON)
-    . = ALIGN(4);
-    __bss_end__ = .;
-  } > DTCM AT > DTCM
-
-  .data_sram :
-  {
-    . = ALIGN(16);
-  } > DATA_SRAM
-
-  .heap (COPY) :
-  {
-    . = ALIGN(8);
-    __end__ = .;
-    PROVIDE(end = .);
-    . = . + __HEAP_SIZE;
-    . = ALIGN(8);
-    __HeapLimit = .;
-  } > DTCM
-
-  .stack (ORIGIN(DTCM) + LENGTH(DTCM) - __STACK_SIZE) (COPY) :
-  {
-    . = ALIGN(8);
-    __StackLimit = .;
-    . = . + __STACK_SIZE;
-    . = ALIGN(8);
-    __StackTop = .;
-  } > DTCM
-  PROVIDE(__stack = __StackTop);
-
-  /* Check if data + stack exceeds DTCM limit */
-  ASSERT(__StackLimit >= __bss_end__, "region DTCM overflowed with stack")
-}
diff --git a/apps/microtvm/cmsisnn/include/tvm_runtime.h b/apps/microtvm/cmsisnn/include/tvm_runtime.h
deleted file mode 100644
index 2b59d9347027..000000000000
--- a/apps/microtvm/cmsisnn/include/tvm_runtime.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#include <stdarg.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <tvm/runtime/c_runtime_api.h>
-#include <tvm/runtime/crt/stack_allocator.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-void __attribute__((noreturn)) TVMPlatformAbort(tvm_crt_error_t error_code) {
-  printf("TVMPlatformAbort: %d\n", error_code);
-  printf("EXITTHESIM\n");
-  exit(-1);
-}
-
-tvm_crt_error_t TVMPlatformMemoryAllocate(size_t num_bytes, DLDevice dev, void** out_ptr) {
-  return kTvmErrorFunctionCallNotImplemented;
-}
-
-tvm_crt_error_t TVMPlatformMemoryFree(void* ptr, DLDevice dev) {
-  return kTvmErrorFunctionCallNotImplemented;
-}
-
-void TVMLogf(const char* msg, ...) {
-  va_list args;
-  va_start(args, msg);
-  vfprintf(stdout, msg, args);
-  va_end(args);
-}
-
-TVM_DLL int TVMFuncRegisterGlobal(const char* name, TVMFunctionHandle f, int override) { return 0; }
-
-#ifdef __cplusplus
-}
-#endif
diff --git a/apps/microtvm/cmsisnn/requirements.txt b/apps/microtvm/cmsisnn/requirements.txt
deleted file mode 100644
index b07c10a050e4..000000000000
--- a/apps/microtvm/cmsisnn/requirements.txt
+++ /dev/null
@@ -1,241 +0,0 @@
-attrs==21.2.0 \
-    --hash=sha256:149e90d6d8ac20db7a955ad60cf0e6881a3f20d37096140088356da6c716b0b1 \
-    --hash=sha256:ef6aaac3ca6cd92904cdd0d83f629a15f18053ec84e6432106f7a4d04ae4f5fb
-cloudpickle==2.0.0 \
-    --hash=sha256:5cd02f3b417a783ba84a4ec3e290ff7929009fe51f6405423cfccfadd43ba4a4 \
-    --hash=sha256:6b2df9741d06f43839a3275c4e6632f7df6487a1f181f5f46a052d3c917c3d11
-decorator==5.1.0 \
-    --hash=sha256:7b12e7c3c6ab203a29e157335e9122cb03de9ab7264b137594103fd4a683b374 \
-    --hash=sha256:e59913af105b9860aa2c8d3272d9de5a56a4e608db9a2f167a8480b323d529a7
-ethos-u-vela==3.8.0 \
-    --hash=sha256:cb0b1f5b1f886242d67ff0072efb88ac90cc87574ebe92fc98db4609f7797acf
-flatbuffers==2.0.7 \
-    --hash=sha256:0ae7d69c5b82bf41962ca5fde9cc43033bc9501311d975fd5a25e8a7d29c1245 \
-    --hash=sha256:71e135d533be527192819aaab757c5e3d109cb10fbb01e687f6bdb7a61ad39d1
-lxml==4.6.3 \
-    --hash=sha256:079f3ae844f38982d156efce585bc540c16a926d4436712cf4baee0cce487a3d \
-    --hash=sha256:0fbcf5565ac01dff87cbfc0ff323515c823081c5777a9fc7703ff58388c258c3 \
-    --hash=sha256:122fba10466c7bd4178b07dba427aa516286b846b2cbd6f6169141917283aae2 \
-    --hash=sha256:1b38116b6e628118dea5b2186ee6820ab138dbb1e24a13e478490c7db2f326ae \
-    --hash=sha256:1b7584d421d254ab86d4f0b13ec662a9014397678a7c4265a02a6d7c2b18a75f \
-    --hash=sha256:26e761ab5b07adf5f555ee82fb4bfc35bf93750499c6c7614bd64d12aaa67927 \
-    --hash=sha256:289e9ca1a9287f08daaf796d96e06cb2bc2958891d7911ac7cae1c5f9e1e0ee3 \
-    --hash=sha256:2a9d50e69aac3ebee695424f7dbd7b8c6d6eb7de2a2eb6b0f6c7db6aa41e02b7 \
-    --hash=sha256:3082c518be8e97324390614dacd041bb1358c882d77108ca1957ba47738d9d59 \
-    --hash=sha256:33bb934a044cf32157c12bfcfbb6649807da20aa92c062ef51903415c704704f \
-    --hash=sha256:3439c71103ef0e904ea0a1901611863e51f50b5cd5e8654a151740fde5e1cade \
-    --hash=sha256:36108c73739985979bf302006527cf8a20515ce444ba916281d1c43938b8bb96 \
-    --hash=sha256:39b78571b3b30645ac77b95f7c69d1bffc4cf8c3b157c435a34da72e78c82468 \
-    --hash=sha256:4289728b5e2000a4ad4ab8da6e1db2e093c63c08bdc0414799ee776a3f78da4b \
-    --hash=sha256:4bff24dfeea62f2e56f5bab929b4428ae6caba2d1eea0c2d6eb618e30a71e6d4 \
-    --hash=sha256:4c61b3a0db43a1607d6264166b230438f85bfed02e8cff20c22e564d0faff354 \
-    --hash=sha256:542d454665a3e277f76954418124d67516c5f88e51a900365ed54a9806122b83 \
-    --hash=sha256:5a0a14e264069c03e46f926be0d8919f4105c1623d620e7ec0e612a2e9bf1c04 \
-    --hash=sha256:5c8c163396cc0df3fd151b927e74f6e4acd67160d6c33304e805b84293351d16 \
-    --hash=sha256:64812391546a18896adaa86c77c59a4998f33c24788cadc35789e55b727a37f4 \
-    --hash=sha256:66e575c62792c3f9ca47cb8b6fab9e35bab91360c783d1606f758761810c9791 \
-    --hash=sha256:6f12e1427285008fd32a6025e38e977d44d6382cf28e7201ed10d6c1698d2a9a \
-    --hash=sha256:74f7d8d439b18fa4c385f3f5dfd11144bb87c1da034a466c5b5577d23a1d9b51 \
-    --hash=sha256:7610b8c31688f0b1be0ef882889817939490a36d0ee880ea562a4e1399c447a1 \
-    --hash=sha256:76fa7b1362d19f8fbd3e75fe2fb7c79359b0af8747e6f7141c338f0bee2f871a \
-    --hash=sha256:7728e05c35412ba36d3e9795ae8995e3c86958179c9770e65558ec3fdfd3724f \
-    --hash=sha256:8157dadbb09a34a6bd95a50690595e1fa0af1a99445e2744110e3dca7831c4ee \
-    --hash=sha256:820628b7b3135403540202e60551e741f9b6d3304371712521be939470b454ec \
-    --hash=sha256:884ab9b29feaca361f7f88d811b1eea9bfca36cf3da27768d28ad45c3ee6f969 \
-    --hash=sha256:89b8b22a5ff72d89d48d0e62abb14340d9e99fd637d046c27b8b257a01ffbe28 \
-    --hash=sha256:92e821e43ad382332eade6812e298dc9701c75fe289f2a2d39c7960b43d1e92a \
-    --hash=sha256:b007cbb845b28db4fb8b6a5cdcbf65bacb16a8bd328b53cbc0698688a68e1caa \
-    --hash=sha256:bc4313cbeb0e7a416a488d72f9680fffffc645f8a838bd2193809881c67dd106 \
-    --hash=sha256:bccbfc27563652de7dc9bdc595cb25e90b59c5f8e23e806ed0fd623755b6565d \
-    --hash=sha256:c1a40c06fd5ba37ad39caa0b3144eb3772e813b5fb5b084198a985431c2f1e8d \
-    --hash=sha256:c47ff7e0a36d4efac9fd692cfa33fbd0636674c102e9e8d9b26e1b93a94e7617 \
-    --hash=sha256:c4f05c5a7c49d2fb70223d0d5bcfbe474cf928310ac9fa6a7c6dddc831d0b1d4 \
-    --hash=sha256:cdaf11d2bd275bf391b5308f86731e5194a21af45fbaaaf1d9e8147b9160ea92 \
-    --hash=sha256:ce256aaa50f6cc9a649c51be3cd4ff142d67295bfc4f490c9134d0f9f6d58ef0 \
-    --hash=sha256:d2e35d7bf1c1ac8c538f88d26b396e73dd81440d59c1ef8522e1ea77b345ede4 \
-    --hash=sha256:d916d31fd85b2f78c76400d625076d9124de3e4bda8b016d25a050cc7d603f24 \
-    --hash=sha256:df7c53783a46febb0e70f6b05df2ba104610f2fb0d27023409734a3ecbb78fb2 \
-    --hash=sha256:e1cbd3f19a61e27e011e02f9600837b921ac661f0c40560eefb366e4e4fb275e \
-    --hash=sha256:efac139c3f0bf4f0939f9375af4b02c5ad83a622de52d6dfa8e438e8e01d0eb0 \
-    --hash=sha256:efd7a09678fd8b53117f6bae4fa3825e0a22b03ef0a932e070c0bdbb3a35e654 \
-    --hash=sha256:f2380a6376dfa090227b663f9678150ef27543483055cc327555fb592c5967e2 \
-    --hash=sha256:f8380c03e45cf09f8557bdaa41e1fa7c81f3ae22828e1db470ab2a6c96d8bc23 \
-    --hash=sha256:f90ba11136bfdd25cae3951af8da2e95121c9b9b93727b1b896e3fa105b2f586
-nose==1.3.7 \
-    --hash=sha256:9ff7c6cc443f8c51994b34a667bbcf45afd6d945be7477b52e97516fd17c53ac \
-    --hash=sha256:dadcddc0aefbf99eea214e0f1232b94f2fa9bd98fa8353711dacb112bfcbbb2a \
-    --hash=sha256:f1bffef9cbc82628f6e7d7b40d7e255aefaa1adb6a1b1d26c69a8b79e6208a98
-numpy==1.21.3 \
-    --hash=sha256:043e83bfc274649c82a6f09836943e4a4aebe5e33656271c7dbf9621dd58b8ec \
-    --hash=sha256:160ccc1bed3a8371bf0d760971f09bfe80a3e18646620e9ded0ad159d9749baa \
-    --hash=sha256:188031f833bbb623637e66006cf75e933e00e7231f67e2b45cf8189612bb5dc3 \
-    --hash=sha256:28f15209fb535dd4c504a7762d3bc440779b0e37d50ed810ced209e5cea60d96 \
-    --hash=sha256:29fb3dcd0468b7715f8ce2c0c2d9bbbaf5ae686334951343a41bd8d155c6ea27 \
-    --hash=sha256:2a6ee9620061b2a722749b391c0d80a0e2ae97290f1b32e28d5a362e21941ee4 \
-    --hash=sha256:300321e3985c968e3ae7fbda187237b225f3ffe6528395a5b7a5407f73cf093e \
-    --hash=sha256:32437f0b275c1d09d9c3add782516413e98cd7c09e6baf4715cbce781fc29912 \
-    --hash=sha256:3c09418a14471c7ae69ba682e2428cae5b4420a766659605566c0fa6987f6b7e \
-    --hash=sha256:49c6249260890e05b8111ebfc391ed58b3cb4b33e63197b2ec7f776e45330721 \
-    --hash=sha256:4cc9b512e9fb590797474f58b7f6d1f1b654b3a94f4fa8558b48ca8b3cfc97cf \
-    --hash=sha256:508b0b513fa1266875524ba8a9ecc27b02ad771fe1704a16314dc1a816a68737 \
-    --hash=sha256:50cd26b0cf6664cb3b3dd161ba0a09c9c1343db064e7c69f9f8b551f5104d654 \
-    --hash=sha256:5c4193f70f8069550a1788bd0cd3268ab7d3a2b70583dfe3b2e7f421e9aace06 \
-    --hash=sha256:5dfe9d6a4c39b8b6edd7990091fea4f852888e41919d0e6722fe78dd421db0eb \
-    --hash=sha256:63571bb7897a584ca3249c86dd01c10bcb5fe4296e3568b2e9c1a55356b6410e \
-    --hash=sha256:75621882d2230ab77fb6a03d4cbccd2038511491076e7964ef87306623aa5272 \
-    --hash=sha256:75eb7cadc8da49302f5b659d40ba4f6d94d5045fbd9569c9d058e77b0514c9e4 \
-    --hash=sha256:88a5d6b268e9ad18f3533e184744acdaa2e913b13148160b1152300c949bbb5f \
-    --hash=sha256:8a10968963640e75cc0193e1847616ab4c718e83b6938ae74dea44953950f6b7 \
-    --hash=sha256:90bec6a86b348b4559b6482e2b684db4a9a7eed1fa054b86115a48d58fbbf62a \
-    --hash=sha256:98339aa9911853f131de11010f6dd94c8cec254d3d1f7261528c3b3e3219f139 \
-    --hash=sha256:a99a6b067e5190ac6d12005a4d85aa6227c5606fa93211f86b1dafb16233e57d \
-    --hash=sha256:bffa2eee3b87376cc6b31eee36d05349571c236d1de1175b804b348dc0941e3f \
-    --hash=sha256:c6c2d535a7beb1f8790aaa98fd089ceab2e3dd7ca48aca0af7dc60e6ef93ffe1 \
-    --hash=sha256:cc14e7519fab2a4ed87d31f99c31a3796e4e1fe63a86ebdd1c5a1ea78ebd5896 \
-    --hash=sha256:dd0482f3fc547f1b1b5d6a8b8e08f63fdc250c58ce688dedd8851e6e26cff0f3 \
-    --hash=sha256:dde972a1e11bb7b702ed0e447953e7617723760f420decb97305e66fb4afc54f \
-    --hash=sha256:e54af82d68ef8255535a6cdb353f55d6b8cf418a83e2be3569243787a4f4866f \
-    --hash=sha256:e606e6316911471c8d9b4618e082635cfe98876007556e89ce03d52ff5e8fcf0 \
-    --hash=sha256:f41b018f126aac18583956c54544db437f25c7ee4794bcb23eb38bef8e5e192a \
-    --hash=sha256:f8f4625536926a155b80ad2bbff44f8cc59e9f2ad14cdda7acf4c135b4dc8ff2 \
-    --hash=sha256:fe52dbe47d9deb69b05084abd4b0df7abb39a3c51957c09f635520abd49b29dd
-Pillow==10.3.0 \
-    --hash=sha256:048ad577748b9fa4a99a0548c64f2cb8d672d5bf2e643a739ac8faff1164238c \
-    --hash=sha256:048eeade4c33fdf7e08da40ef402e748df113fd0b4584e32c4af74fe78baaeb2 \
-    --hash=sha256:0ba26351b137ca4e0db0342d5d00d2e355eb29372c05afd544ebf47c0956ffeb \
-    --hash=sha256:0ea2a783a2bdf2a561808fe4a7a12e9aa3799b701ba305de596bc48b8bdfce9d \
-    --hash=sha256:1530e8f3a4b965eb6a7785cf17a426c779333eb62c9a7d1bbcf3ffd5bf77a4aa \
-    --hash=sha256:16563993329b79513f59142a6b02055e10514c1a8e86dca8b48a893e33cf91e3 \
-    --hash=sha256:19aeb96d43902f0a783946a0a87dbdad5c84c936025b8419da0a0cd7724356b1 \
-    --hash=sha256:1a1d1915db1a4fdb2754b9de292642a39a7fb28f1736699527bb649484fb966a \
-    --hash=sha256:1b87bd9d81d179bd8ab871603bd80d8645729939f90b71e62914e816a76fc6bd \
-    --hash=sha256:1dfc94946bc60ea375cc39cff0b8da6c7e5f8fcdc1d946beb8da5c216156ddd8 \
-    --hash=sha256:2034f6759a722da3a3dbd91a81148cf884e91d1b747992ca288ab88c1de15999 \
-    --hash=sha256:261ddb7ca91fcf71757979534fb4c128448b5b4c55cb6152d280312062f69599 \
-    --hash=sha256:2ed854e716a89b1afcedea551cd85f2eb2a807613752ab997b9974aaa0d56936 \
-    --hash=sha256:3102045a10945173d38336f6e71a8dc71bcaeed55c3123ad4af82c52807b9375 \
-    --hash=sha256:339894035d0ede518b16073bdc2feef4c991ee991a29774b33e515f1d308e08d \
-    --hash=sha256:412444afb8c4c7a6cc11a47dade32982439925537e483be7c0ae0cf96c4f6a0b \
-    --hash=sha256:4203efca580f0dd6f882ca211f923168548f7ba334c189e9eab1178ab840bf60 \
-    --hash=sha256:45ebc7b45406febf07fef35d856f0293a92e7417ae7933207e90bf9090b70572 \
-    --hash=sha256:4b5ec25d8b17217d635f8935dbc1b9aa5907962fae29dff220f2659487891cd3 \
-    --hash=sha256:4c8e73e99da7db1b4cad7f8d682cf6abad7844da39834c288fbfa394a47bbced \
-    --hash=sha256:4e6f7d1c414191c1199f8996d3f2282b9ebea0945693fb67392c75a3a320941f \
-    --hash=sha256:4eaa22f0d22b1a7e93ff0a596d57fdede2e550aecffb5a1ef1106aaece48e96b \
-    --hash=sha256:50b8eae8f7334ec826d6eeffaeeb00e36b5e24aa0b9df322c247539714c6df19 \
-    --hash=sha256:50fd3f6b26e3441ae07b7c979309638b72abc1a25da31a81a7fbd9495713ef4f \
-    --hash=sha256:51243f1ed5161b9945011a7360e997729776f6e5d7005ba0c6879267d4c5139d \
-    --hash=sha256:5d512aafa1d32efa014fa041d38868fda85028e3f930a96f85d49c7d8ddc0383 \
-    --hash=sha256:5f77cf66e96ae734717d341c145c5949c63180842a545c47a0ce7ae52ca83795 \
-    --hash=sha256:6b02471b72526ab8a18c39cb7967b72d194ec53c1fd0a70b050565a0f366d355 \
-    --hash=sha256:6fb1b30043271ec92dc65f6d9f0b7a830c210b8a96423074b15c7bc999975f57 \
-    --hash=sha256:7161ec49ef0800947dc5570f86568a7bb36fa97dd09e9827dc02b718c5643f09 \
-    --hash=sha256:72d622d262e463dfb7595202d229f5f3ab4b852289a1cd09650362db23b9eb0b \
-    --hash=sha256:74d28c17412d9caa1066f7a31df8403ec23d5268ba46cd0ad2c50fb82ae40462 \
-    --hash=sha256:78618cdbccaa74d3f88d0ad6cb8ac3007f1a6fa5c6f19af64b55ca170bfa1edf \
-    --hash=sha256:793b4e24db2e8742ca6423d3fde8396db336698c55cd34b660663ee9e45ed37f \
-    --hash=sha256:798232c92e7665fe82ac085f9d8e8ca98826f8e27859d9a96b41d519ecd2e49a \
-    --hash=sha256:81d09caa7b27ef4e61cb7d8fbf1714f5aec1c6b6c5270ee53504981e6e9121ad \
-    --hash=sha256:8ab74c06ffdab957d7670c2a5a6e1a70181cd10b727cd788c4dd9005b6a8acd9 \
-    --hash=sha256:8eb0908e954d093b02a543dc963984d6e99ad2b5e36503d8a0aaf040505f747d \
-    --hash=sha256:90b9e29824800e90c84e4022dd5cc16eb2d9605ee13f05d47641eb183cd73d45 \
-    --hash=sha256:9797a6c8fe16f25749b371c02e2ade0efb51155e767a971c61734b1bf6293994 \
-    --hash=sha256:9d2455fbf44c914840c793e89aa82d0e1763a14253a000743719ae5946814b2d \
-    --hash=sha256:9d3bea1c75f8c53ee4d505c3e67d8c158ad4df0d83170605b50b64025917f338 \
-    --hash=sha256:9e2ec1e921fd07c7cda7962bad283acc2f2a9ccc1b971ee4b216b75fad6f0463 \
-    --hash=sha256:9e91179a242bbc99be65e139e30690e081fe6cb91a8e77faf4c409653de39451 \
-    --hash=sha256:a0eaa93d054751ee9964afa21c06247779b90440ca41d184aeb5d410f20ff591 \
-    --hash=sha256:a2c405445c79c3f5a124573a051062300936b0281fee57637e706453e452746c \
-    --hash=sha256:aa7e402ce11f0885305bfb6afb3434b3cd8f53b563ac065452d9d5654c7b86fd \
-    --hash=sha256:aff76a55a8aa8364d25400a210a65ff59d0168e0b4285ba6bf2bd83cf675ba32 \
-    --hash=sha256:b09b86b27a064c9624d0a6c54da01c1beaf5b6cadfa609cf63789b1d08a797b9 \
-    --hash=sha256:b14f16f94cbc61215115b9b1236f9c18403c15dd3c52cf629072afa9d54c1cbf \
-    --hash=sha256:b50811d664d392f02f7761621303eba9d1b056fb1868c8cdf4231279645c25f5 \
-    --hash=sha256:b7bc2176354defba3edc2b9a777744462da2f8e921fbaf61e52acb95bafa9828 \
-    --hash=sha256:c78e1b00a87ce43bb37642c0812315b411e856a905d58d597750eb79802aaaa3 \
-    --hash=sha256:c83341b89884e2b2e55886e8fbbf37c3fa5efd6c8907124aeb72f285ae5696e5 \
-    --hash=sha256:ca2870d5d10d8726a27396d3ca4cf7976cec0f3cb706debe88e3a5bd4610f7d2 \
-    --hash=sha256:ccce24b7ad89adb5a1e34a6ba96ac2530046763912806ad4c247356a8f33a67b \
-    --hash=sha256:cd5e14fbf22a87321b24c88669aad3a51ec052eb145315b3da3b7e3cc105b9a2 \
-    --hash=sha256:ce49c67f4ea0609933d01c0731b34b8695a7a748d6c8d186f95e7d085d2fe475 \
-    --hash=sha256:d33891be6df59d93df4d846640f0e46f1a807339f09e79a8040bc887bdcd7ed3 \
-    --hash=sha256:d3b2348a78bc939b4fed6552abfd2e7988e0f81443ef3911a4b8498ca084f6eb \
-    --hash=sha256:d886f5d353333b4771d21267c7ecc75b710f1a73d72d03ca06df49b09015a9ef \
-    --hash=sha256:d93480005693d247f8346bc8ee28c72a2191bdf1f6b5db469c096c0c867ac015 \
-    --hash=sha256:dc1a390a82755a8c26c9964d457d4c9cbec5405896cba94cf51f36ea0d855002 \
-    --hash=sha256:dd78700f5788ae180b5ee8902c6aea5a5726bac7c364b202b4b3e3ba2d293170 \
-    --hash=sha256:e46f38133e5a060d46bd630faa4d9fa0202377495df1f068a8299fd78c84de84 \
-    --hash=sha256:e4b878386c4bf293578b48fc570b84ecfe477d3b77ba39a6e87150af77f40c57 \
-    --hash=sha256:f0d0591a0aeaefdaf9a5e545e7485f89910c977087e7de2b6c388aec32011e9f \
-    --hash=sha256:fdcbb4068117dfd9ce0138d068ac512843c52295ed996ae6dd1faf537b6dbc27 \
-    --hash=sha256:ff61bfd9253c3915e6d41c651d5f962da23eda633cf02262990094a18a55371a
-psutil==5.8.0 \
-    --hash=sha256:0066a82f7b1b37d334e68697faba68e5ad5e858279fd6351c8ca6024e8d6ba64 \
-    --hash=sha256:02b8292609b1f7fcb34173b25e48d0da8667bc85f81d7476584d889c6e0f2131 \
-    --hash=sha256:0ae6f386d8d297177fd288be6e8d1afc05966878704dad9847719650e44fc49c \
-    --hash=sha256:0c9ccb99ab76025f2f0bbecf341d4656e9c1351db8cc8a03ccd62e318ab4b5c6 \
-    --hash=sha256:0dd4465a039d343925cdc29023bb6960ccf4e74a65ad53e768403746a9207023 \
-    --hash=sha256:12d844996d6c2b1d3881cfa6fa201fd635971869a9da945cf6756105af73d2df \
-    --hash=sha256:1bff0d07e76114ec24ee32e7f7f8d0c4b0514b3fae93e3d2aaafd65d22502394 \
-    --hash=sha256:245b5509968ac0bd179287d91210cd3f37add77dad385ef238b275bad35fa1c4 \
-    --hash=sha256:28ff7c95293ae74bf1ca1a79e8805fcde005c18a122ca983abf676ea3466362b \
-    --hash=sha256:36b3b6c9e2a34b7d7fbae330a85bf72c30b1c827a4366a07443fc4b6270449e2 \
-    --hash=sha256:52de075468cd394ac98c66f9ca33b2f54ae1d9bff1ef6b67a212ee8f639ec06d \
-    --hash=sha256:5da29e394bdedd9144c7331192e20c1f79283fb03b06e6abd3a8ae45ffecee65 \
-    --hash=sha256:61f05864b42fedc0771d6d8e49c35f07efd209ade09a5afe6a5059e7bb7bf83d \
-    --hash=sha256:6223d07a1ae93f86451d0198a0c361032c4c93ebd4bf6d25e2fb3edfad9571ef \
-    --hash=sha256:6323d5d845c2785efb20aded4726636546b26d3b577aded22492908f7c1bdda7 \
-    --hash=sha256:6ffe81843131ee0ffa02c317186ed1e759a145267d54fdef1bc4ea5f5931ab60 \
-    --hash=sha256:74f2d0be88db96ada78756cb3a3e1b107ce8ab79f65aa885f76d7664e56928f6 \
-    --hash=sha256:74fb2557d1430fff18ff0d72613c5ca30c45cdbfcddd6a5773e9fc1fe9364be8 \
-    --hash=sha256:90d4091c2d30ddd0a03e0b97e6a33a48628469b99585e2ad6bf21f17423b112b \
-    --hash=sha256:90f31c34d25b1b3ed6c40cdd34ff122b1887a825297c017e4cbd6796dd8b672d \
-    --hash=sha256:99de3e8739258b3c3e8669cb9757c9a861b2a25ad0955f8e53ac662d66de61ac \
-    --hash=sha256:c6a5fd10ce6b6344e616cf01cc5b849fa8103fbb5ba507b6b2dee4c11e84c935 \
-    --hash=sha256:ce8b867423291cb65cfc6d9c4955ee9bfc1e21fe03bb50e177f2b957f1c2469d \
-    --hash=sha256:d225cd8319aa1d3c85bf195c4e07d17d3cd68636b8fc97e6cf198f782f99af28 \
-    --hash=sha256:ea313bb02e5e25224e518e4352af4bf5e062755160f77e4b1767dd5ccb65f876 \
-    --hash=sha256:ea372bcc129394485824ae3e3ddabe67dc0b118d262c568b4d2602a7070afdb0 \
-    --hash=sha256:f4634b033faf0d968bb9220dd1c793b897ab7f1189956e1aa9eae752527127d3 \
-    --hash=sha256:fcc01e900c1d7bee2a37e5d6e4f9194760a93597c97fee89c4ae51701de03563
-scipy==1.5.4 \
-    --hash=sha256:168c45c0c32e23f613db7c9e4e780bc61982d71dcd406ead746c7c7c2f2004ce \
-    --hash=sha256:213bc59191da2f479984ad4ec39406bf949a99aba70e9237b916ce7547b6ef42 \
-    --hash=sha256:25b241034215247481f53355e05f9e25462682b13bd9191359075682adcd9554 \
-    --hash=sha256:2c872de0c69ed20fb1a9b9cf6f77298b04a26f0b8720a5457be08be254366c6e \
-    --hash=sha256:3397c129b479846d7eaa18f999369a24322d008fac0782e7828fa567358c36ce \
-    --hash=sha256:368c0f69f93186309e1b4beb8e26d51dd6f5010b79264c0f1e9ca00cd92ea8c9 \
-    --hash=sha256:3d5db5d815370c28d938cf9b0809dade4acf7aba57eaf7ef733bfedc9b2474c4 \
-    --hash=sha256:4598cf03136067000855d6b44d7a1f4f46994164bcd450fb2c3d481afc25dd06 \
-    --hash=sha256:4a453d5e5689de62e5d38edf40af3f17560bfd63c9c5bd228c18c1f99afa155b \
-    --hash=sha256:4f12d13ffbc16e988fa40809cbbd7a8b45bc05ff6ea0ba8e3e41f6f4db3a9e47 \
-    --hash=sha256:634568a3018bc16a83cda28d4f7aed0d803dd5618facb36e977e53b2df868443 \
-    --hash=sha256:65923bc3809524e46fb7eb4d6346552cbb6a1ffc41be748535aa502a2e3d3389 \
-    --hash=sha256:6b0ceb23560f46dd236a8ad4378fc40bad1783e997604ba845e131d6c680963e \
-    --hash=sha256:8c8d6ca19c8497344b810b0b0344f8375af5f6bb9c98bd42e33f747417ab3f57 \
-    --hash=sha256:9ad4fcddcbf5dc67619379782e6aeef41218a79e17979aaed01ed099876c0e62 \
-    --hash=sha256:a254b98dbcc744c723a838c03b74a8a34c0558c9ac5c86d5561703362231107d \
-    --hash=sha256:b03c4338d6d3d299e8ca494194c0ae4f611548da59e3c038813f1a43976cb437 \
-    --hash=sha256:cc1f78ebc982cd0602c9a7615d878396bec94908db67d4ecddca864d049112f2 \
-    --hash=sha256:d6d25c41a009e3c6b7e757338948d0076ee1dd1770d1c09ec131f11946883c54 \
-    --hash=sha256:d84cadd7d7998433334c99fa55bcba0d8b4aeff0edb123b2a1dfcface538e474 \
-    --hash=sha256:e360cb2299028d0b0d0f65a5c5e51fc16a335f1603aa2357c25766c8dab56938 \
-    --hash=sha256:e98d49a5717369d8241d6cf33ecb0ca72deee392414118198a8e5b4c35c56340 \
-    --hash=sha256:ed572470af2438b526ea574ff8f05e7f39b44ac37f712105e57fc4d53a6fb660 \
-    --hash=sha256:f87b39f4d69cf7d7529d7b1098cb712033b17ea7714aed831b95628f483fd012 \
-    --hash=sha256:fa789583fc94a7689b45834453fec095245c7e69c58561dc159b5d5277057e4c
-tflite==2.10.0 \
-    --hash=sha256:6818a5d7776958b803944ba0a1f4c4395559606d9e795d67ac467a8a3904757d \
-    --hash=sha256:89cb9f57df0f5345f8fad1381e0fae6180ded687113eb552cfbb60a05edc002c
-tornado==6.3.3 \
-    --hash=sha256:1bd19ca6c16882e4d37368e0152f99c099bad93e0950ce55e71daed74045908f \
-    --hash=sha256:22d3c2fa10b5793da13c807e6fc38ff49a4f6e1e3868b0a6f4164768bb8e20f5 \
-    --hash=sha256:502fba735c84450974fec147340016ad928d29f1e91f49be168c0a4c18181e1d \
-    --hash=sha256:65ceca9500383fbdf33a98c0087cb975b2ef3bfb874cb35b8de8740cf7f41bd3 \
-    --hash=sha256:71a8db65160a3c55d61839b7302a9a400074c9c753040455494e2af74e2501f2 \
-    --hash=sha256:7ac51f42808cca9b3613f51ffe2a965c8525cb1b00b7b2d56828b8045354f76a \
-    --hash=sha256:7d01abc57ea0dbb51ddfed477dfe22719d376119844e33c661d873bf9c0e4a16 \
-    --hash=sha256:805d507b1f588320c26f7f097108eb4023bbaa984d63176d1652e184ba24270a \
-    --hash=sha256:9dc4444c0defcd3929d5c1eb5706cbe1b116e762ff3e0deca8b715d14bf6ec17 \
-    --hash=sha256:ceb917a50cd35882b57600709dd5421a418c29ddc852da8bcdab1f0db33406b0 \
-    --hash=sha256:e7d8db41c0181c80d76c982aacc442c0783a2c54d6400fe028954201a2e032fe
diff --git a/apps/microtvm/cmsisnn/run_demo.sh b/apps/microtvm/cmsisnn/run_demo.sh
deleted file mode 100755
index e5d1064e6e65..000000000000
--- a/apps/microtvm/cmsisnn/run_demo.sh
+++ /dev/null
@@ -1,153 +0,0 @@
-#!/bin/bash
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-set -e
-set -u
-set -o pipefail
-set -x
-
-# Show usage
-function show_usage() {
-    cat <<EOF
-Usage: run_demo.sh
--h, --help
-    Display this help message.
---cmsis_path CMSIS_PATH
-    Set path to CMSIS.
---ethosu_platform_path ETHOSU_PLATFORM_PATH
-    Set path to Arm(R) Ethos(TM)-U core platform.
---fvp_path FVP_PATH
-   Set path to FVP.
---cmake_path
-   Set path to cmake.
-EOF
-}
-
-# Parse arguments
-while (( $# )); do
-    case "$1" in
-        -h|--help)
-            show_usage
-            exit 0
-            ;;
-
-        --cmsis_path)
-            if [ $# -gt 1 ]
-            then
-                export CMSIS_PATH="$2"
-                shift 2
-            else
-                echo 'ERROR: --cmsis_path requires a non-empty argument' >&2
-                show_usage >&2
-                exit 1
-            fi
-            ;;
-
-        --ethosu_platform_path)
-            if [ $# -gt 1 ]
-            then
-                export ETHOSU_PLATFORM_PATH="$2"
-                shift 2
-            else
-                echo 'ERROR: --ethosu_platform_path requires a non-empty argument' >&2
-                show_usage >&2
-                exit 1
-            fi
-            ;;
-
-        --fvp_path)
-            if [ $# -gt 1 ]
-            then
-                export PATH="$2/models/Linux64_GCC-6.4:$PATH"
-                shift 2
-            else
-                echo 'ERROR: --fvp_path requires a non-empty argument' >&2
-                show_usage >&2
-                exit 1
-            fi
-            ;;
-
-        --cmake_path)
-            if [ $# -gt 1 ]
-            then
-                export CMAKE="$2"
-                shift 2
-            else
-                echo 'ERROR: --cmake_path requires a non-empty argument' >&2
-                show_usage >&2
-                exit 1
-            fi
-            ;;
-
-        -*|--*)
-            echo "Error: Unknown flag: $1" >&2
-            show_usage >&2
-            exit 1
-            ;;
-    esac
-done
-
-
-# Directories
-script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
-
-# Make build directory
-make cleanall
-mkdir -p build
-cd build
-
-# Get person_detect model
-model_url='https://github.com/tensorflow/tflite-micro/raw/main/tensorflow/lite/micro/models/person_detect.tflite'
-curl --retry 64 -sSL ${model_url} -o ./person_detect.tflite
-
-# Compile model for Arm(R) Cortex(R)-M55 CPU and CMSIS-NN
-# An alternative to using "python3 -m tvm.driver.tvmc" is to call
-# "tvmc" directly once TVM has been pip installed.
-python3 -m tvm.driver.tvmc compile --target=cmsis-nn,c \
-    --target-cmsis-nn-mcpu=cortex-m55 \
-    --target-c-mcpu=cortex-m55 \
-    --runtime=crt \
-    --executor=aot \
-    --executor-aot-interface-api=c \
-    --executor-aot-unpacked-api=1 \
-    --pass-config tir.usmp.enable=1 \
-    --pass-config tir.usmp.algorithm=hill_climb \
-    --pass-config tir.disable_storage_rewrite=1 \
-    --pass-config tir.disable_vectorize=1 ./person_detect.tflite \
-    --output-format=mlf \
-    --module-name=detection
-tar -xf module.tar
-
-# Get input image
-curl -sS https://raw.githubusercontent.com/tensorflow/tflite-micro/main/tensorflow/lite/micro/examples/person_detection/testdata/person.bmp -o input_image.bmp
-# curl -sS https://raw.githubusercontent.com/tensorflow/tflite-micro/main/tensorflow/lite/micro/examples/person_detection/testdata/no_person.bmp -o input_image.bmp
-
-# Create C header files
-cd ..
-python3 ./convert_image.py ./build/input_image.bmp
-
-# Build demo executable
-cd ${script_dir}
-make
-
-# Run demo executable on the FVP
-FVP_Corstone_SSE-300_Ethos-U55 -C cpu0.CFGDTCMSZ=15 \
--C cpu0.CFGITCMSZ=15 -C mps3_board.uart0.out_file=\"-\" -C mps3_board.uart0.shutdown_tag=\"EXITTHESIM\" \
--C mps3_board.visualisation.disable-visualisation=1 -C mps3_board.telnetterminal0.start_telnet=0 \
--C mps3_board.telnetterminal1.start_telnet=0 -C mps3_board.telnetterminal2.start_telnet=0 -C mps3_board.telnetterminal5.start_telnet=0 \
-./build/demo
diff --git a/apps/microtvm/cmsisnn/src/demo_bare_metal.c b/apps/microtvm/cmsisnn/src/demo_bare_metal.c
deleted file mode 100644
index 80b298d8b2d8..000000000000
--- a/apps/microtvm/cmsisnn/src/demo_bare_metal.c
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#include <stdio.h>
-#include <tvm_runtime.h>
-#include <tvmgen_detection.h>
-
-#include "uart_stdout.h"
-
-// Header files generated by convert_image.py
-#include "inputs.h"
-#include "outputs.h"
-
-int main(int argc, char** argv) {
-  UartStdOutInit();
-  printf("Starting Demo\n");
-
-  printf("Running detection inference\n");
-  struct tvmgen_detection_outputs detection_outputs = {
-      .MobilenetV1_Predictions_Reshape_1 = output,
-  };
-  struct tvmgen_detection_inputs detection_inputs = {
-      .input = input,
-  };
-
-  tvmgen_detection_run(&detection_inputs, &detection_outputs);
-
-  // Report result
-  if (output[1] > output[0]) {
-    printf("Person detected.\n");
-  } else {
-    printf("No person detected.\n");
-  }
-
-  // The FVP will shut down when it receives "EXITTHESIM" on the UART
-  printf("EXITTHESIM\n");
-  while (1 == 1)
-    ;
-  return 0;
-}
diff --git a/apps/microtvm/ethosu/.gitignore b/apps/microtvm/ethosu/.gitignore
deleted file mode 100644
index 3ef8f08db900..000000000000
--- a/apps/microtvm/ethosu/.gitignore
+++ /dev/null
@@ -1,3 +0,0 @@
-include/inputs.h
-include/outputs.h
-include/labels.h
diff --git a/apps/microtvm/ethosu/Makefile b/apps/microtvm/ethosu/Makefile
deleted file mode 100644
index 4a1d8d563d43..000000000000
--- a/apps/microtvm/ethosu/Makefile
+++ /dev/null
@@ -1,138 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# Makefile to build demo
-
-# Setup build environment
-BUILD_DIR := build
-
-ARM_CPU = ARMCM55
-ETHOSU_PATH = /opt/arm/ethosu
-ETHOSU_DRIVER_PATH ?= ${ETHOSU_PATH}/core_driver
-CMSIS_PATH ?= ${ETHOSU_PATH}/cmsis
-ETHOSU_PLATFORM_PATH ?= ${ETHOSU_PATH}/core_platform
-STANDALONE_CRT_PATH := $(abspath $(BUILD_DIR))/runtime
-CORSTONE_300_PATH = ${ETHOSU_PLATFORM_PATH}/targets/corstone-300
-PKG_COMPILE_OPTS = -g -Wall -O2 -Wno-incompatible-pointer-types -Wno-format -mcpu=cortex-m55 -mthumb -mfloat-abi=hard -std=gnu99
-CMAKE ?= cmake
-CC = arm-none-eabi-gcc
-AR = arm-none-eabi-ar
-RANLIB = arm-none-eabi-ranlib
-PKG_CFLAGS = ${PKG_COMPILE_OPTS} \
-	-I${STANDALONE_CRT_PATH}/include \
-	-I${STANDALONE_CRT_PATH}/src/runtime/crt/include \
-	-I${PWD}/${BUILD_DIR}/crt_config \
-	-I${PWD}/include \
-	-I${ETHOSU_DRIVER_PATH}/include \
-	-I${ETHOSU_PLATFORM_PATH}/drivers/uart/include \
-	-I${CMSIS_PATH}/Device/ARM/${ARM_CPU}/Include/ \
-	-I${CMSIS_PATH}/CMSIS/Core/Include \
-	-I${CMSIS_PATH}/CMSIS-NN/Include \
-	-I${CMSIS_PATH}/CMSIS/DSP/Include \
-	-I$(abspath $(BUILD_DIR))/codegen/host/include \
-	-DETHOSU_TEST_RUNNER_TOL=${ETHOSU_TEST_RUNNER_TOL}
-CMAKE_FLAGS = -DCMAKE_TOOLCHAIN_FILE=$(abspath $(BUILD_DIR))/../arm-none-eabi-gcc.cmake \
-	-DTARGET_CPU=cortex-m55
-DRIVER_CMAKE_FLAGS = $(CMAKE_FLAGS) \
-	-DETHOSU_LOG_SEVERITY=debug
-PKG_LDFLAGS = -lm -specs=nosys.specs -static -T corstone300.ld
-
-$(ifeq VERBOSE,1)
-QUIET ?=
-$(else)
-QUIET ?= @
-$(endif)
-
-ifdef FREERTOS_PATH
-DEMO_MAIN = src/demo_freertos.c
-FREERTOS_KERNEL = $(FREERTOS_PATH)/FreeRTOS/Source
-FREERTOS_FLAGS = -I$(FREERTOS_KERNEL)/include/ \
-	-I$(FREERTOS_KERNEL)/crt_config/ \
-	-I$(FREERTOS_KERNEL)/portable/GCC/ARM_CM33_NTZ/non_secure/
-FREERTOS_SOURCES = $(FREERTOS_KERNEL)/portable/GCC/ARM_CM33_NTZ/non_secure/port.c \
-	$(FREERTOS_KERNEL)/portable/GCC/ARM_CM33_NTZ/non_secure/portasm.c \
-	$(FREERTOS_KERNEL)/tasks.c \
-	$(FREERTOS_KERNEL)/list.c \
-	$(FREERTOS_KERNEL)/queue.c \
-	$(FREERTOS_KERNEL)/timers.c \
-	$(FREERTOS_KERNEL)/event_groups.c \
-	$(FREERTOS_KERNEL)/portable/MemMang/heap_3.c
-else
-DEMO_MAIN = src/demo_bare_metal.c
-endif
-
-CODEGEN_SRCS = $(wildcard $(abspath $(BUILD_DIR))/codegen/host/src/*.c)
-CODEGEN_OBJS = $(subst .c,.o,$(CODEGEN_SRCS))
-CMSIS_STARTUP_SRCS = $(wildcard ${CMSIS_PATH}/Device/ARM/${ARM_CPU}/Source/*.c)
-CMSIS_NN_SOFTMAX_SRCS = $(shell find ${CMSIS_PATH}/CMSIS-NN/Source/SoftmaxFunctions/*.c)
-CORSTONE_300_SRCS = $(wildcard ${CORSTONE_300_PATH}/*.c)
-
-demo: $(BUILD_DIR)/demo
-
-$(BUILD_DIR)/stack_allocator.o: $(STANDALONE_CRT_PATH)/src/runtime/crt/memory/stack_allocator.c
-	$(QUIET)mkdir -p $(@D)
-	$(QUIET)$(CC) -c $(PKG_CFLAGS) -o $@  $^
-
-$(BUILD_DIR)/crt_backend_api.o: $(STANDALONE_CRT_PATH)/src/runtime/crt/common/crt_backend_api.c
-	$(QUIET)mkdir -p $(@D)
-	$(QUIET)$(CC) -c $(PKG_CFLAGS) -o $@  $^
-
-# Build generated code
-$(BUILD_DIR)/libcodegen.a: $(CODEGEN_SRCS)
-	$(QUIET)cd $(abspath $(BUILD_DIR)/codegen/host/src) && $(CC) -c $(PKG_CFLAGS) $(CODEGEN_SRCS)
-	$(QUIET)$(AR) -cr $(abspath $(BUILD_DIR)/libcodegen.a) $(CODEGEN_OBJS)
-	$(QUIET)$(RANLIB) $(abspath $(BUILD_DIR)/libcodegen.a)
-
-# Build CMSIS startup code
-${BUILD_DIR}/libcmsis_startup.a: $(CMSIS_STARTUP_SRCS)
-	$(QUIET)mkdir -p $(abspath $(BUILD_DIR)/libcmsis_startup)
-	$(QUIET)cd $(abspath $(BUILD_DIR)/libcmsis_startup) && $(CC) -c $(PKG_CFLAGS) -D${ARM_CPU} $^
-	$(QUIET)$(AR) -cr $(abspath $(BUILD_DIR)/libcmsis_startup.a) $(abspath $(BUILD_DIR))/libcmsis_startup/*.o
-	$(QUIET)$(RANLIB) $(abspath $(BUILD_DIR)/libcmsis_startup.a)
-
-# Build Arm(R) Ethos(TM)-U core driver
-${BUILD_DIR}/ethosu_core_driver/libethosu_core_driver.a:
-	$(QUIET)mkdir -p $(@D)
-	$(QUIET)cd $(ETHOSU_DRIVER_PATH) && $(CMAKE) -B $(abspath $(BUILD_DIR)/ethosu_core_driver) $(DRIVER_CMAKE_FLAGS)
-	$(QUIET)cd $(abspath $(BUILD_DIR)/ethosu_core_driver) && $(MAKE)
-
-# Build CMSIS-NN Softmax
-${BUILD_DIR}/libcmsis_nn_softmax.a: $(CMSIS_NN_SOFTMAX_SRCS)
-	$(QUIET)mkdir -p $(abspath $(BUILD_DIR)/libcmsis_nn)
-	$(QUIET)cd $(abspath $(BUILD_DIR)/libcmsis_nn) && $(CC) -c $(PKG_CFLAGS) -D${ARM_CPU} $^
-	$(QUIET)$(AR) -cr $(abspath $(BUILD_DIR)/libcmsis_nn_softmax.a) $(abspath $(BUILD_DIR))/libcmsis_nn/*.o
-	$(QUIET)$(RANLIB) $(abspath $(BUILD_DIR)/libcmsis_nn_softmax.a)
-
-# Build UART driver
-${BUILD_DIR}/ethosu_core_platform/libethosu_uart_cmsdk_apb.a:
-	$(QUIET)mkdir -p $(@D)
-	$(QUIET)cd ${ETHOSU_PLATFORM_PATH}/drivers/uart && $(CMAKE) -B $(abspath $(BUILD_DIR)/ethosu_core_platform) $(CMAKE_FLAGS)
-	$(QUIET)cd $(abspath $(BUILD_DIR)/ethosu_core_platform) && $(MAKE)
-
-$(BUILD_DIR)/demo: $(DEMO_MAIN) src/tvm_ethosu_runtime.c $(FREERTOS_SOURCES) $(CORSTONE_300_SRCS) $(BUILD_DIR)/stack_allocator.o $(BUILD_DIR)/crt_backend_api.o ${BUILD_DIR}/libcodegen.a ${BUILD_DIR}/libcmsis_startup.a ${BUILD_DIR}/ethosu_core_driver/libethosu_core_driver.a ${BUILD_DIR}/libcmsis_nn_softmax.a ${BUILD_DIR}/ethosu_core_platform/libethosu_uart_cmsdk_apb.a
-	$(QUIET)mkdir -p $(@D)
-	$(QUIET)$(CC) $(PKG_CFLAGS) $(FREERTOS_FLAGS) -o $@ $^ $(PKG_LDFLAGS)
-
-clean:
-	$(QUIET)rm -rf $(BUILD_DIR)/codegen
-
-cleanall:
-	$(QUIET)rm -rf $(BUILD_DIR)
-
-.SUFFIXES:
-
-.DEFAULT: demo
diff --git a/apps/microtvm/ethosu/README.md b/apps/microtvm/ethosu/README.md
deleted file mode 100644
index 69834837ac42..000000000000
--- a/apps/microtvm/ethosu/README.md
+++ /dev/null
@@ -1,102 +0,0 @@
-<!--- Licensed to the Apache Software Foundation (ASF) under one -->
-<!--- or more contributor license agreements.  See the NOTICE file -->
-<!--- distributed with this work for additional information -->
-<!--- regarding copyright ownership.  The ASF licenses this file -->
-<!--- to you under the Apache License, Version 2.0 (the -->
-<!--- "License"); you may not use this file except in compliance -->
-<!--- with the License.  You may obtain a copy of the License at -->
-
-<!---   http://www.apache.org/licenses/LICENSE-2.0 -->
-
-<!--- Unless required by applicable law or agreed to in writing, -->
-<!--- software distributed under the License is distributed on an -->
-<!--- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -->
-<!--- KIND, either express or implied.  See the License for the -->
-<!--- specific language governing permissions and limitations -->
-<!--- under the License. -->
-
-
-Running TVM on bare metal Arm(R) Cortex(R)-M55 CPU, Ethos(TM)-U55 NPU and CMSIS-NN
-==================================================================================
-
-This folder contains an example of how to use TVM to run a model
-on bare metal Cortex(R)-M55 CPU, Ethos(TM)-U55 NPU and CMSIS-NN.
-
-Prerequisites
--------------
-If the demo is run in the ci_cpu Docker container provided with TVM, then the following
-software will already be installed.
-
-If the demo is not run in the ci_cpu Docker container, then you will need the following:
-- Software required to build the Ethos(TM)-U driver stack and run the demo (These can all be
-  installed by running tvm/docker/install/ubuntu_install_ethosu_driver_stack.sh.)
-  - [Fixed Virtual Platform (FVP) based on Arm(R) Corstone(TM)-300 software](https://developer.arm.com/tools-and-software/open-source-software/arm-platforms-software/arm-ecosystem-fvps)
-  - [cmake 3.19.5](https://github.com/Kitware/CMake/releases/)
-  - [GCC toolchain from Arm(R)](https://developer.arm.com/-/media/Files/downloads/gnu-rm/10-2020q4/gcc-arm-none-eabi-10-2020-q4-major-x86_64-linux.tar.bz2)
-  - [Arm(R) Ethos(TM)-U NPU driver stack](https://review.mlplatform.org)
-  - [CMSIS](https://github.com/ARM-software/CMSIS_5)
-  - [CMSIS NN](https://github.com/ARM-software/CMSIS-NN)
-- The python libraries listed in the requirements.txt of this directory
-  - These can be installed by running the following from the current directory:
-    ```bash
-    pip install -r ./requirements.txt
-    ```
-
-You will also need TVM which can either be:
-  - Built from source (see [Install from Source](https://tvm.apache.org/docs/install/from_source.html))
-    - When building from source, the following need to be set in config.cmake:
-      - set(USE_ETHOSU ON)
-      - set(USE_CMSISNN ON)
-      - set(USE_MICRO ON)
-      - set(USE_LLVM ON)
-  - Installed from TLCPack(see [TLCPack](https://tlcpack.ai/))
-
-You will need to update your PATH environment variable to include the path to cmake 3.19.5 and the FVP.
-For example if you've installed these in ```/opt/arm``` , then you would do the following:
-```bash
-export PATH=/opt/arm/FVP_Corstone_SSE-300/models/Linux64_GCC-6.4:/opt/arm/cmake/bin:$PATH
-```
-
-Running the demo application
-----------------------------
-Type the following command to run the bare metal demo application ([src/demo_bare_metal.c](./src/demo_bare_metal.c)):
-
-```bash
-./run_demo.sh
-```
-
-To run the demo using FreeRTOS task scheduling and queues ([src/demo_freertos.c](./src/demo_freertos.c)), specify the path to FreeRTOS using `--freertos_path`, for example:
-```
-./run_demo.sh --freertos_path /opt/freertos/FreeRTOSv202112.00/
-```
-
-If the Ethos(TM)-U driver and/or CMSIS have not been installed in /opt/arm/ethosu then
-the locations for these can be specified as arguments to run_demo.sh, for example:
-
-```bash
-./run_demo.sh --ethosu_driver_path /home/tvm-user/ethosu/core_driver --cmsis_path /home/tvm-user/cmsis \
---ethosu_platform_path /home/tvm-user/ethosu/core_platform
-```
-
-This will:
-- Download a quantized (int8) mobilenet v2 model
-- Use tvmc to compile the model for Cortex(R)-M55 CPU, Ethos(TM)-U55 NPU and CMSIS-NN
-- Download an image of a penguin to run the model on
-- Create a C header file inputs.c containing the image data as a C array
-- Create a C header file outputs.c containing a C array where the output of inference will be stored
-- Build the Ethos(TM)-U55 core driver
-- Build the demo application
-- Run the demo application on a Fixed Virtual Platform (FVP) based on Arm(R) Corstone(TM)-300 software
-- The application will display what the image has been classified as e.g. "The image has been classified as 'king penguin'"
-
-Using your own image
---------------------
-The create_image.py script takes a single argument on the command line which is the path of the
-image to be converted into an array of bytes for consumption by the model.
-
-The demo can be modified to use an image of your choice by changing the following lines in run_demo.sh
-
-```bash
-curl -sS https://upload.wikimedia.org/wikipedia/commons/1/18/Falkland_Islands_Penguins_29.jpg -o penguin.jpg
-python3 ./convert_image.py ./build/penguin.jpg
-```
diff --git a/apps/microtvm/ethosu/arm-none-eabi-gcc.cmake b/apps/microtvm/ethosu/arm-none-eabi-gcc.cmake
deleted file mode 100644
index 415b3139be1b..000000000000
--- a/apps/microtvm/ethosu/arm-none-eabi-gcc.cmake
+++ /dev/null
@@ -1,79 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-if (__TOOLCHAIN_LOADED)
-    return()
-endif()
-set(__TOOLCHAIN_LOADED TRUE)
-
-set(CMAKE_SYSTEM_NAME Generic)
-set(CMAKE_C_COMPILER "arm-none-eabi-gcc")
-set(CMAKE_CXX_COMPILER "arm-none-eabi-g++")
-set(CMAKE_SYSTEM_PROCESSOR "cortex-m55" CACHE STRING "Select Arm(R) Cortex(R)-M architecture. (cortex-m0, cortex-m3, cortex-m33, cortex-m4, cortex-m55, cortex-m7, etc)")
-
-set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
-
-SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
-SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
-SET(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
-
-set(CMAKE_C_STANDARD 99)
-set(CMAKE_CXX_STANDARD 14)
-
-# The system processor could for example be set to cortex-m33+nodsp+nofp.
-set(__CPU_COMPILE_TARGET ${CMAKE_SYSTEM_PROCESSOR})
-string(REPLACE "+" ";" __CPU_FEATURES ${__CPU_COMPILE_TARGET})
-list(POP_FRONT __CPU_FEATURES CMAKE_SYSTEM_PROCESSOR)
-
-string(FIND ${__CPU_COMPILE_TARGET} "+" __OFFSET)
-if(__OFFSET GREATER_EQUAL 0)
-    string(SUBSTRING ${__CPU_COMPILE_TARGET} ${__OFFSET} -1 CPU_FEATURES)
-endif()
-
-# Add -mcpu to the compile options to override the -mcpu the CMake toolchain adds
-add_compile_options(-mcpu=${__CPU_COMPILE_TARGET})
-
-# Set floating point unit
-if("${__CPU_COMPILE_TARGET}" MATCHES "\\+fp")
-    set(FLOAT hard)
-elseif("${__CPU_COMPILE_TARGET}" MATCHES "\\+nofp")
-    set(FLOAT soft)
-elseif("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "cortex-m33" OR
-       "${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "cortex-m55")
-    set(FLOAT hard)
-else()
-    set(FLOAT soft)
-endif()
-
-add_compile_options(-mfloat-abi=${FLOAT})
-add_link_options(-mfloat-abi=${FLOAT})
-
-# Link target
-add_link_options(-mcpu=${__CPU_COMPILE_TARGET})
-add_link_options(-Xlinker -Map=output.map)
-
-#
-# Compile options
-#
-set(cxx_flags "-fno-unwind-tables;-fno-rtti;-fno-exceptions")
-
-add_compile_options("-Wall;-Wextra;-Wsign-compare;-Wunused;-Wswitch-default;\
--Wdouble-promotion;-Wredundant-decls;-Wshadow;-Wnull-dereference;\
--Wno-format-extra-args;-Wno-unused-function;-Wno-unused-label;\
--Wno-missing-field-initializers;-Wno-return-type;-Wno-format;-Wno-int-conversion"
-    "$<$<COMPILE_LANGUAGE:CXX>:${cxx_flags}>"
-)
diff --git a/apps/microtvm/ethosu/convert_image.py b/apps/microtvm/ethosu/convert_image.py
deleted file mode 100755
index 924d4bafdeb0..000000000000
--- a/apps/microtvm/ethosu/convert_image.py
+++ /dev/null
@@ -1,75 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import os
-import pathlib
-import re
-import sys
-from PIL import Image
-import numpy as np
-
-
-def create_header_file(name, section, tensor_name, tensor_data, output_path):
-    """
-    This function generates a header file containing the data from the numpy array provided.
-    """
-    file_path = pathlib.Path(f"{output_path}/" + name).resolve()
-    # Create header file with npy_data as a C array
-    raw_path = file_path.with_suffix(".h").resolve()
-    with open(raw_path, "w") as header_file:
-        header_file.write(
-            "#include <tvmgen_default.h>\n"
-            + f"const size_t {tensor_name}_len = {tensor_data.size};\n"
-            + f'int8_t {tensor_name}[] __attribute__((section("{section}"), aligned(16))) = "'
-        )
-
-        data_hexstr = tensor_data.tobytes().hex()
-        for i in range(0, len(data_hexstr), 2):
-            header_file.write(f"\\x{data_hexstr[i:i+2]}")
-        header_file.write('";\n\n')
-
-
-def create_headers(image_name):
-    """
-    This function generates C header files for the input and output arrays required to run inferences
-    """
-    img_path = os.path.join("./", f"{image_name}")
-
-    # Resize image to 224x224
-    resized_image = Image.open(img_path).resize((224, 224))
-    img_data = np.asarray(resized_image).astype("float32")
-
-    # # Add the batch dimension, as we are expecting 4-dimensional input: NCHW.
-    img_data = np.expand_dims(img_data, axis=0)
-
-    # Create input header file
-    input_data = img_data - 128
-    input_data = input_data.astype(np.int8)
-    create_header_file("inputs", "ethosu_scratch", "input", input_data, "./include")
-    # Create output header file
-    output_data = np.zeros([1001], np.int8)
-    create_header_file(
-        "outputs",
-        "output_data_sec",
-        "output",
-        output_data,
-        "./include",
-    )
-
-
-if __name__ == "__main__":
-    create_headers(sys.argv[1])
diff --git a/apps/microtvm/ethosu/convert_labels.py b/apps/microtvm/ethosu/convert_labels.py
deleted file mode 100755
index c17cdb99fdac..000000000000
--- a/apps/microtvm/ethosu/convert_labels.py
+++ /dev/null
@@ -1,50 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import os
-import pathlib
-import sys
-
-from tvm.micro import copy_crt_config_header
-
-
-def create_labels_header(labels_file, section, output_path):
-    """
-    This function generates a header file containing the ImageNet labels as an array of strings
-    """
-    labels_path = pathlib.Path(labels_file).resolve()
-    file_path = pathlib.Path(f"{output_path}/labels.h").resolve()
-
-    with open(labels_path) as f:
-        labels = f.readlines()
-
-    with open(file_path, "w") as header_file:
-        header_file.write(f'char* labels[] __attribute__((section("{section}"), aligned(16))) = {{')
-
-        for _, label in enumerate(labels):
-            header_file.write(f'"{label.rstrip()}",')
-
-        header_file.write("};\n")
-
-
-if __name__ == "__main__":
-    create_labels_header(sys.argv[1], "ethosu_scratch", "./include")
-
-    crt_config_output_path = pathlib.Path(__file__).parent.resolve() / "build" / "crt_config"
-    if not crt_config_output_path.exists():
-        crt_config_output_path.mkdir()
-    copy_crt_config_header("crt", crt_config_output_path)
diff --git a/apps/microtvm/ethosu/corstone300.ld b/apps/microtvm/ethosu/corstone300.ld
deleted file mode 100644
index d073ea329ed8..000000000000
--- a/apps/microtvm/ethosu/corstone300.ld
+++ /dev/null
@@ -1,300 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*------------------ Reference System Memories -------------
-  +===================+============+=======+============+============+
-  | Memory            | Address    | Size  | CPU Access | NPU Access |
-  +===================+============+=======+============+============+
-  | ITCM              | 0x00000000 | 512KB | Yes (RO)   | No         |
-  +-------------------+------------+-------+------------+------------+
-  | DTCM              | 0x20000000 | 512KB | Yes (R/W)  | No         |
-  +-------------------+------------+-------+------------+------------+
-  | SSE-300 SRAM      | 0x21000000 |   2MB | Yes (R/W)  | Yes (R/W)  |
-  +-------------------+------------+-------+------------+------------+
-  | Data SRAM         | 0x01000000 |   2MB | Yes (R/W)  | Yes (R/W)  |
-  +-------------------+------------+-------+------------+------------+
-  | DDR               | 0x60000000 |  32MB | Yes (R/W)  | Yes (R/W)  |
-  +-------------------+------------+-------+------------+------------+ */
-
-/*---------------------- ITCM Configuration ----------------------------------
-  <h> Flash Configuration
-    <o0> Flash Base Address <0x0-0xFFFFFFFF:8>
-    <o1> Flash Size (in Bytes) <0x0-0xFFFFFFFF:8>
-  </h>
-  -----------------------------------------------------------------------------*/
-__ROM_BASE = 0x00000000;
-__ROM_SIZE = 0x00080000;
-
-/*--------------------- DTCM RAM Configuration ----------------------------
-  <h> RAM Configuration
-    <o0> RAM Base Address    <0x0-0xFFFFFFFF:8>
-    <o1> RAM Size (in Bytes) <0x0-0xFFFFFFFF:8>
-  </h>
- -----------------------------------------------------------------------------*/
-__RAM_BASE = 0x20000000;
-__RAM_SIZE = 0x00080000;
-
-/*----------------------- Data SRAM Configuration ------------------------------
-  <h> Data SRAM Configuration
-    <o0> DATA_SRAM Base Address    <0x0-0xFFFFFFFF:8>
-    <o1> DATA_SRAM Size (in Bytes) <0x0-0xFFFFFFFF:8>
-  </h>
- -----------------------------------------------------------------------------*/
-__DATA_SRAM_BASE = 0x01000000;
-__DATA_SRAM_SIZE = 0x00200000;
-
-/*--------------------- Embedded SRAM Configuration ----------------------------
-  <h> SRAM Configuration
-    <o0> SRAM Base Address    <0x0-0xFFFFFFFF:8>
-    <o1> SRAM Size (in Bytes) <0x0-0xFFFFFFFF:8>
-  </h>
- -----------------------------------------------------------------------------*/
-__SRAM_BASE = 0x21000000;
-__SRAM_SIZE = 0x00200000;
-
-/*--------------------- Stack / Heap Configuration ----------------------------
-  <h> Stack / Heap Configuration
-    <o0> Stack Size (in Bytes) <0x0-0xFFFFFFFF:8>
-    <o1> Heap Size (in Bytes) <0x0-0xFFFFFFFF:8>
-  </h>
-  -----------------------------------------------------------------------------*/
-__STACK_SIZE = 0x00008000;
-__HEAP_SIZE  = 0x00008000;
-
-/*--------------------- Embedded RAM Configuration ----------------------------
-  <h> DDR Configuration
-    <o0> DDR Base Address    <0x0-0xFFFFFFFF:8>
-    <o1> DDR Size (in Bytes) <0x0-0xFFFFFFFF:8>
-  </h>
- -----------------------------------------------------------------------------*/
-__DDR_BASE = 0x60000000;
-__DDR_SIZE = 0x02000000;
-
-/*
- *-------------------- <<< end of configuration section >>> -------------------
- */
-
-MEMORY
-{
-  ITCM       (rx)  : ORIGIN = __ROM_BASE, LENGTH = __ROM_SIZE
-  DTCM       (rwx) : ORIGIN = __RAM_BASE, LENGTH = __RAM_SIZE
-  DATA_SRAM  (rwx) : ORIGIN = __DATA_SRAM_BASE, LENGTH = __DATA_SRAM_SIZE
-  SRAM       (rwx) : ORIGIN = __SRAM_BASE, LENGTH = __SRAM_SIZE
-  DDR        (rwx) : ORIGIN = __DDR_BASE, LENGTH = __DDR_SIZE
-}
-
-/* Linker script to place sections and symbol values. Should be used together
- * with other linker script that defines memory regions ITCM and RAM.
- * It references following symbols, which must be defined in code:
- *   Reset_Handler : Entry of reset handler
- *
- * It defines following symbols, which code can use without definition:
- *   __exidx_start
- *   __exidx_end
- *   __copy_table_start__
- *   __copy_table_end__
- *   __zero_table_start__
- *   __zero_table_end__
- *   __etext
- *   __data_start__
- *   __preinit_array_start
- *   __preinit_array_end
- *   __init_array_start
- *   __init_array_end
- *   __fini_array_start
- *   __fini_array_end
- *   __data_end__
- *   __bss_start__
- *   __bss_end__
- *   __end__
- *   end
- *   __HeapLimit
- *   __StackLimit
- *   __StackTop
- *   __stack
- */
-ENTRY(Reset_Handler)
-
-SECTIONS
-{
-  /* .ddr is placed before .text so that .rodata.tvm is encountered before .rodata* */
-  .ddr :
-  {
-    . = ALIGN(16);
-    *(ethosu_scratch)
-    *(output_data_sec)
-    . = ALIGN (16);
-    *(.rodata.tvm)
-    . = ALIGN (16);
-  } > DDR
-
-  .text :
-  {
-    KEEP(*(.vectors))
-    *(.text*)
-
-    KEEP(*(.init))
-    KEEP(*(.fini))
-
-    /* .ctors */
-    *crtbegin.o(.ctors)
-    *crtbegin?.o(.ctors)
-    *(EXCLUDE_FILE(*crtend?.o *crtend.o) .ctors)
-    *(SORT(.ctors.*))
-    *(.ctors)
-
-    /* .dtors */
-    *crtbegin.o(.dtors)
-    *crtbegin?.o(.dtors)
-    *(EXCLUDE_FILE(*crtend?.o *crtend.o) .dtors)
-    *(SORT(.dtors.*))
-    *(.dtors)
-
-    *(.rodata*)
-
-    KEEP(*(.eh_frame*))
-  } > ITCM
-
-  .ARM.extab :
-  {
-    *(.ARM.extab* .gnu.linkonce.armextab.*)
-  } > ITCM
-
-  __exidx_start = .;
-  .ARM.exidx :
-  {
-    *(.ARM.exidx* .gnu.linkonce.armexidx.*)
-  } > ITCM
-  __exidx_end = .;
-
-  .copy.table :
-  {
-    . = ALIGN(4);
-    __copy_table_start__ = .;
-    LONG (__etext)
-    LONG (__data_start__)
-    LONG (__data_end__ - __data_start__)
-    /* Add each additional data section here */
-    __copy_table_end__ = .;
-  } > ITCM
-
-  .zero.table :
-  {
-    . = ALIGN(4);
-    __zero_table_start__ = .;
-    __zero_table_end__ = .;
-  } > ITCM
-
-  /**
-   * Location counter can end up 2byte aligned with narrow Thumb code but
-   * __etext is assumed by startup code to be the LMA of a section in DTCM
-   * which must be 4byte aligned
-   */
-  __etext = ALIGN (4);
-
-  .sram :
-  {
-    . = ALIGN(16);
-    *(.bss.ethosu_fast_memory);
-    . = ALIGN(16);
-    *(.data.tvm);
-    . = ALIGN(16);
-  } > SRAM AT > SRAM
-
-  .data : AT (__etext)
-  {
-    __data_start__ = .;
-    *(vtable)
-    *(.data)
-    *(.data.*)
-
-    . = ALIGN(4);
-    /* preinit data */
-    PROVIDE_HIDDEN (__preinit_array_start = .);
-    KEEP(*(.preinit_array))
-    PROVIDE_HIDDEN (__preinit_array_end = .);
-
-    . = ALIGN(4);
-    /* init data */
-    PROVIDE_HIDDEN (__init_array_start = .);
-    KEEP(*(SORT(.init_array.*)))
-    KEEP(*(.init_array))
-    PROVIDE_HIDDEN (__init_array_end = .);
-
-
-    . = ALIGN(4);
-    /* finit data */
-    PROVIDE_HIDDEN (__fini_array_start = .);
-    KEEP(*(SORT(.fini_array.*)))
-    KEEP(*(.fini_array))
-    PROVIDE_HIDDEN (__fini_array_end = .);
-
-    KEEP(*(.jcr*))
-    . = ALIGN(4);
-    /* All data end */
-    __data_end__ = .;
-
-  } > DTCM
-
-  .bss.noinit (NOLOAD):
-  {
-    . = ALIGN(16);
-    *(.bss.noinit.*)
-    . = ALIGN(16);
-  } > SRAM AT > SRAM
-
-  .bss :
-  {
-    . = ALIGN(4);
-    __bss_start__ = .;
-    *(.bss)
-    *(.bss.*)
-    *(COMMON)
-    . = ALIGN(4);
-    __bss_end__ = .;
-  } > DTCM AT > DTCM
-
-  .data_sram :
-  {
-    . = ALIGN(16);
-  } > DATA_SRAM
-
-  .heap (COPY) :
-  {
-    . = ALIGN(8);
-    __end__ = .;
-    PROVIDE(end = .);
-    . = . + __HEAP_SIZE;
-    . = ALIGN(8);
-    __HeapLimit = .;
-  } > DTCM
-
-  .stack (ORIGIN(DTCM) + LENGTH(DTCM) - __STACK_SIZE) (COPY) :
-  {
-    . = ALIGN(8);
-    __StackLimit = .;
-    . = . + __STACK_SIZE;
-    . = ALIGN(8);
-    __StackTop = .;
-  } > DTCM
-  PROVIDE(__stack = __StackTop);
-
-  /* Check if data + stack exceeds DTCM limit */
-  ASSERT(__StackLimit >= __bss_end__, "region DTCM overflowed with stack")
-}
diff --git a/apps/microtvm/ethosu/include/FreeRTOSConfig.h b/apps/microtvm/ethosu/include/FreeRTOSConfig.h
deleted file mode 100644
index a123581d3b77..000000000000
--- a/apps/microtvm/ethosu/include/FreeRTOSConfig.h
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/* Please refer to http://www.freertos.org/a00110.html for refernce. */
-#ifndef FREERTOS_CONFIG_H
-#define FREERTOS_CONFIG_H
-/******************************************************************************
- * Defines
- **********SYSTEM_CORE_CLOCK********************************************************************/
-/* Hardware features */
-#define configENABLE_MPU 0
-#define configENABLE_FPU 0
-#define configENABLE_TRUSTZONE 0
-/* Scheduling */
-#define configCPU_CLOCK_HZ 25000000
-#define configUSE_PORT_OPTIMISED_TASK_SELECTION 0
-#define configUSE_PREEMPTION 1
-#define configUSE_TIME_SLICING 0
-#define configMAX_PRIORITIES 5
-#define configIDLE_SHOULD_YIELD 1
-#define configUSE_16_BIT_TICKS 0
-#define configRUN_FREERTOS_SECURE_ONLY 1
-/* Stack and heap */
-#define configMINIMAL_STACK_SIZE (uint16_t)128
-#define configMINIMAL_SECURE_STACK_SIZE 1024
-#define configTOTAL_HEAP_SIZE (size_t)(50 * 1024)
-#define configMAX_TASK_NAME_LEN 12
-/* OS features */
-#define configUSE_MUTEXES 1
-#define configUSE_TICKLESS_IDLE 1
-#define configUSE_APPLICATION_TASK_TAG 0
-#define configUSE_NEWLIB_REENTRANT 0
-#define configUSE_CO_ROUTINES 0
-#define configUSE_COUNTING_SEMAPHORES 1
-#define configUSE_RECURSIVE_MUTEXES 1
-#define configUSE_QUEUE_SETS 0
-#define configUSE_TASK_NOTIFICATIONS 1
-#define configUSE_TRACE_FACILITY 1
-/* Hooks */
-#define configUSE_IDLE_HOOK 0
-#define configUSE_TICK_HOOK 0
-#define configUSE_MALLOC_FAILED_HOOK 0
-/* Debug features */
-#define configCHECK_FOR_STACK_OVERFLOW 0
-#define configASSERT(x)       \
-  if ((x) == 0) {             \
-    taskDISABLE_INTERRUPTS(); \
-    for (;;)                  \
-      ;                       \
-  }
-#define configQUEUE_REGISTRY_SIZE 0
-/* Timers and queues */
-#define configUSE_TIMERS 1
-#define configTIMER_TASK_PRIORITY (configMAX_PRIORITIES - 1)
-#define configTIMER_TASK_STACK_DEPTH configMINIMAL_STACK_SIZE
-#define configTIMER_QUEUE_LENGTH 5
-/* Task settings */
-#define INCLUDE_vTaskPrioritySet 1
-#define INCLUDE_uxTaskPriorityGet 1
-#define INCLUDE_vTaskDelete 1
-#define INCLUDE_vTaskCleanUpResources 0
-#define INCLUDE_vTaskSuspend 1
-#define INCLUDE_vTaskDelayUntil 1
-#define INCLUDE_vTaskDelay 1
-#define INCLUDE_uxTaskGetStackHighWaterMark 0
-#define INCLUDE_xTaskGetIdleTaskHandle 0
-#define INCLUDE_eTaskGetState 1
-#define INCLUDE_xTaskResumeFromISR 0
-#define INCLUDE_xTaskGetCurrentTaskHandle 1
-#define INCLUDE_xTaskGetSchedulerState 0
-#define INCLUDE_xSemaphoreGetMutexHolder 0
-#define INCLUDE_xTimerPendFunctionCall 1
-#define configUSE_STATS_FORMATTING_FUNCTIONS 1
-#define configCOMMAND_INT_MAX_OUTPUT_SIZE 2048
-#ifdef __NVIC_PRIO_BITS
-#define configPRIO_BITS __NVIC_PRIO_BITS
-#else
-#define configPRIO_BITS 3
-#endif
-/* Interrupt settings */
-#define configLIBRARY_LOWEST_INTERRUPT_PRIORITY 0x07
-#define configLIBRARY_MAX_SYSCALL_INTERRUPT_PRIORITY 5
-#define configKERNEL_INTERRUPT_PRIORITY \
-  (configLIBRARY_LOWEST_INTERRUPT_PRIORITY << (8 - configPRIO_BITS))
-#define configMAX_SYSCALL_INTERRUPT_PRIORITY \
-  (configLIBRARY_MAX_SYSCALL_INTERRUPT_PRIORITY << (8 - configPRIO_BITS))
-#ifndef __IASMARM__
-#define configGENERATE_RUN_TIME_STATS 0
-#define portCONFIGURE_TIMER_FOR_RUN_TIME_STATS()
-#define portGET_RUN_TIME_COUNTER_VALUE() 0
-#define configTICK_RATE_HZ (TickType_t)1000
-#endif /* __IASMARM__ */
-#define xPortPendSVHandler PendSV_Handler
-#define vPortSVCHandler SVC_Handler
-#define xPortSysTickHandler SysTick_Handler
-#endif /* FREERTOS_CONFIG_H */
diff --git a/apps/microtvm/ethosu/include/ethosu_55.h b/apps/microtvm/ethosu/include/ethosu_55.h
deleted file mode 100644
index a6c45643a238..000000000000
--- a/apps/microtvm/ethosu/include/ethosu_55.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-#ifndef TVM_APPS_MICROTVM_ETHOS_U_ETHOSU_55_H_
-#define TVM_APPS_MICROTVM_ETHOS_U_ETHOSU_55_H_
-
-/* Define Arm(R) Ethos(TM)-U55 specific IRQs & base address */
-#define ETHOSU_NPU_FAIL (1 << 4)
-#define ETHOSU_IRQ ((IRQn_Type)56)
-#define ETHOSU_BASE_ADDRESS ((void*)0x48102000)
-
-#endif  // TVM_APPS_MICROTVM_ETHOS_U_ETHOSU_55_H_
diff --git a/apps/microtvm/ethosu/include/ethosu_mod.h b/apps/microtvm/ethosu/include/ethosu_mod.h
deleted file mode 100644
index 9f058bbef4e1..000000000000
--- a/apps/microtvm/ethosu/include/ethosu_mod.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-#ifndef TVM_APPS_MICROTVM_ETHOS_U_ETHOSU_MOD_H_
-#define TVM_APPS_MICROTVM_ETHOS_U_ETHOSU_MOD_H_
-
-#include <ARMCM55.h>
-// TODO: Remove device specific information once RTOS support is available
-#include <ethosu_driver.h>
-#include <stdio.h>
-
-#include "ethosu_55.h"
-
-struct ethosu_driver ethosu0_driver;
-
-void ethosuIrqHandler0() { ethosu_irq_handler(&ethosu0_driver); }
-
-// Initialize Arm(R) Ethos(TM)-U NPU driver
-int EthosuInit() {
-  if (ethosu_init(&ethosu0_driver, (void*)ETHOSU_BASE_ADDRESS, NULL, 0, 1, 1)) {
-    printf("Failed to initialize NPU.\n");
-    return -1;
-  }
-
-  // Assumes SCB->VTOR points to RW memory
-  NVIC_SetVector(ETHOSU_IRQ, (uint32_t)&ethosuIrqHandler0);
-  NVIC_EnableIRQ(ETHOSU_IRQ);
-
-  return 0;
-}
-
-#endif  // TVM_APPS_MICROTVM_ETHOS_U_ETHOSU_MOD_H_
diff --git a/apps/microtvm/ethosu/include/tvm_ethosu_runtime.h b/apps/microtvm/ethosu/include/tvm_ethosu_runtime.h
deleted file mode 100644
index 8352fa56981e..000000000000
--- a/apps/microtvm/ethosu/include/tvm_ethosu_runtime.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#ifndef TVM_RUNTIME_CONTRIB_ETHOSU_ETHOSU_RUNTIME_H_
-#define TVM_RUNTIME_CONTRIB_ETHOSU_ETHOSU_RUNTIME_H_
-
-#include <ethosu_driver.h>
-#include <stddef.h>
-#include <stdint.h>
-
-typedef void tvm_device_ethos_u_t;
-
-int32_t TVMEthosULaunch(tvm_device_ethos_u_t* resource_handle, void* cms_data, size_t cms_data_size,
-                        uint64_t* base_addrs, size_t* base_addrs_size, int num_tensors);
-
-int32_t TVMDeviceEthosUActivate(tvm_device_ethos_u_t* context);
-int32_t TVMDeviceEthosUOpen(tvm_device_ethos_u_t* context);
-int32_t TVMDeviceEthosUClose(tvm_device_ethos_u_t* context);
-int32_t TVMDeviceEthosUDeactivate(tvm_device_ethos_u_t* context);
-
-#endif  // TVM_RUNTIME_CONTRIB_ETHOSU_ETHOSU_RUNTIME_H_
diff --git a/apps/microtvm/ethosu/include/tvm_runtime.h b/apps/microtvm/ethosu/include/tvm_runtime.h
deleted file mode 100644
index 2b59d9347027..000000000000
--- a/apps/microtvm/ethosu/include/tvm_runtime.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#include <stdarg.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <tvm/runtime/c_runtime_api.h>
-#include <tvm/runtime/crt/stack_allocator.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-void __attribute__((noreturn)) TVMPlatformAbort(tvm_crt_error_t error_code) {
-  printf("TVMPlatformAbort: %d\n", error_code);
-  printf("EXITTHESIM\n");
-  exit(-1);
-}
-
-tvm_crt_error_t TVMPlatformMemoryAllocate(size_t num_bytes, DLDevice dev, void** out_ptr) {
-  return kTvmErrorFunctionCallNotImplemented;
-}
-
-tvm_crt_error_t TVMPlatformMemoryFree(void* ptr, DLDevice dev) {
-  return kTvmErrorFunctionCallNotImplemented;
-}
-
-void TVMLogf(const char* msg, ...) {
-  va_list args;
-  va_start(args, msg);
-  vfprintf(stdout, msg, args);
-  va_end(args);
-}
-
-TVM_DLL int TVMFuncRegisterGlobal(const char* name, TVMFunctionHandle f, int override) { return 0; }
-
-#ifdef __cplusplus
-}
-#endif
diff --git a/apps/microtvm/ethosu/requirements.txt b/apps/microtvm/ethosu/requirements.txt
deleted file mode 100644
index 29ae75b38b1a..000000000000
--- a/apps/microtvm/ethosu/requirements.txt
+++ /dev/null
@@ -1,241 +0,0 @@
-attrs==21.2.0 \
-    --hash=sha256:149e90d6d8ac20db7a955ad60cf0e6881a3f20d37096140088356da6c716b0b1 \
-    --hash=sha256:ef6aaac3ca6cd92904cdd0d83f629a15f18053ec84e6432106f7a4d04ae4f5fb
-cloudpickle==2.0.0 \
-    --hash=sha256:5cd02f3b417a783ba84a4ec3e290ff7929009fe51f6405423cfccfadd43ba4a4 \
-    --hash=sha256:6b2df9741d06f43839a3275c4e6632f7df6487a1f181f5f46a052d3c917c3d11
-decorator==5.1.0 \
-    --hash=sha256:7b12e7c3c6ab203a29e157335e9122cb03de9ab7264b137594103fd4a683b374 \
-    --hash=sha256:e59913af105b9860aa2c8d3272d9de5a56a4e608db9a2f167a8480b323d529a7
-ethos-u-vela==3.8.0 \
-    --hash=sha256:cb0b1f5b1f886242d67ff0072efb88ac90cc87574ebe92fc98db4609f7797acf
-flatbuffers==2.0.7 \
-    --hash=sha256:0ae7d69c5b82bf41962ca5fde9cc43033bc9501311d975fd5a25e8a7d29c1245 \
-    --hash=sha256:71e135d533be527192819aaab757c5e3d109cb10fbb01e687f6bdb7a61ad39d1
-lxml==4.6.3 \
-    --hash=sha256:079f3ae844f38982d156efce585bc540c16a926d4436712cf4baee0cce487a3d \
-    --hash=sha256:0fbcf5565ac01dff87cbfc0ff323515c823081c5777a9fc7703ff58388c258c3 \
-    --hash=sha256:122fba10466c7bd4178b07dba427aa516286b846b2cbd6f6169141917283aae2 \
-    --hash=sha256:1b38116b6e628118dea5b2186ee6820ab138dbb1e24a13e478490c7db2f326ae \
-    --hash=sha256:1b7584d421d254ab86d4f0b13ec662a9014397678a7c4265a02a6d7c2b18a75f \
-    --hash=sha256:26e761ab5b07adf5f555ee82fb4bfc35bf93750499c6c7614bd64d12aaa67927 \
-    --hash=sha256:289e9ca1a9287f08daaf796d96e06cb2bc2958891d7911ac7cae1c5f9e1e0ee3 \
-    --hash=sha256:2a9d50e69aac3ebee695424f7dbd7b8c6d6eb7de2a2eb6b0f6c7db6aa41e02b7 \
-    --hash=sha256:3082c518be8e97324390614dacd041bb1358c882d77108ca1957ba47738d9d59 \
-    --hash=sha256:33bb934a044cf32157c12bfcfbb6649807da20aa92c062ef51903415c704704f \
-    --hash=sha256:3439c71103ef0e904ea0a1901611863e51f50b5cd5e8654a151740fde5e1cade \
-    --hash=sha256:36108c73739985979bf302006527cf8a20515ce444ba916281d1c43938b8bb96 \
-    --hash=sha256:39b78571b3b30645ac77b95f7c69d1bffc4cf8c3b157c435a34da72e78c82468 \
-    --hash=sha256:4289728b5e2000a4ad4ab8da6e1db2e093c63c08bdc0414799ee776a3f78da4b \
-    --hash=sha256:4bff24dfeea62f2e56f5bab929b4428ae6caba2d1eea0c2d6eb618e30a71e6d4 \
-    --hash=sha256:4c61b3a0db43a1607d6264166b230438f85bfed02e8cff20c22e564d0faff354 \
-    --hash=sha256:542d454665a3e277f76954418124d67516c5f88e51a900365ed54a9806122b83 \
-    --hash=sha256:5a0a14e264069c03e46f926be0d8919f4105c1623d620e7ec0e612a2e9bf1c04 \
-    --hash=sha256:5c8c163396cc0df3fd151b927e74f6e4acd67160d6c33304e805b84293351d16 \
-    --hash=sha256:64812391546a18896adaa86c77c59a4998f33c24788cadc35789e55b727a37f4 \
-    --hash=sha256:66e575c62792c3f9ca47cb8b6fab9e35bab91360c783d1606f758761810c9791 \
-    --hash=sha256:6f12e1427285008fd32a6025e38e977d44d6382cf28e7201ed10d6c1698d2a9a \
-    --hash=sha256:74f7d8d439b18fa4c385f3f5dfd11144bb87c1da034a466c5b5577d23a1d9b51 \
-    --hash=sha256:7610b8c31688f0b1be0ef882889817939490a36d0ee880ea562a4e1399c447a1 \
-    --hash=sha256:76fa7b1362d19f8fbd3e75fe2fb7c79359b0af8747e6f7141c338f0bee2f871a \
-    --hash=sha256:7728e05c35412ba36d3e9795ae8995e3c86958179c9770e65558ec3fdfd3724f \
-    --hash=sha256:8157dadbb09a34a6bd95a50690595e1fa0af1a99445e2744110e3dca7831c4ee \
-    --hash=sha256:820628b7b3135403540202e60551e741f9b6d3304371712521be939470b454ec \
-    --hash=sha256:884ab9b29feaca361f7f88d811b1eea9bfca36cf3da27768d28ad45c3ee6f969 \
-    --hash=sha256:89b8b22a5ff72d89d48d0e62abb14340d9e99fd637d046c27b8b257a01ffbe28 \
-    --hash=sha256:92e821e43ad382332eade6812e298dc9701c75fe289f2a2d39c7960b43d1e92a \
-    --hash=sha256:b007cbb845b28db4fb8b6a5cdcbf65bacb16a8bd328b53cbc0698688a68e1caa \
-    --hash=sha256:bc4313cbeb0e7a416a488d72f9680fffffc645f8a838bd2193809881c67dd106 \
-    --hash=sha256:bccbfc27563652de7dc9bdc595cb25e90b59c5f8e23e806ed0fd623755b6565d \
-    --hash=sha256:c1a40c06fd5ba37ad39caa0b3144eb3772e813b5fb5b084198a985431c2f1e8d \
-    --hash=sha256:c47ff7e0a36d4efac9fd692cfa33fbd0636674c102e9e8d9b26e1b93a94e7617 \
-    --hash=sha256:c4f05c5a7c49d2fb70223d0d5bcfbe474cf928310ac9fa6a7c6dddc831d0b1d4 \
-    --hash=sha256:cdaf11d2bd275bf391b5308f86731e5194a21af45fbaaaf1d9e8147b9160ea92 \
-    --hash=sha256:ce256aaa50f6cc9a649c51be3cd4ff142d67295bfc4f490c9134d0f9f6d58ef0 \
-    --hash=sha256:d2e35d7bf1c1ac8c538f88d26b396e73dd81440d59c1ef8522e1ea77b345ede4 \
-    --hash=sha256:d916d31fd85b2f78c76400d625076d9124de3e4bda8b016d25a050cc7d603f24 \
-    --hash=sha256:df7c53783a46febb0e70f6b05df2ba104610f2fb0d27023409734a3ecbb78fb2 \
-    --hash=sha256:e1cbd3f19a61e27e011e02f9600837b921ac661f0c40560eefb366e4e4fb275e \
-    --hash=sha256:efac139c3f0bf4f0939f9375af4b02c5ad83a622de52d6dfa8e438e8e01d0eb0 \
-    --hash=sha256:efd7a09678fd8b53117f6bae4fa3825e0a22b03ef0a932e070c0bdbb3a35e654 \
-    --hash=sha256:f2380a6376dfa090227b663f9678150ef27543483055cc327555fb592c5967e2 \
-    --hash=sha256:f8380c03e45cf09f8557bdaa41e1fa7c81f3ae22828e1db470ab2a6c96d8bc23 \
-    --hash=sha256:f90ba11136bfdd25cae3951af8da2e95121c9b9b93727b1b896e3fa105b2f586
-nose==1.3.7 \
-    --hash=sha256:9ff7c6cc443f8c51994b34a667bbcf45afd6d945be7477b52e97516fd17c53ac \
-    --hash=sha256:dadcddc0aefbf99eea214e0f1232b94f2fa9bd98fa8353711dacb112bfcbbb2a \
-    --hash=sha256:f1bffef9cbc82628f6e7d7b40d7e255aefaa1adb6a1b1d26c69a8b79e6208a98
-numpy==1.21.3 \
-    --hash=sha256:043e83bfc274649c82a6f09836943e4a4aebe5e33656271c7dbf9621dd58b8ec \
-    --hash=sha256:160ccc1bed3a8371bf0d760971f09bfe80a3e18646620e9ded0ad159d9749baa \
-    --hash=sha256:188031f833bbb623637e66006cf75e933e00e7231f67e2b45cf8189612bb5dc3 \
-    --hash=sha256:28f15209fb535dd4c504a7762d3bc440779b0e37d50ed810ced209e5cea60d96 \
-    --hash=sha256:29fb3dcd0468b7715f8ce2c0c2d9bbbaf5ae686334951343a41bd8d155c6ea27 \
-    --hash=sha256:2a6ee9620061b2a722749b391c0d80a0e2ae97290f1b32e28d5a362e21941ee4 \
-    --hash=sha256:300321e3985c968e3ae7fbda187237b225f3ffe6528395a5b7a5407f73cf093e \
-    --hash=sha256:32437f0b275c1d09d9c3add782516413e98cd7c09e6baf4715cbce781fc29912 \
-    --hash=sha256:3c09418a14471c7ae69ba682e2428cae5b4420a766659605566c0fa6987f6b7e \
-    --hash=sha256:49c6249260890e05b8111ebfc391ed58b3cb4b33e63197b2ec7f776e45330721 \
-    --hash=sha256:4cc9b512e9fb590797474f58b7f6d1f1b654b3a94f4fa8558b48ca8b3cfc97cf \
-    --hash=sha256:508b0b513fa1266875524ba8a9ecc27b02ad771fe1704a16314dc1a816a68737 \
-    --hash=sha256:50cd26b0cf6664cb3b3dd161ba0a09c9c1343db064e7c69f9f8b551f5104d654 \
-    --hash=sha256:5c4193f70f8069550a1788bd0cd3268ab7d3a2b70583dfe3b2e7f421e9aace06 \
-    --hash=sha256:5dfe9d6a4c39b8b6edd7990091fea4f852888e41919d0e6722fe78dd421db0eb \
-    --hash=sha256:63571bb7897a584ca3249c86dd01c10bcb5fe4296e3568b2e9c1a55356b6410e \
-    --hash=sha256:75621882d2230ab77fb6a03d4cbccd2038511491076e7964ef87306623aa5272 \
-    --hash=sha256:75eb7cadc8da49302f5b659d40ba4f6d94d5045fbd9569c9d058e77b0514c9e4 \
-    --hash=sha256:88a5d6b268e9ad18f3533e184744acdaa2e913b13148160b1152300c949bbb5f \
-    --hash=sha256:8a10968963640e75cc0193e1847616ab4c718e83b6938ae74dea44953950f6b7 \
-    --hash=sha256:90bec6a86b348b4559b6482e2b684db4a9a7eed1fa054b86115a48d58fbbf62a \
-    --hash=sha256:98339aa9911853f131de11010f6dd94c8cec254d3d1f7261528c3b3e3219f139 \
-    --hash=sha256:a99a6b067e5190ac6d12005a4d85aa6227c5606fa93211f86b1dafb16233e57d \
-    --hash=sha256:bffa2eee3b87376cc6b31eee36d05349571c236d1de1175b804b348dc0941e3f \
-    --hash=sha256:c6c2d535a7beb1f8790aaa98fd089ceab2e3dd7ca48aca0af7dc60e6ef93ffe1 \
-    --hash=sha256:cc14e7519fab2a4ed87d31f99c31a3796e4e1fe63a86ebdd1c5a1ea78ebd5896 \
-    --hash=sha256:dd0482f3fc547f1b1b5d6a8b8e08f63fdc250c58ce688dedd8851e6e26cff0f3 \
-    --hash=sha256:dde972a1e11bb7b702ed0e447953e7617723760f420decb97305e66fb4afc54f \
-    --hash=sha256:e54af82d68ef8255535a6cdb353f55d6b8cf418a83e2be3569243787a4f4866f \
-    --hash=sha256:e606e6316911471c8d9b4618e082635cfe98876007556e89ce03d52ff5e8fcf0 \
-    --hash=sha256:f41b018f126aac18583956c54544db437f25c7ee4794bcb23eb38bef8e5e192a \
-    --hash=sha256:f8f4625536926a155b80ad2bbff44f8cc59e9f2ad14cdda7acf4c135b4dc8ff2 \
-    --hash=sha256:fe52dbe47d9deb69b05084abd4b0df7abb39a3c51957c09f635520abd49b29dd
-Pillow==10.3.0 \
-    --hash=sha256:048ad577748b9fa4a99a0548c64f2cb8d672d5bf2e643a739ac8faff1164238c \
-    --hash=sha256:048eeade4c33fdf7e08da40ef402e748df113fd0b4584e32c4af74fe78baaeb2 \
-    --hash=sha256:0ba26351b137ca4e0db0342d5d00d2e355eb29372c05afd544ebf47c0956ffeb \
-    --hash=sha256:0ea2a783a2bdf2a561808fe4a7a12e9aa3799b701ba305de596bc48b8bdfce9d \
-    --hash=sha256:1530e8f3a4b965eb6a7785cf17a426c779333eb62c9a7d1bbcf3ffd5bf77a4aa \
-    --hash=sha256:16563993329b79513f59142a6b02055e10514c1a8e86dca8b48a893e33cf91e3 \
-    --hash=sha256:19aeb96d43902f0a783946a0a87dbdad5c84c936025b8419da0a0cd7724356b1 \
-    --hash=sha256:1a1d1915db1a4fdb2754b9de292642a39a7fb28f1736699527bb649484fb966a \
-    --hash=sha256:1b87bd9d81d179bd8ab871603bd80d8645729939f90b71e62914e816a76fc6bd \
-    --hash=sha256:1dfc94946bc60ea375cc39cff0b8da6c7e5f8fcdc1d946beb8da5c216156ddd8 \
-    --hash=sha256:2034f6759a722da3a3dbd91a81148cf884e91d1b747992ca288ab88c1de15999 \
-    --hash=sha256:261ddb7ca91fcf71757979534fb4c128448b5b4c55cb6152d280312062f69599 \
-    --hash=sha256:2ed854e716a89b1afcedea551cd85f2eb2a807613752ab997b9974aaa0d56936 \
-    --hash=sha256:3102045a10945173d38336f6e71a8dc71bcaeed55c3123ad4af82c52807b9375 \
-    --hash=sha256:339894035d0ede518b16073bdc2feef4c991ee991a29774b33e515f1d308e08d \
-    --hash=sha256:412444afb8c4c7a6cc11a47dade32982439925537e483be7c0ae0cf96c4f6a0b \
-    --hash=sha256:4203efca580f0dd6f882ca211f923168548f7ba334c189e9eab1178ab840bf60 \
-    --hash=sha256:45ebc7b45406febf07fef35d856f0293a92e7417ae7933207e90bf9090b70572 \
-    --hash=sha256:4b5ec25d8b17217d635f8935dbc1b9aa5907962fae29dff220f2659487891cd3 \
-    --hash=sha256:4c8e73e99da7db1b4cad7f8d682cf6abad7844da39834c288fbfa394a47bbced \
-    --hash=sha256:4e6f7d1c414191c1199f8996d3f2282b9ebea0945693fb67392c75a3a320941f \
-    --hash=sha256:4eaa22f0d22b1a7e93ff0a596d57fdede2e550aecffb5a1ef1106aaece48e96b \
-    --hash=sha256:50b8eae8f7334ec826d6eeffaeeb00e36b5e24aa0b9df322c247539714c6df19 \
-    --hash=sha256:50fd3f6b26e3441ae07b7c979309638b72abc1a25da31a81a7fbd9495713ef4f \
-    --hash=sha256:51243f1ed5161b9945011a7360e997729776f6e5d7005ba0c6879267d4c5139d \
-    --hash=sha256:5d512aafa1d32efa014fa041d38868fda85028e3f930a96f85d49c7d8ddc0383 \
-    --hash=sha256:5f77cf66e96ae734717d341c145c5949c63180842a545c47a0ce7ae52ca83795 \
-    --hash=sha256:6b02471b72526ab8a18c39cb7967b72d194ec53c1fd0a70b050565a0f366d355 \
-    --hash=sha256:6fb1b30043271ec92dc65f6d9f0b7a830c210b8a96423074b15c7bc999975f57 \
-    --hash=sha256:7161ec49ef0800947dc5570f86568a7bb36fa97dd09e9827dc02b718c5643f09 \
-    --hash=sha256:72d622d262e463dfb7595202d229f5f3ab4b852289a1cd09650362db23b9eb0b \
-    --hash=sha256:74d28c17412d9caa1066f7a31df8403ec23d5268ba46cd0ad2c50fb82ae40462 \
-    --hash=sha256:78618cdbccaa74d3f88d0ad6cb8ac3007f1a6fa5c6f19af64b55ca170bfa1edf \
-    --hash=sha256:793b4e24db2e8742ca6423d3fde8396db336698c55cd34b660663ee9e45ed37f \
-    --hash=sha256:798232c92e7665fe82ac085f9d8e8ca98826f8e27859d9a96b41d519ecd2e49a \
-    --hash=sha256:81d09caa7b27ef4e61cb7d8fbf1714f5aec1c6b6c5270ee53504981e6e9121ad \
-    --hash=sha256:8ab74c06ffdab957d7670c2a5a6e1a70181cd10b727cd788c4dd9005b6a8acd9 \
-    --hash=sha256:8eb0908e954d093b02a543dc963984d6e99ad2b5e36503d8a0aaf040505f747d \
-    --hash=sha256:90b9e29824800e90c84e4022dd5cc16eb2d9605ee13f05d47641eb183cd73d45 \
-    --hash=sha256:9797a6c8fe16f25749b371c02e2ade0efb51155e767a971c61734b1bf6293994 \
-    --hash=sha256:9d2455fbf44c914840c793e89aa82d0e1763a14253a000743719ae5946814b2d \
-    --hash=sha256:9d3bea1c75f8c53ee4d505c3e67d8c158ad4df0d83170605b50b64025917f338 \
-    --hash=sha256:9e2ec1e921fd07c7cda7962bad283acc2f2a9ccc1b971ee4b216b75fad6f0463 \
-    --hash=sha256:9e91179a242bbc99be65e139e30690e081fe6cb91a8e77faf4c409653de39451 \
-    --hash=sha256:a0eaa93d054751ee9964afa21c06247779b90440ca41d184aeb5d410f20ff591 \
-    --hash=sha256:a2c405445c79c3f5a124573a051062300936b0281fee57637e706453e452746c \
-    --hash=sha256:aa7e402ce11f0885305bfb6afb3434b3cd8f53b563ac065452d9d5654c7b86fd \
-    --hash=sha256:aff76a55a8aa8364d25400a210a65ff59d0168e0b4285ba6bf2bd83cf675ba32 \
-    --hash=sha256:b09b86b27a064c9624d0a6c54da01c1beaf5b6cadfa609cf63789b1d08a797b9 \
-    --hash=sha256:b14f16f94cbc61215115b9b1236f9c18403c15dd3c52cf629072afa9d54c1cbf \
-    --hash=sha256:b50811d664d392f02f7761621303eba9d1b056fb1868c8cdf4231279645c25f5 \
-    --hash=sha256:b7bc2176354defba3edc2b9a777744462da2f8e921fbaf61e52acb95bafa9828 \
-    --hash=sha256:c78e1b00a87ce43bb37642c0812315b411e856a905d58d597750eb79802aaaa3 \
-    --hash=sha256:c83341b89884e2b2e55886e8fbbf37c3fa5efd6c8907124aeb72f285ae5696e5 \
-    --hash=sha256:ca2870d5d10d8726a27396d3ca4cf7976cec0f3cb706debe88e3a5bd4610f7d2 \
-    --hash=sha256:ccce24b7ad89adb5a1e34a6ba96ac2530046763912806ad4c247356a8f33a67b \
-    --hash=sha256:cd5e14fbf22a87321b24c88669aad3a51ec052eb145315b3da3b7e3cc105b9a2 \
-    --hash=sha256:ce49c67f4ea0609933d01c0731b34b8695a7a748d6c8d186f95e7d085d2fe475 \
-    --hash=sha256:d33891be6df59d93df4d846640f0e46f1a807339f09e79a8040bc887bdcd7ed3 \
-    --hash=sha256:d3b2348a78bc939b4fed6552abfd2e7988e0f81443ef3911a4b8498ca084f6eb \
-    --hash=sha256:d886f5d353333b4771d21267c7ecc75b710f1a73d72d03ca06df49b09015a9ef \
-    --hash=sha256:d93480005693d247f8346bc8ee28c72a2191bdf1f6b5db469c096c0c867ac015 \
-    --hash=sha256:dc1a390a82755a8c26c9964d457d4c9cbec5405896cba94cf51f36ea0d855002 \
-    --hash=sha256:dd78700f5788ae180b5ee8902c6aea5a5726bac7c364b202b4b3e3ba2d293170 \
-    --hash=sha256:e46f38133e5a060d46bd630faa4d9fa0202377495df1f068a8299fd78c84de84 \
-    --hash=sha256:e4b878386c4bf293578b48fc570b84ecfe477d3b77ba39a6e87150af77f40c57 \
-    --hash=sha256:f0d0591a0aeaefdaf9a5e545e7485f89910c977087e7de2b6c388aec32011e9f \
-    --hash=sha256:fdcbb4068117dfd9ce0138d068ac512843c52295ed996ae6dd1faf537b6dbc27 \
-    --hash=sha256:ff61bfd9253c3915e6d41c651d5f962da23eda633cf02262990094a18a55371a
-psutil==5.8.0 \
-    --hash=sha256:0066a82f7b1b37d334e68697faba68e5ad5e858279fd6351c8ca6024e8d6ba64 \
-    --hash=sha256:02b8292609b1f7fcb34173b25e48d0da8667bc85f81d7476584d889c6e0f2131 \
-    --hash=sha256:0ae6f386d8d297177fd288be6e8d1afc05966878704dad9847719650e44fc49c \
-    --hash=sha256:0c9ccb99ab76025f2f0bbecf341d4656e9c1351db8cc8a03ccd62e318ab4b5c6 \
-    --hash=sha256:0dd4465a039d343925cdc29023bb6960ccf4e74a65ad53e768403746a9207023 \
-    --hash=sha256:12d844996d6c2b1d3881cfa6fa201fd635971869a9da945cf6756105af73d2df \
-    --hash=sha256:1bff0d07e76114ec24ee32e7f7f8d0c4b0514b3fae93e3d2aaafd65d22502394 \
-    --hash=sha256:245b5509968ac0bd179287d91210cd3f37add77dad385ef238b275bad35fa1c4 \
-    --hash=sha256:28ff7c95293ae74bf1ca1a79e8805fcde005c18a122ca983abf676ea3466362b \
-    --hash=sha256:36b3b6c9e2a34b7d7fbae330a85bf72c30b1c827a4366a07443fc4b6270449e2 \
-    --hash=sha256:52de075468cd394ac98c66f9ca33b2f54ae1d9bff1ef6b67a212ee8f639ec06d \
-    --hash=sha256:5da29e394bdedd9144c7331192e20c1f79283fb03b06e6abd3a8ae45ffecee65 \
-    --hash=sha256:61f05864b42fedc0771d6d8e49c35f07efd209ade09a5afe6a5059e7bb7bf83d \
-    --hash=sha256:6223d07a1ae93f86451d0198a0c361032c4c93ebd4bf6d25e2fb3edfad9571ef \
-    --hash=sha256:6323d5d845c2785efb20aded4726636546b26d3b577aded22492908f7c1bdda7 \
-    --hash=sha256:6ffe81843131ee0ffa02c317186ed1e759a145267d54fdef1bc4ea5f5931ab60 \
-    --hash=sha256:74f2d0be88db96ada78756cb3a3e1b107ce8ab79f65aa885f76d7664e56928f6 \
-    --hash=sha256:74fb2557d1430fff18ff0d72613c5ca30c45cdbfcddd6a5773e9fc1fe9364be8 \
-    --hash=sha256:90d4091c2d30ddd0a03e0b97e6a33a48628469b99585e2ad6bf21f17423b112b \
-    --hash=sha256:90f31c34d25b1b3ed6c40cdd34ff122b1887a825297c017e4cbd6796dd8b672d \
-    --hash=sha256:99de3e8739258b3c3e8669cb9757c9a861b2a25ad0955f8e53ac662d66de61ac \
-    --hash=sha256:c6a5fd10ce6b6344e616cf01cc5b849fa8103fbb5ba507b6b2dee4c11e84c935 \
-    --hash=sha256:ce8b867423291cb65cfc6d9c4955ee9bfc1e21fe03bb50e177f2b957f1c2469d \
-    --hash=sha256:d225cd8319aa1d3c85bf195c4e07d17d3cd68636b8fc97e6cf198f782f99af28 \
-    --hash=sha256:ea313bb02e5e25224e518e4352af4bf5e062755160f77e4b1767dd5ccb65f876 \
-    --hash=sha256:ea372bcc129394485824ae3e3ddabe67dc0b118d262c568b4d2602a7070afdb0 \
-    --hash=sha256:f4634b033faf0d968bb9220dd1c793b897ab7f1189956e1aa9eae752527127d3 \
-    --hash=sha256:fcc01e900c1d7bee2a37e5d6e4f9194760a93597c97fee89c4ae51701de03563
-scipy==1.5.4 \
-    --hash=sha256:168c45c0c32e23f613db7c9e4e780bc61982d71dcd406ead746c7c7c2f2004ce \
-    --hash=sha256:213bc59191da2f479984ad4ec39406bf949a99aba70e9237b916ce7547b6ef42 \
-    --hash=sha256:25b241034215247481f53355e05f9e25462682b13bd9191359075682adcd9554 \
-    --hash=sha256:2c872de0c69ed20fb1a9b9cf6f77298b04a26f0b8720a5457be08be254366c6e \
-    --hash=sha256:3397c129b479846d7eaa18f999369a24322d008fac0782e7828fa567358c36ce \
-    --hash=sha256:368c0f69f93186309e1b4beb8e26d51dd6f5010b79264c0f1e9ca00cd92ea8c9 \
-    --hash=sha256:3d5db5d815370c28d938cf9b0809dade4acf7aba57eaf7ef733bfedc9b2474c4 \
-    --hash=sha256:4598cf03136067000855d6b44d7a1f4f46994164bcd450fb2c3d481afc25dd06 \
-    --hash=sha256:4a453d5e5689de62e5d38edf40af3f17560bfd63c9c5bd228c18c1f99afa155b \
-    --hash=sha256:4f12d13ffbc16e988fa40809cbbd7a8b45bc05ff6ea0ba8e3e41f6f4db3a9e47 \
-    --hash=sha256:634568a3018bc16a83cda28d4f7aed0d803dd5618facb36e977e53b2df868443 \
-    --hash=sha256:65923bc3809524e46fb7eb4d6346552cbb6a1ffc41be748535aa502a2e3d3389 \
-    --hash=sha256:6b0ceb23560f46dd236a8ad4378fc40bad1783e997604ba845e131d6c680963e \
-    --hash=sha256:8c8d6ca19c8497344b810b0b0344f8375af5f6bb9c98bd42e33f747417ab3f57 \
-    --hash=sha256:9ad4fcddcbf5dc67619379782e6aeef41218a79e17979aaed01ed099876c0e62 \
-    --hash=sha256:a254b98dbcc744c723a838c03b74a8a34c0558c9ac5c86d5561703362231107d \
-    --hash=sha256:b03c4338d6d3d299e8ca494194c0ae4f611548da59e3c038813f1a43976cb437 \
-    --hash=sha256:cc1f78ebc982cd0602c9a7615d878396bec94908db67d4ecddca864d049112f2 \
-    --hash=sha256:d6d25c41a009e3c6b7e757338948d0076ee1dd1770d1c09ec131f11946883c54 \
-    --hash=sha256:d84cadd7d7998433334c99fa55bcba0d8b4aeff0edb123b2a1dfcface538e474 \
-    --hash=sha256:e360cb2299028d0b0d0f65a5c5e51fc16a335f1603aa2357c25766c8dab56938 \
-    --hash=sha256:e98d49a5717369d8241d6cf33ecb0ca72deee392414118198a8e5b4c35c56340 \
-    --hash=sha256:ed572470af2438b526ea574ff8f05e7f39b44ac37f712105e57fc4d53a6fb660 \
-    --hash=sha256:f87b39f4d69cf7d7529d7b1098cb712033b17ea7714aed831b95628f483fd012 \
-    --hash=sha256:fa789583fc94a7689b45834453fec095245c7e69c58561dc159b5d5277057e4c
-tflite==2.4.0 \
-    --hash=sha256:0510db1b48a3eec86bf9bb8d2749cd9d6d26d6a4fb329fd141bde5b4404932d1 \
-    --hash=sha256:0796f6ce6eb2aef4a318f5509e5fb0ce808e29cd3094801b4abbb1d8575a28cd
-tornado==6.3.3 \
-    --hash=sha256:1bd19ca6c16882e4d37368e0152f99c099bad93e0950ce55e71daed74045908f \
-    --hash=sha256:22d3c2fa10b5793da13c807e6fc38ff49a4f6e1e3868b0a6f4164768bb8e20f5 \
-    --hash=sha256:502fba735c84450974fec147340016ad928d29f1e91f49be168c0a4c18181e1d \
-    --hash=sha256:65ceca9500383fbdf33a98c0087cb975b2ef3bfb874cb35b8de8740cf7f41bd3 \
-    --hash=sha256:71a8db65160a3c55d61839b7302a9a400074c9c753040455494e2af74e2501f2 \
-    --hash=sha256:7ac51f42808cca9b3613f51ffe2a965c8525cb1b00b7b2d56828b8045354f76a \
-    --hash=sha256:7d01abc57ea0dbb51ddfed477dfe22719d376119844e33c661d873bf9c0e4a16 \
-    --hash=sha256:805d507b1f588320c26f7f097108eb4023bbaa984d63176d1652e184ba24270a \
-    --hash=sha256:9dc4444c0defcd3929d5c1eb5706cbe1b116e762ff3e0deca8b715d14bf6ec17 \
-    --hash=sha256:ceb917a50cd35882b57600709dd5421a418c29ddc852da8bcdab1f0db33406b0 \
-    --hash=sha256:e7d8db41c0181c80d76c982aacc442c0783a2c54d6400fe028954201a2e032fe
diff --git a/apps/microtvm/ethosu/run_demo.sh b/apps/microtvm/ethosu/run_demo.sh
deleted file mode 100755
index 7490f979b834..000000000000
--- a/apps/microtvm/ethosu/run_demo.sh
+++ /dev/null
@@ -1,183 +0,0 @@
-#!/bin/bash
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-set -e
-set -u
-set -o pipefail
-set -x
-
-# Show usage
-function show_usage() {
-    cat <<EOF
-Usage: run_demo.sh [--ethosu_driver_path ETHOSU_DRIVER_PATH]
--h, --help
-    Display this help message.
---ethosu_driver_path ETHOSU_DRIVER_PATH
-    Set path to Arm(R) Ethos(TM)-U core driver.
---cmsis_path CMSIS_PATH
-    Set path to CMSIS.
---ethosu_platform_path ETHOSU_PLATFORM_PATH
-    Set path to Arm(R) Ethos(TM)-U core platform.
---fvp_path FVP_PATH
-   Set path to FVP.
---cmake_path
-   Set path to cmake.
-EOF
-}
-
-# Parse arguments
-while (( $# )); do
-    case "$1" in
-        -h|--help)
-            show_usage
-            exit 0
-            ;;
-
-        --ethosu_driver_path)
-            if [ $# -gt 1 ]
-            then
-                export ETHOSU_DRIVER_PATH="$2"
-                shift 2
-            else
-                echo 'ERROR: --ethosu_driver_path requires a non-empty argument' >&2
-                show_usage >&2
-                exit 1
-            fi
-            ;;
-
-        --cmsis_path)
-            if [ $# -gt 1 ]
-            then
-                export CMSIS_PATH="$2"
-                shift 2
-            else
-                echo 'ERROR: --cmsis_path requires a non-empty argument' >&2
-                show_usage >&2
-                exit 1
-            fi
-            ;;
-
-        --freertos_path)
-            if [ $# -gt 1 ]
-            then
-                export FREERTOS_PATH="$2"
-                shift 2
-            else
-                echo 'ERROR: --freertos_path requires a non-empty argument' >&2
-                show_usage >&2
-                exit 1
-            fi
-            ;;
-
-        --ethosu_platform_path)
-            if [ $# -gt 1 ]
-            then
-                export ETHOSU_PLATFORM_PATH="$2"
-                shift 2
-            else
-                echo 'ERROR: --ethosu_platform_path requires a non-empty argument' >&2
-                show_usage >&2
-                exit 1
-            fi
-            ;;
-
-        --fvp_path)
-            if [ $# -gt 1 ]
-            then
-                export PATH="$2/models/Linux64_GCC-6.4:$PATH"
-                shift 2
-            else
-                echo 'ERROR: --fvp_path requires a non-empty argument' >&2
-                show_usage >&2
-                exit 1
-            fi
-            ;;
-
-        --cmake_path)
-            if [ $# -gt 1 ]
-            then
-                export CMAKE="$2"
-                shift 2
-            else
-                echo 'ERROR: --cmake_path requires a non-empty argument' >&2
-                show_usage >&2
-                exit 1
-            fi
-            ;;
-
-        -*|--*)
-            echo "Error: Unknown flag: $1" >&2
-            show_usage >&2
-            exit 1
-            ;;
-    esac
-done
-
-
-# Directories
-script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
-
-# Make build directory
-make cleanall
-mkdir -p build
-cd build
-
-# Get mobilenet_v2 tflite model
-mobilenet_url='https://github.com/ARM-software/ML-zoo/raw/b9e26e662c00e0c0b23587888e75ac1205a99b6e/models/image_classification/mobilenet_v2_1.0_224/tflite_int8/mobilenet_v2_1.0_224_INT8.tflite'
-curl --retry 64 -sSL ${mobilenet_url} -o ./mobilenet_v2_1.0_224_INT8.tflite
-
-# Compile model for Arm(R) Cortex(R)-M55 CPU and Ethos(TM)-U55 NPU
-# An alternative to using "python3 -m tvm.driver.tvmc" is to call
-# "tvmc" directly once TVM has been pip installed.
-python3 -m tvm.driver.tvmc compile --target=ethos-u,cmsis-nn,c \
-    --target-ethos-u-accelerator_config=ethos-u55-256 \
-    --target-cmsis-nn-mcpu=cortex-m55 \
-    --target-c-mcpu=cortex-m55 \
-    --runtime=crt \
-    --executor=aot \
-    --executor-aot-interface-api=c \
-    --executor-aot-unpacked-api=1 \
-    --pass-config tir.usmp.enable=1 \
-    --pass-config tir.usmp.algorithm=hill_climb \
-    --pass-config tir.disable_storage_rewrite=1 \
-    --pass-config tir.disable_vectorize=1 ./mobilenet_v2_1.0_224_INT8.tflite --output-format=mlf
-tar -xf module.tar
-
-# Get ImageNet labels
-curl -sS  https://raw.githubusercontent.com/tensorflow/tensorflow/master/tensorflow/lite/java/demo/app/src/main/assets/labels_mobilenet_quant_v1_224.txt \
-    -o ./labels_mobilenet_quant_v1_224.txt
-
-# Get input image
-curl -sS https://s3.amazonaws.com/model-server/inputs/kitten.jpg -o kitten.jpg
-
-# Create C header files
-cd ..
-python3 ./convert_image.py ./build/kitten.jpg
-python3 ./convert_labels.py ./build/labels_mobilenet_quant_v1_224.txt
-
-# Build demo executable
-cd ${script_dir}
-make
-
-# Run demo executable on the FVP
-FVP_Corstone_SSE-300_Ethos-U55 -C cpu0.CFGDTCMSZ=15 \
--C cpu0.CFGITCMSZ=15 -C mps3_board.uart0.out_file=\"-\" -C mps3_board.uart0.shutdown_tag=\"EXITTHESIM\" \
--C mps3_board.visualisation.disable-visualisation=1 -C mps3_board.telnetterminal0.start_telnet=0 \
--C mps3_board.telnetterminal1.start_telnet=0 -C mps3_board.telnetterminal2.start_telnet=0 -C mps3_board.telnetterminal5.start_telnet=0 \
--C ethosu.extra_args="--fast" \
--C ethosu.num_macs=256 ./build/demo
diff --git a/apps/microtvm/ethosu/src/demo_bare_metal.c b/apps/microtvm/ethosu/src/demo_bare_metal.c
deleted file mode 100644
index 1bef90cfb301..000000000000
--- a/apps/microtvm/ethosu/src/demo_bare_metal.c
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#include <stdio.h>
-#include <tvm_runtime.h>
-
-#include "ethosu_mod.h"
-#include "uart_stdout.h"
-
-// Header files generated by convert_image.py and convert_labels.py
-#include "inputs.h"
-#include "labels.h"
-#include "outputs.h"
-
-int abs(int v) { return v * ((v > 0) - (v < 0)); }
-
-int main(int argc, char** argv) {
-  UartStdOutInit();
-  printf("Starting Demo\n");
-  EthosuInit();
-
-  printf("Running inference\n");
-  struct tvmgen_default_outputs outputs = {
-      .MobilenetV2_Predictions_Reshape_11 = output,
-  };
-  struct tvmgen_default_inputs inputs = {
-      .tfl_quantize = input,
-  };
-  struct ethosu_driver* driver = ethosu_reserve_driver();
-  struct tvmgen_default_devices devices = {
-      .ethos_u = driver,
-  };
-  tvmgen_default_run(&inputs, &outputs, &devices);
-  ethosu_release_driver(driver);
-
-  // Calculate index of max value
-  int8_t max_value = -128;
-  int32_t max_index = -1;
-  for (unsigned int i = 0; i < output_len; ++i) {
-    if (output[i] > max_value) {
-      max_value = output[i];
-      max_index = i;
-    }
-  }
-  printf("The image has been classified as '%s'\n", labels[max_index]);
-
-  // The FVP will shut down when it receives "EXITTHESIM" on the UART
-  printf("EXITTHESIM\n");
-  while (1 == 1)
-    ;
-  return 0;
-}
diff --git a/apps/microtvm/ethosu/src/demo_freertos.c b/apps/microtvm/ethosu/src/demo_freertos.c
deleted file mode 100644
index e59d7aeaccf5..000000000000
--- a/apps/microtvm/ethosu/src/demo_freertos.c
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#include <FreeRTOS.h>
-#include <queue.h>
-#include <stdio.h>
-#include <task.h>
-#include <tvm_runtime.h>
-
-#include "ethosu_mod.h"
-#include "uart_stdout.h"
-
-// Header files generated by convert_image.py and convert_labels.py
-#include "inputs.h"
-#include "labels.h"
-#include "outputs.h"
-
-static void prvInferenceTask(void* pvParameters);
-static void prvDataCollectionTask(void* pvParameters);
-
-#define mainQUEUE_INFERENCE_TASK_PRIORITY (tskIDLE_PRIORITY + 3)
-#define mainQUEUE_INFERENCE_TASK_STACK_SIZE 4096
-#define mainQUEUE_DATA_TASK_PRIORITY (tskIDLE_PRIORITY + 2)
-#define mainQUEUE_DATA_TASK_STACK_SIZE configMINIMAL_STACK_SIZE
-#define mainQUEUE_LENGTH (1)
-#define mainQUEUE_SEND_FREQUENCY_MS (100 / portTICK_PERIOD_MS)
-
-/* The queue used to pass data to run through our model */
-static QueueHandle_t xQueue = NULL;
-
-int main(void) {
-  // Platform UART
-  UartStdOutInit();
-  // NPU
-  EthosuInit();
-
-  // Queue for inferences
-  xQueue = xQueueCreate(mainQUEUE_LENGTH, sizeof(uint8_t*));
-
-  if (xQueue != NULL) {
-    // Inference task
-    xTaskCreate(prvInferenceTask, "Inference", mainQUEUE_INFERENCE_TASK_STACK_SIZE, NULL,
-                mainQUEUE_INFERENCE_TASK_PRIORITY, NULL);
-
-    // Data collector task
-    xTaskCreate(prvDataCollectionTask, "Data", mainQUEUE_DATA_TASK_STACK_SIZE, NULL,
-                mainQUEUE_DATA_TASK_PRIORITY, NULL);
-
-    // Start the task scheduling
-    vTaskStartScheduler();
-  }
-
-  // The task scheduler should take over before this is reached
-  printf("Unreachable code reached!\n");
-}
-
-/*
- * This task emulates collection of data and sending it to another inference task
- * for processing
- */
-static void prvDataCollectionTask(void* pvParameters) {
-  // Unused
-  (void)pvParameters;
-
-  // Working
-  vTaskDelay(mainQUEUE_SEND_FREQUENCY_MS);
-
-  // Construct pointer to copy to queue
-  uint8_t** pucInputData = &input;
-  xQueueSend(xQueue, &pucInputData, 0U);
-}
-
-/*
- * This task emulates the inference of data sent by the collector task
- */
-static void prvInferenceTask(void* pvParameters) {
-  uint8_t* pucReceivedData;
-
-  // Unused
-  (void)pvParameters;
-
-  // Wait for data collection
-  xQueueReceive(xQueue, &pucReceivedData, portMAX_DELAY);
-
-  // Print output of inference and exit task
-  printf("Running inference\n");
-  struct tvmgen_default_inputs xInputs = {
-      .tfl_quantize = pucReceivedData,
-  };
-  struct tvmgen_default_outputs xOutputs = {
-      .MobilenetV2_Predictions_Reshape_11 = output,
-  };
-  struct ethosu_driver* xDriver = ethosu_reserve_driver();
-  struct tvmgen_default_devices xDevices = {
-      .ethos_u = xDriver,
-  };
-  tvmgen_default_run(&xInputs, &xOutputs, &xDevices);
-  ethosu_release_driver(xDriver);
-
-  // Calculate index of max value
-  int8_t ucMaxValue = -128;
-  int32_t lMaxIndex = -1;
-  for (unsigned int i = 0; i < output_len; ++i) {
-    if (output[i] > ucMaxValue) {
-      ucMaxValue = output[i];
-      lMaxIndex = i;
-    }
-  }
-  printf("The image has been classified as '%s'\n", labels[lMaxIndex]);
-
-  // The FVP will shut down when it receives "EXITTHESIM" on the UART
-  printf("EXITTHESIM\n");
-}
diff --git a/apps/microtvm/ethosu/src/tvm_ethosu_runtime.c b/apps/microtvm/ethosu/src/tvm_ethosu_runtime.c
deleted file mode 100644
index 2f8f7ec7c1dc..000000000000
--- a/apps/microtvm/ethosu/src/tvm_ethosu_runtime.c
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#include "tvm_ethosu_runtime.h"
-
-#include <ethosu_driver.h>
-
-int32_t TVMEthosULaunch(tvm_device_ethos_u_t* context, void* cms_data, size_t cms_data_size,
-                        uint64_t* base_addrs, size_t* base_addrs_size, int num_tensors) {
-  struct ethosu_driver* driver = (struct ethosu_driver*)context;
-  int32_t result =
-      ethosu_invoke(driver, cms_data, cms_data_size, base_addrs, base_addrs_size, num_tensors);
-
-  // Map errors in invoke to TVM errors
-  if (result != 0) {
-    return -1;
-  }
-  return 0;
-}
-
-int32_t TVMDeviceEthosUActivate(tvm_device_ethos_u_t* context) { return 0; }
-int32_t TVMDeviceEthosUOpen(tvm_device_ethos_u_t* context) { return 0; }
-int32_t TVMDeviceEthosUClose(tvm_device_ethos_u_t* context) { return 0; }
-int32_t TVMDeviceEthosUDeactivate(tvm_device_ethos_u_t* context) { return 0; }
diff --git a/apps/microtvm/poetry.lock b/apps/microtvm/poetry.lock
deleted file mode 100644
index 16ff114dcd90..000000000000
--- a/apps/microtvm/poetry.lock
+++ /dev/null
@@ -1,2982 +0,0 @@
-# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.
-
-[[package]]
-name = "absl-py"
-version = "1.4.0"
-description = "Abseil Python Common Libraries, see https://github.com/abseil/abseil-py."
-optional = true
-python-versions = ">=3.6"
-files = [
-    {file = "absl-py-1.4.0.tar.gz", hash = "sha256:d2c244d01048ba476e7c080bd2c6df5e141d211de80223460d5b3b8a2a58433d"},
-    {file = "absl_py-1.4.0-py3-none-any.whl", hash = "sha256:0d3fe606adfa4f7db64792dd4c7aee4ee0c38ab75dfd353b7a83ed3e957fcb47"},
-]
-
-[[package]]
-name = "alabaster"
-version = "0.7.13"
-description = "A configurable sidebar-enabled Sphinx theme"
-optional = false
-python-versions = ">=3.6"
-files = [
-    {file = "alabaster-0.7.13-py3-none-any.whl", hash = "sha256:1ee19aca801bbabb5ba3f5f258e4422dfa86f82f3e9cefb0859b283cdd7f62a3"},
-    {file = "alabaster-0.7.13.tar.gz", hash = "sha256:a27a4a084d5e690e16e01e03ad2b2e552c61a65469419b907243193de1a84ae2"},
-]
-
-[[package]]
-name = "appdirs"
-version = "1.4.4"
-description = "A small Python module for determining appropriate platform-specific dirs, e.g. a \"user data dir\"."
-optional = false
-python-versions = "*"
-files = [
-    {file = "appdirs-1.4.4-py2.py3-none-any.whl", hash = "sha256:a841dacd6b99318a741b166adb07e19ee71a274450e68237b4650ca1055ab128"},
-    {file = "appdirs-1.4.4.tar.gz", hash = "sha256:7d5d0167b2b1ba821647616af46a749d1c653740dd0d2415100fe26e27afdf41"},
-]
-
-[[package]]
-name = "asgiref"
-version = "3.6.0"
-description = "ASGI specs, helper code, and adapters"
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "asgiref-3.6.0-py3-none-any.whl", hash = "sha256:71e68008da809b957b7ee4b43dbccff33d1b23519fb8344e33f049897077afac"},
-    {file = "asgiref-3.6.0.tar.gz", hash = "sha256:9567dfe7bd8d3c8c892227827c41cce860b368104c3431da67a0c5a65a949506"},
-]
-
-[package.extras]
-tests = ["mypy (>=0.800)", "pytest", "pytest-asyncio"]
-
-[[package]]
-name = "astroid"
-version = "2.15.1"
-description = "An abstract syntax tree for Python with inference support."
-optional = false
-python-versions = ">=3.7.2"
-files = [
-    {file = "astroid-2.15.1-py3-none-any.whl", hash = "sha256:89860bda98fe2bbd1f5d262229be7629d778ce280de68d95d4a73d1f592ad268"},
-    {file = "astroid-2.15.1.tar.gz", hash = "sha256:af4e0aff46e2868218502789898269ed95b663fba49e65d91c1e09c966266c34"},
-]
-
-[package.dependencies]
-lazy-object-proxy = ">=1.4.0"
-typing-extensions = {version = ">=4.0.0", markers = "python_version < \"3.11\""}
-wrapt = {version = ">=1.11,<2", markers = "python_version < \"3.11\""}
-
-[[package]]
-name = "astunparse"
-version = "1.6.3"
-description = "An AST unparser for Python"
-optional = true
-python-versions = "*"
-files = [
-    {file = "astunparse-1.6.3-py2.py3-none-any.whl", hash = "sha256:c2652417f2c8b5bb325c885ae329bdf3f86424075c4fd1a128674bc6fba4b8e8"},
-    {file = "astunparse-1.6.3.tar.gz", hash = "sha256:5ad93a8456f0d084c3456d059fd9a92cce667963232cbf763eac3bc5b7940872"},
-]
-
-[package.dependencies]
-six = ">=1.6.1,<2.0"
-wheel = ">=0.23.0,<1.0"
-
-[[package]]
-name = "attrs"
-version = "22.2.0"
-description = "Classes Without Boilerplate"
-optional = false
-python-versions = ">=3.6"
-files = [
-    {file = "attrs-22.2.0-py3-none-any.whl", hash = "sha256:29e95c7f6778868dbd49170f98f8818f78f3dc5e0e37c0b1f474e3561b240836"},
-    {file = "attrs-22.2.0.tar.gz", hash = "sha256:c9227bfc2f01993c03f68db37d1d15c9690188323c067c641f1a35ca58185f99"},
-]
-
-[package.extras]
-cov = ["attrs[tests]", "coverage-enable-subprocess", "coverage[toml] (>=5.3)"]
-dev = ["attrs[docs,tests]"]
-docs = ["furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier", "zope.interface"]
-tests = ["attrs[tests-no-zope]", "zope.interface"]
-tests-no-zope = ["cloudpickle", "cloudpickle", "hypothesis", "hypothesis", "mypy (>=0.971,<0.990)", "mypy (>=0.971,<0.990)", "pympler", "pympler", "pytest (>=4.3.0)", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-mypy-plugins", "pytest-xdist[psutil]", "pytest-xdist[psutil]"]
-
-[[package]]
-name = "autodocsumm"
-version = "0.1.13"
-description = "Extended sphinx autodoc including automatic autosummaries"
-optional = false
-python-versions = "*"
-files = [
-    {file = "autodocsumm-0.1.13.tar.gz", hash = "sha256:02cabadf090ed0e6de166709ef18c796536b3ed40607ff96c776884fe6aa1f75"},
-]
-
-[package.dependencies]
-sphinx = "*"
-
-[[package]]
-name = "autoflake"
-version = "2.0.2"
-description = "Removes unused imports and unused variables"
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "autoflake-2.0.2-py3-none-any.whl", hash = "sha256:a82d8efdcbbb7129a8a23238c529fb9d9919c562e26bb7963ea6890fbfff7d02"},
-    {file = "autoflake-2.0.2.tar.gz", hash = "sha256:e0164421ff13f805f08a023e249d84200bd00463d213b490906bfefa67e83830"},
-]
-
-[package.dependencies]
-pyflakes = ">=3.0.0"
-tomli = {version = ">=2.0.1", markers = "python_version < \"3.11\""}
-
-[[package]]
-name = "autopep8"
-version = "2.0.2"
-description = "A tool that automatically formats Python code to conform to the PEP 8 style guide"
-optional = false
-python-versions = ">=3.6"
-files = [
-    {file = "autopep8-2.0.2-py2.py3-none-any.whl", hash = "sha256:86e9303b5e5c8160872b2f5ef611161b2893e9bfe8ccc7e2f76385947d57a2f1"},
-    {file = "autopep8-2.0.2.tar.gz", hash = "sha256:f9849cdd62108cb739dbcdbfb7fdcc9a30d1b63c4cc3e1c1f893b5360941b61c"},
-]
-
-[package.dependencies]
-pycodestyle = ">=2.10.0"
-tomli = {version = "*", markers = "python_version < \"3.11\""}
-
-[[package]]
-name = "babel"
-version = "2.12.1"
-description = "Internationalization utilities"
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "Babel-2.12.1-py3-none-any.whl", hash = "sha256:b4246fb7677d3b98f501a39d43396d3cafdc8eadb045f4a31be01863f655c610"},
-    {file = "Babel-2.12.1.tar.gz", hash = "sha256:cc2d99999cd01d44420ae725a21c9e3711b3aadc7976d6147f622d8581963455"},
-]
-
-[package.dependencies]
-pytz = {version = ">=2015.7", markers = "python_version < \"3.9\""}
-
-[[package]]
-name = "backports-zoneinfo"
-version = "0.2.1"
-description = "Backport of the standard library zoneinfo module"
-optional = false
-python-versions = ">=3.6"
-files = [
-    {file = "backports.zoneinfo-0.2.1-cp36-cp36m-macosx_10_14_x86_64.whl", hash = "sha256:da6013fd84a690242c310d77ddb8441a559e9cb3d3d59ebac9aca1a57b2e18bc"},
-    {file = "backports.zoneinfo-0.2.1-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:89a48c0d158a3cc3f654da4c2de1ceba85263fafb861b98b59040a5086259722"},
-    {file = "backports.zoneinfo-0.2.1-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:1c5742112073a563c81f786e77514969acb58649bcdf6cdf0b4ed31a348d4546"},
-    {file = "backports.zoneinfo-0.2.1-cp36-cp36m-win32.whl", hash = "sha256:e8236383a20872c0cdf5a62b554b27538db7fa1bbec52429d8d106effbaeca08"},
-    {file = "backports.zoneinfo-0.2.1-cp36-cp36m-win_amd64.whl", hash = "sha256:8439c030a11780786a2002261569bdf362264f605dfa4d65090b64b05c9f79a7"},
-    {file = "backports.zoneinfo-0.2.1-cp37-cp37m-macosx_10_14_x86_64.whl", hash = "sha256:f04e857b59d9d1ccc39ce2da1021d196e47234873820cbeaad210724b1ee28ac"},
-    {file = "backports.zoneinfo-0.2.1-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:17746bd546106fa389c51dbea67c8b7c8f0d14b5526a579ca6ccf5ed72c526cf"},
-    {file = "backports.zoneinfo-0.2.1-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:5c144945a7752ca544b4b78c8c41544cdfaf9786f25fe5ffb10e838e19a27570"},
-    {file = "backports.zoneinfo-0.2.1-cp37-cp37m-win32.whl", hash = "sha256:e55b384612d93be96506932a786bbcde5a2db7a9e6a4bb4bffe8b733f5b9036b"},
-    {file = "backports.zoneinfo-0.2.1-cp37-cp37m-win_amd64.whl", hash = "sha256:a76b38c52400b762e48131494ba26be363491ac4f9a04c1b7e92483d169f6582"},
-    {file = "backports.zoneinfo-0.2.1-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:8961c0f32cd0336fb8e8ead11a1f8cd99ec07145ec2931122faaac1c8f7fd987"},
-    {file = "backports.zoneinfo-0.2.1-cp38-cp38-manylinux1_i686.whl", hash = "sha256:e81b76cace8eda1fca50e345242ba977f9be6ae3945af8d46326d776b4cf78d1"},
-    {file = "backports.zoneinfo-0.2.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:7b0a64cda4145548fed9efc10322770f929b944ce5cee6c0dfe0c87bf4c0c8c9"},
-    {file = "backports.zoneinfo-0.2.1-cp38-cp38-win32.whl", hash = "sha256:1b13e654a55cd45672cb54ed12148cd33628f672548f373963b0bff67b217328"},
-    {file = "backports.zoneinfo-0.2.1-cp38-cp38-win_amd64.whl", hash = "sha256:4a0f800587060bf8880f954dbef70de6c11bbe59c673c3d818921f042f9954a6"},
-    {file = "backports.zoneinfo-0.2.1.tar.gz", hash = "sha256:fadbfe37f74051d024037f223b8e001611eac868b5c5b06144ef4d8b799862f2"},
-]
-
-[package.extras]
-tzdata = ["tzdata"]
-
-[[package]]
-name = "black"
-version = "19.10b0"
-description = "The uncompromising code formatter."
-optional = false
-python-versions = ">=3.6"
-files = [
-    {file = "black-19.10b0-py36-none-any.whl", hash = "sha256:1b30e59be925fafc1ee4565e5e08abef6b03fe455102883820fe5ee2e4734e0b"},
-    {file = "black-19.10b0.tar.gz", hash = "sha256:c2edb73a08e9e0e6f65a0e6af18b059b8b1cdd5bef997d7a0b181df93dc81539"},
-]
-
-[package.dependencies]
-appdirs = "*"
-attrs = ">=18.1.0"
-click = ">=6.5"
-pathspec = ">=0.6,<1"
-regex = "*"
-toml = ">=0.9.4"
-typed-ast = ">=1.4.0"
-
-[package.extras]
-d = ["aiohttp (>=3.3.2)", "aiohttp-cors"]
-
-[[package]]
-name = "cachetools"
-version = "5.3.0"
-description = "Extensible memoizing collections and decorators"
-optional = true
-python-versions = "~=3.7"
-files = [
-    {file = "cachetools-5.3.0-py3-none-any.whl", hash = "sha256:429e1a1e845c008ea6c85aa35d4b98b65d6a9763eeef3e37e92728a12d1de9d4"},
-    {file = "cachetools-5.3.0.tar.gz", hash = "sha256:13dfddc7b8df938c21a940dfa6557ce6e94a2f1cdfa58eb90c805721d58f2c14"},
-]
-
-[[package]]
-name = "certifi"
-version = "2022.12.7"
-description = "Python package for providing Mozilla's CA Bundle."
-optional = false
-python-versions = ">=3.6"
-files = [
-    {file = "certifi-2022.12.7-py3-none-any.whl", hash = "sha256:4ad3232f5e926d6718ec31cfc1fcadfde020920e278684144551c91769c7bc18"},
-    {file = "certifi-2022.12.7.tar.gz", hash = "sha256:35824b4c3a97115964b408844d64aa14db1cc518f6562e8d7261699d1350a9e3"},
-]
-
-[[package]]
-name = "cffi"
-version = "1.15.1"
-description = "Foreign Function Interface for Python calling C code."
-optional = true
-python-versions = "*"
-files = [
-    {file = "cffi-1.15.1-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:a66d3508133af6e8548451b25058d5812812ec3798c886bf38ed24a98216fab2"},
-    {file = "cffi-1.15.1-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:470c103ae716238bbe698d67ad020e1db9d9dba34fa5a899b5e21577e6d52ed2"},
-    {file = "cffi-1.15.1-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:9ad5db27f9cabae298d151c85cf2bad1d359a1b9c686a275df03385758e2f914"},
-    {file = "cffi-1.15.1-cp27-cp27m-win32.whl", hash = "sha256:b3bbeb01c2b273cca1e1e0c5df57f12dce9a4dd331b4fa1635b8bec26350bde3"},
-    {file = "cffi-1.15.1-cp27-cp27m-win_amd64.whl", hash = "sha256:e00b098126fd45523dd056d2efba6c5a63b71ffe9f2bbe1a4fe1716e1d0c331e"},
-    {file = "cffi-1.15.1-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:d61f4695e6c866a23a21acab0509af1cdfd2c013cf256bbf5b6b5e2695827162"},
-    {file = "cffi-1.15.1-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:ed9cb427ba5504c1dc15ede7d516b84757c3e3d7868ccc85121d9310d27eed0b"},
-    {file = "cffi-1.15.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:39d39875251ca8f612b6f33e6b1195af86d1b3e60086068be9cc053aa4376e21"},
-    {file = "cffi-1.15.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:285d29981935eb726a4399badae8f0ffdff4f5050eaa6d0cfc3f64b857b77185"},
-    {file = "cffi-1.15.1-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3eb6971dcff08619f8d91607cfc726518b6fa2a9eba42856be181c6d0d9515fd"},
-    {file = "cffi-1.15.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:21157295583fe8943475029ed5abdcf71eb3911894724e360acff1d61c1d54bc"},
-    {file = "cffi-1.15.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5635bd9cb9731e6d4a1132a498dd34f764034a8ce60cef4f5319c0541159392f"},
-    {file = "cffi-1.15.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2012c72d854c2d03e45d06ae57f40d78e5770d252f195b93f581acf3ba44496e"},
-    {file = "cffi-1.15.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd86c085fae2efd48ac91dd7ccffcfc0571387fe1193d33b6394db7ef31fe2a4"},
-    {file = "cffi-1.15.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:fa6693661a4c91757f4412306191b6dc88c1703f780c8234035eac011922bc01"},
-    {file = "cffi-1.15.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:59c0b02d0a6c384d453fece7566d1c7e6b7bae4fc5874ef2ef46d56776d61c9e"},
-    {file = "cffi-1.15.1-cp310-cp310-win32.whl", hash = "sha256:cba9d6b9a7d64d4bd46167096fc9d2f835e25d7e4c121fb2ddfc6528fb0413b2"},
-    {file = "cffi-1.15.1-cp310-cp310-win_amd64.whl", hash = "sha256:ce4bcc037df4fc5e3d184794f27bdaab018943698f4ca31630bc7f84a7b69c6d"},
-    {file = "cffi-1.15.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3d08afd128ddaa624a48cf2b859afef385b720bb4b43df214f85616922e6a5ac"},
-    {file = "cffi-1.15.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3799aecf2e17cf585d977b780ce79ff0dc9b78d799fc694221ce814c2c19db83"},
-    {file = "cffi-1.15.1-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a591fe9e525846e4d154205572a029f653ada1a78b93697f3b5a8f1f2bc055b9"},
-    {file = "cffi-1.15.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3548db281cd7d2561c9ad9984681c95f7b0e38881201e157833a2342c30d5e8c"},
-    {file = "cffi-1.15.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:91fc98adde3d7881af9b59ed0294046f3806221863722ba7d8d120c575314325"},
-    {file = "cffi-1.15.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:94411f22c3985acaec6f83c6df553f2dbe17b698cc7f8ae751ff2237d96b9e3c"},
-    {file = "cffi-1.15.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:03425bdae262c76aad70202debd780501fabeaca237cdfddc008987c0e0f59ef"},
-    {file = "cffi-1.15.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:cc4d65aeeaa04136a12677d3dd0b1c0c94dc43abac5860ab33cceb42b801c1e8"},
-    {file = "cffi-1.15.1-cp311-cp311-win32.whl", hash = "sha256:a0f100c8912c114ff53e1202d0078b425bee3649ae34d7b070e9697f93c5d52d"},
-    {file = "cffi-1.15.1-cp311-cp311-win_amd64.whl", hash = "sha256:04ed324bda3cda42b9b695d51bb7d54b680b9719cfab04227cdd1e04e5de3104"},
-    {file = "cffi-1.15.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50a74364d85fd319352182ef59c5c790484a336f6db772c1a9231f1c3ed0cbd7"},
-    {file = "cffi-1.15.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e263d77ee3dd201c3a142934a086a4450861778baaeeb45db4591ef65550b0a6"},
-    {file = "cffi-1.15.1-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cec7d9412a9102bdc577382c3929b337320c4c4c4849f2c5cdd14d7368c5562d"},
-    {file = "cffi-1.15.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4289fc34b2f5316fbb762d75362931e351941fa95fa18789191b33fc4cf9504a"},
-    {file = "cffi-1.15.1-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:173379135477dc8cac4bc58f45db08ab45d228b3363adb7af79436135d028405"},
-    {file = "cffi-1.15.1-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:6975a3fac6bc83c4a65c9f9fcab9e47019a11d3d2cf7f3c0d03431bf145a941e"},
-    {file = "cffi-1.15.1-cp36-cp36m-win32.whl", hash = "sha256:2470043b93ff09bf8fb1d46d1cb756ce6132c54826661a32d4e4d132e1977adf"},
-    {file = "cffi-1.15.1-cp36-cp36m-win_amd64.whl", hash = "sha256:30d78fbc8ebf9c92c9b7823ee18eb92f2e6ef79b45ac84db507f52fbe3ec4497"},
-    {file = "cffi-1.15.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:198caafb44239b60e252492445da556afafc7d1e3ab7a1fb3f0584ef6d742375"},
-    {file = "cffi-1.15.1-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5ef34d190326c3b1f822a5b7a45f6c4535e2f47ed06fec77d3d799c450b2651e"},
-    {file = "cffi-1.15.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8102eaf27e1e448db915d08afa8b41d6c7ca7a04b7d73af6514df10a3e74bd82"},
-    {file = "cffi-1.15.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5df2768244d19ab7f60546d0c7c63ce1581f7af8b5de3eb3004b9b6fc8a9f84b"},
-    {file = "cffi-1.15.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a8c4917bd7ad33e8eb21e9a5bbba979b49d9a97acb3a803092cbc1133e20343c"},
-    {file = "cffi-1.15.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0e2642fe3142e4cc4af0799748233ad6da94c62a8bec3a6648bf8ee68b1c7426"},
-    {file = "cffi-1.15.1-cp37-cp37m-win32.whl", hash = "sha256:e229a521186c75c8ad9490854fd8bbdd9a0c9aa3a524326b55be83b54d4e0ad9"},
-    {file = "cffi-1.15.1-cp37-cp37m-win_amd64.whl", hash = "sha256:a0b71b1b8fbf2b96e41c4d990244165e2c9be83d54962a9a1d118fd8657d2045"},
-    {file = "cffi-1.15.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:320dab6e7cb2eacdf0e658569d2575c4dad258c0fcc794f46215e1e39f90f2c3"},
-    {file = "cffi-1.15.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1e74c6b51a9ed6589199c787bf5f9875612ca4a8a0785fb2d4a84429badaf22a"},
-    {file = "cffi-1.15.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a5c84c68147988265e60416b57fc83425a78058853509c1b0629c180094904a5"},
-    {file = "cffi-1.15.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3b926aa83d1edb5aa5b427b4053dc420ec295a08e40911296b9eb1b6170f6cca"},
-    {file = "cffi-1.15.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:87c450779d0914f2861b8526e035c5e6da0a3199d8f1add1a665e1cbc6fc6d02"},
-    {file = "cffi-1.15.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4f2c9f67e9821cad2e5f480bc8d83b8742896f1242dba247911072d4fa94c192"},
-    {file = "cffi-1.15.1-cp38-cp38-win32.whl", hash = "sha256:8b7ee99e510d7b66cdb6c593f21c043c248537a32e0bedf02e01e9553a172314"},
-    {file = "cffi-1.15.1-cp38-cp38-win_amd64.whl", hash = "sha256:00a9ed42e88df81ffae7a8ab6d9356b371399b91dbdf0c3cb1e84c03a13aceb5"},
-    {file = "cffi-1.15.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:54a2db7b78338edd780e7ef7f9f6c442500fb0d41a5a4ea24fff1c929d5af585"},
-    {file = "cffi-1.15.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:fcd131dd944808b5bdb38e6f5b53013c5aa4f334c5cad0c72742f6eba4b73db0"},
-    {file = "cffi-1.15.1-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7473e861101c9e72452f9bf8acb984947aa1661a7704553a9f6e4baa5ba64415"},
-    {file = "cffi-1.15.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6c9a799e985904922a4d207a94eae35c78ebae90e128f0c4e521ce339396be9d"},
-    {file = "cffi-1.15.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3bcde07039e586f91b45c88f8583ea7cf7a0770df3a1649627bf598332cb6984"},
-    {file = "cffi-1.15.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:33ab79603146aace82c2427da5ca6e58f2b3f2fb5da893ceac0c42218a40be35"},
-    {file = "cffi-1.15.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5d598b938678ebf3c67377cdd45e09d431369c3b1a5b331058c338e201f12b27"},
-    {file = "cffi-1.15.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:db0fbb9c62743ce59a9ff687eb5f4afbe77e5e8403d6697f7446e5f609976f76"},
-    {file = "cffi-1.15.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:98d85c6a2bef81588d9227dde12db8a7f47f639f4a17c9ae08e773aa9c697bf3"},
-    {file = "cffi-1.15.1-cp39-cp39-win32.whl", hash = "sha256:40f4774f5a9d4f5e344f31a32b5096977b5d48560c5592e2f3d2c4374bd543ee"},
-    {file = "cffi-1.15.1-cp39-cp39-win_amd64.whl", hash = "sha256:70df4e3b545a17496c9b3f41f5115e69a4f2e77e94e1d2a8e1070bc0c38c8a3c"},
-    {file = "cffi-1.15.1.tar.gz", hash = "sha256:d400bfb9a37b1351253cb402671cea7e89bdecc294e8016a707f6d1d8ac934f9"},
-]
-
-[package.dependencies]
-pycparser = "*"
-
-[[package]]
-name = "charset-normalizer"
-version = "2.1.1"
-description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet."
-optional = false
-python-versions = ">=3.6.0"
-files = [
-    {file = "charset-normalizer-2.1.1.tar.gz", hash = "sha256:5a3d016c7c547f69d6f81fb0db9449ce888b418b5b9952cc5e6e66843e9dd845"},
-    {file = "charset_normalizer-2.1.1-py3-none-any.whl", hash = "sha256:83e9a75d1911279afd89352c68b45348559d1fc0506b054b346651b5e7fee29f"},
-]
-
-[package.extras]
-unicode-backport = ["unicodedata2"]
-
-[[package]]
-name = "click"
-version = "8.1.3"
-description = "Composable command line interface toolkit"
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "click-8.1.3-py3-none-any.whl", hash = "sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48"},
-    {file = "click-8.1.3.tar.gz", hash = "sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e"},
-]
-
-[package.dependencies]
-colorama = {version = "*", markers = "platform_system == \"Windows\""}
-
-[[package]]
-name = "cloudpickle"
-version = "1.6.0"
-description = "Extended pickling support for Python objects"
-optional = false
-python-versions = ">=3.5"
-files = [
-    {file = "cloudpickle-1.6.0-py3-none-any.whl", hash = "sha256:3a32d0eb0bc6f4d0c57fbc4f3e3780f7a81e6fee0fa935072884d58ae8e1cc7c"},
-    {file = "cloudpickle-1.6.0.tar.gz", hash = "sha256:9bc994f9e9447593bd0a45371f0e7ac7333710fcf64a4eb9834bf149f4ef2f32"},
-]
-
-[[package]]
-name = "colorama"
-version = "0.4.6"
-description = "Cross-platform colored terminal text."
-optional = false
-python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
-files = [
-    {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
-    {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
-]
-
-[[package]]
-name = "coloredlogs"
-version = "15.0.1"
-description = "Colored terminal output for Python's logging module"
-optional = true
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
-files = [
-    {file = "coloredlogs-15.0.1-py2.py3-none-any.whl", hash = "sha256:612ee75c546f53e92e70049c9dbfcc18c935a2b9a53b66085ce9ef6a6e5c0934"},
-    {file = "coloredlogs-15.0.1.tar.gz", hash = "sha256:7c991aa71a4577af2f82600d8f8f3a89f936baeaf9b50a9c197da014e5bf16b0"},
-]
-
-[package.dependencies]
-humanfriendly = ">=9.1"
-
-[package.extras]
-cron = ["capturer (>=2.4)"]
-
-[[package]]
-name = "commonmark"
-version = "0.9.1"
-description = "Python parser for the CommonMark Markdown spec"
-optional = false
-python-versions = "*"
-files = [
-    {file = "commonmark-0.9.1-py2.py3-none-any.whl", hash = "sha256:da2f38c92590f83de410ba1a3cbceafbc74fee9def35f9251ba9a971d6d66fd9"},
-    {file = "commonmark-0.9.1.tar.gz", hash = "sha256:452f9dc859be7f06631ddcb328b6919c67984aca654e5fefb3914d54691aed60"},
-]
-
-[package.extras]
-test = ["flake8 (==3.7.8)", "hypothesis (==3.55.3)"]
-
-[[package]]
-name = "contourpy"
-version = "1.0.7"
-description = "Python library for calculating contours of 2D quadrilateral grids"
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "contourpy-1.0.7-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:95c3acddf921944f241b6773b767f1cbce71d03307270e2d769fd584d5d1092d"},
-    {file = "contourpy-1.0.7-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:fc1464c97579da9f3ab16763c32e5c5d5bb5fa1ec7ce509a4ca6108b61b84fab"},
-    {file = "contourpy-1.0.7-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8acf74b5d383414401926c1598ed77825cd530ac7b463ebc2e4f46638f56cce6"},
-    {file = "contourpy-1.0.7-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1c71fdd8f1c0f84ffd58fca37d00ca4ebaa9e502fb49825484da075ac0b0b803"},
-    {file = "contourpy-1.0.7-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f99e9486bf1bb979d95d5cffed40689cb595abb2b841f2991fc894b3452290e8"},
-    {file = "contourpy-1.0.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:87f4d8941a9564cda3f7fa6a6cd9b32ec575830780677932abdec7bcb61717b0"},
-    {file = "contourpy-1.0.7-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:9e20e5a1908e18aaa60d9077a6d8753090e3f85ca25da6e25d30dc0a9e84c2c6"},
-    {file = "contourpy-1.0.7-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:a877ada905f7d69b2a31796c4b66e31a8068b37aa9b78832d41c82fc3e056ddd"},
-    {file = "contourpy-1.0.7-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6381fa66866b0ea35e15d197fc06ac3840a9b2643a6475c8fff267db8b9f1e69"},
-    {file = "contourpy-1.0.7-cp310-cp310-win32.whl", hash = "sha256:3c184ad2433635f216645fdf0493011a4667e8d46b34082f5a3de702b6ec42e3"},
-    {file = "contourpy-1.0.7-cp310-cp310-win_amd64.whl", hash = "sha256:3caea6365b13119626ee996711ab63e0c9d7496f65641f4459c60a009a1f3e80"},
-    {file = "contourpy-1.0.7-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ed33433fc3820263a6368e532f19ddb4c5990855e4886088ad84fd7c4e561c71"},
-    {file = "contourpy-1.0.7-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:38e2e577f0f092b8e6774459317c05a69935a1755ecfb621c0a98f0e3c09c9a5"},
-    {file = "contourpy-1.0.7-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ae90d5a8590e5310c32a7630b4b8618cef7563cebf649011da80874d0aa8f414"},
-    {file = "contourpy-1.0.7-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:130230b7e49825c98edf0b428b7aa1125503d91732735ef897786fe5452b1ec2"},
-    {file = "contourpy-1.0.7-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:58569c491e7f7e874f11519ef46737cea1d6eda1b514e4eb5ac7dab6aa864d02"},
-    {file = "contourpy-1.0.7-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:54d43960d809c4c12508a60b66cb936e7ed57d51fb5e30b513934a4a23874fae"},
-    {file = "contourpy-1.0.7-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:152fd8f730c31fd67fe0ffebe1df38ab6a669403da93df218801a893645c6ccc"},
-    {file = "contourpy-1.0.7-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:9056c5310eb1daa33fc234ef39ebfb8c8e2533f088bbf0bc7350f70a29bde1ac"},
-    {file = "contourpy-1.0.7-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:a9d7587d2fdc820cc9177139b56795c39fb8560f540bba9ceea215f1f66e1566"},
-    {file = "contourpy-1.0.7-cp311-cp311-win32.whl", hash = "sha256:4ee3ee247f795a69e53cd91d927146fb16c4e803c7ac86c84104940c7d2cabf0"},
-    {file = "contourpy-1.0.7-cp311-cp311-win_amd64.whl", hash = "sha256:5caeacc68642e5f19d707471890f037a13007feba8427eb7f2a60811a1fc1350"},
-    {file = "contourpy-1.0.7-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:fd7dc0e6812b799a34f6d12fcb1000539098c249c8da54f3566c6a6461d0dbad"},
-    {file = "contourpy-1.0.7-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0f9d350b639db6c2c233d92c7f213d94d2e444d8e8fc5ca44c9706cf72193772"},
-    {file = "contourpy-1.0.7-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:e96a08b62bb8de960d3a6afbc5ed8421bf1a2d9c85cc4ea73f4bc81b4910500f"},
-    {file = "contourpy-1.0.7-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:031154ed61f7328ad7f97662e48660a150ef84ee1bc8876b6472af88bf5a9b98"},
-    {file = "contourpy-1.0.7-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2e9ebb4425fc1b658e13bace354c48a933b842d53c458f02c86f371cecbedecc"},
-    {file = "contourpy-1.0.7-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:efb8f6d08ca7998cf59eaf50c9d60717f29a1a0a09caa46460d33b2924839dbd"},
-    {file = "contourpy-1.0.7-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:6c180d89a28787e4b73b07e9b0e2dac7741261dbdca95f2b489c4f8f887dd810"},
-    {file = "contourpy-1.0.7-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:b8d587cc39057d0afd4166083d289bdeff221ac6d3ee5046aef2d480dc4b503c"},
-    {file = "contourpy-1.0.7-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:769eef00437edf115e24d87f8926955f00f7704bede656ce605097584f9966dc"},
-    {file = "contourpy-1.0.7-cp38-cp38-win32.whl", hash = "sha256:62398c80ef57589bdbe1eb8537127321c1abcfdf8c5f14f479dbbe27d0322e66"},
-    {file = "contourpy-1.0.7-cp38-cp38-win_amd64.whl", hash = "sha256:57119b0116e3f408acbdccf9eb6ef19d7fe7baf0d1e9aaa5381489bc1aa56556"},
-    {file = "contourpy-1.0.7-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:30676ca45084ee61e9c3da589042c24a57592e375d4b138bd84d8709893a1ba4"},
-    {file = "contourpy-1.0.7-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3e927b3868bd1e12acee7cc8f3747d815b4ab3e445a28d2e5373a7f4a6e76ba1"},
-    {file = "contourpy-1.0.7-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:366a0cf0fc079af5204801786ad7a1c007714ee3909e364dbac1729f5b0849e5"},
-    {file = "contourpy-1.0.7-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:89ba9bb365446a22411f0673abf6ee1fea3b2cf47b37533b970904880ceb72f3"},
-    {file = "contourpy-1.0.7-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:71b0bf0c30d432278793d2141362ac853859e87de0a7dee24a1cea35231f0d50"},
-    {file = "contourpy-1.0.7-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e7281244c99fd7c6f27c1c6bfafba878517b0b62925a09b586d88ce750a016d2"},
-    {file = "contourpy-1.0.7-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:b6d0f9e1d39dbfb3977f9dd79f156c86eb03e57a7face96f199e02b18e58d32a"},
-    {file = "contourpy-1.0.7-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:7f6979d20ee5693a1057ab53e043adffa1e7418d734c1532e2d9e915b08d8ec2"},
-    {file = "contourpy-1.0.7-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5dd34c1ae752515318224cba7fc62b53130c45ac6a1040c8b7c1a223c46e8967"},
-    {file = "contourpy-1.0.7-cp39-cp39-win32.whl", hash = "sha256:c5210e5d5117e9aec8c47d9156d1d3835570dd909a899171b9535cb4a3f32693"},
-    {file = "contourpy-1.0.7-cp39-cp39-win_amd64.whl", hash = "sha256:60835badb5ed5f4e194a6f21c09283dd6e007664a86101431bf870d9e86266c4"},
-    {file = "contourpy-1.0.7-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:ce41676b3d0dd16dbcfabcc1dc46090aaf4688fd6e819ef343dbda5a57ef0161"},
-    {file = "contourpy-1.0.7-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5a011cf354107b47c58ea932d13b04d93c6d1d69b8b6dce885e642531f847566"},
-    {file = "contourpy-1.0.7-pp38-pypy38_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:31a55dccc8426e71817e3fe09b37d6d48ae40aae4ecbc8c7ad59d6893569c436"},
-    {file = "contourpy-1.0.7-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:69f8ff4db108815addd900a74df665e135dbbd6547a8a69333a68e1f6e368ac2"},
-    {file = "contourpy-1.0.7-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:efe99298ba37e37787f6a2ea868265465410822f7bea163edcc1bd3903354ea9"},
-    {file = "contourpy-1.0.7-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:a1e97b86f73715e8670ef45292d7cc033548266f07d54e2183ecb3c87598888f"},
-    {file = "contourpy-1.0.7-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cc331c13902d0f50845099434cd936d49d7a2ca76cb654b39691974cb1e4812d"},
-    {file = "contourpy-1.0.7-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:24847601071f740837aefb730e01bd169fbcaa610209779a78db7ebb6e6a7051"},
-    {file = "contourpy-1.0.7-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:abf298af1e7ad44eeb93501e40eb5a67abbf93b5d90e468d01fc0c4451971afa"},
-    {file = "contourpy-1.0.7-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:64757f6460fc55d7e16ed4f1de193f362104285c667c112b50a804d482777edd"},
-    {file = "contourpy-1.0.7.tar.gz", hash = "sha256:d8165a088d31798b59e91117d1f5fc3df8168d8b48c4acc10fc0df0d0bdbcc5e"},
-]
-
-[package.dependencies]
-numpy = ">=1.16"
-
-[package.extras]
-bokeh = ["bokeh", "chromedriver", "selenium"]
-docs = ["furo", "sphinx-copybutton"]
-mypy = ["contourpy[bokeh]", "docutils-stubs", "mypy (==0.991)", "types-Pillow"]
-test = ["Pillow", "matplotlib", "pytest"]
-test-no-images = ["pytest"]
-
-[[package]]
-name = "coremltools"
-version = "3.4"
-description = "Community Tools for Core ML"
-optional = true
-python-versions = "*"
-files = [
-    {file = "coremltools-3.4-cp27-none-macosx_10_12_intel.whl", hash = "sha256:f9acf53823f503fba468eedf7a1e67788bbfa8b77316ddce0f6f8196cc411056"},
-    {file = "coremltools-3.4-cp27-none-macosx_10_13_intel.whl", hash = "sha256:961cabd211350125dec02d5deb26f322468bd887d280514df3f8c40ab92aa47a"},
-    {file = "coremltools-3.4-cp27-none-macosx_10_14_intel.whl", hash = "sha256:5dd4211a55e5ed86bf595d9ff1bd69cc2cf72b09947e7d68ca5aac28416caa08"},
-    {file = "coremltools-3.4-cp27-none-macosx_10_15_intel.whl", hash = "sha256:dfae3ad3542d40dbd9ff566f64186d6f8031bbd08b42333044928436e8b526c1"},
-    {file = "coremltools-3.4-cp27-none-manylinux1_x86_64.whl", hash = "sha256:13d60a56eeae28661061c0f439677346ae02593c2946c69d7b703e5b26695729"},
-    {file = "coremltools-3.4-cp35-none-macosx_10_12_intel.whl", hash = "sha256:1f64825bfe4bc13add097a24ac52f0822721ffc781e18062017ff415a043250f"},
-    {file = "coremltools-3.4-cp35-none-macosx_10_13_intel.whl", hash = "sha256:38e047109518efc4469cf9e3fed2b3ff213672d5591772b061186362ba0c3853"},
-    {file = "coremltools-3.4-cp35-none-macosx_10_14_intel.whl", hash = "sha256:71f520c8b9310f3a1ee8b2b676dcc2c26b445cdfb4835a3c31e51eb7c1b92bcf"},
-    {file = "coremltools-3.4-cp35-none-macosx_10_15_intel.whl", hash = "sha256:fa5b95a6514fa8dfc2dfaa9e02165db22cc5fd0746fceccf9432e85e21a26cc6"},
-    {file = "coremltools-3.4-cp35-none-manylinux1_x86_64.whl", hash = "sha256:9c9795187fbfe39d188efa3b5cc3d83d3c8d190ea490b00a0dad7fd81f8d00ed"},
-    {file = "coremltools-3.4-cp36-none-macosx_10_12_intel.whl", hash = "sha256:edd619372e83240dac810aeda2dbdf7c0177fd8c4617ecbbb6abdc286aa3e0af"},
-    {file = "coremltools-3.4-cp36-none-macosx_10_13_intel.whl", hash = "sha256:6b09d631d9e0963a76245c9b086bf328bc0f56ad477c7bd43fe92271f28af8a2"},
-    {file = "coremltools-3.4-cp36-none-macosx_10_14_intel.whl", hash = "sha256:f72b8d963890d728aefc85286f3a0d59f62a7464cdee8fd8f4d9a6a31c328ba9"},
-    {file = "coremltools-3.4-cp36-none-macosx_10_15_intel.whl", hash = "sha256:a6dfc9dbc1921219b231f98d4e03f3e2ec1e5be100ba0379d3dfd46606903cbb"},
-    {file = "coremltools-3.4-cp36-none-manylinux1_x86_64.whl", hash = "sha256:eabc5b20e1ab9e6f16ed6a55a5b0a9df154e46e84a06219625c12a9ff9d4bb86"},
-    {file = "coremltools-3.4-cp37-none-macosx_10_12_intel.whl", hash = "sha256:ba5ceb45dac4136b2969fa9af1fa992c6f54e535cfd479ad3153861b470662b6"},
-    {file = "coremltools-3.4-cp37-none-macosx_10_13_intel.whl", hash = "sha256:5c7056ffff1076fd2a627b0bfb6931a7302f80e3432a383dbdb1021af9af9533"},
-    {file = "coremltools-3.4-cp37-none-macosx_10_14_intel.whl", hash = "sha256:b6eee32f3bb3739861702ac487083a9598fb111de337def2abf7c2c00fc101d0"},
-    {file = "coremltools-3.4-cp37-none-macosx_10_15_intel.whl", hash = "sha256:4bba322462dd389f743ac6dc59a5ae8d3d564ff93863ee0873dcf86676b477a2"},
-    {file = "coremltools-3.4-cp37-none-manylinux1_x86_64.whl", hash = "sha256:3276fe8064048caa719061735bf1dfc1e5a793ec13ff2252e3f1065fa07d4918"},
-]
-
-[package.dependencies]
-numpy = ">=1.14.5"
-protobuf = ">=3.1.0"
-six = ">=1.10.0"
-
-[[package]]
-name = "cycler"
-version = "0.11.0"
-description = "Composable style cycles"
-optional = false
-python-versions = ">=3.6"
-files = [
-    {file = "cycler-0.11.0-py3-none-any.whl", hash = "sha256:3a27e95f763a428a739d2add979fa7494c912a32c17c4c38c4d5f082cad165a3"},
-    {file = "cycler-0.11.0.tar.gz", hash = "sha256:9c87405839a19696e837b3b818fed3f5f69f16f1eec1a1ad77e043dcea9c772f"},
-]
-
-[[package]]
-name = "decorator"
-version = "5.1.1"
-description = "Decorators for Humans"
-optional = false
-python-versions = ">=3.5"
-files = [
-    {file = "decorator-5.1.1-py3-none-any.whl", hash = "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186"},
-    {file = "decorator-5.1.1.tar.gz", hash = "sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330"},
-]
-
-[[package]]
-name = "dill"
-version = "0.3.6"
-description = "serialize all of python"
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "dill-0.3.6-py3-none-any.whl", hash = "sha256:a07ffd2351b8c678dfc4a856a3005f8067aea51d6ba6c700796a4d9e280f39f0"},
-    {file = "dill-0.3.6.tar.gz", hash = "sha256:e5db55f3687856d8fbdab002ed78544e1c4559a130302693d839dfe8f93f2373"},
-]
-
-[package.extras]
-graph = ["objgraph (>=1.7.2)"]
-
-[[package]]
-name = "django"
-version = "4.1.7"
-description = "A high-level Python web framework that encourages rapid development and clean, pragmatic design."
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "Django-4.1.7-py3-none-any.whl", hash = "sha256:f2f431e75adc40039ace496ad3b9f17227022e8b11566f4b363da44c7e44761e"},
-    {file = "Django-4.1.7.tar.gz", hash = "sha256:44f714b81c5f190d9d2ddad01a532fe502fa01c4cb8faf1d081f4264ed15dcd8"},
-]
-
-[package.dependencies]
-asgiref = ">=3.5.2,<4"
-"backports.zoneinfo" = {version = "*", markers = "python_version < \"3.9\""}
-sqlparse = ">=0.2.2"
-tzdata = {version = "*", markers = "sys_platform == \"win32\""}
-
-[package.extras]
-argon2 = ["argon2-cffi (>=19.1.0)"]
-bcrypt = ["bcrypt"]
-
-[[package]]
-name = "docformatter"
-version = "1.5.1"
-description = "Formats docstrings to follow PEP 257"
-optional = false
-python-versions = ">=3.6,<4.0"
-files = [
-    {file = "docformatter-1.5.1-py3-none-any.whl", hash = "sha256:05d6e4c528278b3a54000e08695822617a38963a380f5aef19e12dd0e630f19a"},
-    {file = "docformatter-1.5.1.tar.gz", hash = "sha256:3fa3cdb90cdbcdee82747c58410e47fc7e2e8c352b82bed80767915eb03f2e43"},
-]
-
-[package.dependencies]
-charset_normalizer = ">=2.0.0,<3.0.0"
-tomli = {version = ">=2.0.0,<3.0.0", markers = "python_version >= \"3.7\""}
-untokenize = ">=0.1.1,<0.2.0"
-
-[package.extras]
-tomli = ["tomli (<2.0.0)"]
-
-[[package]]
-name = "docutils"
-version = "0.19"
-description = "Docutils -- Python Documentation Utilities"
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "docutils-0.19-py3-none-any.whl", hash = "sha256:5e1de4d849fee02c63b040a4a3fd567f4ab104defd8a5511fbbc24a8a017efbc"},
-    {file = "docutils-0.19.tar.gz", hash = "sha256:33995a6753c30b7f577febfc2c50411fec6aac7f7ffeb7c4cfe5991072dcf9e6"},
-]
-
-[[package]]
-name = "exceptiongroup"
-version = "1.1.1"
-description = "Backport of PEP 654 (exception groups)"
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "exceptiongroup-1.1.1-py3-none-any.whl", hash = "sha256:232c37c63e4f682982c8b6459f33a8981039e5fb8756b2074364e5055c498c9e"},
-    {file = "exceptiongroup-1.1.1.tar.gz", hash = "sha256:d484c3090ba2889ae2928419117447a14daf3c1231d5e30d0aae34f354f01785"},
-]
-
-[package.extras]
-test = ["pytest (>=6)"]
-
-[[package]]
-name = "execnet"
-version = "1.9.0"
-description = "execnet: rapid multi-Python deployment"
-optional = false
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
-files = [
-    {file = "execnet-1.9.0-py2.py3-none-any.whl", hash = "sha256:a295f7cc774947aac58dde7fdc85f4aa00c42adf5d8f5468fc630c1acf30a142"},
-    {file = "execnet-1.9.0.tar.gz", hash = "sha256:8f694f3ba9cc92cab508b152dcfe322153975c29bda272e2fd7f3f00f36e47c5"},
-]
-
-[package.extras]
-testing = ["pre-commit"]
-
-[[package]]
-name = "flatbuffers"
-version = "23.3.3"
-description = "The FlatBuffers serialization format for Python"
-optional = true
-python-versions = "*"
-files = [
-    {file = "flatbuffers-23.3.3-py2.py3-none-any.whl", hash = "sha256:5ad36d376240090757e8f0a2cfaf6abcc81c6536c0dc988060375fd0899121f8"},
-    {file = "flatbuffers-23.3.3.tar.gz", hash = "sha256:cabd87c4882f37840f6081f094b2c5bc28cefc2a6357732746936d055ab45c3d"},
-]
-
-[[package]]
-name = "fonttools"
-version = "4.39.3"
-description = "Tools to manipulate font files"
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "fonttools-4.39.3-py3-none-any.whl", hash = "sha256:64c0c05c337f826183637570ac5ab49ee220eec66cf50248e8df527edfa95aeb"},
-    {file = "fonttools-4.39.3.zip", hash = "sha256:9234b9f57b74e31b192c3fc32ef1a40750a8fbc1cd9837a7b7bfc4ca4a5c51d7"},
-]
-
-[package.extras]
-all = ["brotli (>=1.0.1)", "brotlicffi (>=0.8.0)", "fs (>=2.2.0,<3)", "lxml (>=4.0,<5)", "lz4 (>=1.7.4.2)", "matplotlib", "munkres", "scipy", "skia-pathops (>=0.5.0)", "sympy", "uharfbuzz (>=0.23.0)", "unicodedata2 (>=15.0.0)", "xattr", "zopfli (>=0.1.4)"]
-graphite = ["lz4 (>=1.7.4.2)"]
-interpolatable = ["munkres", "scipy"]
-lxml = ["lxml (>=4.0,<5)"]
-pathops = ["skia-pathops (>=0.5.0)"]
-plot = ["matplotlib"]
-repacker = ["uharfbuzz (>=0.23.0)"]
-symfont = ["sympy"]
-type1 = ["xattr"]
-ufo = ["fs (>=2.2.0,<3)"]
-unicode = ["unicodedata2 (>=15.0.0)"]
-woff = ["brotli (>=1.0.1)", "brotlicffi (>=0.8.0)", "zopfli (>=0.1.4)"]
-
-[[package]]
-name = "future"
-version = "0.18.3"
-description = "Clean single-source support for Python 3 and 2"
-optional = true
-python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*"
-files = [
-    {file = "future-0.18.3.tar.gz", hash = "sha256:34a17436ed1e96697a86f9de3d15a3b0be01d8bc8de9c1dffd59fb8234ed5307"},
-]
-
-[[package]]
-name = "gast"
-version = "0.4.0"
-description = "Python AST that abstracts the underlying Python version"
-optional = true
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
-files = [
-    {file = "gast-0.4.0-py3-none-any.whl", hash = "sha256:b7adcdd5adbebf1adf17378da5ba3f543684dbec47b1cda1f3997e573cd542c4"},
-    {file = "gast-0.4.0.tar.gz", hash = "sha256:40feb7b8b8434785585ab224d1568b857edb18297e5a3047f1ba012bc83b42c1"},
-]
-
-[[package]]
-name = "google-auth"
-version = "2.17.0"
-description = "Google Authentication Library"
-optional = true
-python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*"
-files = [
-    {file = "google-auth-2.17.0.tar.gz", hash = "sha256:f51d26ebb3e5d723b9a7dbd310b6c88654ef1ad1fc35750d1fdba48ca4d82f52"},
-    {file = "google_auth-2.17.0-py2.py3-none-any.whl", hash = "sha256:45ba9b4b3e49406de3c5451697820694b2f6ce8a6b75bb187852fdae231dab94"},
-]
-
-[package.dependencies]
-cachetools = ">=2.0.0,<6.0"
-pyasn1-modules = ">=0.2.1"
-rsa = {version = ">=3.1.4,<5", markers = "python_version >= \"3.6\""}
-six = ">=1.9.0"
-
-[package.extras]
-aiohttp = ["aiohttp (>=3.6.2,<4.0.0dev)", "requests (>=2.20.0,<3.0.0dev)"]
-enterprise-cert = ["cryptography (==36.0.2)", "pyopenssl (==22.0.0)"]
-pyopenssl = ["cryptography (>=38.0.3)", "pyopenssl (>=20.0.0)"]
-reauth = ["pyu2f (>=0.1.5)"]
-requests = ["requests (>=2.20.0,<3.0.0dev)"]
-
-[[package]]
-name = "google-auth-oauthlib"
-version = "0.4.6"
-description = "Google Authentication Library"
-optional = true
-python-versions = ">=3.6"
-files = [
-    {file = "google-auth-oauthlib-0.4.6.tar.gz", hash = "sha256:a90a072f6993f2c327067bf65270046384cda5a8ecb20b94ea9a687f1f233a7a"},
-    {file = "google_auth_oauthlib-0.4.6-py2.py3-none-any.whl", hash = "sha256:3f2a6e802eebbb6fb736a370fbf3b055edcb6b52878bf2f26330b5e041316c73"},
-]
-
-[package.dependencies]
-google-auth = ">=1.0.0"
-requests-oauthlib = ">=0.7.0"
-
-[package.extras]
-tool = ["click (>=6.0.0)"]
-
-[[package]]
-name = "google-pasta"
-version = "0.2.0"
-description = "pasta is an AST-based Python refactoring library"
-optional = true
-python-versions = "*"
-files = [
-    {file = "google-pasta-0.2.0.tar.gz", hash = "sha256:c9f2c8dfc8f96d0d5808299920721be30c9eec37f2389f28904f454565c8a16e"},
-    {file = "google_pasta-0.2.0-py2-none-any.whl", hash = "sha256:4612951da876b1a10fe3960d7226f0c7682cf901e16ac06e473b267a5afa8954"},
-    {file = "google_pasta-0.2.0-py3-none-any.whl", hash = "sha256:b32482794a366b5366a32c92a9a9201b107821889935a02b3e51f6b432ea84ed"},
-]
-
-[package.dependencies]
-six = "*"
-
-[[package]]
-name = "graphviz"
-version = "0.8.4"
-description = "Simple Python interface for Graphviz"
-optional = true
-python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*"
-files = [
-    {file = "graphviz-0.8.4-py2.py3-none-any.whl", hash = "sha256:7caa53f0b0be42c5f2eaa3f3d71dcc863b15bacceb5d531c2ad7519e1980ff82"},
-    {file = "graphviz-0.8.4.zip", hash = "sha256:4958a19cbd8461757a08db308a4a15c3d586660417e1e364f0107d2fe481689f"},
-]
-
-[package.extras]
-dev = ["flake8", "pep8-naming", "tox (>=3.0)", "twine", "wheel"]
-docs = ["sphinx (>=1.3)", "sphinx-rtd-theme"]
-test = ["mock (>=2)", "pytest (>=3.4)", "pytest-cov", "pytest-mock (>=1.8)"]
-
-[[package]]
-name = "grpcio"
-version = "1.53.0"
-description = "HTTP/2-based RPC framework"
-optional = true
-python-versions = ">=3.7"
-files = [
-    {file = "grpcio-1.53.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:752d2949b40e12e6ad3ed8cc552a65b54d226504f6b1fb67cab2ccee502cc06f"},
-    {file = "grpcio-1.53.0-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:8a48fd3a7222be226bb86b7b413ad248f17f3101a524018cdc4562eeae1eb2a3"},
-    {file = "grpcio-1.53.0-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:f3e837d29f0e1b9d6e7b29d569e2e9b0da61889e41879832ea15569c251c303a"},
-    {file = "grpcio-1.53.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:aef7d30242409c3aa5839b501e877e453a2c8d3759ca8230dd5a21cda029f046"},
-    {file = "grpcio-1.53.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e6f90698b5d1c5dd7b3236cd1fa959d7b80e17923f918d5be020b65f1c78b173"},
-    {file = "grpcio-1.53.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:a96c3c7f564b263c5d7c0e49a337166c8611e89c4c919f66dba7b9a84abad137"},
-    {file = "grpcio-1.53.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:ee81349411648d1abc94095c68cd25e3c2812e4e0367f9a9355be1e804a5135c"},
-    {file = "grpcio-1.53.0-cp310-cp310-win32.whl", hash = "sha256:fdc6191587de410a184550d4143e2b24a14df495c86ca15e59508710681690ac"},
-    {file = "grpcio-1.53.0-cp310-cp310-win_amd64.whl", hash = "sha256:658ffe1e39171be00490db5bd3b966f79634ac4215a1eb9a85c6cd6783bf7f6e"},
-    {file = "grpcio-1.53.0-cp311-cp311-linux_armv7l.whl", hash = "sha256:1b172e6d497191940c4b8d75b53de82dc252e15b61de2951d577ec5b43316b29"},
-    {file = "grpcio-1.53.0-cp311-cp311-macosx_10_10_universal2.whl", hash = "sha256:82434ba3a5935e47908bc861ce1ebc43c2edfc1001d235d6e31e5d3ed55815f7"},
-    {file = "grpcio-1.53.0-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:1c734a2d4843e4e14ececf5600c3c4750990ec319e1299db7e4f0d02c25c1467"},
-    {file = "grpcio-1.53.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b6a2ead3de3b2d53119d473aa2f224030257ef33af1e4ddabd4afee1dea5f04c"},
-    {file = "grpcio-1.53.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a34d6e905f071f9b945cabbcc776e2055de1fdb59cd13683d9aa0a8f265b5bf9"},
-    {file = "grpcio-1.53.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:eaf8e3b97caaf9415227a3c6ca5aa8d800fecadd526538d2bf8f11af783f1550"},
-    {file = "grpcio-1.53.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:da95778d37be8e4e9afca771a83424f892296f5dfb2a100eda2571a1d8bbc0dc"},
-    {file = "grpcio-1.53.0-cp311-cp311-win32.whl", hash = "sha256:e4f513d63df6336fd84b74b701f17d1bb3b64e9d78a6ed5b5e8a198bbbe8bbfa"},
-    {file = "grpcio-1.53.0-cp311-cp311-win_amd64.whl", hash = "sha256:ddb2511fbbb440ed9e5c9a4b9b870f2ed649b7715859fd6f2ebc585ee85c0364"},
-    {file = "grpcio-1.53.0-cp37-cp37m-linux_armv7l.whl", hash = "sha256:2a912397eb8d23c177d6d64e3c8bc46b8a1c7680b090d9f13a640b104aaec77c"},
-    {file = "grpcio-1.53.0-cp37-cp37m-macosx_10_10_universal2.whl", hash = "sha256:55930c56b8f5b347d6c8c609cc341949a97e176c90f5cbb01d148d778f3bbd23"},
-    {file = "grpcio-1.53.0-cp37-cp37m-manylinux_2_17_aarch64.whl", hash = "sha256:6601d812105583948ab9c6e403a7e2dba6e387cc678c010e74f2d6d589d1d1b3"},
-    {file = "grpcio-1.53.0-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c705e0c21acb0e8478a00e7e773ad0ecdb34bd0e4adc282d3d2f51ba3961aac7"},
-    {file = "grpcio-1.53.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba074af9ca268ad7b05d3fc2b920b5fb3c083da94ab63637aaf67f4f71ecb755"},
-    {file = "grpcio-1.53.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:14817de09317dd7d3fbc8272864288320739973ef0f4b56bf2c0032349da8cdf"},
-    {file = "grpcio-1.53.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:c7ad9fbedb93f331c2e9054e202e95cf825b885811f1bcbbdfdc301e451442db"},
-    {file = "grpcio-1.53.0-cp37-cp37m-win_amd64.whl", hash = "sha256:dad5b302a4c21c604d88a5d441973f320134e6ff6a84ecef9c1139e5ffd466f6"},
-    {file = "grpcio-1.53.0-cp38-cp38-linux_armv7l.whl", hash = "sha256:fa8eaac75d3107e3f5465f2c9e3bbd13db21790c6e45b7de1756eba16b050aca"},
-    {file = "grpcio-1.53.0-cp38-cp38-macosx_10_10_universal2.whl", hash = "sha256:104a2210edd3776c38448b4f76c2f16e527adafbde171fc72a8a32976c20abc7"},
-    {file = "grpcio-1.53.0-cp38-cp38-manylinux_2_17_aarch64.whl", hash = "sha256:dbc1ba968639c1d23476f75c356e549e7bbf2d8d6688717dcab5290e88e8482b"},
-    {file = "grpcio-1.53.0-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:95952d3fe795b06af29bb8ec7bbf3342cdd867fc17b77cc25e6733d23fa6c519"},
-    {file = "grpcio-1.53.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f144a790f14c51b8a8e591eb5af40507ffee45ea6b818c2482f0457fec2e1a2e"},
-    {file = "grpcio-1.53.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:0698c094688a2dd4c7c2f2c0e3e142cac439a64d1cef6904c97f6cde38ba422f"},
-    {file = "grpcio-1.53.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:6b6d60b0958be711bab047e9f4df5dbbc40367955f8651232bfdcdd21450b9ab"},
-    {file = "grpcio-1.53.0-cp38-cp38-win32.whl", hash = "sha256:1948539ce78805d4e6256ab0e048ec793956d54787dc9d6777df71c1d19c7f81"},
-    {file = "grpcio-1.53.0-cp38-cp38-win_amd64.whl", hash = "sha256:df9ba1183b3f649210788cf80c239041dddcb375d6142d8bccafcfdf549522cd"},
-    {file = "grpcio-1.53.0-cp39-cp39-linux_armv7l.whl", hash = "sha256:19caa5b7282a89b799e63776ff602bb39604f7ca98db6df27e2de06756ae86c3"},
-    {file = "grpcio-1.53.0-cp39-cp39-macosx_10_10_universal2.whl", hash = "sha256:b5bd026ac928c96cc23149e6ef79183125542062eb6d1ccec34c0a37e02255e7"},
-    {file = "grpcio-1.53.0-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:7dc8584ca6c015ad82e186e82f4c0fe977394588f66b8ecfc4ec873285314619"},
-    {file = "grpcio-1.53.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2eddaae8af625e45b5c8500dcca1043264d751a6872cde2eda5022df8a336959"},
-    {file = "grpcio-1.53.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c5fb6f3d7824696c1c9f2ad36ddb080ba5a86f2d929ef712d511b4d9972d3d27"},
-    {file = "grpcio-1.53.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:8270d1dc2c98ab57e6dbf36fa187db8df4c036f04a398e5d5e25b4e01a766d70"},
-    {file = "grpcio-1.53.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:976a7f24eb213e8429cab78d5e120500dfcdeb01041f1f5a77b17b9101902615"},
-    {file = "grpcio-1.53.0-cp39-cp39-win32.whl", hash = "sha256:9c84a481451e7174f3a764a44150f93b041ab51045aa33d7b5b68b6979114e48"},
-    {file = "grpcio-1.53.0-cp39-cp39-win_amd64.whl", hash = "sha256:6beb84f83360ff29a3654f43f251ec11b809dcb5524b698d711550243debd289"},
-    {file = "grpcio-1.53.0.tar.gz", hash = "sha256:a4952899b4931a6ba12951f9a141ef3e74ff8a6ec9aa2dc602afa40f63595e33"},
-]
-
-[package.extras]
-protobuf = ["grpcio-tools (>=1.53.0)"]
-
-[[package]]
-name = "h5py"
-version = "3.8.0"
-description = "Read and write HDF5 files from Python"
-optional = true
-python-versions = ">=3.7"
-files = [
-    {file = "h5py-3.8.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:533d7dad466ddb7e3b30af274b630eb7c1a6e4ddf01d1c373a0334dc2152110a"},
-    {file = "h5py-3.8.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c873ba9fd4fa875ad62ce0e4891725e257a8fe7f5abdbc17e51a5d54819be55c"},
-    {file = "h5py-3.8.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:98a240cd4c1bfd568aaa52ec42d263131a2582dab82d74d3d42a0d954cac12be"},
-    {file = "h5py-3.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c3389b63222b1c7a158bb7fe69d11ca00066740ec5574596d47a2fe5317f563a"},
-    {file = "h5py-3.8.0-cp310-cp310-win_amd64.whl", hash = "sha256:7f3350fc0a8407d668b13247861c2acd23f7f5fe7d060a3ad9b0820f5fcbcae0"},
-    {file = "h5py-3.8.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:db03e3f2c716205fbdabb34d0848459840585225eb97b4f08998c743821ca323"},
-    {file = "h5py-3.8.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:36761693efbe53df179627a775476dcbc37727d6e920958277a7efbc18f1fb73"},
-    {file = "h5py-3.8.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4a506fc223def428f4329e7e1f9fe1c8c593eab226e7c0942c8d75308ad49950"},
-    {file = "h5py-3.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:33b15aae79e9147aebe1d0e54099cbcde8d65e3e227cd5b59e49b1272aa0e09d"},
-    {file = "h5py-3.8.0-cp311-cp311-win_amd64.whl", hash = "sha256:9f6f6ffadd6bfa9b2c5b334805eb4b19ca0a5620433659d8f7fb86692c40a359"},
-    {file = "h5py-3.8.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8f55d9c6c84d7d09c79fb85979e97b81ec6071cc776a97eb6b96f8f6ec767323"},
-    {file = "h5py-3.8.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b685453e538b2b5934c58a644ac3f3b3d0cec1a01b6fb26d57388e9f9b674ad0"},
-    {file = "h5py-3.8.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:377865821fe80ad984d003723d6f8890bd54ceeb5981b43c0313b9df95411b30"},
-    {file = "h5py-3.8.0-cp37-cp37m-win_amd64.whl", hash = "sha256:0fef76e10b9216657fa37e7edff6d8be0709b25bd5066474c229b56cf0098df9"},
-    {file = "h5py-3.8.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:26ffc344ec9984d2cd3ca0265007299a8bac8d85c1ad48f4639d8d3aed2af171"},
-    {file = "h5py-3.8.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:bacaa1c16810dd2b3e4417f8e730971b7c4d53d234de61fe4a918db78e80e1e4"},
-    {file = "h5py-3.8.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bae730580ae928de409d63cbe4fdca4c82c3ad2bed30511d19d34e995d63c77e"},
-    {file = "h5py-3.8.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f47f757d1b76f0ecb8aa0508ec8d1b390df67a8b67ee2515dc1b046f3a1596ea"},
-    {file = "h5py-3.8.0-cp38-cp38-win_amd64.whl", hash = "sha256:f891b17e3a3e974e93f9e34e7cca9f530806543571ce078998676a555837d91d"},
-    {file = "h5py-3.8.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:290e00fa2de74a10688d1bac98d5a9cdd43f14f58e562c580b5b3dfbd358ecae"},
-    {file = "h5py-3.8.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:03890b1c123d024fb0239a3279737d5432498c1901c354f8b10d8221d1d16235"},
-    {file = "h5py-3.8.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b7865de06779b14d98068da387333ad9bf2756b5b579cc887fac169bc08f87c3"},
-    {file = "h5py-3.8.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:49bc857635f935fa30e92e61ac1e87496df8f260a6945a3235e43a9890426866"},
-    {file = "h5py-3.8.0-cp39-cp39-win_amd64.whl", hash = "sha256:5fd2252d1fc364ba0e93dd0b7089f4906b66805cb4e6aca7fa8874ac08649647"},
-    {file = "h5py-3.8.0.tar.gz", hash = "sha256:6fead82f0c4000cf38d53f9c030780d81bfa0220218aee13b90b7701c937d95f"},
-]
-
-[package.dependencies]
-numpy = ">=1.14.5"
-
-[[package]]
-name = "humanfriendly"
-version = "10.0"
-description = "Human friendly output for text interfaces using Python"
-optional = true
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
-files = [
-    {file = "humanfriendly-10.0-py2.py3-none-any.whl", hash = "sha256:1697e1a8a8f550fd43c2865cd84542fc175a61dcb779b6fee18cf6b6ccba1477"},
-    {file = "humanfriendly-10.0.tar.gz", hash = "sha256:6b0b831ce8f15f7300721aa49829fc4e83921a9a301cc7f606be6686a2288ddc"},
-]
-
-[package.dependencies]
-pyreadline3 = {version = "*", markers = "sys_platform == \"win32\" and python_version >= \"3.8\""}
-
-[[package]]
-name = "idna"
-version = "3.4"
-description = "Internationalized Domain Names in Applications (IDNA)"
-optional = false
-python-versions = ">=3.5"
-files = [
-    {file = "idna-3.4-py3-none-any.whl", hash = "sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2"},
-    {file = "idna-3.4.tar.gz", hash = "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4"},
-]
-
-[[package]]
-name = "image"
-version = "1.5.33"
-description = "Django application that provides cropping, resizing, thumbnailing, overlays and masking for images and videos with the ability to set the center of attention,"
-optional = false
-python-versions = "*"
-files = [
-    {file = "image-1.5.33.tar.gz", hash = "sha256:baa2e09178277daa50f22fd6d1d51ec78f19c12688921cb9ab5808743f097126"},
-]
-
-[package.dependencies]
-django = "*"
-pillow = "*"
-six = "*"
-
-[[package]]
-name = "imagesize"
-version = "1.4.1"
-description = "Getting image size from png/jpeg/jpeg2000/gif file"
-optional = false
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
-files = [
-    {file = "imagesize-1.4.1-py2.py3-none-any.whl", hash = "sha256:0d8d18d08f840c19d0ee7ca1fd82490fdc3729b7ac93f49870406ddde8ef8d8b"},
-    {file = "imagesize-1.4.1.tar.gz", hash = "sha256:69150444affb9cb0d5cc5a92b3676f0b2fb7cd9ae39e947a5e11a36b4497cd4a"},
-]
-
-[[package]]
-name = "importlib-metadata"
-version = "6.1.0"
-description = "Read metadata from Python packages"
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "importlib_metadata-6.1.0-py3-none-any.whl", hash = "sha256:ff80f3b5394912eb1b108fcfd444dc78b7f1f3e16b16188054bd01cb9cb86f09"},
-    {file = "importlib_metadata-6.1.0.tar.gz", hash = "sha256:43ce9281e097583d758c2c708c4376371261a02c34682491a8e98352365aad20"},
-]
-
-[package.dependencies]
-zipp = ">=0.5"
-
-[package.extras]
-docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"]
-perf = ["ipython"]
-testing = ["flake8 (<5)", "flufl.flake8", "importlib-resources (>=1.3)", "packaging", "pyfakefs", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)", "pytest-perf (>=0.9.2)"]
-
-[[package]]
-name = "importlib-resources"
-version = "5.12.0"
-description = "Read resources from Python packages"
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "importlib_resources-5.12.0-py3-none-any.whl", hash = "sha256:7b1deeebbf351c7578e09bf2f63fa2ce8b5ffec296e0d349139d43cca061a81a"},
-    {file = "importlib_resources-5.12.0.tar.gz", hash = "sha256:4be82589bf5c1d7999aedf2a45159d10cb3ca4f19b2271f8792bc8e6da7b22f6"},
-]
-
-[package.dependencies]
-zipp = {version = ">=3.1.0", markers = "python_version < \"3.10\""}
-
-[package.extras]
-docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"]
-testing = ["flake8 (<5)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)"]
-
-[[package]]
-name = "iniconfig"
-version = "2.0.0"
-description = "brain-dead simple config-ini parsing"
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"},
-    {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"},
-]
-
-[[package]]
-name = "isort"
-version = "5.12.0"
-description = "A Python utility / library to sort Python imports."
-optional = false
-python-versions = ">=3.8.0"
-files = [
-    {file = "isort-5.12.0-py3-none-any.whl", hash = "sha256:f84c2818376e66cf843d497486ea8fed8700b340f308f076c6fb1229dff318b6"},
-    {file = "isort-5.12.0.tar.gz", hash = "sha256:8bef7dde241278824a6d83f44a544709b065191b95b6e50894bdc722fcba0504"},
-]
-
-[package.extras]
-colors = ["colorama (>=0.4.3)"]
-pipfile-deprecated-finder = ["pip-shims (>=0.5.2)", "pipreqs", "requirementslib"]
-plugins = ["setuptools"]
-requirements-deprecated-finder = ["pip-api", "pipreqs"]
-
-[[package]]
-name = "jax"
-version = "0.4.8"
-description = "Differentiate, compile, and transform Numpy code."
-optional = true
-python-versions = ">=3.8"
-files = [
-    {file = "jax-0.4.8.tar.gz", hash = "sha256:08116481f7336db16c24812bfb5e6f9786915f4c2f6ff4028331fa69e7535202"},
-]
-
-[package.dependencies]
-ml_dtypes = ">=0.0.3"
-numpy = ">=1.21"
-opt_einsum = "*"
-scipy = ">=1.7"
-
-[package.extras]
-australis = ["protobuf (>=3.13,<4)"]
-ci = ["jaxlib (==0.4.7)"]
-cpu = ["jaxlib (==0.4.7)"]
-cuda = ["jaxlib (==0.4.7+cuda11.cudnn86)"]
-cuda11-cudnn82 = ["jaxlib (==0.4.7+cuda11.cudnn82)"]
-cuda11-cudnn86 = ["jaxlib (==0.4.7+cuda11.cudnn86)"]
-cuda11-local = ["jaxlib (==0.4.7+cuda11.cudnn86)"]
-cuda11-pip = ["jaxlib (==0.4.7+cuda11.cudnn86)", "nvidia-cublas-cu11 (>=11.11)", "nvidia-cuda-nvcc-cu11 (>=11.8)", "nvidia-cuda-runtime-cu11 (>=11.8)", "nvidia-cudnn-cu11 (>=8.6)", "nvidia-cufft-cu11 (>=10.9)", "nvidia-cusolver-cu11 (>=11.4)", "nvidia-cusparse-cu11 (>=11.7)"]
-cuda12-local = ["jaxlib (==0.4.7+cuda12.cudnn88)"]
-cuda12-pip = ["jaxlib (==0.4.7+cuda12.cudnn88)", "nvidia-cublas-cu12", "nvidia-cuda-nvcc-cu12", "nvidia-cuda-runtime-cu12", "nvidia-cudnn-cu12", "nvidia-cufft-cu12", "nvidia-cusolver-cu12", "nvidia-cusparse-cu12"]
-minimum-jaxlib = ["jaxlib (==0.4.7)"]
-tpu = ["jaxlib (==0.4.7)", "libtpu-nightly (==0.1.dev20230327)", "requests"]
-
-[[package]]
-name = "jinja2"
-version = "3.1.2"
-description = "A very fast and expressive template engine."
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "Jinja2-3.1.2-py3-none-any.whl", hash = "sha256:6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61"},
-    {file = "Jinja2-3.1.2.tar.gz", hash = "sha256:31351a702a408a9e7595a8fc6150fc3f43bb6bf7e319770cbc0db9df9437e852"},
-]
-
-[package.dependencies]
-MarkupSafe = ">=2.0"
-
-[package.extras]
-i18n = ["Babel (>=2.7)"]
-
-[[package]]
-name = "keras"
-version = "2.12.0"
-description = "Deep learning for humans."
-optional = true
-python-versions = ">=3.8"
-files = [
-    {file = "keras-2.12.0-py2.py3-none-any.whl", hash = "sha256:35c39534011e909645fb93515452e98e1a0ce23727b55d4918b9c58b2308c15e"},
-]
-
-[[package]]
-name = "kiwisolver"
-version = "1.4.4"
-description = "A fast implementation of the Cassowary constraint solver"
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "kiwisolver-1.4.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:2f5e60fabb7343a836360c4f0919b8cd0d6dbf08ad2ca6b9cf90bf0c76a3c4f6"},
-    {file = "kiwisolver-1.4.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:10ee06759482c78bdb864f4109886dff7b8a56529bc1609d4f1112b93fe6423c"},
-    {file = "kiwisolver-1.4.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c79ebe8f3676a4c6630fd3f777f3cfecf9289666c84e775a67d1d358578dc2e3"},
-    {file = "kiwisolver-1.4.4-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:abbe9fa13da955feb8202e215c4018f4bb57469b1b78c7a4c5c7b93001699938"},
-    {file = "kiwisolver-1.4.4-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:7577c1987baa3adc4b3c62c33bd1118c3ef5c8ddef36f0f2c950ae0b199e100d"},
-    {file = "kiwisolver-1.4.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f8ad8285b01b0d4695102546b342b493b3ccc6781fc28c8c6a1bb63e95d22f09"},
-    {file = "kiwisolver-1.4.4-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8ed58b8acf29798b036d347791141767ccf65eee7f26bde03a71c944449e53de"},
-    {file = "kiwisolver-1.4.4-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a68b62a02953b9841730db7797422f983935aeefceb1679f0fc85cbfbd311c32"},
-    {file = "kiwisolver-1.4.4-cp310-cp310-win32.whl", hash = "sha256:e92a513161077b53447160b9bd8f522edfbed4bd9759e4c18ab05d7ef7e49408"},
-    {file = "kiwisolver-1.4.4-cp310-cp310-win_amd64.whl", hash = "sha256:3fe20f63c9ecee44560d0e7f116b3a747a5d7203376abeea292ab3152334d004"},
-    {file = "kiwisolver-1.4.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:e0ea21f66820452a3f5d1655f8704a60d66ba1191359b96541eaf457710a5fc6"},
-    {file = "kiwisolver-1.4.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:bc9db8a3efb3e403e4ecc6cd9489ea2bac94244f80c78e27c31dcc00d2790ac2"},
-    {file = "kiwisolver-1.4.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d5b61785a9ce44e5a4b880272baa7cf6c8f48a5180c3e81c59553ba0cb0821ca"},
-    {file = "kiwisolver-1.4.4-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c2dbb44c3f7e6c4d3487b31037b1bdbf424d97687c1747ce4ff2895795c9bf69"},
-    {file = "kiwisolver-1.4.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6295ecd49304dcf3bfbfa45d9a081c96509e95f4b9d0eb7ee4ec0530c4a96514"},
-    {file = "kiwisolver-1.4.4-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4bd472dbe5e136f96a4b18f295d159d7f26fd399136f5b17b08c4e5f498cd494"},
-    {file = "kiwisolver-1.4.4-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bf7d9fce9bcc4752ca4a1b80aabd38f6d19009ea5cbda0e0856983cf6d0023f5"},
-    {file = "kiwisolver-1.4.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:78d6601aed50c74e0ef02f4204da1816147a6d3fbdc8b3872d263338a9052c51"},
-    {file = "kiwisolver-1.4.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:877272cf6b4b7e94c9614f9b10140e198d2186363728ed0f701c6eee1baec1da"},
-    {file = "kiwisolver-1.4.4-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:db608a6757adabb32f1cfe6066e39b3706d8c3aa69bbc353a5b61edad36a5cb4"},
-    {file = "kiwisolver-1.4.4-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:5853eb494c71e267912275e5586fe281444eb5e722de4e131cddf9d442615626"},
-    {file = "kiwisolver-1.4.4-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:f0a1dbdb5ecbef0d34eb77e56fcb3e95bbd7e50835d9782a45df81cc46949750"},
-    {file = "kiwisolver-1.4.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:283dffbf061a4ec60391d51e6155e372a1f7a4f5b15d59c8505339454f8989e4"},
-    {file = "kiwisolver-1.4.4-cp311-cp311-win32.whl", hash = "sha256:d06adcfa62a4431d404c31216f0f8ac97397d799cd53800e9d3efc2fbb3cf14e"},
-    {file = "kiwisolver-1.4.4-cp311-cp311-win_amd64.whl", hash = "sha256:e7da3fec7408813a7cebc9e4ec55afed2d0fd65c4754bc376bf03498d4e92686"},
-    {file = "kiwisolver-1.4.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:62ac9cc684da4cf1778d07a89bf5f81b35834cb96ca523d3a7fb32509380cbf6"},
-    {file = "kiwisolver-1.4.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:41dae968a94b1ef1897cb322b39360a0812661dba7c682aa45098eb8e193dbdf"},
-    {file = "kiwisolver-1.4.4-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:02f79693ec433cb4b5f51694e8477ae83b3205768a6fb48ffba60549080e295b"},
-    {file = "kiwisolver-1.4.4-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d0611a0a2a518464c05ddd5a3a1a0e856ccc10e67079bb17f265ad19ab3c7597"},
-    {file = "kiwisolver-1.4.4-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:db5283d90da4174865d520e7366801a93777201e91e79bacbac6e6927cbceede"},
-    {file = "kiwisolver-1.4.4-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:1041feb4cda8708ce73bb4dcb9ce1ccf49d553bf87c3954bdfa46f0c3f77252c"},
-    {file = "kiwisolver-1.4.4-cp37-cp37m-win32.whl", hash = "sha256:a553dadda40fef6bfa1456dc4be49b113aa92c2a9a9e8711e955618cd69622e3"},
-    {file = "kiwisolver-1.4.4-cp37-cp37m-win_amd64.whl", hash = "sha256:03baab2d6b4a54ddbb43bba1a3a2d1627e82d205c5cf8f4c924dc49284b87166"},
-    {file = "kiwisolver-1.4.4-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:841293b17ad704d70c578f1f0013c890e219952169ce8a24ebc063eecf775454"},
-    {file = "kiwisolver-1.4.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:f4f270de01dd3e129a72efad823da90cc4d6aafb64c410c9033aba70db9f1ff0"},
-    {file = "kiwisolver-1.4.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:f9f39e2f049db33a908319cf46624a569b36983c7c78318e9726a4cb8923b26c"},
-    {file = "kiwisolver-1.4.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c97528e64cb9ebeff9701e7938653a9951922f2a38bd847787d4a8e498cc83ae"},
-    {file = "kiwisolver-1.4.4-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1d1573129aa0fd901076e2bfb4275a35f5b7aa60fbfb984499d661ec950320b0"},
-    {file = "kiwisolver-1.4.4-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ad881edc7ccb9d65b0224f4e4d05a1e85cf62d73aab798943df6d48ab0cd79a1"},
-    {file = "kiwisolver-1.4.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b428ef021242344340460fa4c9185d0b1f66fbdbfecc6c63eff4b7c29fad429d"},
-    {file = "kiwisolver-1.4.4-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:2e407cb4bd5a13984a6c2c0fe1845e4e41e96f183e5e5cd4d77a857d9693494c"},
-    {file = "kiwisolver-1.4.4-cp38-cp38-win32.whl", hash = "sha256:75facbe9606748f43428fc91a43edb46c7ff68889b91fa31f53b58894503a191"},
-    {file = "kiwisolver-1.4.4-cp38-cp38-win_amd64.whl", hash = "sha256:5bce61af018b0cb2055e0e72e7d65290d822d3feee430b7b8203d8a855e78766"},
-    {file = "kiwisolver-1.4.4-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:8c808594c88a025d4e322d5bb549282c93c8e1ba71b790f539567932722d7bd8"},
-    {file = "kiwisolver-1.4.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f0a71d85ecdd570ded8ac3d1c0f480842f49a40beb423bb8014539a9f32a5897"},
-    {file = "kiwisolver-1.4.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b533558eae785e33e8c148a8d9921692a9fe5aa516efbdff8606e7d87b9d5824"},
-    {file = "kiwisolver-1.4.4-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:efda5fc8cc1c61e4f639b8067d118e742b812c930f708e6667a5ce0d13499e29"},
-    {file = "kiwisolver-1.4.4-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:7c43e1e1206cd421cd92e6b3280d4385d41d7166b3ed577ac20444b6995a445f"},
-    {file = "kiwisolver-1.4.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bc8d3bd6c72b2dd9decf16ce70e20abcb3274ba01b4e1c96031e0c4067d1e7cd"},
-    {file = "kiwisolver-1.4.4-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4ea39b0ccc4f5d803e3337dd46bcce60b702be4d86fd0b3d7531ef10fd99a1ac"},
-    {file = "kiwisolver-1.4.4-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:968f44fdbf6dd757d12920d63b566eeb4d5b395fd2d00d29d7ef00a00582aac9"},
-    {file = "kiwisolver-1.4.4-cp39-cp39-win32.whl", hash = "sha256:da7e547706e69e45d95e116e6939488d62174e033b763ab1496b4c29b76fabea"},
-    {file = "kiwisolver-1.4.4-cp39-cp39-win_amd64.whl", hash = "sha256:ba59c92039ec0a66103b1d5fe588fa546373587a7d68f5c96f743c3396afc04b"},
-    {file = "kiwisolver-1.4.4-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:91672bacaa030f92fc2f43b620d7b337fd9a5af28b0d6ed3f77afc43c4a64b5a"},
-    {file = "kiwisolver-1.4.4-pp37-pypy37_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:787518a6789009c159453da4d6b683f468ef7a65bbde796bcea803ccf191058d"},
-    {file = "kiwisolver-1.4.4-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:da152d8cdcab0e56e4f45eb08b9aea6455845ec83172092f09b0e077ece2cf7a"},
-    {file = "kiwisolver-1.4.4-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:ecb1fa0db7bf4cff9dac752abb19505a233c7f16684c5826d1f11ebd9472b871"},
-    {file = "kiwisolver-1.4.4-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:28bc5b299f48150b5f822ce68624e445040595a4ac3d59251703779836eceff9"},
-    {file = "kiwisolver-1.4.4-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:81e38381b782cc7e1e46c4e14cd997ee6040768101aefc8fa3c24a4cc58e98f8"},
-    {file = "kiwisolver-1.4.4-pp38-pypy38_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:2a66fdfb34e05b705620dd567f5a03f239a088d5a3f321e7b6ac3239d22aa286"},
-    {file = "kiwisolver-1.4.4-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:872b8ca05c40d309ed13eb2e582cab0c5a05e81e987ab9c521bf05ad1d5cf5cb"},
-    {file = "kiwisolver-1.4.4-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:70e7c2e7b750585569564e2e5ca9845acfaa5da56ac46df68414f29fea97be9f"},
-    {file = "kiwisolver-1.4.4-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:9f85003f5dfa867e86d53fac6f7e6f30c045673fa27b603c397753bebadc3008"},
-    {file = "kiwisolver-1.4.4-pp39-pypy39_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2e307eb9bd99801f82789b44bb45e9f541961831c7311521b13a6c85afc09767"},
-    {file = "kiwisolver-1.4.4-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b1792d939ec70abe76f5054d3f36ed5656021dcad1322d1cc996d4e54165cef9"},
-    {file = "kiwisolver-1.4.4-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f6cb459eea32a4e2cf18ba5fcece2dbdf496384413bc1bae15583f19e567f3b2"},
-    {file = "kiwisolver-1.4.4-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:36dafec3d6d6088d34e2de6b85f9d8e2324eb734162fba59d2ba9ed7a2043d5b"},
-    {file = "kiwisolver-1.4.4.tar.gz", hash = "sha256:d41997519fcba4a1e46eb4a2fe31bc12f0ff957b2b81bac28db24744f333e955"},
-]
-
-[[package]]
-name = "lazy-object-proxy"
-version = "1.9.0"
-description = "A fast and thorough lazy object proxy."
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "lazy-object-proxy-1.9.0.tar.gz", hash = "sha256:659fb5809fa4629b8a1ac5106f669cfc7bef26fbb389dda53b3e010d1ac4ebae"},
-    {file = "lazy_object_proxy-1.9.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b40387277b0ed2d0602b8293b94d7257e17d1479e257b4de114ea11a8cb7f2d7"},
-    {file = "lazy_object_proxy-1.9.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e8c6cfb338b133fbdbc5cfaa10fe3c6aeea827db80c978dbd13bc9dd8526b7d4"},
-    {file = "lazy_object_proxy-1.9.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:721532711daa7db0d8b779b0bb0318fa87af1c10d7fe5e52ef30f8eff254d0cd"},
-    {file = "lazy_object_proxy-1.9.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:66a3de4a3ec06cd8af3f61b8e1ec67614fbb7c995d02fa224813cb7afefee701"},
-    {file = "lazy_object_proxy-1.9.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:1aa3de4088c89a1b69f8ec0dcc169aa725b0ff017899ac568fe44ddc1396df46"},
-    {file = "lazy_object_proxy-1.9.0-cp310-cp310-win32.whl", hash = "sha256:f0705c376533ed2a9e5e97aacdbfe04cecd71e0aa84c7c0595d02ef93b6e4455"},
-    {file = "lazy_object_proxy-1.9.0-cp310-cp310-win_amd64.whl", hash = "sha256:ea806fd4c37bf7e7ad82537b0757999264d5f70c45468447bb2b91afdbe73a6e"},
-    {file = "lazy_object_proxy-1.9.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:946d27deaff6cf8452ed0dba83ba38839a87f4f7a9732e8f9fd4107b21e6ff07"},
-    {file = "lazy_object_proxy-1.9.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79a31b086e7e68b24b99b23d57723ef7e2c6d81ed21007b6281ebcd1688acb0a"},
-    {file = "lazy_object_proxy-1.9.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f699ac1c768270c9e384e4cbd268d6e67aebcfae6cd623b4d7c3bfde5a35db59"},
-    {file = "lazy_object_proxy-1.9.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:bfb38f9ffb53b942f2b5954e0f610f1e721ccebe9cce9025a38c8ccf4a5183a4"},
-    {file = "lazy_object_proxy-1.9.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:189bbd5d41ae7a498397287c408617fe5c48633e7755287b21d741f7db2706a9"},
-    {file = "lazy_object_proxy-1.9.0-cp311-cp311-win32.whl", hash = "sha256:81fc4d08b062b535d95c9ea70dbe8a335c45c04029878e62d744bdced5141586"},
-    {file = "lazy_object_proxy-1.9.0-cp311-cp311-win_amd64.whl", hash = "sha256:f2457189d8257dd41ae9b434ba33298aec198e30adf2dcdaaa3a28b9994f6adb"},
-    {file = "lazy_object_proxy-1.9.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:d9e25ef10a39e8afe59a5c348a4dbf29b4868ab76269f81ce1674494e2565a6e"},
-    {file = "lazy_object_proxy-1.9.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cbf9b082426036e19c6924a9ce90c740a9861e2bdc27a4834fd0a910742ac1e8"},
-    {file = "lazy_object_proxy-1.9.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9f5fa4a61ce2438267163891961cfd5e32ec97a2c444e5b842d574251ade27d2"},
-    {file = "lazy_object_proxy-1.9.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:8fa02eaab317b1e9e03f69aab1f91e120e7899b392c4fc19807a8278a07a97e8"},
-    {file = "lazy_object_proxy-1.9.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:e7c21c95cae3c05c14aafffe2865bbd5e377cfc1348c4f7751d9dc9a48ca4bda"},
-    {file = "lazy_object_proxy-1.9.0-cp37-cp37m-win32.whl", hash = "sha256:f12ad7126ae0c98d601a7ee504c1122bcef553d1d5e0c3bfa77b16b3968d2734"},
-    {file = "lazy_object_proxy-1.9.0-cp37-cp37m-win_amd64.whl", hash = "sha256:edd20c5a55acb67c7ed471fa2b5fb66cb17f61430b7a6b9c3b4a1e40293b1671"},
-    {file = "lazy_object_proxy-1.9.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2d0daa332786cf3bb49e10dc6a17a52f6a8f9601b4cf5c295a4f85854d61de63"},
-    {file = "lazy_object_proxy-1.9.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9cd077f3d04a58e83d04b20e334f678c2b0ff9879b9375ed107d5d07ff160171"},
-    {file = "lazy_object_proxy-1.9.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:660c94ea760b3ce47d1855a30984c78327500493d396eac4dfd8bd82041b22be"},
-    {file = "lazy_object_proxy-1.9.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:212774e4dfa851e74d393a2370871e174d7ff0ebc980907723bb67d25c8a7c30"},
-    {file = "lazy_object_proxy-1.9.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:f0117049dd1d5635bbff65444496c90e0baa48ea405125c088e93d9cf4525b11"},
-    {file = "lazy_object_proxy-1.9.0-cp38-cp38-win32.whl", hash = "sha256:0a891e4e41b54fd5b8313b96399f8b0e173bbbfc03c7631f01efbe29bb0bcf82"},
-    {file = "lazy_object_proxy-1.9.0-cp38-cp38-win_amd64.whl", hash = "sha256:9990d8e71b9f6488e91ad25f322898c136b008d87bf852ff65391b004da5e17b"},
-    {file = "lazy_object_proxy-1.9.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9e7551208b2aded9c1447453ee366f1c4070602b3d932ace044715d89666899b"},
-    {file = "lazy_object_proxy-1.9.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5f83ac4d83ef0ab017683d715ed356e30dd48a93746309c8f3517e1287523ef4"},
-    {file = "lazy_object_proxy-1.9.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7322c3d6f1766d4ef1e51a465f47955f1e8123caee67dd641e67d539a534d006"},
-    {file = "lazy_object_proxy-1.9.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:18b78ec83edbbeb69efdc0e9c1cb41a3b1b1ed11ddd8ded602464c3fc6020494"},
-    {file = "lazy_object_proxy-1.9.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:09763491ce220c0299688940f8dc2c5d05fd1f45af1e42e636b2e8b2303e4382"},
-    {file = "lazy_object_proxy-1.9.0-cp39-cp39-win32.whl", hash = "sha256:9090d8e53235aa280fc9239a86ae3ea8ac58eff66a705fa6aa2ec4968b95c821"},
-    {file = "lazy_object_proxy-1.9.0-cp39-cp39-win_amd64.whl", hash = "sha256:db1c1722726f47e10e0b5fdbf15ac3b8adb58c091d12b3ab713965795036985f"},
-]
-
-[[package]]
-name = "libclang"
-version = "16.0.0"
-description = "Clang Python Bindings, mirrored from the official LLVM repo: https://github.com/llvm/llvm-project/tree/main/clang/bindings/python, to make the installation process easier."
-optional = true
-python-versions = "*"
-files = [
-    {file = "libclang-16.0.0-py2.py3-none-macosx_10_9_x86_64.whl", hash = "sha256:65258a6bb3e7dc31dc9b26f8d42f53c9d3b959643ade291fcd1aef4855303ca6"},
-    {file = "libclang-16.0.0-py2.py3-none-macosx_11_0_arm64.whl", hash = "sha256:af55a4aa86fdfe6b2ec68bc8cfe5fdac6c448d591ca7648be86ca17099b41ca8"},
-    {file = "libclang-16.0.0-py2.py3-none-manylinux2010_x86_64.whl", hash = "sha256:a043138caaf2cb076ebb060c6281ec95612926645d425c691991fc9df00e8a24"},
-    {file = "libclang-16.0.0-py2.py3-none-manylinux2014_aarch64.whl", hash = "sha256:eb59652cb0559c0e71784ff4c8ba24c14644becc907b1446563ecfaa622d523b"},
-    {file = "libclang-16.0.0-py2.py3-none-manylinux2014_armv7l.whl", hash = "sha256:7b6686b67a0daa84b4c614bcc119578329fc4fbb52b919565b7376b507c4793b"},
-    {file = "libclang-16.0.0-py2.py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:2adce42ae652f312245b8f4eda6f30b4076fb61f7619f2dfd0a0c31dee4c32b9"},
-    {file = "libclang-16.0.0-py2.py3-none-win_amd64.whl", hash = "sha256:ee20bf93e3dd330f71fc50cdbf13b92ced0aec8e540be64251db53502a9b33f7"},
-    {file = "libclang-16.0.0-py2.py3-none-win_arm64.whl", hash = "sha256:bf4628fc4da7a1dd06a244f9b8e121c5ec68076a763c59d6b13cbb103acc935b"},
-]
-
-[[package]]
-name = "markdown"
-version = "3.4.3"
-description = "Python implementation of John Gruber's Markdown."
-optional = true
-python-versions = ">=3.7"
-files = [
-    {file = "Markdown-3.4.3-py3-none-any.whl", hash = "sha256:065fd4df22da73a625f14890dd77eb8040edcbd68794bcd35943be14490608b2"},
-    {file = "Markdown-3.4.3.tar.gz", hash = "sha256:8bf101198e004dc93e84a12a7395e31aac6a9c9942848ae1d99b9d72cf9b3520"},
-]
-
-[package.dependencies]
-importlib-metadata = {version = ">=4.4", markers = "python_version < \"3.10\""}
-
-[package.extras]
-testing = ["coverage", "pyyaml"]
-
-[[package]]
-name = "markupsafe"
-version = "2.1.2"
-description = "Safely add untrusted strings to HTML/XML markup."
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "MarkupSafe-2.1.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:665a36ae6f8f20a4676b53224e33d456a6f5a72657d9c83c2aa00765072f31f7"},
-    {file = "MarkupSafe-2.1.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:340bea174e9761308703ae988e982005aedf427de816d1afe98147668cc03036"},
-    {file = "MarkupSafe-2.1.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22152d00bf4a9c7c83960521fc558f55a1adbc0631fbb00a9471e097b19d72e1"},
-    {file = "MarkupSafe-2.1.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:28057e985dace2f478e042eaa15606c7efccb700797660629da387eb289b9323"},
-    {file = "MarkupSafe-2.1.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ca244fa73f50a800cf8c3ebf7fd93149ec37f5cb9596aa8873ae2c1d23498601"},
-    {file = "MarkupSafe-2.1.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:d9d971ec1e79906046aa3ca266de79eac42f1dbf3612a05dc9368125952bd1a1"},
-    {file = "MarkupSafe-2.1.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:7e007132af78ea9df29495dbf7b5824cb71648d7133cf7848a2a5dd00d36f9ff"},
-    {file = "MarkupSafe-2.1.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:7313ce6a199651c4ed9d7e4cfb4aa56fe923b1adf9af3b420ee14e6d9a73df65"},
-    {file = "MarkupSafe-2.1.2-cp310-cp310-win32.whl", hash = "sha256:c4a549890a45f57f1ebf99c067a4ad0cb423a05544accaf2b065246827ed9603"},
-    {file = "MarkupSafe-2.1.2-cp310-cp310-win_amd64.whl", hash = "sha256:835fb5e38fd89328e9c81067fd642b3593c33e1e17e2fdbf77f5676abb14a156"},
-    {file = "MarkupSafe-2.1.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:2ec4f2d48ae59bbb9d1f9d7efb9236ab81429a764dedca114f5fdabbc3788013"},
-    {file = "MarkupSafe-2.1.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:608e7073dfa9e38a85d38474c082d4281f4ce276ac0010224eaba11e929dd53a"},
-    {file = "MarkupSafe-2.1.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:65608c35bfb8a76763f37036547f7adfd09270fbdbf96608be2bead319728fcd"},
-    {file = "MarkupSafe-2.1.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f2bfb563d0211ce16b63c7cb9395d2c682a23187f54c3d79bfec33e6705473c6"},
-    {file = "MarkupSafe-2.1.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:da25303d91526aac3672ee6d49a2f3db2d9502a4a60b55519feb1a4c7714e07d"},
-    {file = "MarkupSafe-2.1.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:9cad97ab29dfc3f0249b483412c85c8ef4766d96cdf9dcf5a1e3caa3f3661cf1"},
-    {file = "MarkupSafe-2.1.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:085fd3201e7b12809f9e6e9bc1e5c96a368c8523fad5afb02afe3c051ae4afcc"},
-    {file = "MarkupSafe-2.1.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:1bea30e9bf331f3fef67e0a3877b2288593c98a21ccb2cf29b74c581a4eb3af0"},
-    {file = "MarkupSafe-2.1.2-cp311-cp311-win32.whl", hash = "sha256:7df70907e00c970c60b9ef2938d894a9381f38e6b9db73c5be35e59d92e06625"},
-    {file = "MarkupSafe-2.1.2-cp311-cp311-win_amd64.whl", hash = "sha256:e55e40ff0cc8cc5c07996915ad367fa47da6b3fc091fdadca7f5403239c5fec3"},
-    {file = "MarkupSafe-2.1.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:a6e40afa7f45939ca356f348c8e23048e02cb109ced1eb8420961b2f40fb373a"},
-    {file = "MarkupSafe-2.1.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cf877ab4ed6e302ec1d04952ca358b381a882fbd9d1b07cccbfd61783561f98a"},
-    {file = "MarkupSafe-2.1.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:63ba06c9941e46fa389d389644e2d8225e0e3e5ebcc4ff1ea8506dce646f8c8a"},
-    {file = "MarkupSafe-2.1.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f1cd098434e83e656abf198f103a8207a8187c0fc110306691a2e94a78d0abb2"},
-    {file = "MarkupSafe-2.1.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:55f44b440d491028addb3b88f72207d71eeebfb7b5dbf0643f7c023ae1fba619"},
-    {file = "MarkupSafe-2.1.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:a6f2fcca746e8d5910e18782f976489939d54a91f9411c32051b4aab2bd7c513"},
-    {file = "MarkupSafe-2.1.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:0b462104ba25f1ac006fdab8b6a01ebbfbce9ed37fd37fd4acd70c67c973e460"},
-    {file = "MarkupSafe-2.1.2-cp37-cp37m-win32.whl", hash = "sha256:7668b52e102d0ed87cb082380a7e2e1e78737ddecdde129acadb0eccc5423859"},
-    {file = "MarkupSafe-2.1.2-cp37-cp37m-win_amd64.whl", hash = "sha256:6d6607f98fcf17e534162f0709aaad3ab7a96032723d8ac8750ffe17ae5a0666"},
-    {file = "MarkupSafe-2.1.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:a806db027852538d2ad7555b203300173dd1b77ba116de92da9afbc3a3be3eed"},
-    {file = "MarkupSafe-2.1.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:a4abaec6ca3ad8660690236d11bfe28dfd707778e2442b45addd2f086d6ef094"},
-    {file = "MarkupSafe-2.1.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f03a532d7dee1bed20bc4884194a16160a2de9ffc6354b3878ec9682bb623c54"},
-    {file = "MarkupSafe-2.1.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4cf06cdc1dda95223e9d2d3c58d3b178aa5dacb35ee7e3bbac10e4e1faacb419"},
-    {file = "MarkupSafe-2.1.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:22731d79ed2eb25059ae3df1dfc9cb1546691cc41f4e3130fe6bfbc3ecbbecfa"},
-    {file = "MarkupSafe-2.1.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:f8ffb705ffcf5ddd0e80b65ddf7bed7ee4f5a441ea7d3419e861a12eaf41af58"},
-    {file = "MarkupSafe-2.1.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:8db032bf0ce9022a8e41a22598eefc802314e81b879ae093f36ce9ddf39ab1ba"},
-    {file = "MarkupSafe-2.1.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:2298c859cfc5463f1b64bd55cb3e602528db6fa0f3cfd568d3605c50678f8f03"},
-    {file = "MarkupSafe-2.1.2-cp38-cp38-win32.whl", hash = "sha256:50c42830a633fa0cf9e7d27664637532791bfc31c731a87b202d2d8ac40c3ea2"},
-    {file = "MarkupSafe-2.1.2-cp38-cp38-win_amd64.whl", hash = "sha256:bb06feb762bade6bf3c8b844462274db0c76acc95c52abe8dbed28ae3d44a147"},
-    {file = "MarkupSafe-2.1.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:99625a92da8229df6d44335e6fcc558a5037dd0a760e11d84be2260e6f37002f"},
-    {file = "MarkupSafe-2.1.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:8bca7e26c1dd751236cfb0c6c72d4ad61d986e9a41bbf76cb445f69488b2a2bd"},
-    {file = "MarkupSafe-2.1.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:40627dcf047dadb22cd25ea7ecfe9cbf3bbbad0482ee5920b582f3809c97654f"},
-    {file = "MarkupSafe-2.1.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:40dfd3fefbef579ee058f139733ac336312663c6706d1163b82b3003fb1925c4"},
-    {file = "MarkupSafe-2.1.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:090376d812fb6ac5f171e5938e82e7f2d7adc2b629101cec0db8b267815c85e2"},
-    {file = "MarkupSafe-2.1.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:2e7821bffe00aa6bd07a23913b7f4e01328c3d5cc0b40b36c0bd81d362faeb65"},
-    {file = "MarkupSafe-2.1.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:c0a33bc9f02c2b17c3ea382f91b4db0e6cde90b63b296422a939886a7a80de1c"},
-    {file = "MarkupSafe-2.1.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:b8526c6d437855442cdd3d87eede9c425c4445ea011ca38d937db299382e6fa3"},
-    {file = "MarkupSafe-2.1.2-cp39-cp39-win32.whl", hash = "sha256:137678c63c977754abe9086a3ec011e8fd985ab90631145dfb9294ad09c102a7"},
-    {file = "MarkupSafe-2.1.2-cp39-cp39-win_amd64.whl", hash = "sha256:0576fe974b40a400449768941d5d0858cc624e3249dfd1e0c33674e5c7ca7aed"},
-    {file = "MarkupSafe-2.1.2.tar.gz", hash = "sha256:abcabc8c2b26036d62d4c746381a6f7cf60aafcc653198ad678306986b09450d"},
-]
-
-[[package]]
-name = "matplotlib"
-version = "3.7.1"
-description = "Python plotting package"
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "matplotlib-3.7.1-cp310-cp310-macosx_10_12_universal2.whl", hash = "sha256:95cbc13c1fc6844ab8812a525bbc237fa1470863ff3dace7352e910519e194b1"},
-    {file = "matplotlib-3.7.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:08308bae9e91aca1ec6fd6dda66237eef9f6294ddb17f0d0b3c863169bf82353"},
-    {file = "matplotlib-3.7.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:544764ba51900da4639c0f983b323d288f94f65f4024dc40ecb1542d74dc0500"},
-    {file = "matplotlib-3.7.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:56d94989191de3fcc4e002f93f7f1be5da476385dde410ddafbb70686acf00ea"},
-    {file = "matplotlib-3.7.1-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e99bc9e65901bb9a7ce5e7bb24af03675cbd7c70b30ac670aa263240635999a4"},
-    {file = "matplotlib-3.7.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eb7d248c34a341cd4c31a06fd34d64306624c8cd8d0def7abb08792a5abfd556"},
-    {file = "matplotlib-3.7.1-cp310-cp310-win32.whl", hash = "sha256:ce463ce590f3825b52e9fe5c19a3c6a69fd7675a39d589e8b5fbe772272b3a24"},
-    {file = "matplotlib-3.7.1-cp310-cp310-win_amd64.whl", hash = "sha256:3d7bc90727351fb841e4d8ae620d2d86d8ed92b50473cd2b42ce9186104ecbba"},
-    {file = "matplotlib-3.7.1-cp311-cp311-macosx_10_12_universal2.whl", hash = "sha256:770a205966d641627fd5cf9d3cb4b6280a716522cd36b8b284a8eb1581310f61"},
-    {file = "matplotlib-3.7.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:f67bfdb83a8232cb7a92b869f9355d677bce24485c460b19d01970b64b2ed476"},
-    {file = "matplotlib-3.7.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2bf092f9210e105f414a043b92af583c98f50050559616930d884387d0772aba"},
-    {file = "matplotlib-3.7.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:89768d84187f31717349c6bfadc0e0d8c321e8eb34522acec8a67b1236a66332"},
-    {file = "matplotlib-3.7.1-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:83111e6388dec67822e2534e13b243cc644c7494a4bb60584edbff91585a83c6"},
-    {file = "matplotlib-3.7.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a867bf73a7eb808ef2afbca03bcdb785dae09595fbe550e1bab0cd023eba3de0"},
-    {file = "matplotlib-3.7.1-cp311-cp311-win32.whl", hash = "sha256:fbdeeb58c0cf0595efe89c05c224e0a502d1aa6a8696e68a73c3efc6bc354304"},
-    {file = "matplotlib-3.7.1-cp311-cp311-win_amd64.whl", hash = "sha256:c0bd19c72ae53e6ab979f0ac6a3fafceb02d2ecafa023c5cca47acd934d10be7"},
-    {file = "matplotlib-3.7.1-cp38-cp38-macosx_10_12_universal2.whl", hash = "sha256:6eb88d87cb2c49af00d3bbc33a003f89fd9f78d318848da029383bfc08ecfbfb"},
-    {file = "matplotlib-3.7.1-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:cf0e4f727534b7b1457898c4f4ae838af1ef87c359b76dcd5330fa31893a3ac7"},
-    {file = "matplotlib-3.7.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:46a561d23b91f30bccfd25429c3c706afe7d73a5cc64ef2dfaf2b2ac47c1a5dc"},
-    {file = "matplotlib-3.7.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:8704726d33e9aa8a6d5215044b8d00804561971163563e6e6591f9dcf64340cc"},
-    {file = "matplotlib-3.7.1-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:4cf327e98ecf08fcbb82685acaf1939d3338548620ab8dfa02828706402c34de"},
-    {file = "matplotlib-3.7.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:617f14ae9d53292ece33f45cba8503494ee199a75b44de7717964f70637a36aa"},
-    {file = "matplotlib-3.7.1-cp38-cp38-win32.whl", hash = "sha256:7c9a4b2da6fac77bcc41b1ea95fadb314e92508bf5493ceff058e727e7ecf5b0"},
-    {file = "matplotlib-3.7.1-cp38-cp38-win_amd64.whl", hash = "sha256:14645aad967684e92fc349493fa10c08a6da514b3d03a5931a1bac26e6792bd1"},
-    {file = "matplotlib-3.7.1-cp39-cp39-macosx_10_12_universal2.whl", hash = "sha256:81a6b377ea444336538638d31fdb39af6be1a043ca5e343fe18d0f17e098770b"},
-    {file = "matplotlib-3.7.1-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:28506a03bd7f3fe59cd3cd4ceb2a8d8a2b1db41afede01f66c42561b9be7b4b7"},
-    {file = "matplotlib-3.7.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:8c587963b85ce41e0a8af53b9b2de8dddbf5ece4c34553f7bd9d066148dc719c"},
-    {file = "matplotlib-3.7.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8bf26ade3ff0f27668989d98c8435ce9327d24cffb7f07d24ef609e33d582439"},
-    {file = "matplotlib-3.7.1-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:def58098f96a05f90af7e92fd127d21a287068202aa43b2a93476170ebd99e87"},
-    {file = "matplotlib-3.7.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f883a22a56a84dba3b588696a2b8a1ab0d2c3d41be53264115c71b0a942d8fdb"},
-    {file = "matplotlib-3.7.1-cp39-cp39-win32.whl", hash = "sha256:4f99e1b234c30c1e9714610eb0c6d2f11809c9c78c984a613ae539ea2ad2eb4b"},
-    {file = "matplotlib-3.7.1-cp39-cp39-win_amd64.whl", hash = "sha256:3ba2af245e36990facf67fde840a760128ddd71210b2ab6406e640188d69d136"},
-    {file = "matplotlib-3.7.1-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:3032884084f541163f295db8a6536e0abb0db464008fadca6c98aaf84ccf4717"},
-    {file = "matplotlib-3.7.1-pp38-pypy38_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3a2cb34336110e0ed8bb4f650e817eed61fa064acbefeb3591f1b33e3a84fd96"},
-    {file = "matplotlib-3.7.1-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b867e2f952ed592237a1828f027d332d8ee219ad722345b79a001f49df0936eb"},
-    {file = "matplotlib-3.7.1-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:57bfb8c8ea253be947ccb2bc2d1bb3862c2bccc662ad1b4626e1f5e004557042"},
-    {file = "matplotlib-3.7.1-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:438196cdf5dc8d39b50a45cb6e3f6274edbcf2254f85fa9b895bf85851c3a613"},
-    {file = "matplotlib-3.7.1-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:21e9cff1a58d42e74d01153360de92b326708fb205250150018a52c70f43c290"},
-    {file = "matplotlib-3.7.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:75d4725d70b7c03e082bbb8a34639ede17f333d7247f56caceb3801cb6ff703d"},
-    {file = "matplotlib-3.7.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:97cc368a7268141afb5690760921765ed34867ffb9655dd325ed207af85c7529"},
-    {file = "matplotlib-3.7.1.tar.gz", hash = "sha256:7b73305f25eab4541bd7ee0b96d87e53ae9c9f1823be5659b806cd85786fe882"},
-]
-
-[package.dependencies]
-contourpy = ">=1.0.1"
-cycler = ">=0.10"
-fonttools = ">=4.22.0"
-importlib-resources = {version = ">=3.2.0", markers = "python_version < \"3.10\""}
-kiwisolver = ">=1.0.1"
-numpy = ">=1.20"
-packaging = ">=20.0"
-pillow = ">=6.2.0"
-pyparsing = ">=2.3.1"
-python-dateutil = ">=2.7"
-
-[[package]]
-name = "mccabe"
-version = "0.7.0"
-description = "McCabe checker, plugin for flake8"
-optional = false
-python-versions = ">=3.6"
-files = [
-    {file = "mccabe-0.7.0-py2.py3-none-any.whl", hash = "sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e"},
-    {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"},
-]
-
-[[package]]
-name = "ml-dtypes"
-version = "0.0.4"
-description = ""
-optional = true
-python-versions = ">=3.7"
-files = [
-    {file = "ml_dtypes-0.0.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a03c5acc55a878fac190d428ef01438f930cbef3fb8625c8c8fd2e3adc277607"},
-    {file = "ml_dtypes-0.0.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e600aa70a9f8ee85c9488eb14852124c878ec824c3c7996d2d82010655eabfe"},
-    {file = "ml_dtypes-0.0.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a74c1fb29d2e586f643fb1a70b1dffe9fc35bc3ad8c76ec0797b2bf9f7ac128b"},
-    {file = "ml_dtypes-0.0.4-cp310-cp310-win_amd64.whl", hash = "sha256:b3f49901eb42cac259156edc17d4c1922ac47ddd1fe3c05169f445135a07319c"},
-    {file = "ml_dtypes-0.0.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:52aaa9318e2a4ec65a6bc4842df3442a9cfa00a9b8365a08e0370b0dfefc3a5a"},
-    {file = "ml_dtypes-0.0.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:db9912d50466d386a4016b16f889722183f6d6c03d9e478fdf62f41e50de0059"},
-    {file = "ml_dtypes-0.0.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4ece1269b5311489e26b3f3181d498b8829042f380cd160d7fe02f2393f69a71"},
-    {file = "ml_dtypes-0.0.4-cp311-cp311-win_amd64.whl", hash = "sha256:68d2e6c83c762aa6d476ea715ce6b2ac67f519c242cfe93d7a49cb76a83f6650"},
-    {file = "ml_dtypes-0.0.4-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:85085f9dac85b1eee5f7d2044c47bb3df72abc7785d38d176744fde5782b76ce"},
-    {file = "ml_dtypes-0.0.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a75ef23de72daf5efcc99799dfaa387386b79502a123909b0d3098ef84ffa6fa"},
-    {file = "ml_dtypes-0.0.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b651fa1f91ce83cf037db202cd2601ac9b649016ec8593459c0295e613bf47"},
-    {file = "ml_dtypes-0.0.4-cp38-cp38-win_amd64.whl", hash = "sha256:b28c6b7831fa2cbb3169ed3053f10fb11d0415e2f250b893eb874e3af747a1f3"},
-    {file = "ml_dtypes-0.0.4-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:23ff15cd9ba61cc42287097c30ae6841facd6dc14cc252f977d6430b8cd6eccc"},
-    {file = "ml_dtypes-0.0.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e5b148131da64f85053b79380cf34471eb869f7c027e2198a0c86d5e6fc9531f"},
-    {file = "ml_dtypes-0.0.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ebc64866c1848999fab6f4a2938e769aed95b964085ebdcd7cd45e350192e457"},
-    {file = "ml_dtypes-0.0.4-cp39-cp39-win_amd64.whl", hash = "sha256:e64869be11c830736c40513c47918c421a8385243846f1e8fd838793d866aa87"},
-    {file = "ml_dtypes-0.0.4.tar.gz", hash = "sha256:45623c738d477d7a0f3f8e4c94998dc49025202c520e62e27f0ef688db2f696f"},
-]
-
-[package.dependencies]
-numpy = [
-    {version = ">1.20", markers = "python_version <= \"3.9\""},
-    {version = ">=1.21.2", markers = "python_version > \"3.9\""},
-]
-
-[package.extras]
-dev = ["absl-py", "pyink", "pylint (>=2.6.0)", "pytest", "pytest-xdist"]
-
-[[package]]
-name = "mpmath"
-version = "1.3.0"
-description = "Python library for arbitrary-precision floating-point arithmetic"
-optional = true
-python-versions = "*"
-files = [
-    {file = "mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c"},
-    {file = "mpmath-1.3.0.tar.gz", hash = "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f"},
-]
-
-[package.extras]
-develop = ["codecov", "pycodestyle", "pytest (>=4.6)", "pytest-cov", "wheel"]
-docs = ["sphinx"]
-gmpy = ["gmpy2 (>=2.1.0a4)"]
-tests = ["pytest (>=4.6)"]
-
-[[package]]
-name = "mxnet"
-version = "1.9.1"
-description = "Apache MXNet is an ultra-scalable deep learning framework. This version uses openblas and MKLDNN."
-optional = true
-python-versions = "*"
-files = [
-    {file = "mxnet-1.9.1-py3-none-macosx_10_13_x86_64.whl", hash = "sha256:73c045f65ad05fe9ca3c4202e10471703b57231f8ac8b05d973ec2ab362178fb"},
-    {file = "mxnet-1.9.1-py3-none-manylinux2014_aarch64.whl", hash = "sha256:5e51a0c05d99f8f1b3b5e7c02170be57af2e6edb3ad9af2cb9551ace3e22942c"},
-    {file = "mxnet-1.9.1-py3-none-manylinux2014_x86_64.whl", hash = "sha256:65d5dac162c87a14d138d888b54494d515036d9047ae804ff51f770bd02197a6"},
-]
-
-[package.dependencies]
-graphviz = ">=0.8.1,<0.9.0"
-numpy = ">1.16.0,<2.0.0"
-requests = ">=2.20.0,<3"
-
-[[package]]
-name = "numpy"
-version = "1.22.0"
-description = "NumPy is the fundamental package for array computing with Python."
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "numpy-1.22.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3d22662b4b10112c545c91a0741f2436f8ca979ab3d69d03d19322aa970f9695"},
-    {file = "numpy-1.22.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:11a1f3816ea82eed4178102c56281782690ab5993251fdfd75039aad4d20385f"},
-    {file = "numpy-1.22.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5dc65644f75a4c2970f21394ad8bea1a844104f0fe01f278631be1c7eae27226"},
-    {file = "numpy-1.22.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42c16cec1c8cf2728f1d539bd55aaa9d6bb48a7de2f41eb944697293ef65a559"},
-    {file = "numpy-1.22.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a97e82c39d9856fe7d4f9b86d8a1e66eff99cf3a8b7ba48202f659703d27c46f"},
-    {file = "numpy-1.22.0-cp310-cp310-win_amd64.whl", hash = "sha256:e41e8951749c4b5c9a2dc5fdbc1a4eec6ab2a140fdae9b460b0f557eed870f4d"},
-    {file = "numpy-1.22.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:bece0a4a49e60e472a6d1f70ac6cdea00f9ab80ff01132f96bd970cdd8a9e5a9"},
-    {file = "numpy-1.22.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:818b9be7900e8dc23e013a92779135623476f44a0de58b40c32a15368c01d471"},
-    {file = "numpy-1.22.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:47ee7a839f5885bc0c63a74aabb91f6f40d7d7b639253768c4199b37aede7982"},
-    {file = "numpy-1.22.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a024181d7aef0004d76fb3bce2a4c9f2e67a609a9e2a6ff2571d30e9976aa383"},
-    {file = "numpy-1.22.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f71d57cc8645f14816ae249407d309be250ad8de93ef61d9709b45a0ddf4050c"},
-    {file = "numpy-1.22.0-cp38-cp38-win32.whl", hash = "sha256:283d9de87c0133ef98f93dfc09fad3fb382f2a15580de75c02b5bb36a5a159a5"},
-    {file = "numpy-1.22.0-cp38-cp38-win_amd64.whl", hash = "sha256:2762331de395739c91f1abb88041f94a080cb1143aeec791b3b223976228af3f"},
-    {file = "numpy-1.22.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:76ba7c40e80f9dc815c5e896330700fd6e20814e69da9c1267d65a4d051080f1"},
-    {file = "numpy-1.22.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:0cfe07133fd00b27edee5e6385e333e9eeb010607e8a46e1cd673f05f8596595"},
-    {file = "numpy-1.22.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:6ed0d073a9c54ac40c41a9c2d53fcc3d4d4ed607670b9e7b0de1ba13b4cbfe6f"},
-    {file = "numpy-1.22.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:41388e32e40b41dd56eb37fcaa7488b2b47b0adf77c66154d6b89622c110dfe9"},
-    {file = "numpy-1.22.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b55b953a1bdb465f4dc181758570d321db4ac23005f90ffd2b434cc6609a63dd"},
-    {file = "numpy-1.22.0-cp39-cp39-win32.whl", hash = "sha256:5a311ee4d983c487a0ab546708edbdd759393a3dc9cd30305170149fedd23c88"},
-    {file = "numpy-1.22.0-cp39-cp39-win_amd64.whl", hash = "sha256:a97a954a8c2f046d3817c2bce16e3c7e9a9c2afffaf0400f5c16df5172a67c9c"},
-    {file = "numpy-1.22.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bb02929b0d6bfab4c48a79bd805bd7419114606947ec8284476167415171f55b"},
-    {file = "numpy-1.22.0.zip", hash = "sha256:a955e4128ac36797aaffd49ab44ec74a71c11d6938df83b1285492d277db5397"},
-]
-
-[[package]]
-name = "nvidia-cublas-cu11"
-version = "11.10.3.66"
-description = "CUBLAS native runtime libraries"
-optional = true
-python-versions = ">=3"
-files = [
-    {file = "nvidia_cublas_cu11-11.10.3.66-py3-none-manylinux1_x86_64.whl", hash = "sha256:d32e4d75f94ddfb93ea0a5dda08389bcc65d8916a25cb9f37ac89edaeed3bded"},
-    {file = "nvidia_cublas_cu11-11.10.3.66-py3-none-win_amd64.whl", hash = "sha256:8ac17ba6ade3ed56ab898a036f9ae0756f1e81052a317bf98f8c6d18dc3ae49e"},
-]
-
-[package.dependencies]
-setuptools = "*"
-wheel = "*"
-
-[[package]]
-name = "nvidia-cuda-nvrtc-cu11"
-version = "11.7.99"
-description = "NVRTC native runtime libraries"
-optional = true
-python-versions = ">=3"
-files = [
-    {file = "nvidia_cuda_nvrtc_cu11-11.7.99-2-py3-none-manylinux1_x86_64.whl", hash = "sha256:9f1562822ea264b7e34ed5930567e89242d266448e936b85bc97a3370feabb03"},
-    {file = "nvidia_cuda_nvrtc_cu11-11.7.99-py3-none-manylinux1_x86_64.whl", hash = "sha256:f7d9610d9b7c331fa0da2d1b2858a4a8315e6d49765091d28711c8946e7425e7"},
-    {file = "nvidia_cuda_nvrtc_cu11-11.7.99-py3-none-win_amd64.whl", hash = "sha256:f2effeb1309bdd1b3854fc9b17eaf997808f8b25968ce0c7070945c4265d64a3"},
-]
-
-[package.dependencies]
-setuptools = "*"
-wheel = "*"
-
-[[package]]
-name = "nvidia-cuda-runtime-cu11"
-version = "11.7.99"
-description = "CUDA Runtime native Libraries"
-optional = true
-python-versions = ">=3"
-files = [
-    {file = "nvidia_cuda_runtime_cu11-11.7.99-py3-none-manylinux1_x86_64.whl", hash = "sha256:cc768314ae58d2641f07eac350f40f99dcb35719c4faff4bc458a7cd2b119e31"},
-    {file = "nvidia_cuda_runtime_cu11-11.7.99-py3-none-win_amd64.whl", hash = "sha256:bc77fa59a7679310df9d5c70ab13c4e34c64ae2124dd1efd7e5474b71be125c7"},
-]
-
-[package.dependencies]
-setuptools = "*"
-wheel = "*"
-
-[[package]]
-name = "nvidia-cudnn-cu11"
-version = "8.5.0.96"
-description = "cuDNN runtime libraries"
-optional = true
-python-versions = ">=3"
-files = [
-    {file = "nvidia_cudnn_cu11-8.5.0.96-2-py3-none-manylinux1_x86_64.whl", hash = "sha256:402f40adfc6f418f9dae9ab402e773cfed9beae52333f6d86ae3107a1b9527e7"},
-    {file = "nvidia_cudnn_cu11-8.5.0.96-py3-none-manylinux1_x86_64.whl", hash = "sha256:71f8111eb830879ff2836db3cccf03bbd735df9b0d17cd93761732ac50a8a108"},
-]
-
-[package.dependencies]
-setuptools = "*"
-wheel = "*"
-
-[[package]]
-name = "oauthlib"
-version = "3.2.2"
-description = "A generic, spec-compliant, thorough implementation of the OAuth request-signing logic"
-optional = true
-python-versions = ">=3.6"
-files = [
-    {file = "oauthlib-3.2.2-py3-none-any.whl", hash = "sha256:8139f29aac13e25d502680e9e19963e83f16838d48a0d71c287fe40e7067fbca"},
-    {file = "oauthlib-3.2.2.tar.gz", hash = "sha256:9859c40929662bec5d64f34d01c99e093149682a3f38915dc0655d5a633dd918"},
-]
-
-[package.extras]
-rsa = ["cryptography (>=3.0.0)"]
-signals = ["blinker (>=1.4.0)"]
-signedtoken = ["cryptography (>=3.0.0)", "pyjwt (>=2.0.0,<3)"]
-
-[[package]]
-name = "onnx"
-version = "1.13.0"
-description = "Open Neural Network Exchange"
-optional = true
-python-versions = "*"
-files = [
-    {file = "onnx-1.13.0-cp310-cp310-macosx_10_12_universal2.whl", hash = "sha256:4d25aaf5b385937eb3d0846616ff8fdff65b4f5d2f55c82ffe0c6deb021f4714"},
-    {file = "onnx-1.13.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:ea9ce14eb7bc4ff3e236d44433ed6b9a5cdb2921d357d28f74268b43d04897c0"},
-    {file = "onnx-1.13.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c77074bb9c12bef5c0054c972bba0280de6df0712dfccfdd224b0ee3f0b56cab"},
-    {file = "onnx-1.13.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e1f4f217942f97aad8bd3b707259338045ed5804b3a0dfa453d9f108f53a9ce2"},
-    {file = "onnx-1.13.0-cp310-cp310-win32.whl", hash = "sha256:41953d6a9c1fc1c1023438c8aac8ed2ee29a1739fbfce68f8ece38f34d326d02"},
-    {file = "onnx-1.13.0-cp310-cp310-win_amd64.whl", hash = "sha256:9b2691ad78ddcac2244b4299e5f5132895e12f99dbd1cfb310d10cdb50cd2d90"},
-    {file = "onnx-1.13.0-cp311-cp311-macosx_10_12_universal2.whl", hash = "sha256:c1eeec79e3cc35b808df4616f9ddef769fb4e5912e0eaacbddfa3a60d93d36c0"},
-    {file = "onnx-1.13.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:8008d9dd4479445584a5e08cce5b8319777a145fdf397c1791a59efa347bb732"},
-    {file = "onnx-1.13.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bad429298470efa5da5589f518036a125ca8864a766ad706278531c05c723f48"},
-    {file = "onnx-1.13.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8faef7ee6e2012ad1ff13ffe248809218ac4ea7040bd62a7e0740978feffe926"},
-    {file = "onnx-1.13.0-cp311-cp311-win32.whl", hash = "sha256:1d443d3b895dda42355ea6281eff638cc53a1a0b12244421d9fc09046444ba94"},
-    {file = "onnx-1.13.0-cp311-cp311-win_amd64.whl", hash = "sha256:88199c7eecba6c4f0b822147cf40184e7de94bcf915b3cbd0e2728966f7305c1"},
-    {file = "onnx-1.13.0-cp37-cp37m-macosx_10_12_universal2.whl", hash = "sha256:99fb9e674e1244fed09afd2a3106485d803357beaea7c0ae2f5956bde8319c54"},
-    {file = "onnx-1.13.0-cp37-cp37m-macosx_10_12_x86_64.whl", hash = "sha256:6113b4491cf27c408272397405c2f0daf8751515a91ea86f03db05f953433be9"},
-    {file = "onnx-1.13.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9b9ff692dff87dc180045c055502ab0d91d042f1cd3058d94de4141cd4445286"},
-    {file = "onnx-1.13.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:603587b438bbf685860ead13a83b771cd7a6a0565f745d825260c1ad6904674a"},
-    {file = "onnx-1.13.0-cp37-cp37m-win32.whl", hash = "sha256:2b8429926423e83aba724ba8586ef1611d7bf7f4f9f2cc4312856da86fd9c5ba"},
-    {file = "onnx-1.13.0-cp37-cp37m-win_amd64.whl", hash = "sha256:098176b93b19323639831561618da91aa472745cd518527a540152d7f9b8f7d3"},
-    {file = "onnx-1.13.0-cp38-cp38-macosx_10_12_universal2.whl", hash = "sha256:4bac7453fa5fa969352778f759896c43e3b8337ae532cd6dda36758b9dc656d7"},
-    {file = "onnx-1.13.0-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:2e8ece71cb00c1e8f5fec93f306a071b8d2929fded6b1e6a71cab710de2e798d"},
-    {file = "onnx-1.13.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bd305274b199ad04ea6c185437f736981265f20d7ac2cbf16411d2d35e5e8e68"},
-    {file = "onnx-1.13.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ad8208252f6d61dcd487c2c62f6fbc1eee24bf11addb56c500cf0feb8966f36"},
-    {file = "onnx-1.13.0-cp38-cp38-win32.whl", hash = "sha256:cef04e12123fef9f1e44078311fe7e9d9da0713e273ea13147b77d57cfd5eea0"},
-    {file = "onnx-1.13.0-cp38-cp38-win_amd64.whl", hash = "sha256:934a6497b2feb7a5ec31063afea5da1b06131fb2e9979f9bdeeec5c8cf0e03d2"},
-    {file = "onnx-1.13.0-cp39-cp39-macosx_10_12_universal2.whl", hash = "sha256:43e1ff72dcff4eabf3a75b119156c96603c7c3cbcf93ac7dba0c12687511aa9d"},
-    {file = "onnx-1.13.0-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:e27021a056dfecfdc6307d8b50e69a4e32e2137b54bfe215c82580e8dceb7d84"},
-    {file = "onnx-1.13.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a5d5e7efbf7d6b81edf358bbfb008d1110ab020de0d52d8c84ffddbf07cedfb2"},
-    {file = "onnx-1.13.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:257db1d357671d81f789f30d4e9358fd6427ae3ebf5fd90e4b0b418e6c8bb295"},
-    {file = "onnx-1.13.0-cp39-cp39-win32.whl", hash = "sha256:eb32853d94a61728ba4dd0809740e782896d10178d5593f472d24f51c13c2e5d"},
-    {file = "onnx-1.13.0-cp39-cp39-win_amd64.whl", hash = "sha256:925e9ecc4cca0da65126e5320577b81c0c2ac3c9e0418e77ff21da63d0f3bcdb"},
-    {file = "onnx-1.13.0.tar.gz", hash = "sha256:410b39950367857f97b65093681fe2495a2e23d63777a8aceaf96c56a16d166e"},
-]
-
-[package.dependencies]
-numpy = ">=1.16.6"
-protobuf = ">=3.20.2,<4"
-typing-extensions = ">=3.6.2.1"
-
-[package.extras]
-lint = ["black (>=22.3)", "clang-format (==13.0.0)", "flake8 (>=5.0.2)", "isort[colors] (>=5.10)", "mypy (>=0.971)", "types-protobuf (==3.18.4)"]
-
-[[package]]
-name = "onnxoptimizer"
-version = "0.3.10"
-description = "Open Neural Network Exchange"
-optional = true
-python-versions = "*"
-files = [
-    {file = "onnxoptimizer-0.3.10-cp310-cp310-macosx_10_15_universal2.whl", hash = "sha256:2dd82596fc81d508d635e75e0eb0a4517fdecd16bde0808d0e2661e560a6cc5f"},
-    {file = "onnxoptimizer-0.3.10-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:2b3044a9cd7d3b83e84ac443dfd5373911fb275e26c5843fb1eec75fb58775a1"},
-    {file = "onnxoptimizer-0.3.10-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dab6d00b202ddf82655d816f43292e947166b6a465b59047712a1c1c0cd70ee2"},
-    {file = "onnxoptimizer-0.3.10-cp310-cp310-win_amd64.whl", hash = "sha256:063609bf76ae5e787686b10f08f176f70cccabb08aa25b8ff99ea30fbaaafdb7"},
-    {file = "onnxoptimizer-0.3.10-cp311-cp311-macosx_10_15_universal2.whl", hash = "sha256:efc14a0be850efe2472991f6d089b59ad4942bcc7a3c53f1623b931d93213bde"},
-    {file = "onnxoptimizer-0.3.10-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:9df03e49e758b3d57d3f9ce7e78588deb3f7e040a0753910665c386a98822bdb"},
-    {file = "onnxoptimizer-0.3.10-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:05d24a9b76641f16866a2f99c177b631fba17c8da55afcd8aa404fdd8cf769dc"},
-    {file = "onnxoptimizer-0.3.10-cp311-cp311-win_amd64.whl", hash = "sha256:064cc155a01e11b038fda0e4e1ee514eb6487b5dd3e3128cd35d268ff8770ba2"},
-    {file = "onnxoptimizer-0.3.10-cp37-cp37m-macosx_10_15_x86_64.whl", hash = "sha256:59968fd584318dc996646a97cd862d4c206327b3ad10861268763ccd0c7e27bf"},
-    {file = "onnxoptimizer-0.3.10-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ab956e2dcb94030be9dc4c8b8fd41cb12283c4e6d9e19d40cee2653551ec1b73"},
-    {file = "onnxoptimizer-0.3.10-cp37-cp37m-win_amd64.whl", hash = "sha256:7ac6f63346091cceb23b5a5507b4a6caad9023d340a952901fac68b2e4e20831"},
-    {file = "onnxoptimizer-0.3.10-cp38-cp38-macosx_10_15_universal2.whl", hash = "sha256:48b4cf84685b980931ef80db594f1c9c9f628b79fb0ffde31216942139ea0561"},
-    {file = "onnxoptimizer-0.3.10-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:02456d52f00a699100ac799f45096e7ed0157915c7a27503412c187415df3a30"},
-    {file = "onnxoptimizer-0.3.10-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:534a4d5c41cb6aab7c3b7dbb051664bf99e4ed28033a02d2ff610041b9104689"},
-    {file = "onnxoptimizer-0.3.10-cp38-cp38-win_amd64.whl", hash = "sha256:50142ac2aba3e292b57f50446f10966602e009ef5ea7458b63e3fa1a5f186057"},
-    {file = "onnxoptimizer-0.3.10-cp39-cp39-macosx_10_15_universal2.whl", hash = "sha256:8982f4fadfd81401b6bffe0aaea2eb4757ca4e989ba9974105309d53fcaa3cb9"},
-    {file = "onnxoptimizer-0.3.10-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:a532786bd7920552c5058824d6449720736cdd3547edb3504ec53c46a48093de"},
-    {file = "onnxoptimizer-0.3.10-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:58bbebd245cedfb4b3d01babefb54ed70468d4de5ea01308567e1522ff7f14e3"},
-    {file = "onnxoptimizer-0.3.10-cp39-cp39-win_amd64.whl", hash = "sha256:c637600ad403fc1ab3bf5b9d670385d817ef2e062871a82745db0d0afcf03cdf"},
-    {file = "onnxoptimizer-0.3.10.tar.gz", hash = "sha256:d459030cd147f45e5216cf0f634aef1734b1fa68cd073f2d57e670675b82e012"},
-]
-
-[package.dependencies]
-onnx = "*"
-
-[package.extras]
-mypy = ["mypy (==0.600)"]
-
-[[package]]
-name = "onnxruntime"
-version = "1.14.1"
-description = "ONNX Runtime is a runtime accelerator for Machine Learning models"
-optional = true
-python-versions = "*"
-files = [
-    {file = "onnxruntime-1.14.1-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:193ef1ac512e530c6e6e259c26e67212e2cd3f2bfaad6ff935ed3f4281053056"},
-    {file = "onnxruntime-1.14.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d2853bbb36cb272d99f6c225e5040eb0ddb37a667fce20d186ecdf0a6fac8af8"},
-    {file = "onnxruntime-1.14.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e1b173365c6894616b8207e23cbb891da9638c5373668d6653e4081ef5f04d0"},
-    {file = "onnxruntime-1.14.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:24bf0401c5f92be7230ac660ff07ba06f7c175e99e225d5d48ff09062a3b76e9"},
-    {file = "onnxruntime-1.14.1-cp310-cp310-manylinux_2_27_aarch64.whl", hash = "sha256:0a2d09260bbdbe1df678e0a237a5f7b1a44fd11a2f52688d8b6a53a9d03a26db"},
-    {file = "onnxruntime-1.14.1-cp310-cp310-manylinux_2_27_x86_64.whl", hash = "sha256:d99d35b9d5c3f46cad1673a39cc753fb57d60784369b59e6f8cd3dfb77df1885"},
-    {file = "onnxruntime-1.14.1-cp310-cp310-win32.whl", hash = "sha256:f400356df1b27d9adc5513319e8a89753e48ef0d6c5084caf5db8e132f46e7e8"},
-    {file = "onnxruntime-1.14.1-cp310-cp310-win_amd64.whl", hash = "sha256:96a4059dbab162fe5cdb6750f8c70b2106ef2de5d49a7f72085171937d0e36d3"},
-    {file = "onnxruntime-1.14.1-cp37-cp37m-macosx_10_15_x86_64.whl", hash = "sha256:fa23df6a349218636290f9fe56d7baaceb1a50cf92255234d495198b47d92327"},
-    {file = "onnxruntime-1.14.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bc70e44d9e123d126648da24ffb39e56464272a1660a3eb91f4f5b74263be3ba"},
-    {file = "onnxruntime-1.14.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:deff8138045a3affb6be064b598e3ec69a88e4d445359c50464ee5379b8eaf19"},
-    {file = "onnxruntime-1.14.1-cp37-cp37m-manylinux_2_27_aarch64.whl", hash = "sha256:7c02acdc1107cbf698dcbf6dadc6f5b6aa179e7fa9a026251e99cf8613bd3129"},
-    {file = "onnxruntime-1.14.1-cp37-cp37m-manylinux_2_27_x86_64.whl", hash = "sha256:6efa3b2f4b1eaa6c714c07861993bfd9bb33bd73cdbcaf5b4aadcf1ec13fcaf7"},
-    {file = "onnxruntime-1.14.1-cp37-cp37m-win32.whl", hash = "sha256:72fc0acc82c54bf03eba065ad9025baa438c00c54a2ee0beb8ae4b6085cd3a0d"},
-    {file = "onnxruntime-1.14.1-cp37-cp37m-win_amd64.whl", hash = "sha256:4d6f08ea40d63ccf90f203f4a2a498f4e590737dcaf16867075cc8e0a86c5554"},
-    {file = "onnxruntime-1.14.1-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:c2d9e8f1bc6037f14d8aaa480492792c262fc914936153e40b06b3667bb25549"},
-    {file = "onnxruntime-1.14.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:e7424d3befdd95b537c90787bbfaa053b2bb19eb60135abb898cb0e099d7d7ad"},
-    {file = "onnxruntime-1.14.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9066d275e6e41d0597e234d2d88c074d4325e650c74a9527a52cadbcf42a0fe2"},
-    {file = "onnxruntime-1.14.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8224d3c1f2cd0b899cea7b5a39f28b971debe0da30fcbc61382801d97d6f5740"},
-    {file = "onnxruntime-1.14.1-cp38-cp38-manylinux_2_27_aarch64.whl", hash = "sha256:f4ac52ff4ac793683ebd1fbd1ee24197e3b4ca825ee68ff739296a820867debe"},
-    {file = "onnxruntime-1.14.1-cp38-cp38-manylinux_2_27_x86_64.whl", hash = "sha256:b1dd8cdd3be36c32ddd8f5763841ed571c3e81da59439a622947bd97efee6e77"},
-    {file = "onnxruntime-1.14.1-cp38-cp38-win32.whl", hash = "sha256:95d0f0cd95360c07f1c3ba20962b9bb813627df4bfc1b4b274e1d40044df5ad1"},
-    {file = "onnxruntime-1.14.1-cp38-cp38-win_amd64.whl", hash = "sha256:de40a558e00fc00f92e298d5be99eb8075dba51368dabcb259670a00f4670e56"},
-    {file = "onnxruntime-1.14.1-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:c65b587a42a89fceceaad367bd69d071ee5c9c7010b76e2adac5e9efd9356fb5"},
-    {file = "onnxruntime-1.14.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:6e47ef6a2c6e6dd6ff48bc13f2331d124dff00e1d76627624bb3268c8058f19c"},
-    {file = "onnxruntime-1.14.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0afd0f671d068dd99b9d071d88e93a9a57a5ed59af440c0f4d65319ee791603f"},
-    {file = "onnxruntime-1.14.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fc65e9061349cdf98ce16b37722b557109f16076632fbfed9a3151895cfd3bb7"},
-    {file = "onnxruntime-1.14.1-cp39-cp39-manylinux_2_27_aarch64.whl", hash = "sha256:2ff17c71187391a71e6ccc78ca89aed83bcaed1c085c95267ab1a70897868bdd"},
-    {file = "onnxruntime-1.14.1-cp39-cp39-manylinux_2_27_x86_64.whl", hash = "sha256:9b795189916942ce848192200dde5b1f32799ee6c84fc600969a44d88e8a5404"},
-    {file = "onnxruntime-1.14.1-cp39-cp39-win32.whl", hash = "sha256:17ca3100112af045118750d24643a01ed4e6d86071a8efaef75cc1d434ea64aa"},
-    {file = "onnxruntime-1.14.1-cp39-cp39-win_amd64.whl", hash = "sha256:b5e8c489329ba0fa0639dfd7ec02d6b07cece1bab52ef83884b537247efbda74"},
-]
-
-[package.dependencies]
-coloredlogs = "*"
-flatbuffers = "*"
-numpy = ">=1.21.6"
-packaging = "*"
-protobuf = "*"
-sympy = "*"
-
-[[package]]
-name = "opencv-python"
-version = "4.7.0.72"
-description = "Wrapper package for OpenCV python bindings."
-optional = true
-python-versions = ">=3.6"
-files = [
-    {file = "opencv-python-4.7.0.72.tar.gz", hash = "sha256:3424794a711f33284581f3c1e4b071cfc827d02b99d6fd9a35391f517c453306"},
-    {file = "opencv_python-4.7.0.72-cp37-abi3-macosx_10_16_x86_64.whl", hash = "sha256:d4f8880440c433a0025d78804dda6901d1e8e541a561dda66892d90290aef881"},
-    {file = "opencv_python-4.7.0.72-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:7a297e7651e22eb17c265ddbbc80e2ba2a8ff4f4a1696a67c45e5f5798245842"},
-    {file = "opencv_python-4.7.0.72-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cd08343654c6b88c5a8c25bf425f8025aed2e3189b4d7306b5861d32affaf737"},
-    {file = "opencv_python-4.7.0.72-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ebfc0a3a2f57716e709028b992e4de7fd8752105d7a768531c4f434043c6f9ff"},
-    {file = "opencv_python-4.7.0.72-cp37-abi3-win32.whl", hash = "sha256:eda115797b114fc16ca6f182b91c5d984f0015c19bec3145e55d33d708e9bae1"},
-    {file = "opencv_python-4.7.0.72-cp37-abi3-win_amd64.whl", hash = "sha256:812af57553ec1c6709060c63f6b7e9ad07ddc0f592f3ccc6d00c71e0fe0e6376"},
-]
-
-[package.dependencies]
-numpy = [
-    {version = ">=1.21.0", markers = "python_version <= \"3.9\" and platform_system == \"Darwin\" and platform_machine == \"arm64\" and python_version >= \"3.8\""},
-    {version = ">=1.21.4", markers = "python_version >= \"3.10\" and platform_system == \"Darwin\""},
-    {version = ">=1.21.2", markers = "platform_system != \"Darwin\" and python_version >= \"3.10\""},
-    {version = ">=1.19.3", markers = "platform_system == \"Linux\" and platform_machine == \"aarch64\" and python_version >= \"3.8\" and python_version < \"3.10\" or python_version > \"3.9\" and python_version < \"3.10\" or python_version >= \"3.9\" and platform_system != \"Darwin\" and python_version < \"3.10\" or python_version >= \"3.9\" and platform_machine != \"arm64\" and python_version < \"3.10\""},
-    {version = ">=1.17.3", markers = "(platform_system != \"Darwin\" and platform_system != \"Linux\") and python_version >= \"3.8\" and python_version < \"3.9\" or platform_system != \"Darwin\" and python_version >= \"3.8\" and python_version < \"3.9\" and platform_machine != \"aarch64\" or platform_machine != \"arm64\" and python_version >= \"3.8\" and python_version < \"3.9\" and platform_system != \"Linux\" or (platform_machine != \"arm64\" and platform_machine != \"aarch64\") and python_version >= \"3.8\" and python_version < \"3.9\""},
-]
-
-[[package]]
-name = "opt-einsum"
-version = "3.3.0"
-description = "Optimizing numpys einsum function"
-optional = true
-python-versions = ">=3.5"
-files = [
-    {file = "opt_einsum-3.3.0-py3-none-any.whl", hash = "sha256:2455e59e3947d3c275477df7f5205b30635e266fe6dc300e3d9f9646bfcea147"},
-    {file = "opt_einsum-3.3.0.tar.gz", hash = "sha256:59f6475f77bbc37dcf7cd748519c0ec60722e91e63ca114e68821c0c54a46549"},
-]
-
-[package.dependencies]
-numpy = ">=1.7"
-
-[package.extras]
-docs = ["numpydoc", "sphinx (==1.2.3)", "sphinx-rtd-theme", "sphinxcontrib-napoleon"]
-tests = ["pytest", "pytest-cov", "pytest-pep8"]
-
-[[package]]
-name = "packaging"
-version = "23.0"
-description = "Core utilities for Python packages"
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "packaging-23.0-py3-none-any.whl", hash = "sha256:714ac14496c3e68c99c29b00845f7a2b85f3bb6f1078fd9f72fd20f0570002b2"},
-    {file = "packaging-23.0.tar.gz", hash = "sha256:b6ad297f8907de0fa2fe1ccbd26fdaf387f5f47c7275fedf8cce89f99446cf97"},
-]
-
-[[package]]
-name = "pathspec"
-version = "0.11.1"
-description = "Utility library for gitignore style pattern matching of file paths."
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "pathspec-0.11.1-py3-none-any.whl", hash = "sha256:d8af70af76652554bd134c22b3e8a1cc46ed7d91edcdd721ef1a0c51a84a5293"},
-    {file = "pathspec-0.11.1.tar.gz", hash = "sha256:2798de800fa92780e33acca925945e9a19a133b715067cf165b8866c15a31687"},
-]
-
-[[package]]
-name = "pillow"
-version = "10.2.0"
-description = "Python Imaging Library (Fork)"
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "pillow-10.2.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:7823bdd049099efa16e4246bdf15e5a13dbb18a51b68fa06d6c1d4d8b99a796e"},
-    {file = "pillow-10.2.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:83b2021f2ade7d1ed556bc50a399127d7fb245e725aa0113ebd05cfe88aaf588"},
-    {file = "pillow-10.2.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6fad5ff2f13d69b7e74ce5b4ecd12cc0ec530fcee76356cac6742785ff71c452"},
-    {file = "pillow-10.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:da2b52b37dad6d9ec64e653637a096905b258d2fc2b984c41ae7d08b938a67e4"},
-    {file = "pillow-10.2.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:47c0995fc4e7f79b5cfcab1fc437ff2890b770440f7696a3ba065ee0fd496563"},
-    {file = "pillow-10.2.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:322bdf3c9b556e9ffb18f93462e5f749d3444ce081290352c6070d014c93feb2"},
-    {file = "pillow-10.2.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:51f1a1bffc50e2e9492e87d8e09a17c5eea8409cda8d3f277eb6edc82813c17c"},
-    {file = "pillow-10.2.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:69ffdd6120a4737710a9eee73e1d2e37db89b620f702754b8f6e62594471dee0"},
-    {file = "pillow-10.2.0-cp310-cp310-win32.whl", hash = "sha256:c6dafac9e0f2b3c78df97e79af707cdc5ef8e88208d686a4847bab8266870023"},
-    {file = "pillow-10.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:aebb6044806f2e16ecc07b2a2637ee1ef67a11840a66752751714a0d924adf72"},
-    {file = "pillow-10.2.0-cp310-cp310-win_arm64.whl", hash = "sha256:7049e301399273a0136ff39b84c3678e314f2158f50f517bc50285fb5ec847ad"},
-    {file = "pillow-10.2.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:35bb52c37f256f662abdfa49d2dfa6ce5d93281d323a9af377a120e89a9eafb5"},
-    {file = "pillow-10.2.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9c23f307202661071d94b5e384e1e1dc7dfb972a28a2310e4ee16103e66ddb67"},
-    {file = "pillow-10.2.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:773efe0603db30c281521a7c0214cad7836c03b8ccff897beae9b47c0b657d61"},
-    {file = "pillow-10.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:11fa2e5984b949b0dd6d7a94d967743d87c577ff0b83392f17cb3990d0d2fd6e"},
-    {file = "pillow-10.2.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:716d30ed977be8b37d3ef185fecb9e5a1d62d110dfbdcd1e2a122ab46fddb03f"},
-    {file = "pillow-10.2.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:a086c2af425c5f62a65e12fbf385f7c9fcb8f107d0849dba5839461a129cf311"},
-    {file = "pillow-10.2.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:c8de2789052ed501dd829e9cae8d3dcce7acb4777ea4a479c14521c942d395b1"},
-    {file = "pillow-10.2.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:609448742444d9290fd687940ac0b57fb35e6fd92bdb65386e08e99af60bf757"},
-    {file = "pillow-10.2.0-cp311-cp311-win32.whl", hash = "sha256:823ef7a27cf86df6597fa0671066c1b596f69eba53efa3d1e1cb8b30f3533068"},
-    {file = "pillow-10.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:1da3b2703afd040cf65ec97efea81cfba59cdbed9c11d8efc5ab09df9509fc56"},
-    {file = "pillow-10.2.0-cp311-cp311-win_arm64.whl", hash = "sha256:edca80cbfb2b68d7b56930b84a0e45ae1694aeba0541f798e908a49d66b837f1"},
-    {file = "pillow-10.2.0-cp312-cp312-macosx_10_10_x86_64.whl", hash = "sha256:1b5e1b74d1bd1b78bc3477528919414874748dd363e6272efd5abf7654e68bef"},
-    {file = "pillow-10.2.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0eae2073305f451d8ecacb5474997c08569fb4eb4ac231ffa4ad7d342fdc25ac"},
-    {file = "pillow-10.2.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b7c2286c23cd350b80d2fc9d424fc797575fb16f854b831d16fd47ceec078f2c"},
-    {file = "pillow-10.2.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1e23412b5c41e58cec602f1135c57dfcf15482013ce6e5f093a86db69646a5aa"},
-    {file = "pillow-10.2.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:52a50aa3fb3acb9cf7213573ef55d31d6eca37f5709c69e6858fe3bc04a5c2a2"},
-    {file = "pillow-10.2.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:127cee571038f252a552760076407f9cff79761c3d436a12af6000cd182a9d04"},
-    {file = "pillow-10.2.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:8d12251f02d69d8310b046e82572ed486685c38f02176bd08baf216746eb947f"},
-    {file = "pillow-10.2.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:54f1852cd531aa981bc0965b7d609f5f6cc8ce8c41b1139f6ed6b3c54ab82bfb"},
-    {file = "pillow-10.2.0-cp312-cp312-win32.whl", hash = "sha256:257d8788df5ca62c980314053197f4d46eefedf4e6175bc9412f14412ec4ea2f"},
-    {file = "pillow-10.2.0-cp312-cp312-win_amd64.whl", hash = "sha256:154e939c5f0053a383de4fd3d3da48d9427a7e985f58af8e94d0b3c9fcfcf4f9"},
-    {file = "pillow-10.2.0-cp312-cp312-win_arm64.whl", hash = "sha256:f379abd2f1e3dddb2b61bc67977a6b5a0a3f7485538bcc6f39ec76163891ee48"},
-    {file = "pillow-10.2.0-cp38-cp38-macosx_10_10_x86_64.whl", hash = "sha256:8373c6c251f7ef8bda6675dd6d2b3a0fcc31edf1201266b5cf608b62a37407f9"},
-    {file = "pillow-10.2.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:870ea1ada0899fd0b79643990809323b389d4d1d46c192f97342eeb6ee0b8483"},
-    {file = "pillow-10.2.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b4b6b1e20608493548b1f32bce8cca185bf0480983890403d3b8753e44077129"},
-    {file = "pillow-10.2.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3031709084b6e7852d00479fd1d310b07d0ba82765f973b543c8af5061cf990e"},
-    {file = "pillow-10.2.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:3ff074fc97dd4e80543a3e91f69d58889baf2002b6be64347ea8cf5533188213"},
-    {file = "pillow-10.2.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:cb4c38abeef13c61d6916f264d4845fab99d7b711be96c326b84df9e3e0ff62d"},
-    {file = "pillow-10.2.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:b1b3020d90c2d8e1dae29cf3ce54f8094f7938460fb5ce8bc5c01450b01fbaf6"},
-    {file = "pillow-10.2.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:170aeb00224ab3dc54230c797f8404507240dd868cf52066f66a41b33169bdbe"},
-    {file = "pillow-10.2.0-cp38-cp38-win32.whl", hash = "sha256:c4225f5220f46b2fde568c74fca27ae9771536c2e29d7c04f4fb62c83275ac4e"},
-    {file = "pillow-10.2.0-cp38-cp38-win_amd64.whl", hash = "sha256:0689b5a8c5288bc0504d9fcee48f61a6a586b9b98514d7d29b840143d6734f39"},
-    {file = "pillow-10.2.0-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:b792a349405fbc0163190fde0dc7b3fef3c9268292586cf5645598b48e63dc67"},
-    {file = "pillow-10.2.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c570f24be1e468e3f0ce7ef56a89a60f0e05b30a3669a459e419c6eac2c35364"},
-    {file = "pillow-10.2.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8ecd059fdaf60c1963c58ceb8997b32e9dc1b911f5da5307aab614f1ce5c2fb"},
-    {file = "pillow-10.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c365fd1703040de1ec284b176d6af5abe21b427cb3a5ff68e0759e1e313a5e7e"},
-    {file = "pillow-10.2.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:70c61d4c475835a19b3a5aa42492409878bbca7438554a1f89d20d58a7c75c01"},
-    {file = "pillow-10.2.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:b6f491cdf80ae540738859d9766783e3b3c8e5bd37f5dfa0b76abdecc5081f13"},
-    {file = "pillow-10.2.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:9d189550615b4948f45252d7f005e53c2040cea1af5b60d6f79491a6e147eef7"},
-    {file = "pillow-10.2.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:49d9ba1ed0ef3e061088cd1e7538a0759aab559e2e0a80a36f9fd9d8c0c21591"},
-    {file = "pillow-10.2.0-cp39-cp39-win32.whl", hash = "sha256:babf5acfede515f176833ed6028754cbcd0d206f7f614ea3447d67c33be12516"},
-    {file = "pillow-10.2.0-cp39-cp39-win_amd64.whl", hash = "sha256:0304004f8067386b477d20a518b50f3fa658a28d44e4116970abfcd94fac34a8"},
-    {file = "pillow-10.2.0-cp39-cp39-win_arm64.whl", hash = "sha256:0fb3e7fc88a14eacd303e90481ad983fd5b69c761e9e6ef94c983f91025da869"},
-    {file = "pillow-10.2.0-pp310-pypy310_pp73-macosx_10_10_x86_64.whl", hash = "sha256:322209c642aabdd6207517e9739c704dc9f9db943015535783239022002f054a"},
-    {file = "pillow-10.2.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3eedd52442c0a5ff4f887fab0c1c0bb164d8635b32c894bc1faf4c618dd89df2"},
-    {file = "pillow-10.2.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cb28c753fd5eb3dd859b4ee95de66cc62af91bcff5db5f2571d32a520baf1f04"},
-    {file = "pillow-10.2.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:33870dc4653c5017bf4c8873e5488d8f8d5f8935e2f1fb9a2208c47cdd66efd2"},
-    {file = "pillow-10.2.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:3c31822339516fb3c82d03f30e22b1d038da87ef27b6a78c9549888f8ceda39a"},
-    {file = "pillow-10.2.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:a2b56ba36e05f973d450582fb015594aaa78834fefe8dfb8fcd79b93e64ba4c6"},
-    {file = "pillow-10.2.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:d8e6aeb9201e655354b3ad049cb77d19813ad4ece0df1249d3c793de3774f8c7"},
-    {file = "pillow-10.2.0-pp39-pypy39_pp73-macosx_10_10_x86_64.whl", hash = "sha256:2247178effb34a77c11c0e8ac355c7a741ceca0a732b27bf11e747bbc950722f"},
-    {file = "pillow-10.2.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:15587643b9e5eb26c48e49a7b33659790d28f190fc514a322d55da2fb5c2950e"},
-    {file = "pillow-10.2.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:753cd8f2086b2b80180d9b3010dd4ed147efc167c90d3bf593fe2af21265e5a5"},
-    {file = "pillow-10.2.0-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:7c8f97e8e7a9009bcacbe3766a36175056c12f9a44e6e6f2d5caad06dcfbf03b"},
-    {file = "pillow-10.2.0-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:d1b35bcd6c5543b9cb547dee3150c93008f8dd0f1fef78fc0cd2b141c5baf58a"},
-    {file = "pillow-10.2.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:fe4c15f6c9285dc54ce6553a3ce908ed37c8f3825b5a51a15c91442bb955b868"},
-    {file = "pillow-10.2.0.tar.gz", hash = "sha256:e87f0b2c78157e12d7686b27d63c070fd65d994e8ddae6f328e0dcf4a0cd007e"},
-]
-
-[package.extras]
-docs = ["furo", "olefile", "sphinx (>=2.4)", "sphinx-copybutton", "sphinx-inline-tabs", "sphinx-removed-in", "sphinxext-opengraph"]
-fpx = ["olefile"]
-mic = ["olefile"]
-tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "packaging", "pyroma", "pytest", "pytest-cov", "pytest-timeout"]
-typing = ["typing-extensions"]
-xmp = ["defusedxml"]
-
-[[package]]
-name = "platformdirs"
-version = "3.2.0"
-description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"."
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "platformdirs-3.2.0-py3-none-any.whl", hash = "sha256:ebe11c0d7a805086e99506aa331612429a72ca7cd52a1f0d277dc4adc20cb10e"},
-    {file = "platformdirs-3.2.0.tar.gz", hash = "sha256:d5b638ca397f25f979350ff789db335903d7ea010ab28903f57b27e1b16c2b08"},
-]
-
-[package.extras]
-docs = ["furo (>=2022.12.7)", "proselint (>=0.13)", "sphinx (>=6.1.3)", "sphinx-autodoc-typehints (>=1.22,!=1.23.4)"]
-test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.2.2)", "pytest-cov (>=4)", "pytest-mock (>=3.10)"]
-
-[[package]]
-name = "pluggy"
-version = "1.0.0"
-description = "plugin and hook calling mechanisms for python"
-optional = false
-python-versions = ">=3.6"
-files = [
-    {file = "pluggy-1.0.0-py2.py3-none-any.whl", hash = "sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3"},
-    {file = "pluggy-1.0.0.tar.gz", hash = "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159"},
-]
-
-[package.extras]
-dev = ["pre-commit", "tox"]
-testing = ["pytest", "pytest-benchmark"]
-
-[[package]]
-name = "protobuf"
-version = "3.20.3"
-description = "Protocol Buffers"
-optional = true
-python-versions = ">=3.7"
-files = [
-    {file = "protobuf-3.20.3-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:f4bd856d702e5b0d96a00ec6b307b0f51c1982c2bf9c0052cf9019e9a544ba99"},
-    {file = "protobuf-3.20.3-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:9aae4406ea63d825636cc11ffb34ad3379335803216ee3a856787bcf5ccc751e"},
-    {file = "protobuf-3.20.3-cp310-cp310-win32.whl", hash = "sha256:28545383d61f55b57cf4df63eebd9827754fd2dc25f80c5253f9184235db242c"},
-    {file = "protobuf-3.20.3-cp310-cp310-win_amd64.whl", hash = "sha256:67a3598f0a2dcbc58d02dd1928544e7d88f764b47d4a286202913f0b2801c2e7"},
-    {file = "protobuf-3.20.3-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:899dc660cd599d7352d6f10d83c95df430a38b410c1b66b407a6b29265d66469"},
-    {file = "protobuf-3.20.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:e64857f395505ebf3d2569935506ae0dfc4a15cb80dc25261176c784662cdcc4"},
-    {file = "protobuf-3.20.3-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:d9e4432ff660d67d775c66ac42a67cf2453c27cb4d738fc22cb53b5d84c135d4"},
-    {file = "protobuf-3.20.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:74480f79a023f90dc6e18febbf7b8bac7508420f2006fabd512013c0c238f454"},
-    {file = "protobuf-3.20.3-cp37-cp37m-win32.whl", hash = "sha256:b6cc7ba72a8850621bfec987cb72623e703b7fe2b9127a161ce61e61558ad905"},
-    {file = "protobuf-3.20.3-cp37-cp37m-win_amd64.whl", hash = "sha256:8c0c984a1b8fef4086329ff8dd19ac77576b384079247c770f29cc8ce3afa06c"},
-    {file = "protobuf-3.20.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:de78575669dddf6099a8a0f46a27e82a1783c557ccc38ee620ed8cc96d3be7d7"},
-    {file = "protobuf-3.20.3-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:f4c42102bc82a51108e449cbb32b19b180022941c727bac0cfd50170341f16ee"},
-    {file = "protobuf-3.20.3-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:44246bab5dd4b7fbd3c0c80b6f16686808fab0e4aca819ade6e8d294a29c7050"},
-    {file = "protobuf-3.20.3-cp38-cp38-win32.whl", hash = "sha256:c02ce36ec760252242a33967d51c289fd0e1c0e6e5cc9397e2279177716add86"},
-    {file = "protobuf-3.20.3-cp38-cp38-win_amd64.whl", hash = "sha256:447d43819997825d4e71bf5769d869b968ce96848b6479397e29fc24c4a5dfe9"},
-    {file = "protobuf-3.20.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:398a9e0c3eaceb34ec1aee71894ca3299605fa8e761544934378bbc6c97de23b"},
-    {file = "protobuf-3.20.3-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:bf01b5720be110540be4286e791db73f84a2b721072a3711efff6c324cdf074b"},
-    {file = "protobuf-3.20.3-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:daa564862dd0d39c00f8086f88700fdbe8bc717e993a21e90711acfed02f2402"},
-    {file = "protobuf-3.20.3-cp39-cp39-win32.whl", hash = "sha256:819559cafa1a373b7096a482b504ae8a857c89593cf3a25af743ac9ecbd23480"},
-    {file = "protobuf-3.20.3-cp39-cp39-win_amd64.whl", hash = "sha256:03038ac1cfbc41aa21f6afcbcd357281d7521b4157926f30ebecc8d4ea59dcb7"},
-    {file = "protobuf-3.20.3-py2.py3-none-any.whl", hash = "sha256:a7ca6d488aa8ff7f329d4c545b2dbad8ac31464f1d8b1c87ad1346717731e4db"},
-    {file = "protobuf-3.20.3.tar.gz", hash = "sha256:2e3427429c9cffebf259491be0af70189607f365c2f41c7c3764af6f337105f2"},
-]
-
-[[package]]
-name = "psutil"
-version = "5.9.4"
-description = "Cross-platform lib for process and system monitoring in Python."
-optional = false
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
-files = [
-    {file = "psutil-5.9.4-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:c1ca331af862803a42677c120aff8a814a804e09832f166f226bfd22b56feee8"},
-    {file = "psutil-5.9.4-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:68908971daf802203f3d37e78d3f8831b6d1014864d7a85937941bb35f09aefe"},
-    {file = "psutil-5.9.4-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:3ff89f9b835100a825b14c2808a106b6fdcc4b15483141482a12c725e7f78549"},
-    {file = "psutil-5.9.4-cp27-cp27m-win32.whl", hash = "sha256:852dd5d9f8a47169fe62fd4a971aa07859476c2ba22c2254d4a1baa4e10b95ad"},
-    {file = "psutil-5.9.4-cp27-cp27m-win_amd64.whl", hash = "sha256:9120cd39dca5c5e1c54b59a41d205023d436799b1c8c4d3ff71af18535728e94"},
-    {file = "psutil-5.9.4-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:6b92c532979bafc2df23ddc785ed116fced1f492ad90a6830cf24f4d1ea27d24"},
-    {file = "psutil-5.9.4-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:efeae04f9516907be44904cc7ce08defb6b665128992a56957abc9b61dca94b7"},
-    {file = "psutil-5.9.4-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:54d5b184728298f2ca8567bf83c422b706200bcbbfafdc06718264f9393cfeb7"},
-    {file = "psutil-5.9.4-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:16653106f3b59386ffe10e0bad3bb6299e169d5327d3f187614b1cb8f24cf2e1"},
-    {file = "psutil-5.9.4-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:54c0d3d8e0078b7666984e11b12b88af2db11d11249a8ac8920dd5ef68a66e08"},
-    {file = "psutil-5.9.4-cp36-abi3-win32.whl", hash = "sha256:149555f59a69b33f056ba1c4eb22bb7bf24332ce631c44a319cec09f876aaeff"},
-    {file = "psutil-5.9.4-cp36-abi3-win_amd64.whl", hash = "sha256:fd8522436a6ada7b4aad6638662966de0d61d241cb821239b2ae7013d41a43d4"},
-    {file = "psutil-5.9.4-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:6001c809253a29599bc0dfd5179d9f8a5779f9dffea1da0f13c53ee568115e1e"},
-    {file = "psutil-5.9.4.tar.gz", hash = "sha256:3d7f9739eb435d4b1338944abe23f49584bde5395f27487d2ee25ad9a8774a62"},
-]
-
-[package.extras]
-test = ["enum34", "ipaddress", "mock", "pywin32", "wmi"]
-
-[[package]]
-name = "pyasn1"
-version = "0.4.8"
-description = "ASN.1 types and codecs"
-optional = true
-python-versions = "*"
-files = [
-    {file = "pyasn1-0.4.8-py2.py3-none-any.whl", hash = "sha256:39c7e2ec30515947ff4e87fb6f456dfc6e84857d34be479c9d4a4ba4bf46aa5d"},
-    {file = "pyasn1-0.4.8.tar.gz", hash = "sha256:aef77c9fb94a3ac588e87841208bdec464471d9871bd5050a287cc9a475cd0ba"},
-]
-
-[[package]]
-name = "pyasn1-modules"
-version = "0.2.8"
-description = "A collection of ASN.1-based protocols modules."
-optional = true
-python-versions = "*"
-files = [
-    {file = "pyasn1-modules-0.2.8.tar.gz", hash = "sha256:905f84c712230b2c592c19470d3ca8d552de726050d1d1716282a1f6146be65e"},
-    {file = "pyasn1_modules-0.2.8-py2.py3-none-any.whl", hash = "sha256:a50b808ffeb97cb3601dd25981f6b016cbb3d31fbf57a8b8a87428e6158d0c74"},
-]
-
-[package.dependencies]
-pyasn1 = ">=0.4.6,<0.5.0"
-
-[[package]]
-name = "pycodestyle"
-version = "2.10.0"
-description = "Python style guide checker"
-optional = false
-python-versions = ">=3.6"
-files = [
-    {file = "pycodestyle-2.10.0-py2.py3-none-any.whl", hash = "sha256:8a4eaf0d0495c7395bdab3589ac2db602797d76207242c17d470186815706610"},
-    {file = "pycodestyle-2.10.0.tar.gz", hash = "sha256:347187bdb476329d98f695c213d7295a846d1152ff4fe9bacb8a9590b8ee7053"},
-]
-
-[[package]]
-name = "pycparser"
-version = "2.21"
-description = "C parser in Python"
-optional = true
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
-files = [
-    {file = "pycparser-2.21-py2.py3-none-any.whl", hash = "sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9"},
-    {file = "pycparser-2.21.tar.gz", hash = "sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206"},
-]
-
-[[package]]
-name = "pyflakes"
-version = "3.0.1"
-description = "passive checker of Python programs"
-optional = false
-python-versions = ">=3.6"
-files = [
-    {file = "pyflakes-3.0.1-py2.py3-none-any.whl", hash = "sha256:ec55bf7fe21fff7f1ad2f7da62363d749e2a470500eab1b555334b67aa1ef8cf"},
-    {file = "pyflakes-3.0.1.tar.gz", hash = "sha256:ec8b276a6b60bd80defed25add7e439881c19e64850afd9b346283d4165fd0fd"},
-]
-
-[[package]]
-name = "pyformat"
-version = "0.7"
-description = "Formats Python code to follow a consistent style."
-optional = false
-python-versions = "*"
-files = [
-    {file = "pyformat-0.7.tar.gz", hash = "sha256:eb7b0e93f768c6f92e2cb06307deaa3a5141c7c61cd472b1a7918e30d09df20f"},
-]
-
-[package.dependencies]
-autoflake = ">=0.6.6"
-autopep8 = ">=1.2.2"
-docformatter = ">=0.7"
-unify = ">=0.2"
-
-[[package]]
-name = "pygments"
-version = "2.14.0"
-description = "Pygments is a syntax highlighting package written in Python."
-optional = false
-python-versions = ">=3.6"
-files = [
-    {file = "Pygments-2.14.0-py3-none-any.whl", hash = "sha256:fa7bd7bd2771287c0de303af8bfdfc731f51bd2c6a47ab69d117138893b82717"},
-    {file = "Pygments-2.14.0.tar.gz", hash = "sha256:b3ed06a9e8ac9a9aae5a6f5dbe78a8a58655d17b43b93c078f094ddc476ae297"},
-]
-
-[package.extras]
-plugins = ["importlib-metadata"]
-
-[[package]]
-name = "pylint"
-version = "2.17.1"
-description = "python code static checker"
-optional = false
-python-versions = ">=3.7.2"
-files = [
-    {file = "pylint-2.17.1-py3-none-any.whl", hash = "sha256:8660a54e3f696243d644fca98f79013a959c03f979992c1ab59c24d3f4ec2700"},
-    {file = "pylint-2.17.1.tar.gz", hash = "sha256:d4d009b0116e16845533bc2163493d6681846ac725eab8ca8014afb520178ddd"},
-]
-
-[package.dependencies]
-astroid = ">=2.15.0,<=2.17.0-dev0"
-colorama = {version = ">=0.4.5", markers = "sys_platform == \"win32\""}
-dill = {version = ">=0.2", markers = "python_version < \"3.11\""}
-isort = ">=4.2.5,<6"
-mccabe = ">=0.6,<0.8"
-platformdirs = ">=2.2.0"
-tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""}
-tomlkit = ">=0.10.1"
-typing-extensions = {version = ">=3.10.0", markers = "python_version < \"3.10\""}
-
-[package.extras]
-spelling = ["pyenchant (>=3.2,<4.0)"]
-testutils = ["gitpython (>3)"]
-
-[[package]]
-name = "pyparsing"
-version = "3.0.9"
-description = "pyparsing module - Classes and methods to define and execute parsing grammars"
-optional = false
-python-versions = ">=3.6.8"
-files = [
-    {file = "pyparsing-3.0.9-py3-none-any.whl", hash = "sha256:5026bae9a10eeaefb61dab2f09052b9f4307d44aee4eda64b309723d8d206bbc"},
-    {file = "pyparsing-3.0.9.tar.gz", hash = "sha256:2b020ecf7d21b687f219b71ecad3631f644a47f01403fa1d1036b0c6416d70fb"},
-]
-
-[package.extras]
-diagrams = ["jinja2", "railroad-diagrams"]
-
-[[package]]
-name = "pyreadline3"
-version = "3.4.1"
-description = "A python implementation of GNU readline."
-optional = true
-python-versions = "*"
-files = [
-    {file = "pyreadline3-3.4.1-py3-none-any.whl", hash = "sha256:b0efb6516fd4fb07b45949053826a62fa4cb353db5be2bbb4a7aa1fdd1e345fb"},
-    {file = "pyreadline3-3.4.1.tar.gz", hash = "sha256:6f3d1f7b8a31ba32b73917cefc1f28cc660562f39aea8646d30bd6eff21f7bae"},
-]
-
-[[package]]
-name = "pytest"
-version = "7.2.1"
-description = "pytest: simple powerful testing with Python"
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "pytest-7.2.1-py3-none-any.whl", hash = "sha256:c7c6ca206e93355074ae32f7403e8ea12163b1163c976fee7d4d84027c162be5"},
-    {file = "pytest-7.2.1.tar.gz", hash = "sha256:d45e0952f3727241918b8fd0f376f5ff6b301cc0777c6f9a556935c92d8a7d42"},
-]
-
-[package.dependencies]
-attrs = ">=19.2.0"
-colorama = {version = "*", markers = "sys_platform == \"win32\""}
-exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""}
-iniconfig = "*"
-packaging = "*"
-pluggy = ">=0.12,<2.0"
-tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""}
-
-[package.extras]
-testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "xmlschema"]
-
-[[package]]
-name = "pytest-xdist"
-version = "3.1.0"
-description = "pytest xdist plugin for distributed testing, most importantly across multiple CPUs"
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "pytest-xdist-3.1.0.tar.gz", hash = "sha256:40fdb8f3544921c5dfcd486ac080ce22870e71d82ced6d2e78fa97c2addd480c"},
-    {file = "pytest_xdist-3.1.0-py3-none-any.whl", hash = "sha256:70a76f191d8a1d2d6be69fc440cdf85f3e4c03c08b520fd5dc5d338d6cf07d89"},
-]
-
-[package.dependencies]
-execnet = ">=1.1"
-pytest = ">=6.2.0"
-
-[package.extras]
-psutil = ["psutil (>=3.0)"]
-setproctitle = ["setproctitle"]
-testing = ["filelock"]
-
-[[package]]
-name = "python-dateutil"
-version = "2.8.2"
-description = "Extensions to the standard Python datetime module"
-optional = false
-python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
-files = [
-    {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"},
-    {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"},
-]
-
-[package.dependencies]
-six = ">=1.5"
-
-[[package]]
-name = "pytz"
-version = "2023.3"
-description = "World timezone definitions, modern and historical"
-optional = false
-python-versions = "*"
-files = [
-    {file = "pytz-2023.3-py2.py3-none-any.whl", hash = "sha256:a151b3abb88eda1d4e34a9814df37de2a80e301e68ba0fd856fb9b46bfbbbffb"},
-    {file = "pytz-2023.3.tar.gz", hash = "sha256:1d8ce29db189191fb55338ee6d0387d82ab59f3d00eac103412d64e0ebd0c588"},
-]
-
-[[package]]
-name = "pyusb"
-version = "1.2.1"
-description = "Python USB access module"
-optional = false
-python-versions = ">=3.6.0"
-files = [
-    {file = "pyusb-1.2.1-py3-none-any.whl", hash = "sha256:2b4c7cb86dbadf044dfb9d3a4ff69fd217013dbe78a792177a3feb172449ea36"},
-    {file = "pyusb-1.2.1.tar.gz", hash = "sha256:a4cc7404a203144754164b8b40994e2849fde1cfff06b08492f12fff9d9de7b9"},
-]
-
-[[package]]
-name = "recommonmark"
-version = "0.6.0"
-description = "A docutils-compatibility bridge to CommonMark, enabling you to write CommonMark inside of Docutils & Sphinx projects."
-optional = false
-python-versions = "*"
-files = [
-    {file = "recommonmark-0.6.0-py2.py3-none-any.whl", hash = "sha256:2ec4207a574289355d5b6ae4ae4abb29043346ca12cdd5f07d374dc5987d2852"},
-    {file = "recommonmark-0.6.0.tar.gz", hash = "sha256:29cd4faeb6c5268c633634f2d69aef9431e0f4d347f90659fd0aab20e541efeb"},
-]
-
-[package.dependencies]
-commonmark = ">=0.8.1"
-docutils = ">=0.11"
-sphinx = ">=1.3.1"
-
-[[package]]
-name = "regex"
-version = "2023.3.23"
-description = "Alternative regular expression module, to replace re."
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "regex-2023.3.23-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:845a5e2d84389c4ddada1a9b95c055320070f18bb76512608374aca00d22eca8"},
-    {file = "regex-2023.3.23-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:87d9951f5a538dd1d016bdc0dcae59241d15fa94860964833a54d18197fcd134"},
-    {file = "regex-2023.3.23-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:37ae17d3be44c0b3f782c28ae9edd8b47c1f1776d4cabe87edc0b98e1f12b021"},
-    {file = "regex-2023.3.23-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0b8eb1e3bca6b48dc721818a60ae83b8264d4089a4a41d62be6d05316ec38e15"},
-    {file = "regex-2023.3.23-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:df45fac182ebc3c494460c644e853515cc24f5ad9da05f8ffb91da891bfee879"},
-    {file = "regex-2023.3.23-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b7006105b10b59971d3b248ad75acc3651c7e4cf54d81694df5a5130a3c3f7ea"},
-    {file = "regex-2023.3.23-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:93f3f1aa608380fe294aa4cb82e2afda07a7598e828d0341e124b8fd9327c715"},
-    {file = "regex-2023.3.23-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:787954f541ab95d8195d97b0b8cf1dc304424adb1e07365967e656b92b38a699"},
-    {file = "regex-2023.3.23-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:20abe0bdf03630fe92ccafc45a599bca8b3501f48d1de4f7d121153350a2f77d"},
-    {file = "regex-2023.3.23-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:11d00c31aeab9a6e0503bc77e73ed9f4527b3984279d997eb145d7c7be6268fd"},
-    {file = "regex-2023.3.23-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:d5bbe0e1511b844794a3be43d6c145001626ba9a6c1db8f84bdc724e91131d9d"},
-    {file = "regex-2023.3.23-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:ea3c0cb56eadbf4ab2277e7a095676370b3e46dbfc74d5c383bd87b0d6317910"},
-    {file = "regex-2023.3.23-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:d895b4c863059a4934d3e874b90998df774644a41b349ebb330f85f11b4ef2c0"},
-    {file = "regex-2023.3.23-cp310-cp310-win32.whl", hash = "sha256:9d764514d19b4edcc75fd8cb1423448ef393e8b6cbd94f38cab983ab1b75855d"},
-    {file = "regex-2023.3.23-cp310-cp310-win_amd64.whl", hash = "sha256:11d1f2b7a0696dc0310de0efb51b1f4d813ad4401fe368e83c0c62f344429f98"},
-    {file = "regex-2023.3.23-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:8a9c63cde0eaa345795c0fdeb19dc62d22e378c50b0bc67bf4667cd5b482d98b"},
-    {file = "regex-2023.3.23-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:dd7200b4c27b68cf9c9646da01647141c6db09f48cc5b51bc588deaf8e98a797"},
-    {file = "regex-2023.3.23-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22720024b90a6ba673a725dcc62e10fb1111b889305d7c6b887ac7466b74bedb"},
-    {file = "regex-2023.3.23-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6b190a339090e6af25f4a5fd9e77591f6d911cc7b96ecbb2114890b061be0ac1"},
-    {file = "regex-2023.3.23-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e76b6fc0d8e9efa39100369a9b3379ce35e20f6c75365653cf58d282ad290f6f"},
-    {file = "regex-2023.3.23-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7868b8f218bf69a2a15402fde08b08712213a1f4b85a156d90473a6fb6b12b09"},
-    {file = "regex-2023.3.23-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2472428efc4127374f494e570e36b30bb5e6b37d9a754f7667f7073e43b0abdd"},
-    {file = "regex-2023.3.23-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:c37df2a060cb476d94c047b18572ee2b37c31f831df126c0da3cd9227b39253d"},
-    {file = "regex-2023.3.23-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:4479f9e2abc03362df4045b1332d4a2b7885b245a30d4f4b051c4083b97d95d8"},
-    {file = "regex-2023.3.23-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:e2396e0678167f2d0c197da942b0b3fb48fee2f0b5915a0feb84d11b6686afe6"},
-    {file = "regex-2023.3.23-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:75f288c60232a5339e0ff2fa05779a5e9c74e9fc085c81e931d4a264501e745b"},
-    {file = "regex-2023.3.23-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:c869260aa62cee21c5eb171a466c0572b5e809213612ef8d495268cd2e34f20d"},
-    {file = "regex-2023.3.23-cp311-cp311-win32.whl", hash = "sha256:25f0532fd0c53e96bad84664171969de9673b4131f2297f1db850d3918d58858"},
-    {file = "regex-2023.3.23-cp311-cp311-win_amd64.whl", hash = "sha256:5ccfafd98473e007cebf7da10c1411035b7844f0f204015efd050601906dbb53"},
-    {file = "regex-2023.3.23-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6572ff287176c0fb96568adb292674b421fa762153ed074d94b1d939ed92c253"},
-    {file = "regex-2023.3.23-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:a610e0adfcb0fc84ea25f6ea685e39e74cbcd9245a72a9a7aab85ff755a5ed27"},
-    {file = "regex-2023.3.23-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:086afe222d58b88b62847bdbd92079b4699350b4acab892f88a935db5707c790"},
-    {file = "regex-2023.3.23-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:79e29fd62fa2f597a6754b247356bda14b866131a22444d67f907d6d341e10f3"},
-    {file = "regex-2023.3.23-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c07ce8e9eee878a48ebeb32ee661b49504b85e164b05bebf25420705709fdd31"},
-    {file = "regex-2023.3.23-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:86b036f401895e854de9fefe061518e78d506d8a919cc250dc3416bca03f6f9a"},
-    {file = "regex-2023.3.23-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:78ac8dd8e18800bb1f97aad0d73f68916592dddf233b99d2b5cabc562088503a"},
-    {file = "regex-2023.3.23-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:539dd010dc35af935b32f248099e38447bbffc10b59c2b542bceead2bed5c325"},
-    {file = "regex-2023.3.23-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:9bf4a5626f2a0ea006bf81e8963f498a57a47d58907eaa58f4b3e13be68759d8"},
-    {file = "regex-2023.3.23-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:cf86b4328c204c3f315074a61bc1c06f8a75a8e102359f18ce99fbcbbf1951f0"},
-    {file = "regex-2023.3.23-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:2848bf76673c83314068241c8d5b7fa9ad9bed866c979875a0e84039349e8fa7"},
-    {file = "regex-2023.3.23-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:c125a02d22c555e68f7433bac8449992fa1cead525399f14e47c2d98f2f0e467"},
-    {file = "regex-2023.3.23-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:cd1671e9d5ac05ce6aa86874dd8dfa048824d1dbe73060851b310c6c1a201a96"},
-    {file = "regex-2023.3.23-cp38-cp38-win32.whl", hash = "sha256:fffe57312a358be6ec6baeb43d253c36e5790e436b7bf5b7a38df360363e88e9"},
-    {file = "regex-2023.3.23-cp38-cp38-win_amd64.whl", hash = "sha256:dbb3f87e15d3dd76996d604af8678316ad2d7d20faa394e92d9394dfd621fd0c"},
-    {file = "regex-2023.3.23-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c88e8c226473b5549fe9616980ea7ca09289246cfbdf469241edf4741a620004"},
-    {file = "regex-2023.3.23-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:6560776ec19c83f3645bbc5db64a7a5816c9d8fb7ed7201c5bcd269323d88072"},
-    {file = "regex-2023.3.23-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1b1fc2632c01f42e06173d8dd9bb2e74ab9b0afa1d698058c867288d2c7a31f3"},
-    {file = "regex-2023.3.23-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fdf7ad455f1916b8ea5cdbc482d379f6daf93f3867b4232d14699867a5a13af7"},
-    {file = "regex-2023.3.23-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5fc33b27b1d800fc5b78d7f7d0f287e35079ecabe68e83d46930cf45690e1c8c"},
-    {file = "regex-2023.3.23-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4c49552dc938e3588f63f8a78c86f3c9c75301e813bca0bef13bdb4b87ccf364"},
-    {file = "regex-2023.3.23-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e152461e9a0aedec7d37fc66ec0fa635eca984777d3d3c3e36f53bf3d3ceb16e"},
-    {file = "regex-2023.3.23-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:db034255e72d2995cf581b14bb3fc9c00bdbe6822b49fcd4eef79e1d5f232618"},
-    {file = "regex-2023.3.23-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:55ae114da21b7a790b90255ea52d2aa3a0d121a646deb2d3c6a3194e722fc762"},
-    {file = "regex-2023.3.23-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:ef3f528fe1cc3d139508fe1b22523745aa77b9d6cb5b0bf277f48788ee0b993f"},
-    {file = "regex-2023.3.23-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:a81c9ec59ca2303acd1ccd7b9ac409f1e478e40e96f8f79b943be476c5fdb8bb"},
-    {file = "regex-2023.3.23-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:cde09c4fdd070772aa2596d97e942eb775a478b32459e042e1be71b739d08b77"},
-    {file = "regex-2023.3.23-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:3cd9f5dd7b821f141d3a6ca0d5d9359b9221e4f051ca3139320adea9f1679691"},
-    {file = "regex-2023.3.23-cp39-cp39-win32.whl", hash = "sha256:7304863f3a652dab5e68e6fb1725d05ebab36ec0390676d1736e0571ebb713ef"},
-    {file = "regex-2023.3.23-cp39-cp39-win_amd64.whl", hash = "sha256:54c3fa855a3f7438149de3211738dd9b5f0c733f48b54ae05aa7fce83d48d858"},
-    {file = "regex-2023.3.23.tar.gz", hash = "sha256:dc80df325b43ffea5cdea2e3eaa97a44f3dd298262b1c7fe9dbb2a9522b956a7"},
-]
-
-[[package]]
-name = "requests"
-version = "2.28.2"
-description = "Python HTTP for Humans."
-optional = false
-python-versions = ">=3.7, <4"
-files = [
-    {file = "requests-2.28.2-py3-none-any.whl", hash = "sha256:64299f4909223da747622c030b781c0d7811e359c37124b4bd368fb8c6518baa"},
-    {file = "requests-2.28.2.tar.gz", hash = "sha256:98b1b2782e3c6c4904938b84c0eb932721069dfdb9134313beff7c83c2df24bf"},
-]
-
-[package.dependencies]
-certifi = ">=2017.4.17"
-charset-normalizer = ">=2,<4"
-idna = ">=2.5,<4"
-urllib3 = ">=1.21.1,<1.27"
-
-[package.extras]
-socks = ["PySocks (>=1.5.6,!=1.5.7)"]
-use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]
-
-[[package]]
-name = "requests-oauthlib"
-version = "1.3.1"
-description = "OAuthlib authentication support for Requests."
-optional = true
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
-files = [
-    {file = "requests-oauthlib-1.3.1.tar.gz", hash = "sha256:75beac4a47881eeb94d5ea5d6ad31ef88856affe2332b9aafb52c6452ccf0d7a"},
-    {file = "requests_oauthlib-1.3.1-py2.py3-none-any.whl", hash = "sha256:2577c501a2fb8d05a304c09d090d6e47c306fef15809d102b327cf8364bddab5"},
-]
-
-[package.dependencies]
-oauthlib = ">=3.0.0"
-requests = ">=2.0.0"
-
-[package.extras]
-rsa = ["oauthlib[signedtoken] (>=3.0.0)"]
-
-[[package]]
-name = "rsa"
-version = "4.9"
-description = "Pure-Python RSA implementation"
-optional = true
-python-versions = ">=3.6,<4"
-files = [
-    {file = "rsa-4.9-py3-none-any.whl", hash = "sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7"},
-    {file = "rsa-4.9.tar.gz", hash = "sha256:e38464a49c6c85d7f1351b0126661487a7e0a14a50f1675ec50eb34d4f20ef21"},
-]
-
-[package.dependencies]
-pyasn1 = ">=0.1.3"
-
-[[package]]
-name = "scipy"
-version = "1.7.3"
-description = "SciPy: Scientific Library for Python"
-optional = false
-python-versions = ">=3.7,<3.11"
-files = [
-    {file = "scipy-1.7.3-1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:c9e04d7e9b03a8a6ac2045f7c5ef741be86727d8f49c45db45f244bdd2bcff17"},
-    {file = "scipy-1.7.3-1-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:b0e0aeb061a1d7dcd2ed59ea57ee56c9b23dd60100825f98238c06ee5cc4467e"},
-    {file = "scipy-1.7.3-1-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:b78a35c5c74d336f42f44106174b9851c783184a85a3fe3e68857259b37b9ffb"},
-    {file = "scipy-1.7.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:173308efba2270dcd61cd45a30dfded6ec0085b4b6eb33b5eb11ab443005e088"},
-    {file = "scipy-1.7.3-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:21b66200cf44b1c3e86495e3a436fc7a26608f92b8d43d344457c54f1c024cbc"},
-    {file = "scipy-1.7.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ceebc3c4f6a109777c0053dfa0282fddb8893eddfb0d598574acfb734a926168"},
-    {file = "scipy-1.7.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f7eaea089345a35130bc9a39b89ec1ff69c208efa97b3f8b25ea5d4c41d88094"},
-    {file = "scipy-1.7.3-cp310-cp310-win_amd64.whl", hash = "sha256:304dfaa7146cffdb75fbf6bb7c190fd7688795389ad060b970269c8576d038e9"},
-    {file = "scipy-1.7.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:033ce76ed4e9f62923e1f8124f7e2b0800db533828c853b402c7eec6e9465d80"},
-    {file = "scipy-1.7.3-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:4d242d13206ca4302d83d8a6388c9dfce49fc48fdd3c20efad89ba12f785bf9e"},
-    {file = "scipy-1.7.3-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:8499d9dd1459dc0d0fe68db0832c3d5fc1361ae8e13d05e6849b358dc3f2c279"},
-    {file = "scipy-1.7.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ca36e7d9430f7481fc7d11e015ae16fbd5575615a8e9060538104778be84addf"},
-    {file = "scipy-1.7.3-cp37-cp37m-win32.whl", hash = "sha256:e2c036492e673aad1b7b0d0ccdc0cb30a968353d2c4bf92ac8e73509e1bf212c"},
-    {file = "scipy-1.7.3-cp37-cp37m-win_amd64.whl", hash = "sha256:866ada14a95b083dd727a845a764cf95dd13ba3dc69a16b99038001b05439709"},
-    {file = "scipy-1.7.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:65bd52bf55f9a1071398557394203d881384d27b9c2cad7df9a027170aeaef93"},
-    {file = "scipy-1.7.3-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:f99d206db1f1ae735a8192ab93bd6028f3a42f6fa08467d37a14eb96c9dd34a3"},
-    {file = "scipy-1.7.3-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:5f2cfc359379c56b3a41b17ebd024109b2049f878badc1e454f31418c3a18436"},
-    {file = "scipy-1.7.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eb7ae2c4dbdb3c9247e07acc532f91077ae6dbc40ad5bd5dca0bb5a176ee9bda"},
-    {file = "scipy-1.7.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95c2d250074cfa76715d58830579c64dff7354484b284c2b8b87e5a38321672c"},
-    {file = "scipy-1.7.3-cp38-cp38-win32.whl", hash = "sha256:87069cf875f0262a6e3187ab0f419f5b4280d3dcf4811ef9613c605f6e4dca95"},
-    {file = "scipy-1.7.3-cp38-cp38-win_amd64.whl", hash = "sha256:7edd9a311299a61e9919ea4192dd477395b50c014cdc1a1ac572d7c27e2207fa"},
-    {file = "scipy-1.7.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:eef93a446114ac0193a7b714ce67659db80caf940f3232bad63f4c7a81bc18df"},
-    {file = "scipy-1.7.3-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:eb326658f9b73c07081300daba90a8746543b5ea177184daed26528273157294"},
-    {file = "scipy-1.7.3-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:93378f3d14fff07572392ce6a6a2ceb3a1f237733bd6dcb9eb6a2b29b0d19085"},
-    {file = "scipy-1.7.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:edad1cf5b2ce1912c4d8ddad20e11d333165552aba262c882e28c78bbc09dbf6"},
-    {file = "scipy-1.7.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5d1cc2c19afe3b5a546ede7e6a44ce1ff52e443d12b231823268019f608b9b12"},
-    {file = "scipy-1.7.3-cp39-cp39-win32.whl", hash = "sha256:2c56b820d304dffcadbbb6cbfbc2e2c79ee46ea291db17e288e73cd3c64fefa9"},
-    {file = "scipy-1.7.3-cp39-cp39-win_amd64.whl", hash = "sha256:3f78181a153fa21c018d346f595edd648344751d7f03ab94b398be2ad083ed3e"},
-    {file = "scipy-1.7.3.tar.gz", hash = "sha256:ab5875facfdef77e0a47d5fd39ea178b58e60e454a4c85aa1e52fcb80db7babf"},
-]
-
-[package.dependencies]
-numpy = ">=1.16.5,<1.23.0"
-
-[[package]]
-name = "setuptools"
-version = "67.6.1"
-description = "Easily download, build, install, upgrade, and uninstall Python packages"
-optional = true
-python-versions = ">=3.7"
-files = [
-    {file = "setuptools-67.6.1-py3-none-any.whl", hash = "sha256:e728ca814a823bf7bf60162daf9db95b93d532948c4c0bea762ce62f60189078"},
-    {file = "setuptools-67.6.1.tar.gz", hash = "sha256:257de92a9d50a60b8e22abfcbb771571fde0dbf3ec234463212027a4eeecbe9a"},
-]
-
-[package.extras]
-docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-hoverxref (<2)", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (==0.8.3)", "sphinx-reredirects", "sphinxcontrib-towncrier"]
-testing = ["build[virtualenv]", "filelock (>=3.4.0)", "flake8 (<5)", "flake8-2020", "ini2toml[lite] (>=0.9)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pip (>=19.1)", "pip-run (>=8.8)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)", "pytest-perf", "pytest-timeout", "pytest-xdist", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"]
-testing-integration = ["build[virtualenv]", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"]
-
-[[package]]
-name = "six"
-version = "1.16.0"
-description = "Python 2 and 3 compatibility utilities"
-optional = false
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*"
-files = [
-    {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"},
-    {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"},
-]
-
-[[package]]
-name = "snowballstemmer"
-version = "2.2.0"
-description = "This package provides 29 stemmers for 28 languages generated from Snowball algorithms."
-optional = false
-python-versions = "*"
-files = [
-    {file = "snowballstemmer-2.2.0-py2.py3-none-any.whl", hash = "sha256:c8e1716e83cc398ae16824e5572ae04e0d9fc2c6b985fb0f900f5f0c96ecba1a"},
-    {file = "snowballstemmer-2.2.0.tar.gz", hash = "sha256:09b16deb8547d3412ad7b590689584cd0fe25ec8db3be37788be3810cbf19cb1"},
-]
-
-[[package]]
-name = "sphinx"
-version = "6.1.3"
-description = "Python documentation generator"
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "Sphinx-6.1.3.tar.gz", hash = "sha256:0dac3b698538ffef41716cf97ba26c1c7788dba73ce6f150c1ff5b4720786dd2"},
-    {file = "sphinx-6.1.3-py3-none-any.whl", hash = "sha256:807d1cb3d6be87eb78a381c3e70ebd8d346b9a25f3753e9947e866b2786865fc"},
-]
-
-[package.dependencies]
-alabaster = ">=0.7,<0.8"
-babel = ">=2.9"
-colorama = {version = ">=0.4.5", markers = "sys_platform == \"win32\""}
-docutils = ">=0.18,<0.20"
-imagesize = ">=1.3"
-importlib-metadata = {version = ">=4.8", markers = "python_version < \"3.10\""}
-Jinja2 = ">=3.0"
-packaging = ">=21.0"
-Pygments = ">=2.13"
-requests = ">=2.25.0"
-snowballstemmer = ">=2.0"
-sphinxcontrib-applehelp = "*"
-sphinxcontrib-devhelp = "*"
-sphinxcontrib-htmlhelp = ">=2.0.0"
-sphinxcontrib-jsmath = "*"
-sphinxcontrib-qthelp = "*"
-sphinxcontrib-serializinghtml = ">=1.1.5"
-
-[package.extras]
-docs = ["sphinxcontrib-websupport"]
-lint = ["docutils-stubs", "flake8 (>=3.5.0)", "flake8-simplify", "isort", "mypy (>=0.990)", "ruff", "sphinx-lint", "types-requests"]
-test = ["cython", "html5lib", "pytest (>=4.6)"]
-
-[[package]]
-name = "sphinxcontrib-applehelp"
-version = "1.0.4"
-description = "sphinxcontrib-applehelp is a Sphinx extension which outputs Apple help books"
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "sphinxcontrib-applehelp-1.0.4.tar.gz", hash = "sha256:828f867945bbe39817c210a1abfd1bc4895c8b73fcaade56d45357a348a07d7e"},
-    {file = "sphinxcontrib_applehelp-1.0.4-py3-none-any.whl", hash = "sha256:29d341f67fb0f6f586b23ad80e072c8e6ad0b48417db2bde114a4c9746feb228"},
-]
-
-[package.extras]
-lint = ["docutils-stubs", "flake8", "mypy"]
-test = ["pytest"]
-
-[[package]]
-name = "sphinxcontrib-devhelp"
-version = "1.0.2"
-description = "sphinxcontrib-devhelp is a sphinx extension which outputs Devhelp document."
-optional = false
-python-versions = ">=3.5"
-files = [
-    {file = "sphinxcontrib-devhelp-1.0.2.tar.gz", hash = "sha256:ff7f1afa7b9642e7060379360a67e9c41e8f3121f2ce9164266f61b9f4b338e4"},
-    {file = "sphinxcontrib_devhelp-1.0.2-py2.py3-none-any.whl", hash = "sha256:8165223f9a335cc1af7ffe1ed31d2871f325254c0423bc0c4c7cd1c1e4734a2e"},
-]
-
-[package.extras]
-lint = ["docutils-stubs", "flake8", "mypy"]
-test = ["pytest"]
-
-[[package]]
-name = "sphinxcontrib-htmlhelp"
-version = "2.0.1"
-description = "sphinxcontrib-htmlhelp is a sphinx extension which renders HTML help files"
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "sphinxcontrib-htmlhelp-2.0.1.tar.gz", hash = "sha256:0cbdd302815330058422b98a113195c9249825d681e18f11e8b1f78a2f11efff"},
-    {file = "sphinxcontrib_htmlhelp-2.0.1-py3-none-any.whl", hash = "sha256:c38cb46dccf316c79de6e5515e1770414b797162b23cd3d06e67020e1d2a6903"},
-]
-
-[package.extras]
-lint = ["docutils-stubs", "flake8", "mypy"]
-test = ["html5lib", "pytest"]
-
-[[package]]
-name = "sphinxcontrib-jsmath"
-version = "1.0.1"
-description = "A sphinx extension which renders display math in HTML via JavaScript"
-optional = false
-python-versions = ">=3.5"
-files = [
-    {file = "sphinxcontrib-jsmath-1.0.1.tar.gz", hash = "sha256:a9925e4a4587247ed2191a22df5f6970656cb8ca2bd6284309578f2153e0c4b8"},
-    {file = "sphinxcontrib_jsmath-1.0.1-py2.py3-none-any.whl", hash = "sha256:2ec2eaebfb78f3f2078e73666b1415417a116cc848b72e5172e596c871103178"},
-]
-
-[package.extras]
-test = ["flake8", "mypy", "pytest"]
-
-[[package]]
-name = "sphinxcontrib-qthelp"
-version = "1.0.3"
-description = "sphinxcontrib-qthelp is a sphinx extension which outputs QtHelp document."
-optional = false
-python-versions = ">=3.5"
-files = [
-    {file = "sphinxcontrib-qthelp-1.0.3.tar.gz", hash = "sha256:4c33767ee058b70dba89a6fc5c1892c0d57a54be67ddd3e7875a18d14cba5a72"},
-    {file = "sphinxcontrib_qthelp-1.0.3-py2.py3-none-any.whl", hash = "sha256:bd9fc24bcb748a8d51fd4ecaade681350aa63009a347a8c14e637895444dfab6"},
-]
-
-[package.extras]
-lint = ["docutils-stubs", "flake8", "mypy"]
-test = ["pytest"]
-
-[[package]]
-name = "sphinxcontrib-serializinghtml"
-version = "1.1.5"
-description = "sphinxcontrib-serializinghtml is a sphinx extension which outputs \"serialized\" HTML files (json and pickle)."
-optional = false
-python-versions = ">=3.5"
-files = [
-    {file = "sphinxcontrib-serializinghtml-1.1.5.tar.gz", hash = "sha256:aa5f6de5dfdf809ef505c4895e51ef5c9eac17d0f287933eb49ec495280b6952"},
-    {file = "sphinxcontrib_serializinghtml-1.1.5-py2.py3-none-any.whl", hash = "sha256:352a9a00ae864471d3a7ead8d7d79f5fc0b57e8b3f95e9867eb9eb28999b92fd"},
-]
-
-[package.extras]
-lint = ["docutils-stubs", "flake8", "mypy"]
-test = ["pytest"]
-
-[[package]]
-name = "sqlparse"
-version = "0.4.3"
-description = "A non-validating SQL parser."
-optional = false
-python-versions = ">=3.5"
-files = [
-    {file = "sqlparse-0.4.3-py3-none-any.whl", hash = "sha256:0323c0ec29cd52bceabc1b4d9d579e311f3e4961b98d174201d5622a23b85e34"},
-    {file = "sqlparse-0.4.3.tar.gz", hash = "sha256:69ca804846bb114d2ec380e4360a8a340db83f0ccf3afceeb1404df028f57268"},
-]
-
-[[package]]
-name = "sympy"
-version = "1.11.1"
-description = "Computer algebra system (CAS) in Python"
-optional = true
-python-versions = ">=3.8"
-files = [
-    {file = "sympy-1.11.1-py3-none-any.whl", hash = "sha256:938f984ee2b1e8eae8a07b884c8b7a1146010040fccddc6539c54f401c8f6fcf"},
-    {file = "sympy-1.11.1.tar.gz", hash = "sha256:e32380dce63cb7c0108ed525570092fd45168bdae2faa17e528221ef72e88658"},
-]
-
-[package.dependencies]
-mpmath = ">=0.19"
-
-[[package]]
-name = "tensorboard"
-version = "2.12.0"
-description = "TensorBoard lets you watch Tensors Flow"
-optional = true
-python-versions = ">=3.8"
-files = [
-    {file = "tensorboard-2.12.0-py3-none-any.whl", hash = "sha256:3cbdc32448d7a28dc1bf0b1754760c08b8e0e2e37c451027ebd5ff4896613012"},
-]
-
-[package.dependencies]
-absl-py = ">=0.4"
-google-auth = ">=1.6.3,<3"
-google-auth-oauthlib = ">=0.4.1,<0.5"
-grpcio = ">=1.48.2"
-markdown = ">=2.6.8"
-numpy = ">=1.12.0"
-protobuf = ">=3.19.6"
-requests = ">=2.21.0,<3"
-setuptools = ">=41.0.0"
-tensorboard-data-server = ">=0.7.0,<0.8.0"
-tensorboard-plugin-wit = ">=1.6.0"
-werkzeug = ">=1.0.1"
-wheel = ">=0.26"
-
-[[package]]
-name = "tensorboard-data-server"
-version = "0.7.0"
-description = "Fast data loading for TensorBoard"
-optional = true
-python-versions = ">=3.7"
-files = [
-    {file = "tensorboard_data_server-0.7.0-py3-none-any.whl", hash = "sha256:753d4214799b31da7b6d93837959abebbc6afa86e69eacf1e9a317a48daa31eb"},
-    {file = "tensorboard_data_server-0.7.0-py3-none-macosx_10_9_x86_64.whl", hash = "sha256:eb7fa518737944dbf4f0cf83c2e40a7ac346bf91be2e6a0215de98be74e85454"},
-    {file = "tensorboard_data_server-0.7.0-py3-none-manylinux2014_x86_64.whl", hash = "sha256:64aa1be7c23e80b1a42c13b686eb0875bb70f5e755f4d2b8de5c1d880cf2267f"},
-]
-
-[[package]]
-name = "tensorboard-plugin-wit"
-version = "1.8.1"
-description = "What-If Tool TensorBoard plugin."
-optional = true
-python-versions = "*"
-files = [
-    {file = "tensorboard_plugin_wit-1.8.1-py3-none-any.whl", hash = "sha256:ff26bdd583d155aa951ee3b152b3d0cffae8005dc697f72b44a8e8c2a77a8cbe"},
-]
-
-[[package]]
-name = "tensorflow"
-version = "2.12.0"
-description = "TensorFlow is an open source machine learning framework for everyone."
-optional = true
-python-versions = ">=3.8"
-files = [
-    {file = "tensorflow-2.12.0-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:be4ac0dfcc7a16f6df2bc19bd322e312235ab3f7b0c7297f96c92c44bb14d2a1"},
-    {file = "tensorflow-2.12.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c5193ddb3bb5120cb445279beb08ed9e74a85a4eeb2485550d6fb707a89d9a88"},
-    {file = "tensorflow-2.12.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:357d9d2851188a8d27ee195345b4d175cad970150d1344ba9d9fcc4bf2b68336"},
-    {file = "tensorflow-2.12.0-cp310-cp310-win_amd64.whl", hash = "sha256:c8001210df7202ef6267150865b0b79f834c3ca69ee3132277de8eeb994dffde"},
-    {file = "tensorflow-2.12.0-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:91dccda42c03569d8c787190482a11ecae3b9b173aaa9166f0ab20cecc9c31f4"},
-    {file = "tensorflow-2.12.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:31f81eb8adaeb558963f5d8b47dbfcc398d898f0857bf3de6b6484350236b7b5"},
-    {file = "tensorflow-2.12.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6ec4a2934ea19e92f27a9668ece43025ed5efe14b5d19be53b07692bc8a4189d"},
-    {file = "tensorflow-2.12.0-cp311-cp311-win_amd64.whl", hash = "sha256:6e7641e2a6e32f31ff233495478a9cc86b7c038140eab714a61eeddbbbb327c3"},
-    {file = "tensorflow-2.12.0-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:a7194e744c5a7f3e759ecb949527b4a07718a6d1110e6e82fd4ce0c5586a7d4a"},
-    {file = "tensorflow-2.12.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4afc2dd57435f29ebe249eb5f595d89b0e73be94922eeb7110aa6280a332837c"},
-    {file = "tensorflow-2.12.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:23850332f1f9f778d697c9dba63ca52be72cb73363e75ad358f07ddafef63c01"},
-    {file = "tensorflow-2.12.0-cp38-cp38-win_amd64.whl", hash = "sha256:e29fcf6cfd069aefb4b44f357cccbb4415a5a3d7b5b516eaf4450062fe40021e"},
-    {file = "tensorflow-2.12.0-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:42fc2635e9420faee781a16bd393126f29cd39aa2b9d02901f24d8497bd6f958"},
-    {file = "tensorflow-2.12.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:76414355e420edb9154b4e72113eef5813ccb71701fda959afbbc1eebe3099bd"},
-    {file = "tensorflow-2.12.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:020d6a54cb26020bdc71a7bae8ee35be05096f63e773dc517f6e87c49de62c50"},
-    {file = "tensorflow-2.12.0-cp39-cp39-win_amd64.whl", hash = "sha256:9f70a8f9ab46e5ed436850aa60d1cd40645f5c669e14bcad48915dc1f597dda2"},
-]
-
-[package.dependencies]
-absl-py = ">=1.0.0"
-astunparse = ">=1.6.0"
-flatbuffers = ">=2.0"
-gast = ">=0.2.1,<=0.4.0"
-google-pasta = ">=0.1.1"
-grpcio = ">=1.24.3,<2.0"
-h5py = ">=2.9.0"
-jax = ">=0.3.15"
-keras = ">=2.12.0,<2.13"
-libclang = ">=13.0.0"
-numpy = ">=1.22,<1.24"
-opt-einsum = ">=2.3.2"
-packaging = "*"
-protobuf = ">=3.20.3,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0dev"
-setuptools = "*"
-six = ">=1.12.0"
-tensorboard = ">=2.12,<2.13"
-tensorflow-estimator = ">=2.12.0,<2.13"
-tensorflow-io-gcs-filesystem = {version = ">=0.23.1", markers = "platform_machine != \"arm64\" or platform_system != \"Darwin\""}
-termcolor = ">=1.1.0"
-typing-extensions = ">=3.6.6"
-wrapt = ">=1.11.0,<1.15"
-
-[[package]]
-name = "tensorflow-estimator"
-version = "2.12.0"
-description = "TensorFlow Estimator."
-optional = true
-python-versions = ">=3.7"
-files = [
-    {file = "tensorflow_estimator-2.12.0-py2.py3-none-any.whl", hash = "sha256:59b191bead4883822de3d63ac02ace11a83bfe6c10d64d0c4dfde75a50e60ca1"},
-]
-
-[[package]]
-name = "tensorflow-io-gcs-filesystem"
-version = "0.32.0"
-description = "TensorFlow IO"
-optional = true
-python-versions = ">=3.7, <3.12"
-files = [
-    {file = "tensorflow_io_gcs_filesystem-0.32.0-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:74a7e25e83d4117a7ebb09a3f247553a5497393ab48c3ee0cf0d17b405026817"},
-    {file = "tensorflow_io_gcs_filesystem-0.32.0-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:045d51bba586390d0545fcd8a18727d62b175eb142f6f4c6d719d39de40774cd"},
-    {file = "tensorflow_io_gcs_filesystem-0.32.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:db682e9a510c27dd35710ba5a2c62c371e25b727741b2fe3a920355fa501e947"},
-    {file = "tensorflow_io_gcs_filesystem-0.32.0-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:7f15fd22e592661b10de317be2f42a0f84be7bfc5e6a565fcfcb04b60d625b78"},
-    {file = "tensorflow_io_gcs_filesystem-0.32.0-cp311-cp311-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:336d9b3fe6b55aea149c4f6aa1fd6ffaf27d4e5c37e55a182340b47caba38846"},
-    {file = "tensorflow_io_gcs_filesystem-0.32.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:842f5f09cd756bdb3b4d0b5571b3a6f72fd534d42da938b9acf0ef462995eada"},
-    {file = "tensorflow_io_gcs_filesystem-0.32.0-cp37-cp37m-macosx_10_14_x86_64.whl", hash = "sha256:1ce80e1555d6ee88dda67feddf366cc8b30252b5837a7a17303df7b06a71fc2e"},
-    {file = "tensorflow_io_gcs_filesystem-0.32.0-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:05e65d3cb6c93a7929b384d86c6369c63cbbab8a770440a3d95e094878403f9f"},
-    {file = "tensorflow_io_gcs_filesystem-0.32.0-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:21de7dcc06eb1e7de3c022b0072d90ba35ef886578149663437aa7a6fb5bf6b3"},
-    {file = "tensorflow_io_gcs_filesystem-0.32.0-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:79fdd02103b8ae9f8b89af41f744c013fa1caaea709de19833917795e3063857"},
-    {file = "tensorflow_io_gcs_filesystem-0.32.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5635df0bbe40f971dc1b946e3372744b0bdfda45c38ffcd28ef53a32bb8da4da"},
-    {file = "tensorflow_io_gcs_filesystem-0.32.0-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:122be149e5f6a030f5c2901be0cc3cb07619232f7b03889e2cdf3da1c0d4f92f"},
-    {file = "tensorflow_io_gcs_filesystem-0.32.0-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:8214cdf85bea694160f9035ff395221c1e25e119784ccb4c104919b1f5dec84e"},
-    {file = "tensorflow_io_gcs_filesystem-0.32.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:28202492d904a6e280cf27560791e87ac1c7566000db82065d63a70c27008af2"},
-]
-
-[package.extras]
-tensorflow = ["tensorflow (>=2.12.0,<2.13.0)"]
-tensorflow-aarch64 = ["tensorflow-aarch64 (>=2.12.0,<2.13.0)"]
-tensorflow-cpu = ["tensorflow-cpu (>=2.12.0,<2.13.0)"]
-tensorflow-gpu = ["tensorflow-gpu (>=2.12.0,<2.13.0)"]
-tensorflow-rocm = ["tensorflow-rocm (>=2.12.0,<2.13.0)"]
-
-[[package]]
-name = "termcolor"
-version = "2.2.0"
-description = "ANSI color formatting for output in terminal"
-optional = true
-python-versions = ">=3.7"
-files = [
-    {file = "termcolor-2.2.0-py3-none-any.whl", hash = "sha256:91ddd848e7251200eac969846cbae2dacd7d71c2871e92733289e7e3666f48e7"},
-    {file = "termcolor-2.2.0.tar.gz", hash = "sha256:dfc8ac3f350788f23b2947b3e6cfa5a53b630b612e6cd8965a015a776020b99a"},
-]
-
-[package.extras]
-tests = ["pytest", "pytest-cov"]
-
-[[package]]
-name = "tflite"
-version = "2.10.0"
-description = "Parsing TensorFlow Lite Models (*.tflite) Easily"
-optional = true
-python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,<4,>=2.7"
-files = [
-    {file = "tflite-2.10.0-py2.py3-none-any.whl", hash = "sha256:89cb9f57df0f5345f8fad1381e0fae6180ded687113eb552cfbb60a05edc002c"},
-    {file = "tflite-2.10.0.tar.gz", hash = "sha256:6818a5d7776958b803944ba0a1f4c4395559606d9e795d67ac467a8a3904757d"},
-]
-
-[package.dependencies]
-flatbuffers = "*"
-numpy = "*"
-
-[[package]]
-name = "toml"
-version = "0.10.2"
-description = "Python Library for Tom's Obvious, Minimal Language"
-optional = false
-python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*"
-files = [
-    {file = "toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b"},
-    {file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"},
-]
-
-[[package]]
-name = "tomli"
-version = "2.0.1"
-description = "A lil' TOML parser"
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"},
-    {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"},
-]
-
-[[package]]
-name = "tomlkit"
-version = "0.11.7"
-description = "Style preserving TOML library"
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "tomlkit-0.11.7-py3-none-any.whl", hash = "sha256:5325463a7da2ef0c6bbfefb62a3dc883aebe679984709aee32a317907d0a8d3c"},
-    {file = "tomlkit-0.11.7.tar.gz", hash = "sha256:f392ef70ad87a672f02519f99967d28a4d3047133e2d1df936511465fbb3791d"},
-]
-
-[[package]]
-name = "torch"
-version = "1.13.1"
-description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration"
-optional = true
-python-versions = ">=3.7.0"
-files = [
-    {file = "torch-1.13.1-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:fd12043868a34a8da7d490bf6db66991108b00ffbeecb034228bfcbbd4197143"},
-    {file = "torch-1.13.1-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:d9fe785d375f2e26a5d5eba5de91f89e6a3be5d11efb497e76705fdf93fa3c2e"},
-    {file = "torch-1.13.1-cp310-cp310-win_amd64.whl", hash = "sha256:98124598cdff4c287dbf50f53fb455f0c1e3a88022b39648102957f3445e9b76"},
-    {file = "torch-1.13.1-cp310-none-macosx_10_9_x86_64.whl", hash = "sha256:393a6273c832e047581063fb74335ff50b4c566217019cc6ace318cd79eb0566"},
-    {file = "torch-1.13.1-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:0122806b111b949d21fa1a5f9764d1fd2fcc4a47cb7f8ff914204fd4fc752ed5"},
-    {file = "torch-1.13.1-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:22128502fd8f5b25ac1cd849ecb64a418382ae81dd4ce2b5cebaa09ab15b0d9b"},
-    {file = "torch-1.13.1-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:76024be052b659ac1304ab8475ab03ea0a12124c3e7626282c9c86798ac7bc11"},
-    {file = "torch-1.13.1-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:ea8dda84d796094eb8709df0fcd6b56dc20b58fdd6bc4e8d7109930dafc8e419"},
-    {file = "torch-1.13.1-cp37-cp37m-win_amd64.whl", hash = "sha256:2ee7b81e9c457252bddd7d3da66fb1f619a5d12c24d7074de91c4ddafb832c93"},
-    {file = "torch-1.13.1-cp37-none-macosx_10_9_x86_64.whl", hash = "sha256:0d9b8061048cfb78e675b9d2ea8503bfe30db43d583599ae8626b1263a0c1380"},
-    {file = "torch-1.13.1-cp37-none-macosx_11_0_arm64.whl", hash = "sha256:f402ca80b66e9fbd661ed4287d7553f7f3899d9ab54bf5c67faada1555abde28"},
-    {file = "torch-1.13.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:727dbf00e2cf858052364c0e2a496684b9cb5aa01dc8a8bc8bbb7c54502bdcdd"},
-    {file = "torch-1.13.1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:df8434b0695e9ceb8cc70650afc1310d8ba949e6db2a0525ddd9c3b2b181e5fe"},
-    {file = "torch-1.13.1-cp38-cp38-win_amd64.whl", hash = "sha256:5e1e722a41f52a3f26f0c4fcec227e02c6c42f7c094f32e49d4beef7d1e213ea"},
-    {file = "torch-1.13.1-cp38-none-macosx_10_9_x86_64.whl", hash = "sha256:33e67eea526e0bbb9151263e65417a9ef2d8fa53cbe628e87310060c9dcfa312"},
-    {file = "torch-1.13.1-cp38-none-macosx_11_0_arm64.whl", hash = "sha256:eeeb204d30fd40af6a2d80879b46a7efbe3cf43cdbeb8838dd4f3d126cc90b2b"},
-    {file = "torch-1.13.1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:50ff5e76d70074f6653d191fe4f6a42fdbe0cf942fbe2a3af0b75eaa414ac038"},
-    {file = "torch-1.13.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:2c3581a3fd81eb1f0f22997cddffea569fea53bafa372b2c0471db373b26aafc"},
-    {file = "torch-1.13.1-cp39-cp39-win_amd64.whl", hash = "sha256:0aa46f0ac95050c604bcf9ef71da9f1172e5037fdf2ebe051962d47b123848e7"},
-    {file = "torch-1.13.1-cp39-none-macosx_10_9_x86_64.whl", hash = "sha256:6930791efa8757cb6974af73d4996b6b50c592882a324b8fb0589c6a9ba2ddaf"},
-    {file = "torch-1.13.1-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:e0df902a7c7dd6c795698532ee5970ce898672625635d885eade9976e5a04949"},
-]
-
-[package.dependencies]
-nvidia-cublas-cu11 = {version = "11.10.3.66", markers = "platform_system == \"Linux\""}
-nvidia-cuda-nvrtc-cu11 = {version = "11.7.99", markers = "platform_system == \"Linux\""}
-nvidia-cuda-runtime-cu11 = {version = "11.7.99", markers = "platform_system == \"Linux\""}
-nvidia-cudnn-cu11 = {version = "8.5.0.96", markers = "platform_system == \"Linux\""}
-typing-extensions = "*"
-
-[package.extras]
-opt-einsum = ["opt-einsum (>=3.3)"]
-
-[[package]]
-name = "torchvision"
-version = "0.12.0"
-description = "image and video datasets and models for torch deep learning"
-optional = true
-python-versions = ">=3.7"
-files = [
-    {file = "torchvision-0.12.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:693656e6790b6ab21e4a6e87e81c2982bad9e455b5eb24e14bb672382ec6130f"},
-    {file = "torchvision-0.12.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a0be4501ca0ba1b195644c9243f49a1c49a26e52a7f37924c4239d0bf5ecbd8d"},
-    {file = "torchvision-0.12.0-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:ebfb47adf65bf3926b990b2c4767e291f135e259e03232e0e1a30ecdb05eb087"},
-    {file = "torchvision-0.12.0-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:9771231639afb5973cdaea1d449b451e2982e1ef5410ca67bbdc2b465565573a"},
-    {file = "torchvision-0.12.0-cp310-cp310-win_amd64.whl", hash = "sha256:894dacdc64b6e35e3f330722db51c76f4de016c7bf7bd79cf02ed2f4c106e625"},
-    {file = "torchvision-0.12.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:36dfdf6451fe3072ab15118982853b848896c0fd3b26cb8135e1e7981dbb0916"},
-    {file = "torchvision-0.12.0-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:aac76d52c5ce4229cb0eaebb762f3391fa736565eb35a4184fa0f7be30b705cd"},
-    {file = "torchvision-0.12.0-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:926666f0b893dce6619759c19b0dd3884af7a9d7022b10395653659d28e43c48"},
-    {file = "torchvision-0.12.0-cp37-cp37m-win_amd64.whl", hash = "sha256:c225f55c1bfce027a03f4ca46ddb9559c83f8087c2880bed3261a76c49bb7996"},
-    {file = "torchvision-0.12.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:d1ccb53836ba886320dcda12d00ee8b5f8f38b6c36d7906f141d25778cf74104"},
-    {file = "torchvision-0.12.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:9f42420f7f0b29cd3d61776df3157827257a0cf16b2c02776dc16c96abb1256d"},
-    {file = "torchvision-0.12.0-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:9017248c7e526c8cdcaaab8cf41d904a520a409d707398189a06d0757901d235"},
-    {file = "torchvision-0.12.0-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:0744902f2265d4c3e83c44a06b567df312e4a9faf8c92620016c7bed7056b5a7"},
-    {file = "torchvision-0.12.0-cp38-cp38-win_amd64.whl", hash = "sha256:a91db01496932350bf9c0ee8607ac8ef31c3ebfdaedefe5c5cda0515317f8b8e"},
-    {file = "torchvision-0.12.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:24d03fcaa28004c64a24124ac4a894c50f5948c8eb290e398d6c76fff2bc678f"},
-    {file = "torchvision-0.12.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:69d82f47b67bad6ddcbb87833ba5950a6c271ba97baae4c0955610071bf034f5"},
-    {file = "torchvision-0.12.0-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:49ed7886b93b80c9733462edd06a07f8d4c6ea4d5bd2894e7268f7a3774f4f7d"},
-    {file = "torchvision-0.12.0-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:b93a767f44e3933cb3b01a6fe9727db54590f57b7dac09d5aaf15966c6c151dd"},
-    {file = "torchvision-0.12.0-cp39-cp39-win_amd64.whl", hash = "sha256:edab05f7ba9f648c00435b384ffdbd7bde79a3b8ea893813fb50f6ccf28b1e76"},
-]
-
-[package.dependencies]
-numpy = "*"
-pillow = ">=5.3.0,<8.3.dev0 || >=8.4.dev0"
-requests = "*"
-torch = "*"
-typing-extensions = "*"
-
-[package.extras]
-scipy = ["scipy"]
-
-[[package]]
-name = "tornado"
-version = "6.3.3"
-description = "Tornado is a Python web framework and asynchronous networking library, originally developed at FriendFeed."
-optional = false
-python-versions = ">= 3.8"
-files = [
-    {file = "tornado-6.3.3-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:502fba735c84450974fec147340016ad928d29f1e91f49be168c0a4c18181e1d"},
-    {file = "tornado-6.3.3-cp38-abi3-macosx_10_9_x86_64.whl", hash = "sha256:805d507b1f588320c26f7f097108eb4023bbaa984d63176d1652e184ba24270a"},
-    {file = "tornado-6.3.3-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1bd19ca6c16882e4d37368e0152f99c099bad93e0950ce55e71daed74045908f"},
-    {file = "tornado-6.3.3-cp38-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7ac51f42808cca9b3613f51ffe2a965c8525cb1b00b7b2d56828b8045354f76a"},
-    {file = "tornado-6.3.3-cp38-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:71a8db65160a3c55d61839b7302a9a400074c9c753040455494e2af74e2501f2"},
-    {file = "tornado-6.3.3-cp38-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:ceb917a50cd35882b57600709dd5421a418c29ddc852da8bcdab1f0db33406b0"},
-    {file = "tornado-6.3.3-cp38-abi3-musllinux_1_1_i686.whl", hash = "sha256:7d01abc57ea0dbb51ddfed477dfe22719d376119844e33c661d873bf9c0e4a16"},
-    {file = "tornado-6.3.3-cp38-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:9dc4444c0defcd3929d5c1eb5706cbe1b116e762ff3e0deca8b715d14bf6ec17"},
-    {file = "tornado-6.3.3-cp38-abi3-win32.whl", hash = "sha256:65ceca9500383fbdf33a98c0087cb975b2ef3bfb874cb35b8de8740cf7f41bd3"},
-    {file = "tornado-6.3.3-cp38-abi3-win_amd64.whl", hash = "sha256:22d3c2fa10b5793da13c807e6fc38ff49a4f6e1e3868b0a6f4164768bb8e20f5"},
-    {file = "tornado-6.3.3.tar.gz", hash = "sha256:e7d8db41c0181c80d76c982aacc442c0783a2c54d6400fe028954201a2e032fe"},
-]
-
-[[package]]
-name = "typed-ast"
-version = "1.5.4"
-description = "a fork of Python 2 and 3 ast modules with type comment support"
-optional = false
-python-versions = ">=3.6"
-files = [
-    {file = "typed_ast-1.5.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:669dd0c4167f6f2cd9f57041e03c3c2ebf9063d0757dc89f79ba1daa2bfca9d4"},
-    {file = "typed_ast-1.5.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:211260621ab1cd7324e0798d6be953d00b74e0428382991adfddb352252f1d62"},
-    {file = "typed_ast-1.5.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:267e3f78697a6c00c689c03db4876dd1efdfea2f251a5ad6555e82a26847b4ac"},
-    {file = "typed_ast-1.5.4-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:c542eeda69212fa10a7ada75e668876fdec5f856cd3d06829e6aa64ad17c8dfe"},
-    {file = "typed_ast-1.5.4-cp310-cp310-win_amd64.whl", hash = "sha256:a9916d2bb8865f973824fb47436fa45e1ebf2efd920f2b9f99342cb7fab93f72"},
-    {file = "typed_ast-1.5.4-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:79b1e0869db7c830ba6a981d58711c88b6677506e648496b1f64ac7d15633aec"},
-    {file = "typed_ast-1.5.4-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a94d55d142c9265f4ea46fab70977a1944ecae359ae867397757d836ea5a3f47"},
-    {file = "typed_ast-1.5.4-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:183afdf0ec5b1b211724dfef3d2cad2d767cbefac291f24d69b00546c1837fb6"},
-    {file = "typed_ast-1.5.4-cp36-cp36m-win_amd64.whl", hash = "sha256:639c5f0b21776605dd6c9dbe592d5228f021404dafd377e2b7ac046b0349b1a1"},
-    {file = "typed_ast-1.5.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:cf4afcfac006ece570e32d6fa90ab74a17245b83dfd6655a6f68568098345ff6"},
-    {file = "typed_ast-1.5.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ed855bbe3eb3715fca349c80174cfcfd699c2f9de574d40527b8429acae23a66"},
-    {file = "typed_ast-1.5.4-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:6778e1b2f81dfc7bc58e4b259363b83d2e509a65198e85d5700dfae4c6c8ff1c"},
-    {file = "typed_ast-1.5.4-cp37-cp37m-win_amd64.whl", hash = "sha256:0261195c2062caf107831e92a76764c81227dae162c4f75192c0d489faf751a2"},
-    {file = "typed_ast-1.5.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2efae9db7a8c05ad5547d522e7dbe62c83d838d3906a3716d1478b6c1d61388d"},
-    {file = "typed_ast-1.5.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7d5d014b7daa8b0bf2eaef684295acae12b036d79f54178b92a2b6a56f92278f"},
-    {file = "typed_ast-1.5.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:370788a63915e82fd6f212865a596a0fefcbb7d408bbbb13dea723d971ed8bdc"},
-    {file = "typed_ast-1.5.4-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:4e964b4ff86550a7a7d56345c7864b18f403f5bd7380edf44a3c1fb4ee7ac6c6"},
-    {file = "typed_ast-1.5.4-cp38-cp38-win_amd64.whl", hash = "sha256:683407d92dc953c8a7347119596f0b0e6c55eb98ebebd9b23437501b28dcbb8e"},
-    {file = "typed_ast-1.5.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4879da6c9b73443f97e731b617184a596ac1235fe91f98d279a7af36c796da35"},
-    {file = "typed_ast-1.5.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3e123d878ba170397916557d31c8f589951e353cc95fb7f24f6bb69adc1a8a97"},
-    {file = "typed_ast-1.5.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ebd9d7f80ccf7a82ac5f88c521115cc55d84e35bf8b446fcd7836eb6b98929a3"},
-    {file = "typed_ast-1.5.4-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:98f80dee3c03455e92796b58b98ff6ca0b2a6f652120c263efdba4d6c5e58f72"},
-    {file = "typed_ast-1.5.4-cp39-cp39-win_amd64.whl", hash = "sha256:0fdbcf2fef0ca421a3f5912555804296f0b0960f0418c440f5d6d3abb549f3e1"},
-    {file = "typed_ast-1.5.4.tar.gz", hash = "sha256:39e21ceb7388e4bb37f4c679d72707ed46c2fbf2a5609b8b8ebc4b067d977df2"},
-]
-
-[[package]]
-name = "typing-extensions"
-version = "4.5.0"
-description = "Backported and Experimental Type Hints for Python 3.7+"
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "typing_extensions-4.5.0-py3-none-any.whl", hash = "sha256:fb33085c39dd998ac16d1431ebc293a8b3eedd00fd4a32de0ff79002c19511b4"},
-    {file = "typing_extensions-4.5.0.tar.gz", hash = "sha256:5cb5f4a79139d699607b3ef622a1dedafa84e115ab0024e0d9c044a9479ca7cb"},
-]
-
-[[package]]
-name = "tzdata"
-version = "2023.3"
-description = "Provider of IANA time zone data"
-optional = false
-python-versions = ">=2"
-files = [
-    {file = "tzdata-2023.3-py2.py3-none-any.whl", hash = "sha256:7e65763eef3120314099b6939b5546db7adce1e7d6f2e179e3df563c70511eda"},
-    {file = "tzdata-2023.3.tar.gz", hash = "sha256:11ef1e08e54acb0d4f95bdb1be05da659673de4acbd21bf9c69e94cc5e907a3a"},
-]
-
-[[package]]
-name = "unify"
-version = "0.5"
-description = "Modifies strings to all use the same (single/double) quote where possible."
-optional = false
-python-versions = "*"
-files = [
-    {file = "unify-0.5.tar.gz", hash = "sha256:8ddce812b2457212b7598fe574c9e6eb3ad69710f445391338270c7f8a71723c"},
-]
-
-[package.dependencies]
-untokenize = "*"
-
-[[package]]
-name = "untokenize"
-version = "0.1.1"
-description = "Transforms tokens into original source code (while preserving whitespace)."
-optional = false
-python-versions = "*"
-files = [
-    {file = "untokenize-0.1.1.tar.gz", hash = "sha256:3865dbbbb8efb4bb5eaa72f1be7f3e0be00ea8b7f125c69cbd1f5fda926f37a2"},
-]
-
-[[package]]
-name = "urllib3"
-version = "1.26.18"
-description = "HTTP library with thread-safe connection pooling, file post, and more."
-optional = false
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*"
-files = [
-    {file = "urllib3-1.26.18-py2.py3-none-any.whl", hash = "sha256:34b97092d7e0a3a8cf7cd10e386f401b3737364026c45e622aa02903dffe0f07"},
-    {file = "urllib3-1.26.18.tar.gz", hash = "sha256:f8ecc1bba5667413457c529ab955bf8c67b45db799d159066261719e328580a0"},
-]
-
-[package.extras]
-brotli = ["brotli (==1.0.9)", "brotli (>=1.0.9)", "brotlicffi (>=0.8.0)", "brotlipy (>=0.6.0)"]
-secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"]
-socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"]
-
-[[package]]
-name = "werkzeug"
-version = "3.0.1"
-description = "The comprehensive WSGI web application library."
-optional = true
-python-versions = ">=3.8"
-files = [
-    {file = "werkzeug-3.0.1-py3-none-any.whl", hash = "sha256:90a285dc0e42ad56b34e696398b8122ee4c681833fb35b8334a095d82c56da10"},
-    {file = "werkzeug-3.0.1.tar.gz", hash = "sha256:507e811ecea72b18a404947aded4b3390e1db8f826b494d76550ef45bb3b1dcc"},
-]
-
-[package.dependencies]
-MarkupSafe = ">=2.1.1"
-
-[package.extras]
-watchdog = ["watchdog (>=2.3)"]
-
-[[package]]
-name = "wheel"
-version = "0.40.0"
-description = "A built-package format for Python"
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "wheel-0.40.0-py3-none-any.whl", hash = "sha256:d236b20e7cb522daf2390fa84c55eea81c5c30190f90f29ae2ca1ad8355bf247"},
-    {file = "wheel-0.40.0.tar.gz", hash = "sha256:cd1196f3faee2b31968d626e1731c94f99cbdb67cf5a46e4f5656cbee7738873"},
-]
-
-[package.extras]
-test = ["pytest (>=6.0.0)"]
-
-[[package]]
-name = "wrapt"
-version = "1.14.1"
-description = "Module for decorators, wrappers and monkey patching."
-optional = false
-python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7"
-files = [
-    {file = "wrapt-1.14.1-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:1b376b3f4896e7930f1f772ac4b064ac12598d1c38d04907e696cc4d794b43d3"},
-    {file = "wrapt-1.14.1-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:903500616422a40a98a5a3c4ff4ed9d0066f3b4c951fa286018ecdf0750194ef"},
-    {file = "wrapt-1.14.1-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:5a9a0d155deafd9448baff28c08e150d9b24ff010e899311ddd63c45c2445e28"},
-    {file = "wrapt-1.14.1-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:ddaea91abf8b0d13443f6dac52e89051a5063c7d014710dcb4d4abb2ff811a59"},
-    {file = "wrapt-1.14.1-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:36f582d0c6bc99d5f39cd3ac2a9062e57f3cf606ade29a0a0d6b323462f4dd87"},
-    {file = "wrapt-1.14.1-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:7ef58fb89674095bfc57c4069e95d7a31cfdc0939e2a579882ac7d55aadfd2a1"},
-    {file = "wrapt-1.14.1-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:e2f83e18fe2f4c9e7db597e988f72712c0c3676d337d8b101f6758107c42425b"},
-    {file = "wrapt-1.14.1-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:ee2b1b1769f6707a8a445162ea16dddf74285c3964f605877a20e38545c3c462"},
-    {file = "wrapt-1.14.1-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:833b58d5d0b7e5b9832869f039203389ac7cbf01765639c7309fd50ef619e0b1"},
-    {file = "wrapt-1.14.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:80bb5c256f1415f747011dc3604b59bc1f91c6e7150bd7db03b19170ee06b320"},
-    {file = "wrapt-1.14.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:07f7a7d0f388028b2df1d916e94bbb40624c59b48ecc6cbc232546706fac74c2"},
-    {file = "wrapt-1.14.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:02b41b633c6261feff8ddd8d11c711df6842aba629fdd3da10249a53211a72c4"},
-    {file = "wrapt-1.14.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2fe803deacd09a233e4762a1adcea5db5d31e6be577a43352936179d14d90069"},
-    {file = "wrapt-1.14.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:257fd78c513e0fb5cdbe058c27a0624c9884e735bbd131935fd49e9fe719d310"},
-    {file = "wrapt-1.14.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:4fcc4649dc762cddacd193e6b55bc02edca674067f5f98166d7713b193932b7f"},
-    {file = "wrapt-1.14.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:11871514607b15cfeb87c547a49bca19fde402f32e2b1c24a632506c0a756656"},
-    {file = "wrapt-1.14.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8ad85f7f4e20964db4daadcab70b47ab05c7c1cf2a7c1e51087bfaa83831854c"},
-    {file = "wrapt-1.14.1-cp310-cp310-win32.whl", hash = "sha256:a9a52172be0b5aae932bef82a79ec0a0ce87288c7d132946d645eba03f0ad8a8"},
-    {file = "wrapt-1.14.1-cp310-cp310-win_amd64.whl", hash = "sha256:6d323e1554b3d22cfc03cd3243b5bb815a51f5249fdcbb86fda4bf62bab9e164"},
-    {file = "wrapt-1.14.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ecee4132c6cd2ce5308e21672015ddfed1ff975ad0ac8d27168ea82e71413f55"},
-    {file = "wrapt-1.14.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2020f391008ef874c6d9e208b24f28e31bcb85ccff4f335f15a3251d222b92d9"},
-    {file = "wrapt-1.14.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2feecf86e1f7a86517cab34ae6c2f081fd2d0dac860cb0c0ded96d799d20b335"},
-    {file = "wrapt-1.14.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:240b1686f38ae665d1b15475966fe0472f78e71b1b4903c143a842659c8e4cb9"},
-    {file = "wrapt-1.14.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9008dad07d71f68487c91e96579c8567c98ca4c3881b9b113bc7b33e9fd78b8"},
-    {file = "wrapt-1.14.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:6447e9f3ba72f8e2b985a1da758767698efa72723d5b59accefd716e9e8272bf"},
-    {file = "wrapt-1.14.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:acae32e13a4153809db37405f5eba5bac5fbe2e2ba61ab227926a22901051c0a"},
-    {file = "wrapt-1.14.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:49ef582b7a1152ae2766557f0550a9fcbf7bbd76f43fbdc94dd3bf07cc7168be"},
-    {file = "wrapt-1.14.1-cp311-cp311-win32.whl", hash = "sha256:358fe87cc899c6bb0ddc185bf3dbfa4ba646f05b1b0b9b5a27c2cb92c2cea204"},
-    {file = "wrapt-1.14.1-cp311-cp311-win_amd64.whl", hash = "sha256:26046cd03936ae745a502abf44dac702a5e6880b2b01c29aea8ddf3353b68224"},
-    {file = "wrapt-1.14.1-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:43ca3bbbe97af00f49efb06e352eae40434ca9d915906f77def219b88e85d907"},
-    {file = "wrapt-1.14.1-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:6b1a564e6cb69922c7fe3a678b9f9a3c54e72b469875aa8018f18b4d1dd1adf3"},
-    {file = "wrapt-1.14.1-cp35-cp35m-manylinux2010_i686.whl", hash = "sha256:00b6d4ea20a906c0ca56d84f93065b398ab74b927a7a3dbd470f6fc503f95dc3"},
-    {file = "wrapt-1.14.1-cp35-cp35m-manylinux2010_x86_64.whl", hash = "sha256:a85d2b46be66a71bedde836d9e41859879cc54a2a04fad1191eb50c2066f6e9d"},
-    {file = "wrapt-1.14.1-cp35-cp35m-win32.whl", hash = "sha256:dbcda74c67263139358f4d188ae5faae95c30929281bc6866d00573783c422b7"},
-    {file = "wrapt-1.14.1-cp35-cp35m-win_amd64.whl", hash = "sha256:b21bb4c09ffabfa0e85e3a6b623e19b80e7acd709b9f91452b8297ace2a8ab00"},
-    {file = "wrapt-1.14.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:9e0fd32e0148dd5dea6af5fee42beb949098564cc23211a88d799e434255a1f4"},
-    {file = "wrapt-1.14.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9736af4641846491aedb3c3f56b9bc5568d92b0692303b5a305301a95dfd38b1"},
-    {file = "wrapt-1.14.1-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5b02d65b9ccf0ef6c34cba6cf5bf2aab1bb2f49c6090bafeecc9cd81ad4ea1c1"},
-    {file = "wrapt-1.14.1-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21ac0156c4b089b330b7666db40feee30a5d52634cc4560e1905d6529a3897ff"},
-    {file = "wrapt-1.14.1-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:9f3e6f9e05148ff90002b884fbc2a86bd303ae847e472f44ecc06c2cd2fcdb2d"},
-    {file = "wrapt-1.14.1-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:6e743de5e9c3d1b7185870f480587b75b1cb604832e380d64f9504a0535912d1"},
-    {file = "wrapt-1.14.1-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:d79d7d5dc8a32b7093e81e97dad755127ff77bcc899e845f41bf71747af0c569"},
-    {file = "wrapt-1.14.1-cp36-cp36m-win32.whl", hash = "sha256:81b19725065dcb43df02b37e03278c011a09e49757287dca60c5aecdd5a0b8ed"},
-    {file = "wrapt-1.14.1-cp36-cp36m-win_amd64.whl", hash = "sha256:b014c23646a467558be7da3d6b9fa409b2c567d2110599b7cf9a0c5992b3b471"},
-    {file = "wrapt-1.14.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:88bd7b6bd70a5b6803c1abf6bca012f7ed963e58c68d76ee20b9d751c74a3248"},
-    {file = "wrapt-1.14.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b5901a312f4d14c59918c221323068fad0540e34324925c8475263841dbdfe68"},
-    {file = "wrapt-1.14.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d77c85fedff92cf788face9bfa3ebaa364448ebb1d765302e9af11bf449ca36d"},
-    {file = "wrapt-1.14.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8d649d616e5c6a678b26d15ece345354f7c2286acd6db868e65fcc5ff7c24a77"},
-    {file = "wrapt-1.14.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:7d2872609603cb35ca513d7404a94d6d608fc13211563571117046c9d2bcc3d7"},
-    {file = "wrapt-1.14.1-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:ee6acae74a2b91865910eef5e7de37dc6895ad96fa23603d1d27ea69df545015"},
-    {file = "wrapt-1.14.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:2b39d38039a1fdad98c87279b48bc5dce2c0ca0d73483b12cb72aa9609278e8a"},
-    {file = "wrapt-1.14.1-cp37-cp37m-win32.whl", hash = "sha256:60db23fa423575eeb65ea430cee741acb7c26a1365d103f7b0f6ec412b893853"},
-    {file = "wrapt-1.14.1-cp37-cp37m-win_amd64.whl", hash = "sha256:709fe01086a55cf79d20f741f39325018f4df051ef39fe921b1ebe780a66184c"},
-    {file = "wrapt-1.14.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:8c0ce1e99116d5ab21355d8ebe53d9460366704ea38ae4d9f6933188f327b456"},
-    {file = "wrapt-1.14.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:e3fb1677c720409d5f671e39bac6c9e0e422584e5f518bfd50aa4cbbea02433f"},
-    {file = "wrapt-1.14.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:642c2e7a804fcf18c222e1060df25fc210b9c58db7c91416fb055897fc27e8cc"},
-    {file = "wrapt-1.14.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7b7c050ae976e286906dd3f26009e117eb000fb2cf3533398c5ad9ccc86867b1"},
-    {file = "wrapt-1.14.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ef3f72c9666bba2bab70d2a8b79f2c6d2c1a42a7f7e2b0ec83bb2f9e383950af"},
-    {file = "wrapt-1.14.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:01c205616a89d09827986bc4e859bcabd64f5a0662a7fe95e0d359424e0e071b"},
-    {file = "wrapt-1.14.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:5a0f54ce2c092aaf439813735584b9537cad479575a09892b8352fea5e988dc0"},
-    {file = "wrapt-1.14.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:2cf71233a0ed05ccdabe209c606fe0bac7379fdcf687f39b944420d2a09fdb57"},
-    {file = "wrapt-1.14.1-cp38-cp38-win32.whl", hash = "sha256:aa31fdcc33fef9eb2552cbcbfee7773d5a6792c137b359e82879c101e98584c5"},
-    {file = "wrapt-1.14.1-cp38-cp38-win_amd64.whl", hash = "sha256:d1967f46ea8f2db647c786e78d8cc7e4313dbd1b0aca360592d8027b8508e24d"},
-    {file = "wrapt-1.14.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3232822c7d98d23895ccc443bbdf57c7412c5a65996c30442ebe6ed3df335383"},
-    {file = "wrapt-1.14.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:988635d122aaf2bdcef9e795435662bcd65b02f4f4c1ae37fbee7401c440b3a7"},
-    {file = "wrapt-1.14.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9cca3c2cdadb362116235fdbd411735de4328c61425b0aa9f872fd76d02c4e86"},
-    {file = "wrapt-1.14.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d52a25136894c63de15a35bc0bdc5adb4b0e173b9c0d07a2be9d3ca64a332735"},
-    {file = "wrapt-1.14.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:40e7bc81c9e2b2734ea4bc1aceb8a8f0ceaac7c5299bc5d69e37c44d9081d43b"},
-    {file = "wrapt-1.14.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:b9b7a708dd92306328117d8c4b62e2194d00c365f18eff11a9b53c6f923b01e3"},
-    {file = "wrapt-1.14.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:6a9a25751acb379b466ff6be78a315e2b439d4c94c1e99cb7266d40a537995d3"},
-    {file = "wrapt-1.14.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:34aa51c45f28ba7f12accd624225e2b1e5a3a45206aa191f6f9aac931d9d56fe"},
-    {file = "wrapt-1.14.1-cp39-cp39-win32.whl", hash = "sha256:dee0ce50c6a2dd9056c20db781e9c1cfd33e77d2d569f5d1d9321c641bb903d5"},
-    {file = "wrapt-1.14.1-cp39-cp39-win_amd64.whl", hash = "sha256:dee60e1de1898bde3b238f18340eec6148986da0455d8ba7848d50470a7a32fb"},
-    {file = "wrapt-1.14.1.tar.gz", hash = "sha256:380a85cf89e0e69b7cfbe2ea9f765f004ff419f34194018a6827ac0e3edfed4d"},
-]
-
-[[package]]
-name = "xgboost"
-version = "1.4.2"
-description = "XGBoost Python Package"
-optional = true
-python-versions = ">=3.6"
-files = [
-    {file = "xgboost-1.4.2-py3-none-macosx_10_14_x86_64.macosx_10_15_x86_64.macosx_11_0_x86_64.whl", hash = "sha256:e8f1a366a403784afd30a56eb99a429cefc45d906943cd362025ccf942208e13"},
-    {file = "xgboost-1.4.2-py3-none-manylinux2010_x86_64.whl", hash = "sha256:ec3f60d53dcd23273a5c7a495ba0f8205656ce750eb2ce7798726a4b2ef4955a"},
-    {file = "xgboost-1.4.2-py3-none-manylinux2014_aarch64.whl", hash = "sha256:15dd5987827030b3f68e741490a8b3a4ead7c6064bd911e36235b84e0a9d0765"},
-    {file = "xgboost-1.4.2-py3-none-win_amd64.whl", hash = "sha256:7c8973204b2c2362012850605e81de5a180513fc08db36d0da9befb77c3d57c8"},
-    {file = "xgboost-1.4.2.tar.gz", hash = "sha256:5a364c152095824445ac56a83fb7f7e75913b4bb128c2fcd99b85877c9f4f8fe"},
-]
-
-[package.dependencies]
-numpy = "*"
-scipy = "*"
-
-[package.extras]
-dask = ["dask", "distributed", "pandas"]
-datatable = ["datatable"]
-pandas = ["pandas"]
-plotting = ["graphviz", "matplotlib"]
-scikit-learn = ["scikit-learn"]
-
-[[package]]
-name = "zipp"
-version = "3.15.0"
-description = "Backport of pathlib-compatible object wrapper for zip files"
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "zipp-3.15.0-py3-none-any.whl", hash = "sha256:48904fc76a60e542af151aded95726c1a5c34ed43ab4134b597665c86d7ad556"},
-    {file = "zipp-3.15.0.tar.gz", hash = "sha256:112929ad649da941c23de50f356a2b5570c954b65150642bccdd66bf194d224b"},
-]
-
-[package.extras]
-docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"]
-testing = ["big-O", "flake8 (<5)", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)"]
-
-[extras]
-importer-caffe2 = ["torch"]
-importer-coreml = ["coremltools"]
-importer-darknet = ["opencv-python"]
-importer-keras = ["tensorflow"]
-importer-mxnet = ["mxnet"]
-importer-onnx = ["future", "onnx", "onnxoptimizer", "onnxruntime", "torch", "torchvision"]
-importer-pytorch = ["future", "torch", "torchvision"]
-importer-tensorflow = ["tensorflow"]
-importer-tflite = ["tensorflow", "tflite"]
-xgboost = ["xgboost"]
-
-[metadata]
-lock-version = "2.0"
-python-versions = ">=3.8, <3.11"
-content-hash = "6de2f7da86130f58375c7b75295f6230abb4b08f5bd8d44853f0c9ce2a9e2f54"
diff --git a/apps/microtvm/pyproject.toml b/apps/microtvm/pyproject.toml
deleted file mode 100644
index abc9902c2a33..000000000000
--- a/apps/microtvm/pyproject.toml
+++ /dev/null
@@ -1,143 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# This `pyproject.toml` file is used to allow MicroTVM
-# to run within a Poetry-managed environment.
-
-[tool.black]
-line-length = 100
-target-version = ['py37']
-include = '(\.pyi?$)'
-exclude = '''
-
-(
-  /(
-      \.github
-    | \.tvm
-    | \.tvm_test_data
-    | \.vscode
-    | \.venv
-    | 3rdparty
-    | build\/
-    | cmake\/
-    | conda\/
-    | docker\/
-    | docs\/
-    | golang\/
-    | include\/
-    | jvm\/
-    | licenses\/
-    | nnvm\/
-    | rust\/
-    | src\/
-    | vta\/
-    | web\/
-  )/
-)
-'''
-[tool.poetry]
-name = "microtvm"
-version = "0.1.0"
-description = ""
-authors = []
-packages = [
-    { include = "tvm", from = "../../python" },
-]
-
-[tool.poetry.dependencies]
-python = ">=3.8, <3.11"
-attrs = "==22.2.0"
-decorator = "==5.1.1"
-numpy = "==1.22"
-psutil = "==5.9.4"
-scipy = "==1.7.3"
-tornado = "==6.3.3"
-typed-ast = "^1.5.4"
-
-# AutoTVM
-xgboost = {version = "==1.4.2", optional = true}
-
-#############
-# Importers #
-#############
-
-# NOTE: Caffe frontend dependency is from torch package.
-
-# CoreML
-coremltools = {version = "^3.3", optional = true}
-
-# Darknet
-opencv-python = {version = "^4.2", optional = true}
-cffi = {version = "^1.14", optional = true}
-
-# Keras
-keras = {version = "==2.12.0", optional = true}
-
-# MXNet frontend
-mxnet = {version = "==1.9.1", optional = true}
-
-# ONNX frontend
-onnx = {version = "==1.13.0", optional = true}
-onnxoptimizer = { version = "==0.3.10", optional = true }
-onnxruntime = { version = "==1.14.1", optional = true }
-
-# Pytorch (also used by ONNX)
-torch = { version = "==1.13.1", optional = true }
-torchvision = { version = "==0.12.0", optional = true }
-
-future = { version = ">=0.18.3", optional = true }
-
-# Tensorflow frontend
-tensorflow = {version = "^2.12.0", optional = true}
-
-# TFLite frontend
-tflite = {version = "^2.10.0", optional = true}
-wheel = "*"
-cloudpickle = "^1.6.0"
-pyusb = "^1.2.1"
-
-
-[tool.poetry.extras]
-xgboost = ["xgboost"]
-importer-caffe2 = ["torch"]
-importer-coreml = ["coremltools"]
-importer-darknet = ["opencv-python"]
-importer-keras = ["tensorflow"]
-importer-onnx = ["future", "onnx", "onnxoptimizer", "onnxruntime", "torch", "torchvision"]
-importer-pytorch = ["torch", "torchvision", "future"]
-importer-tensorflow = ["tensorflow"]
-importer-tflite = ["tflite", "tensorflow"]
-importer-mxnet = ["mxnet"]
-
-[tool.poetry.dev-dependencies]
-autodocsumm = "^0.1"
-black = "^19.10b0"
-matplotlib = "^3.2"
-Image = "^1.5"
-recommonmark = "^0.6"
-pillow = "==10.2.0"
-pyformat = "^0.7"
-pylint = "^2.4"
-pytest = "==7.2.1"
-pytest-xdist = "==3.1.0"
-
-[build-system]
-requires = ["poetry>=0.12"]
-build-backend = "poetry.masonry.api"
-
-[tool.autopep8]
-max_line_length = 100
diff --git a/apps/microtvm/reference-vm/.gitignore b/apps/microtvm/reference-vm/.gitignore
deleted file mode 100644
index 187e6d9f34da..000000000000
--- a/apps/microtvm/reference-vm/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-/release-test
-/.vagrant
diff --git a/apps/microtvm/reference-vm/README.md b/apps/microtvm/reference-vm/README.md
deleted file mode 100644
index 6fe039a9fda9..000000000000
--- a/apps/microtvm/reference-vm/README.md
+++ /dev/null
@@ -1,117 +0,0 @@
-<!--- Licensed to the Apache Software Foundation (ASF) under one -->
-<!--- or more contributor license agreements.  See the NOTICE file -->
-<!--- distributed with this work for additional information -->
-<!--- regarding copyright ownership.  The ASF licenses this file -->
-<!--- to you under the Apache License, Version 2.0 (the -->
-<!--- "License"); you may not use this file except in compliance -->
-<!--- with the License.  You may obtain a copy of the License at -->
-
-<!---   http://www.apache.org/licenses/LICENSE-2.0 -->
-
-<!--- Unless required by applicable law or agreed to in writing, -->
-<!--- software distributed under the License is distributed on an -->
-<!--- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -->
-<!--- KIND, either express or implied.  See the License for the -->
-<!--- specific language governing permissions and limitations -->
-<!--- under the License. -->
-
-# microTVM Reference Virtual Machines (RVM)
-
-This directory contains Vagrant specifications that create Reference Virtual
-Machines (RVM) for use with microTVM. These machines help microTVM users
-collaborate by providing a stable reference environment to build and test
-microTVM.
-
-For more information on how to use them, see the
-[microTVM Reference VM tutorial](../../../tutorials/micro/micro_reference_vm.py).
-
-
-## microTVM Developer Information
-
-Each RTOS or platform (like Zephyr, Ardunio, etc) that integrates with microTVM
-can check-in installation scripts in the Reference VM in this directory to help
-the community collaborate. You should use the tools provided here to ensure a
-uniform release process across all platforms. Typically, releases need to be
-created by TVM committers.
-
-Generally speaking, it's expected that any integrated platform with a regression
-test checked-in to the tvm repository should also define a reference VM. If you
-want to integrate a new platform, please raise a discussion on
-[the forum](https://discuss.tvm.ai).
-
-
-## Reference VM Organization
-
-The Reference VM is organized in this directory as follows:
-
-```
-.
-+-- base-box-tool.py - Reference VM build, test, and release tool.
-    +-- Vagrantfile  - Vagrantfile that end-users will invoke. Should be based
-    |                  off a base box which contains dependencies other than the
-    |                  TVM python dependencies.
-    +-- base-box/    - Top-level directory which defines the base box.
-        +-- Vagrantfile.packer-template - 'packer' template Vagrantfile which
-        |                                 will be used to build the base box.
-        +-- test-config.json            - JSON file explaining how to perform
-                                          release tests to base-box-tool.py.
-```
-
-
-## Creating Releases
-
-1. **Build** the base box for a given platform:
-```bash
-$ ./base-box-tool.py [--provider=PROVIDER] build
-```
-
-For example:
-```bash
-$ ./base-box-tool.py --provider virtualbox build
-```
-
-2. **Run** release tests for each platform:
-
-   A. Connect any needed hardware to the VM host machine;
-
-   B. Run tests:
-   ```bash
-   $ ./base-box-tool.py [--provider=PROVIDER] test --microtvm-board=MICROTVM_BOARD [--test-device-serial=SERIAL] PLATFORM
-   ```
-   where MICROTVM_BOARD is one of the options listed in the
-   PLATFORM/base-box/test-config.json file.
-
-   For example:
-   ```base
-   $ ./base-box-tool.py --provider virtualbox test --microtvm-board=stm32f746g_disco zephyr
-   ```
-
-   This command does the following for the specified provider:
-
-   * Copies all files inside this dir except `.vagrant` and `base-box` to
-   `release-test/`. This is done to avoid reusing any VM the developer may have
-   started;
-
-   * Executes `$ vagrant up [--provider=PROVIDER]`;
-
-   * Finds an attached USB device matching the VID and PID specified in
-   `test-config.json`, and if `--test-device-serial` was given, that serial
-   number (as reported to USB). Creates a rule to autoconnect this device to the
-   VM, and also attaches it to the VM;
-
-   * SSHs to the VM, `cd` to the TVM root directory, and runs `test_cmd` from
-   `test-config.json`. Nonzero status means failure.
-
-3. If release tests _fail_, fix them and restart from step 1.
-
-4. If release tests pass, **release** the box:
-```bash
-$ ./base-box-tool.py [--provider=PROVIDER] release --release-version=RELEASE_VER
-```
-   For that step be sure you've logged in to Vagrant Cloud using the `vagrant`
-   tool.
-
-## Versioning
-We use semantic versioning as it is recommended by [Vagrant](https://www.vagrantup.com/docs/boxes/versioning). We use `X.Y.Z` version where we maintain the same major version `X` it has minor changes and newer version is still compatible with older versions and we increase minor version `Y`. However, We increase the major version `X` when new RVM is not compatible with older onces. Updates to the Zephyr SDK or Arduino board SDKs are considered major changes and require incrementing major version `X`. In this versioning, `Z` is barely used but we kept it since Vagrant requires this format.
-
-**Note**: We will release all microTVM RVM boxes under [microtvm](https://app.vagrantup.com/tlcpack/boxes/microtvm) and use box versioning in Vagrant file. Previous versions like `microtvm-zephyr`, `microtvm-arduino`, `microtvm-zephyr-2.5`, etc. are deprecated and will be removed in the future.
diff --git a/apps/microtvm/reference-vm/Vagrantfile b/apps/microtvm/reference-vm/Vagrantfile
deleted file mode 100644
index 00465a8b8848..000000000000
--- a/apps/microtvm/reference-vm/Vagrantfile
+++ /dev/null
@@ -1,72 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-Vagrant.configure("2") do |config|
-  config.vm.box = "tlcpack/microtvm"
-  config.vm.box_version = "1.0.0"
-
-  if ENV.has_key?("TVM_RVM_NUM_CORES")
-    num_cores = ENV["TVM_RVM_NUM_CORES"]
-  else
-    num_cores = 2
-  end
-
-  if ENV.has_key?("TVM_RVM_RAM_BYTES")
-    ram_bytes = ENV["TVM_RVM_RAM_BYTES"]
-  else
-    ram_bytes = 2048
-  end
-
-  tvm_home = "../../.."
-  dirs_to_mount = [Pathname.new(Pathname.new(tvm_home).expand_path())]
-  if ENV.has_key?("TVM_PROJECT_DIR") then
-    dirs_to_mount.append(ENV["TVM_PROJECT_DIR"])
-    puts "NOTE: also configuring project dir: %s" % [dirs_to_mount[-1]]
-  end
-
-  git_file = Pathname.new(tvm_home + "/.git")
-  if git_file.ftype() == "file" then
-    gitdir_match = Regexp.new('^gitdir: (?<gitdir>.*/.git).*\n$', Regexp::MULTILINE).match(git_file.read())
-    if !gitdir_match.nil? then
-      dirs_to_mount.append(Pathname.new(tvm_home).realpath.join(gitdir_match.named_captures["gitdir"]))
-      puts "NOTE: also configuring git-worktree gitdir: %s" % [dirs_to_mount[-1]]
-    end
-  end
-
-  config.vm.provision "shell",
-    path: "provision_setup.sh",
-    env: {"TVM_HOME": dirs_to_mount[0],
-          "TVM_CI_NUM_CORES": num_cores
-    },
-    privileged: false
-
-  # Enable USB Controller on VirtualBox
-  vm_name = "microtvm-#{Time.now.tv_sec}"
-  config.vm.provider "virtualbox" do |vb, overrides|
-    vb.name = vm_name
-    vb.cpus = num_cores
-    vb.memory = ram_bytes
-    vb.customize ["modifyvm", :id, "--usb", "on"]
-    vb.customize ["modifyvm", :id, "--usbehci", "on"]
-    vb.customize ["modifyvm", :id, "--usbxhci", "on"]
-    vb.customize [ "guestproperty", "set", :id, "/VirtualBox/GuestAdd/VBoxService/--timesync-set-threshold", 10000]
-    dirs_to_mount.each do |d|
-      overrides.vm.synced_folder d.to_s, d.to_s
-    end
-  end
-
-end
diff --git a/apps/microtvm/reference-vm/base-box-tool.py b/apps/microtvm/reference-vm/base-box-tool.py
deleted file mode 100755
index b9d589bcadbd..000000000000
--- a/apps/microtvm/reference-vm/base-box-tool.py
+++ /dev/null
@@ -1,630 +0,0 @@
-#!/usr/bin/env python3
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-
-import argparse
-import copy
-import json
-import logging
-import pathlib
-import os
-import re
-import shlex
-import shutil
-import subprocess
-import sys
-import pathlib
-
-_LOG = logging.getLogger(__name__)
-
-
-THIS_DIR = pathlib.Path(os.path.realpath(os.path.dirname(__file__)))
-
-# List of vagrant providers supported by this tool
-ALL_PROVIDERS = (
-    "parallels",
-    "virtualbox",
-    "vmware_desktop",
-)
-
-# List of supported electronics platforms. Each must correspond
-# to a sub-directory of this directory.
-ALL_PLATFORMS = (
-    "arduino",
-    "zephyr",
-)
-
-# Extra scripts required to execute on provisioning
-# in [platform]/base-box/base_box_provision.sh
-EXTRA_SCRIPTS = [
-    "apps/microtvm/reference-vm/base-box/base_box_setup_common.sh",
-    "docker/install/ubuntu_install_core.sh",
-    "docker/install/ubuntu_install_python.sh",
-    "docker/utils/apt-install-and-clear.sh",
-    "docker/install/ubuntu2204_install_llvm.sh",
-    # Zephyr
-    "docker/install/ubuntu_init_zephyr_project.sh",
-    "docker/install/ubuntu_install_zephyr_sdk.sh",
-    "docker/install/ubuntu_install_cmsis.sh",
-    "docker/install/ubuntu_install_nrfjprog.sh",
-]
-
-PACKER_FILE_NAME = "packer.json"
-
-
-# List of identifying strings for microTVM boards for testing.
-with open(THIS_DIR / ".." / "zephyr" / "template_project" / "boards.json") as f:
-    zephyr_boards = json.load(f)
-
-with open(THIS_DIR / ".." / "arduino" / "template_project" / "boards.json") as f:
-    arduino_boards = json.load(f)
-
-ALL_MICROTVM_BOARDS = {
-    "arduino": arduino_boards.keys(),
-    "zephyr": zephyr_boards.keys(),
-}
-
-
-def parse_virtualbox_devices():
-    output = subprocess.check_output(["VBoxManage", "list", "usbhost"], encoding="utf-8")
-    devices = []
-    current_dev = {}
-    for line in output.split("\n"):
-        if not line.strip():
-            if current_dev:
-                if "VendorId" in current_dev and "ProductId" in current_dev:
-                    devices.append(current_dev)
-                current_dev = {}
-
-            continue
-
-        key, value = line.split(":", 1)
-        value = value.lstrip(" ")
-        current_dev[key] = value
-
-    if current_dev:
-        devices.append(current_dev)
-    return devices
-
-
-VIRTUALBOX_USB_DEVICE_RE = (
-    "USBAttachVendorId[0-9]+=0x([0-9a-z]{4})\n" + "USBAttachProductId[0-9]+=0x([0-9a-z]{4})"
-)
-
-
-def parse_virtualbox_attached_usb_devices(vm_uuid):
-    output = subprocess.check_output(
-        ["VBoxManage", "showvminfo", "--machinereadable", vm_uuid], encoding="utf-8"
-    )
-
-    r = re.compile(VIRTUALBOX_USB_DEVICE_RE)
-    attached_usb_devices = r.findall(output, re.MULTILINE)
-
-    # List of couples (VendorId, ProductId) for all attached USB devices
-    return attached_usb_devices
-
-
-VIRTUALBOX_VID_PID_RE = re.compile(r"0x([0-9A-Fa-f]{4}).*")
-
-
-def attach_virtualbox(vm_uuid, vid_hex=None, pid_hex=None, serial=None):
-    usb_devices = parse_virtualbox_devices()
-    for dev in usb_devices:
-        m = VIRTUALBOX_VID_PID_RE.match(dev["VendorId"])
-        if not m:
-            _LOG.warning("Malformed VendorId: %s", dev["VendorId"])
-            continue
-
-        dev_vid_hex = m.group(1).lower()
-
-        m = VIRTUALBOX_VID_PID_RE.match(dev["ProductId"])
-        if not m:
-            _LOG.warning("Malformed ProductId: %s", dev["ProductId"])
-            continue
-
-        dev_pid_hex = m.group(1).lower()
-
-        if (
-            vid_hex == dev_vid_hex
-            and pid_hex == dev_pid_hex
-            and (serial is None or serial == dev["SerialNumber"])
-        ):
-            attached_devices = parse_virtualbox_attached_usb_devices(vm_uuid)
-            for vid, pid in parse_virtualbox_attached_usb_devices(vm_uuid):
-                if vid_hex == vid and pid_hex == pid:
-                    print(f"USB dev {vid_hex}:{pid_hex} already attached. Skipping attach.")
-                    return
-
-            rule_args = [
-                "VBoxManage",
-                "usbfilter",
-                "add",
-                "0",
-                "--action",
-                "hold",
-                "--name",
-                "test device",
-                "--target",
-                vm_uuid,
-                "--vendorid",
-                vid_hex,
-                "--productid",
-                pid_hex,
-            ]
-            if serial is not None:
-                rule_args.extend(["--serialnumber", serial])
-            subprocess.check_call(rule_args)
-            subprocess.check_call(["VBoxManage", "controlvm", vm_uuid, "usbattach", dev["UUID"]])
-            return
-
-    raise Exception(
-        f"Device with vid={vid_hex}, pid={pid_hex}, serial={serial!r} not found:\n{usb_devices!r}"
-    )
-
-
-def attach_parallels(uuid, vid_hex=None, pid_hex=None, serial=None):
-    usb_devices = json.loads(
-        subprocess.check_output(["prlsrvctl", "usb", "list", "-j"], encoding="utf-8")
-    )
-    for dev in usb_devices:
-        _, dev_vid_hex, dev_pid_hex, _, _, dev_serial = dev["System name"].split("|")
-        dev_vid_hex = dev_vid_hex.lower()
-        dev_pid_hex = dev_pid_hex.lower()
-        if (
-            vid_hex == dev_vid_hex
-            and pid_hex == dev_pid_hex
-            and (serial is None or serial == dev_serial)
-        ):
-            subprocess.check_call(["prlsrvctl", "usb", "set", dev["Name"], uuid])
-            if "Used-By-Vm-Name" in dev:
-                subprocess.check_call(
-                    ["prlctl", "set", dev["Used-By-Vm-Name"], "--device-disconnect", dev["Name"]]
-                )
-            subprocess.check_call(["prlctl", "set", uuid, "--device-connect", dev["Name"]])
-            return
-
-    raise Exception(
-        f"Device with vid={vid_hex}, pid={pid_hex}, serial={serial!r} not found:\n{usb_devices!r}"
-    )
-
-
-def attach_vmware(uuid, vid_hex=None, pid_hex=None, serial=None):
-    print("NOTE: vmware doesn't seem to support automatic attaching of devices :(")
-    print("The VMWare VM UUID is {uuid}")
-    print("Please attach the following usb device using the VMWare GUI:")
-    if vid_hex is not None:
-        print(f" - VID: {vid_hex}")
-    if pid_hex is not None:
-        print(f" - PID: {pid_hex}")
-    if serial is not None:
-        print(f" - Serial: {serial}")
-    if vid_hex is None and pid_hex is None and serial is None:
-        print(" - (no specifications given for USB device)")
-    print()
-    print("Press [Enter] when the USB device is attached")
-    input()
-
-
-ATTACH_USB_DEVICE = {
-    "parallels": attach_parallels,
-    "virtualbox": attach_virtualbox,
-    "vmware_desktop": attach_vmware,
-}
-
-
-def generate_packer_config(file_path, providers):
-    builders = []
-    provisioners = []
-    for provider_name in providers:
-        builders.append(
-            {
-                "name": f"{provider_name}",
-                "type": "vagrant",
-                "box_name": f"microtvm-base-{provider_name}",
-                "output_dir": f"output-packer-{provider_name}",
-                "communicator": "ssh",
-                "source_path": "generic/ubuntu1804",
-                "provider": provider_name,
-                "template": "Vagrantfile.packer-template",
-            }
-        )
-
-    repo_root = subprocess.check_output(
-        ["git", "rev-parse", "--show-toplevel"], encoding="utf-8"
-    ).strip()
-
-    scripts_to_copy = EXTRA_SCRIPTS
-    for script in scripts_to_copy:
-        script_path = os.path.join(repo_root, script)
-        filename = os.path.basename(script_path)
-        provisioners.append({"type": "file", "source": script_path, "destination": f"~/{filename}"})
-
-    provisioners.append(
-        {
-            "type": "shell",
-            "script": "base_box_setup.sh",
-        }
-    )
-    provisioners.append(
-        {
-            "type": "shell",
-            "script": "base_box_provision.sh",
-        }
-    )
-
-    with open(file_path, "w") as f:
-        json.dump(
-            {
-                "builders": builders,
-                "provisioners": provisioners,
-            },
-            f,
-            sort_keys=True,
-            indent=2,
-        )
-
-
-def build_command(args):
-    base_box_dir = THIS_DIR / "base-box"
-
-    generate_packer_config(
-        os.path.join(base_box_dir, PACKER_FILE_NAME),
-        args.provider or ALL_PROVIDERS,
-    )
-    env = copy.copy(os.environ)
-    packer_args = ["packer", "build", "-force"]
-    env["PACKER_LOG"] = "1"
-    env["PACKER_LOG_PATH"] = "packer.log"
-    if args.debug_packer:
-        packer_args += ["-debug"]
-
-    packer_args += [PACKER_FILE_NAME]
-
-    box_package_exists = False
-    if not args.force:
-        box_package_dirs = [(base_box_dir / f"output-packer-{p}") for p in args.provider]
-        for box_package_dir in box_package_dirs:
-            if box_package_dir.exists():
-                print(f"A box package {box_package_dir} already exists. Refusing to overwrite it!")
-                box_package_exists = True
-
-    if box_package_exists:
-        sys.exit("One or more box packages exist (see list above). To rebuild use '--force'")
-
-    subprocess.check_call(packer_args, cwd=THIS_DIR / "base-box", env=env)
-
-
-REQUIRED_TEST_CONFIG_KEYS = {
-    "vid_hex": str,
-    "pid_hex": str,
-}
-
-
-VM_BOX_RE = re.compile(r'(.*\.vm\.box) = "(.*)"')
-VM_TVM_HOME_RE = re.compile(r'(.*tvm_home) = "(.*)"')
-
-# Paths, relative to the platform box directory, which will not be copied to release-test dir.
-SKIP_COPY_PATHS = [".vagrant", "base-box", "scripts"]
-
-
-def do_build_release_test_vm(
-    release_test_dir, user_box_dir: pathlib.Path, base_box_dir: pathlib.Path, provider_name
-):
-    if os.path.exists(release_test_dir):
-        try:
-            subprocess.check_call(["vagrant", "destroy", "-f"], cwd=release_test_dir)
-        except subprocess.CalledProcessError:
-            _LOG.warning("vagrant destroy failed--removing dirtree anyhow", exc_info=True)
-
-        shutil.rmtree(release_test_dir)
-
-    for dirpath, _, filenames in os.walk(user_box_dir):
-        rel_path = os.path.relpath(dirpath, user_box_dir)
-        if any(
-            rel_path == scp or rel_path.startswith(f"{scp}{os.path.sep}") for scp in SKIP_COPY_PATHS
-        ):
-            continue
-
-        dest_dir = os.path.join(release_test_dir, rel_path)
-        os.makedirs(dest_dir)
-        for filename in filenames:
-            shutil.copy2(os.path.join(dirpath, filename), os.path.join(dest_dir, filename))
-
-    release_test_vagrantfile = os.path.join(release_test_dir, "Vagrantfile")
-    with open(release_test_vagrantfile) as f:
-        lines = list(f)
-
-    found_box_line = False
-    with open(release_test_vagrantfile, "w") as f:
-        for line in lines:
-            # Skip setting version
-            if "config.vm.box_version" in line:
-                continue
-            m = VM_BOX_RE.match(line)
-            tvm_home_m = VM_TVM_HOME_RE.match(line)
-
-            if tvm_home_m:
-                # Adjust tvm home for testing step
-                f.write(f'{tvm_home_m.group(1)} = "../../../.."\n')
-                continue
-            if not m:
-                f.write(line)
-                continue
-
-            box_package = os.path.join(
-                base_box_dir, f"output-packer-{provider_name}", "package.box"
-            )
-            box_relpath = os.path.relpath(box_package, release_test_dir)
-            f.write(f'{m.group(1)} = "{box_relpath}"\n')
-            found_box_line = True
-
-    if not found_box_line:
-        _LOG.error(
-            "testing provider %s: couldn't find config.box.vm = line in Vagrantfile; unable to test",
-            provider_name,
-        )
-        return False
-
-    # Delete the old box registered with Vagrant, which may lead to a falsely-passing release test.
-    remove_args = ["vagrant", "box", "remove", box_relpath]
-    return_code = subprocess.call(remove_args, cwd=release_test_dir)
-    assert return_code in (0, 1), f'{" ".join(remove_args)} returned exit code {return_code}'
-    subprocess.check_call(["vagrant", "up", f"--provider={provider_name}"], cwd=release_test_dir)
-    return True
-
-
-def do_run_release_test(release_test_dir, provider_name, test_config, test_device_serial):
-    with open(
-        os.path.join(release_test_dir, ".vagrant", "machines", "default", provider_name, "id")
-    ) as f:
-        machine_uuid = f.read()
-
-    # Check if target is not QEMU
-    if test_config["vid_hex"] and test_config["pid_hex"]:
-        ATTACH_USB_DEVICE[provider_name](
-            machine_uuid,
-            vid_hex=test_config["vid_hex"],
-            pid_hex=test_config["pid_hex"],
-            serial=test_device_serial,
-        )
-    tvm_home = os.path.realpath(THIS_DIR / ".." / ".." / "..")
-
-    def _quote_cmd(cmd):
-        return " ".join(shlex.quote(a) for a in cmd)
-
-    test_cmd = (
-        _quote_cmd(["cd", tvm_home])
-        + " && "
-        + _quote_cmd(
-            [
-                f"apps/microtvm/reference-vm/base-box/base_box_test.sh",
-                test_config["microtvm_board"],
-            ]
-        )
-    )
-    subprocess.check_call(["vagrant", "ssh", "-c", f"bash -ec '{test_cmd}'"], cwd=release_test_dir)
-
-
-def test_command(args):
-    user_box_dir = THIS_DIR
-    base_box_dir = user_box_dir / "base-box"
-    boards_file = THIS_DIR / ".." / args.platform / "template_project" / "boards.json"
-    with open(boards_file) as f:
-        test_config = json.load(f)
-
-        # select microTVM test config
-        microtvm_test_config = test_config[args.microtvm_board]
-
-        for key, expected_type in REQUIRED_TEST_CONFIG_KEYS.items():
-            assert key in microtvm_test_config and isinstance(
-                microtvm_test_config[key], expected_type
-            ), f"Expected key {key} of type {expected_type} in {boards_file}: {test_config!r}"
-
-        microtvm_test_config["vid_hex"] = microtvm_test_config["vid_hex"].lower()
-        microtvm_test_config["pid_hex"] = microtvm_test_config["pid_hex"].lower()
-        microtvm_test_config["microtvm_board"] = args.microtvm_board
-
-    providers = args.provider
-
-    release_test_dir = THIS_DIR / f"release-test"
-
-    if args.skip_build or args.skip_destroy:
-        assert (
-            len(providers) == 1
-        ), "--skip-build and/or --skip-destroy was given, but >1 provider specified"
-
-    test_failed = False
-    for provider_name in providers:
-        try:
-            if not args.skip_build:
-                do_build_release_test_vm(
-                    release_test_dir, user_box_dir, base_box_dir, provider_name
-                )
-            do_run_release_test(
-                release_test_dir,
-                provider_name,
-                microtvm_test_config,
-                args.test_device_serial,
-            )
-
-        except subprocess.CalledProcessError:
-            test_failed = True
-            sys.exit(
-                f"\n\nERROR: Provider '{provider_name}' failed the release test. "
-                "You can re-run it to reproduce the issue without building everything "
-                "again by passing the --skip-build and specifying only the provider that failed. "
-                "The VM is still running in case you want to connect it via SSH to "
-                "investigate further the issue, thus it's necessary to destroy it manually "
-                "to release the resources back to the host, like a USB device attached to the VM."
-            )
-
-        finally:
-            # if we reached out here do_run_release_test() succeeded, hence we can
-            # destroy the VM and release the resources back to the host if user haven't
-            # requested to not destroy it.
-            if not (args.skip_destroy or test_failed):
-                subprocess.check_call(["vagrant", "destroy", "-f"], cwd=release_test_dir)
-                shutil.rmtree(release_test_dir)
-
-    print(f'\n\nThe release tests passed on all specified providers: {", ".join(providers)}.')
-
-
-def release_command(args):
-    if args.release_full_name:
-        vm_name = args.release_full_name
-    else:
-        vm_name = "tlcpack/microtvm"
-
-    if not args.skip_creating_release_version:
-        subprocess.check_call(
-            [
-                "vagrant",
-                "cloud",
-                "version",
-                "create",
-                vm_name,
-                args.release_version,
-            ]
-        )
-    if not args.release_version:
-        sys.exit(f"--release-version must be specified")
-
-    for provider_name in args.provider:
-        subprocess.check_call(
-            [
-                "vagrant",
-                "cloud",
-                "publish",
-                "-f",
-                vm_name,
-                args.release_version,
-                provider_name,
-                str(THIS_DIR / "base-box" / f"output-packer-{provider_name}/package.box"),
-            ]
-        )
-
-
-def parse_args():
-    parser = argparse.ArgumentParser(
-        description="Automates building, testing, and releasing a base box"
-    )
-    subparsers = parser.add_subparsers(help="Action to perform.")
-    subparsers.required = True
-    subparsers.dest = "action"
-    parser.add_argument(
-        "--provider",
-        choices=ALL_PROVIDERS,
-        action="append",
-        required=True,
-        help="Name of the provider or providers to act on",
-    )
-
-    # "test" has special options for different platforms, and "build", "release" might
-    # in the future, so we'll add the platform argument to each one individually.
-    platform_help_str = "Platform to use (e.g. Arduino, Zephyr)"
-
-    # Options for build subcommand
-    parser_build = subparsers.add_parser("build", help="Build a base box.")
-    parser_build.set_defaults(func=build_command)
-    parser_build.add_argument(
-        "--debug-packer",
-        action="store_true",
-        help=("Run packer in debug mode, and write log to the base-box directory."),
-    )
-    parser_build.add_argument(
-        "--force",
-        action="store_true",
-        help=("Force rebuilding a base box from scratch if one already exists."),
-    )
-
-    # Options for test subcommand
-    parser_test = subparsers.add_parser("test", help="Test a base box before release.")
-    parser_test.set_defaults(func=test_command)
-    parser_test.add_argument(
-        "--skip-build",
-        action="store_true",
-        help=(
-            "If given, assume a box has already been built in the release-test subdirectory, "
-            "so use that box to execute the release test script. If the tests fail the VM used "
-            "for testing will be left running for further investigation and will need to be "
-            "destroyed manually. If all tests pass on all specified providers no VM is left running, "
-            "unless --skip-destroy is given too."
-        ),
-    )
-    parser_test.add_argument(
-        "--skip-destroy",
-        action="store_true",
-        help=(
-            "Skip destroying the test VM even if all tests pass. Can only be used if a single "
-            "provider is specified. Default is to destroy the VM if all tests pass (and always "
-            "skip destroying it if a test fails)."
-        ),
-    )
-    parser_test.add_argument(
-        "--test-device-serial",
-        help=(
-            "If given, attach the test device with this USB serial number. Corresponds to the "
-            "iSerial field from `lsusb -v` output."
-        ),
-    )
-    parser_test_platform_subparsers = parser_test.add_subparsers(help=platform_help_str)
-    for platform in ALL_PLATFORMS:
-        platform_specific_parser = parser_test_platform_subparsers.add_parser(platform)
-        platform_specific_parser.set_defaults(platform=platform)
-        platform_specific_parser.add_argument(
-            "--microtvm-board",
-            choices=ALL_MICROTVM_BOARDS[platform],
-            required=True,
-            help="MicroTVM board used for testing.",
-        )
-
-    # Options for release subcommand
-    parser_release = subparsers.add_parser("release", help="Release base box to cloud.")
-    parser_release.set_defaults(func=release_command)
-    parser_release.add_argument(
-        "--release-version",
-        required=True,
-        help="Version to release, in the form 'x.y.z'. Must be specified with release.",
-    )
-    parser_release.add_argument(
-        "--skip-creating-release-version",
-        action="store_true",
-        help="Skip creating the version and just upload for this provider.",
-    )
-    parser_release.add_argument(
-        "--release-full-name",
-        required=False,
-        type=str,
-        default=None,
-        help=(
-            "If set, it will use this as the full release name and version for the box. "
-            "If this set, it will ignore `--release-version`."
-        ),
-    )
-
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = parse_args()
-    args.func(args)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/apps/microtvm/reference-vm/base-box/.gitignore b/apps/microtvm/reference-vm/base-box/.gitignore
deleted file mode 100644
index e4406c4f61e2..000000000000
--- a/apps/microtvm/reference-vm/base-box/.gitignore
+++ /dev/null
@@ -1,4 +0,0 @@
-*.box
-.vagrant
-/output-packer-*
-/packer.json
diff --git a/apps/microtvm/reference-vm/base-box/Vagrantfile.packer-template b/apps/microtvm/reference-vm/base-box/Vagrantfile.packer-template
deleted file mode 100644
index b43596bb83c1..000000000000
--- a/apps/microtvm/reference-vm/base-box/Vagrantfile.packer-template
+++ /dev/null
@@ -1,47 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-Vagrant.configure("2") do |config|
-  # From hashicorp default template:
-  # https://github.com/hashicorp/packer/blob/master/builder/vagrant/step_create_vagrantfile.go#L23-L37
-
-  config.vm.define "source" do |source|
-    source.vm.box = "{{.SourceBox}}"
-    config.ssh.insert_key = {{.InsertKey}}
-  end
-
-  config.vm.define "output" do |output|
-    output.vm.box = "{{.BoxName}}"
-    output.vm.box_url = "file://package.box"
-    config.ssh.insert_key = {{.InsertKey}}
-  end
-
-  {{ if ne .SyncedFolder "" -}}
-    config.vm.synced_folder "{{.SyncedFolder}}", "/vagrant"
-  {{- else -}}
-    config.vm.synced_folder ".", "/vagrant", disabled: true
-  {{- end}}
-
-
-  {{ if eq .BoxName "microtvm-base-vmware_desktop" -}}
-  config.vm.provision "shell", inline: "touch ~/skip_zeroing_disk", privileged: false
-  {{- end}}
-
-  # NOTE: base_box_setup.sh resides in the parent directory (../) because this template is expanded into a
-  # sub-directory of base-box (output-packer-*).
-  config.vm.provision "shell", path: "../base_box_setup.sh", privileged: false
-end
diff --git a/apps/microtvm/reference-vm/base-box/base_box_provision.sh b/apps/microtvm/reference-vm/base-box/base_box_provision.sh
deleted file mode 100755
index d8b987973735..000000000000
--- a/apps/microtvm/reference-vm/base-box/base_box_provision.sh
+++ /dev/null
@@ -1,81 +0,0 @@
-#!/bin/bash -e
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#   Using this script we can reuse docker/install scripts to configure the reference
-#   virtual machine similar to CI Cortex-M setup.
-#
-
-set -x
-
-source ~/.profile
-
-# Init Zephyr
-cd ~
-~/ubuntu_init_zephyr_project.sh ~/zephyr
-
-# Install CMSIS
-cd ~
-~/ubuntu_install_cmsis.sh ~/cmsis
-
-# Cleanup
-rm -f ubuntu_init_zephyr_project.sh ubuntu_install_cmsis.sh
-
-# Init Arduino
-source ~/.profile
-cd ~
-
-sudo apt-get install -y ca-certificates
-
-# Install Arduino-CLI (specific version)
-# To keep in sync with the version
-# defined in apps/microtvm/arduino/template_project/microtvm_api_server.py
-ARDUINO_CLI_VERSION="0.21.1"
-
-export PATH="/home/vagrant/bin:$PATH"
-wget -O - https://raw.githubusercontent.com/arduino/arduino-cli/master/install.sh | sh -s ${ARDUINO_CLI_VERSION}
-
-# Arduino (the CLI and GUI) require the dialout permission for uploading
-sudo usermod -a -G dialout $USER
-
-# ubuntu_init_arduino.sh only installs a few officially
-# supported architectures, so we don't use it here
-
-# 3rd party board URLs
-ADAFRUIT_BOARDS_URL="https://raw.githubusercontent.com/adafruit/arduino-board-index/7840c768/package_adafruit_index.json"
-ESP32_BOARDS_URL="https://github.com/espressif/arduino-esp32/releases/download/2.0.3/package_esp32_dev_index.json"
-RP2040_BOARDS_URL="https://github.com/earlephilhower/arduino-pico/releases/download/2.0.3/package_rp2040_index.json"
-SPRESENSE_BOARDS_URL="https://github.com/sonydevworld/spresense-arduino-compatible/releases/download/v2.5.0/package_spresense_index.json"
-arduino-cli core update-index --additional-urls $ADAFRUIT_BOARDS_URL,$ESP32_BOARDS_URL,$RP2040_BOARDS_URL,$SPRESENSE_BOARDS_URL
-
-# Install supported cores from those URLS
-arduino-cli version
-arduino-cli core install arduino:mbed_nano@3.0.1
-arduino-cli core install arduino:sam@1.6.12
-arduino-cli core install arduino:mbed_portenta@3.1.1
-arduino-cli core install adafruit:samd@1.7.10 --additional-urls $ADAFRUIT_BOARDS_URL
-arduino-cli core install esp32:esp32@2.0.2 --additional-urls $ESP32_BOARDS_URL
-arduino-cli core install rp2040:rp2040@2.0.3 --additional-urls $RP2040_BOARDS_URL
-arduino-cli core install SPRESENSE:spresense@2.5.0 --additional-urls $SPRESENSE_BOARDS_URL
-
-# The Arduino Code API has a major bug that breaks TVM. It has been worked around in
-# most board SDKs (including arduino:sam), but it still exists for the Portenta H7.
-# There is a PR to fix it (https://github.com/arduino/ArduinoCore-API/pull/163), but
-# it may not be merged for a while (and a new release will have to be deployed too).
-# The below sed command avoids the bug, and will be removed when no longer needed.
-PORTENTA_H7_BUGFIX_PATH=~/.arduino15/packages/arduino/hardware/mbed_portenta/3.1.1/cores/arduino/api/Common.h
-sed -i '3 i #include <stdbool.h>' $PORTENTA_H7_BUGFIX_PATH
diff --git a/apps/microtvm/reference-vm/base-box/base_box_setup.sh b/apps/microtvm/reference-vm/base-box/base_box_setup.sh
deleted file mode 100755
index 33487150f935..000000000000
--- a/apps/microtvm/reference-vm/base-box/base_box_setup.sh
+++ /dev/null
@@ -1,73 +0,0 @@
-#!/bin/bash -e
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-set -e
-set -x
-
-skip_zeroing_disk=0
-if [ -e "$HOME/skip_zeroing_disk" ]; then
-    echo "NOTE: will not zero disk at the end due to VMWare Fusion bug"
-    echo "See: https://communities.vmware.com/t5/VMware-Fusion-Discussions/VMWare-Fusion-Pro-11-15-6-16696540-causes-macOS-crash-during/m-p/2284011#M139190"
-    skip_zeroing_disk=1
-fi
-
-# Install common configs
-~/base_box_setup_common.sh
-rm -f ~/base_box_setup_common.sh
-
-# Poetry
-sed -i "/^# If not running interactively,/ i source \$HOME/.poetry/env" ~/.bashrc
-sed -i "/^# If not running interactively,/ i export ZEPHYR_BASE=$HOME/zephyr/zephyr" ~/.bashrc
-sed -i "/^# If not running interactively,/ i\\ " ~/.bashrc
-
-# nrfjprog
-sudo ~/ubuntu_install_nrfjprog.sh
-rm -f ~/ubuntu_install_nrfjprog.sh
-
-# Zephyr
-pip3 install --user -U west
-echo 'export PATH=$HOME/.local/bin:"$PATH"' >> ~/.profile
-source ~/.profile
-echo PATH=$PATH
-
-cd ~
-echo "Downloading zephyr SDK..."
-~/ubuntu_install_zephyr_sdk.sh ~/zephyr-sdk
-rm -f ubuntu_install_zephyr_sdk.sh
-
-# GDB for Zephyr SDK depends on python3.8
-sudo add-apt-repository ppa:deadsnakes/ppa
-sudo apt install -y python3.8-dev
-
-sudo find ~/zephyr-sdk -name '*.rules' -exec cp {} /etc/udev/rules.d \;
-sudo udevadm control --reload
-
-# Clean box for packaging as a base box
-sudo apt-get clean
-if [ $skip_zeroing_disk -eq 0 ]; then
-    echo "Zeroing disk..."
-    EMPTY_FILE="$HOME/EMPTY"
-    dd if=/dev/zero "of=${EMPTY_FILE}" bs=1M || /bin/true
-    if [ ! -e "${EMPTY_FILE}" ]; then
-        echo "failed to zero empty sectors on disk"
-        exit 2
-    fi
-    rm -f "${EMPTY_FILE}"
-else
-    echo "NOTE: skipping zeroing disk due to command-line argument."
-fi
diff --git a/apps/microtvm/reference-vm/base-box/base_box_setup_common.sh b/apps/microtvm/reference-vm/base-box/base_box_setup_common.sh
deleted file mode 100755
index 56f907306ccd..000000000000
--- a/apps/microtvm/reference-vm/base-box/base_box_setup_common.sh
+++ /dev/null
@@ -1,68 +0,0 @@
-#!/bin/bash -e
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-set -x
-
-# Fix network DNS issue
-sudo sed -i 's/DNSSEC=yes/DNSSEC=no/' /etc/systemd/resolved.conf
-sudo systemctl restart systemd-resolved
-
-sudo cp ~/apt-install-and-clear.sh /usr/local/bin/apt-install-and-clear
-rm -f ~/apt-install-and-clear.sh
-
-sudo apt update
-sudo apt-install-and-clear -y build-essential
-sudo apt-get --purge remove modemmanager  # required to access serial ports.
-
-# Core
-sudo ~/ubuntu_install_core.sh
-rm -f ~/ubuntu_install_core.sh
-
-sudo apt-install-and-clear -y --no-install-recommends git \
-     gperf ccache dfu-util device-tree-compiler xz-utils file \
-     gcc gcc-multilib g++-multilib libsdl2-dev
-
-# Cmake
-wget --no-verbose https://apt.kitware.com/keys/kitware-archive-latest.asc
-sudo apt-key add kitware-archive-latest.asc
-sudo apt-add-repository 'deb https://apt.kitware.com/ubuntu/ bionic main'
-sudo apt update
-sudo apt-install-and-clear -y --no-install-recommends \
-     cmake=3.22.2-0kitware1ubuntu18.04.1 cmake-data=3.22.2-0kitware1ubuntu18.04.1 \
-
-# Python
-sudo ~/ubuntu_install_python.sh 3.8
-rm -f ~/ubuntu_install_python.sh
-
-# Poetry deps
-sudo apt-install-and-clear -y python3-venv
-
-# TVM deps
-sudo ~/ubuntu2204_install_llvm.sh
-rm -rf ~/ubuntu2204_install_llvm.sh
-
-# ONNX deps
-sudo apt-install-and-clear -y protobuf-compiler libprotoc-dev
-
-# Poetry
-curl -sSL https://raw.githubusercontent.com/python-poetry/poetry/master/get-poetry.py | python3
-
-# Host name
-OLD_HOSTNAME=$(hostname)
-sudo hostnamectl set-hostname microtvm
-sudo sed -i.bak "s/${OLD_HOSTNAME}/microtvm.localdomain/g" /etc/hosts
diff --git a/apps/microtvm/reference-vm/base-box/base_box_test.sh b/apps/microtvm/reference-vm/base-box/base_box_test.sh
deleted file mode 100755
index 09779bb048e0..000000000000
--- a/apps/microtvm/reference-vm/base-box/base_box_test.sh
+++ /dev/null
@@ -1,42 +0,0 @@
-#!/bin/bash -e
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-
-set -x
-
-if [ "$#" -lt 1 ]; then
-    echo "Usage: base_box_test.sh <PLATFORM> <BOARD>"
-    exit -1
-fi
-
-platform=$1
-board=$2
-
-if [ "${platform}" == "zephyr" ]; then
-    pytest tests/micro/zephyr --board=${board}
-fi
-
-if [ "${platform}" == "arduino" ]; then
-    pytest tests/micro/arduino/test_arduino_workflow.py --board=${board}
-    if [ $board == "nano33ble" ]; then
-        # https://github.com/apache/tvm/issues/8730
-        echo "NOTE: skipped test_arduino_rpc_server.py on $board -- known failure"
-    else
-        pytest tests/micro/arduino/test_arduino_rpc_server.py --board=${board}
-    fi
-fi
diff --git a/apps/microtvm/reference-vm/provision_setup.sh b/apps/microtvm/reference-vm/provision_setup.sh
deleted file mode 100755
index f6237a82cd8b..000000000000
--- a/apps/microtvm/reference-vm/provision_setup.sh
+++ /dev/null
@@ -1,44 +0,0 @@
-#!/bin/bash -e
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-set -ex
-
-# TVM
-# NOTE: TVM is presumed to be mounted already by Vagrantfile.
-cd "${TVM_HOME}"
-
-apps/microtvm/reference-vm/rebuild_tvm.sh
-
-# Build poetry
-cd apps/microtvm/reference-vm
-
-poetry env use 3.7
-
-# importers
-poetry install -E importer-onnx
-poetry install -E importer-tflite
-poetry install -E importer-mxnet
-
-poetry install
-poetry run pip3 install -r ${ZEPHYR_BASE}/scripts/requirements.txt
-
-echo "export TVM_LIBRARY_PATH=\"$TVM_HOME\"/build-microtvm" >>~/.profile
-echo "VENV_PATH=\$((cd \"$TVM_HOME\"/apps/microtvm/reference-vm && poetry env list --full-path) | sed -E 's/^(.*)[[:space:]]\(Activated\)\$/\1/g')" >>~/.profile
-echo "source \$VENV_PATH/bin/activate" >>~/.profile
-echo "export PATH=\"\${PATH}:\${HOME}/zephyr-sdk/sysroots/x86_64-pokysdk-linux/usr/bin\"" >>~/.profile
-echo "export CMSIS_PATH=\"\${HOME}/cmsis\"" >>~/.profile
diff --git a/apps/microtvm/reference-vm/rebuild_tvm.sh b/apps/microtvm/reference-vm/rebuild_tvm.sh
deleted file mode 100755
index 1a690d3225cf..000000000000
--- a/apps/microtvm/reference-vm/rebuild_tvm.sh
+++ /dev/null
@@ -1,47 +0,0 @@
-#!/bin/bash -e
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-
-set -e
-
-if [ "$1" == "--help" ]; then
-    echo "Usage ./apps/microtvm/reference-vm/rebuild_tvm.sh"
-    exit -1
-fi
-
-# Get number of cores for build
-if [ -n "${TVM_CI_NUM_CORES}" ]; then
-  num_cores=${TVM_CI_NUM_CORES}
-else
-  # default setup for Vagrantfile
-  num_cores=2
-fi
-
-cd "$(dirname $0)"
-cd "$(git rev-parse --show-toplevel)"
-BUILD_DIR="build-microtvm"
-
-if [ ! -e "${BUILD_DIR}" ]; then
-    mkdir "${BUILD_DIR}"
-fi
-
-./tests/scripts/task_config_build_cortexm.sh "${BUILD_DIR}"
-cd "${BUILD_DIR}"
-cmake ..
-rm -rf standalone_crt host_standalone_crt  # remove stale generated files
-make -j${num_cores}
diff --git a/apps/microtvm/reference-vm/scripts/reference_vm_build.sh b/apps/microtvm/reference-vm/scripts/reference_vm_build.sh
deleted file mode 100755
index bfbd8aaa26d4..000000000000
--- a/apps/microtvm/reference-vm/scripts/reference_vm_build.sh
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/bin/bash -e
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-
-if [ "$1" == "--help" -o "$1" == "-h" ]; then
-    echo "Usage: apps/microtvm/reference-vm/scripts/reference_vm_build.sh"
-    exit -1
-fi
-
-cd "$(dirname "$0")"
-source "./utils.sh" || exit 2
-cd ${RVM_BASE_PATH}
-
-${BASE_BOX_TOOL} --provider=virtualbox build
diff --git a/apps/microtvm/reference-vm/scripts/reference_vm_release.sh b/apps/microtvm/reference-vm/scripts/reference_vm_release.sh
deleted file mode 100755
index beb271bd9e75..000000000000
--- a/apps/microtvm/reference-vm/scripts/reference_vm_release.sh
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/bin/bash -e
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-
-if [ "$#" -lt 2 -o "$1" == "--help" -o "$1" == "-h" ]; then
-    echo "Usage: apps/microtvm/reference-vm/scripts/reference_vm_release.sh <RELEASE_NAME> <RELEASE_VERSION>"
-    exit -1
-fi
-
-RELEASE_NAME=$1
-shift
-
-RELEASE_VERSION=$1
-shift
-
-cd "$(dirname "$0")"
-source "./utils.sh" || exit 2
-cd ${RVM_BASE_PATH}
-
-${BASE_BOX_TOOL} --provider=virtualbox release \
-    --release-full-name=${RELEASE_NAME} \
-    --release-version=${RELEASE_VERSION} \
-    --skip-creating-release-version
diff --git a/apps/microtvm/reference-vm/scripts/reference_vm_test.sh b/apps/microtvm/reference-vm/scripts/reference_vm_test.sh
deleted file mode 100755
index 25031cc0ccd8..000000000000
--- a/apps/microtvm/reference-vm/scripts/reference_vm_test.sh
+++ /dev/null
@@ -1,37 +0,0 @@
-#!/bin/bash
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-# Usage: apps/microtvm/reference-vm/scripts/reference_vm_test.sh <PLATFORM> <BOARD>
-#
-
-if [ "$#" -lt 2 -o "$1" == "--help" -o "$1" == "-h" ]; then
-    echo "Usage: apps/microtvm/reference-vm/scripts/reference_vm_test.sh <PLATFORM> <BOARD>"
-    exit -1
-fi
-
-PLATFORM=$1
-shift
-
-BOARD=$1
-shift
-
-cd "$(dirname "$0")"
-source "./utils.sh" || exit 2
-cd ${RVM_BASE_PATH}
-
-${BASE_BOX_TOOL} --provider=virtualbox test ${PLATFORM} --microtvm-board=${BOARD}
diff --git a/apps/microtvm/reference-vm/scripts/utils.sh b/apps/microtvm/reference-vm/scripts/utils.sh
deleted file mode 100755
index 27b929da2316..000000000000
--- a/apps/microtvm/reference-vm/scripts/utils.sh
+++ /dev/null
@@ -1,26 +0,0 @@
-#!/bin/bash
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-SCRIPTS_DIR=$(dirname "${BASH_SOURCE[0]}")
-
-function get_repo_root() {
-    cd "${SCRIPTS_DIR}" && git rev-parse --show-toplevel
-}
-
-BASE_BOX_TOOL="./base-box-tool.py"
-RVM_BASE_PATH="$(get_repo_root)"/apps/microtvm/reference-vm
diff --git a/apps/microtvm/zephyr/README.md b/apps/microtvm/zephyr/README.md
deleted file mode 100644
index 1003b65f824b..000000000000
--- a/apps/microtvm/zephyr/README.md
+++ /dev/null
@@ -1,18 +0,0 @@
-<!--- Licensed to the Apache Software Foundation (ASF) under one -->
-<!--- or more contributor license agreements.  See the NOTICE file -->
-<!--- distributed with this work for additional information -->
-<!--- regarding copyright ownership.  The ASF licenses this file -->
-<!--- to you under the Apache License, Version 2.0 (the -->
-<!--- "License"); you may not use this file except in compliance -->
-<!--- with the License.  You may obtain a copy of the License at -->
-
-<!---   http://www.apache.org/licenses/LICENSE-2.0 -->
-
-<!--- Unless required by applicable law or agreed to in writing, -->
-<!--- software distributed under the License is distributed on an -->
-<!--- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -->
-<!--- KIND, either express or implied.  See the License for the -->
-<!--- specific language governing permissions and limitations -->
-<!--- under the License. -->
-
-This directory contains code to interface microTVM with the [Zephyr RTOS](https://zephyrproject.org/).
diff --git a/apps/microtvm/zephyr/template_project/CMakeLists.txt.template b/apps/microtvm/zephyr/template_project/CMakeLists.txt.template
deleted file mode 100644
index 416ff5fd5ed6..000000000000
--- a/apps/microtvm/zephyr/template_project/CMakeLists.txt.template
+++ /dev/null
@@ -1,85 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# SPDX-License-Identifier: Apache-2.0
-
-cmake_minimum_required(VERSION 3.13.1)
-
-set(ENV{QEMU_BIN_PATH} "${CMAKE_SOURCE_DIR}/qemu-hack")
-
-set(QEMU_PIPE <QEMU_PIPE> CACHE PATH "Path to QEMU pipe")
-
-option(ENERGY_MODE "Enable energy mode for MLPerfTiny tests." 0)
-
-<CMAKE_ARGS>
-
-find_package(Zephyr HINTS $ENV{ZEPHYR_BASE})
-project(microtvm_autogenerated_project)
-
-if(DEFINED CMSIS_PATH)
-  file(GLOB_RECURSE cmsis_lib_srcs
-    ${CMSIS_PATH}/CMSIS-NN/Source/ActivationFunctions/*.c
-    ${CMSIS_PATH}/CMSIS-NN/Source/BasicMathFunctions/*.c
-    ${CMSIS_PATH}/CMSIS-NN/Source/ConcatenationFunctions/*.c
-    ${CMSIS_PATH}/CMSIS-NN/Source/ConvolutionFunctions/*.c
-    ${CMSIS_PATH}/CMSIS-NN/Source/FullyConnectedFunctions/*.c
-    ${CMSIS_PATH}/CMSIS-NN/Source/NNSupportFunctions/*.c
-    ${CMSIS_PATH}/CMSIS-NN/Source/PoolingFunctions/*.c
-    ${CMSIS_PATH}/CMSIS-NN/Source/ReshapeFunctions/*.c
-    ${CMSIS_PATH}/CMSIS-NN/Source/SoftmaxFunctions/*.c
-  )
-
-  set(cmsis_includes
-    ${CMSIS_PATH}/CMSIS/Core/Include
-    ${CMSIS_PATH}/CMSIS-NN/Include
-    ${CMSIS_PATH}/CMSIS/DSP/Include
-    ${CMSIS_PATH}/CMSIS/DSP/Include/dsp
-  )
-else()
-  set(cmsis_lib_srcs "")
-  set(cmsis_includes "")
-endif()
-
-# Add CRT libraries
-set(CRT_LIBS <API_SERVER_CRT_LIBS>)
-set(CRT_CONFIG_PATH ${CMAKE_SOURCE_DIR}/crt_config)
-add_subdirectory(crt)
-foreach(crt_lib_name ${CRT_LIBS})
-  target_link_libraries(${crt_lib_name} PUBLIC zephyr_interface)
-  target_link_libraries(app PRIVATE ${crt_lib_name})
-endforeach(crt_lib_name ${CRT_LIBS})
-
-# define a library for the model sources.
-zephyr_library_named(tvm_model)
-file(GLOB_RECURSE tvm_model_srcs model/codegen/host/src/*.c model/codegen/host/lib/*.o)
-target_sources(tvm_model PRIVATE ${tvm_model_srcs})
-target_include_directories(tvm_model PRIVATE ${CMAKE_SOURCE_DIR}/include crt_config crt/include model/codegen/host/include ${cmsis_includes})
-target_compile_options(tvm_model PRIVATE -Wno-unused-variable)  # TVM-generated code tends to include lots of these.
-target_link_libraries(app PRIVATE tvm_model)
-
-if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/src/api)
-  zephyr_library_named(tinymlperf_api)
-  file(GLOB_RECURSE tiny_internal_srcs src/api/*.cpp)
-  target_sources(tinymlperf_api PRIVATE ${tiny_internal_srcs})
-  target_compile_options(tinymlperf_api PRIVATE -Wno-unused-variable)  # TVM-generated code tends to include lots of these.
-  target_link_libraries(app PRIVATE tinymlperf_api)
-  target_compile_definitions(tinymlperf_api PUBLIC -DEE_CFG_ENERGY_MODE=${ENERGY_MODE})
-endif()
-
-file(GLOB_RECURSE app_srcs src/**.c src/**.cc)
-target_sources(app PRIVATE ${app_srcs} ${cmsis_lib_srcs})
-target_include_directories(app PRIVATE crt_config include ${CMAKE_SOURCE_DIR}/include crt/include model/codegen/host/include ${cmsis_includes})
diff --git a/apps/microtvm/zephyr/template_project/README.md b/apps/microtvm/zephyr/template_project/README.md
deleted file mode 100644
index eab3f3d241a1..000000000000
--- a/apps/microtvm/zephyr/template_project/README.md
+++ /dev/null
@@ -1,21 +0,0 @@
-<!--- Licensed to the Apache Software Foundation (ASF) under one -->
-<!--- or more contributor license agreements.  See the NOTICE file -->
-<!--- distributed with this work for additional information -->
-<!--- regarding copyright ownership.  The ASF licenses this file -->
-<!--- to you under the Apache License, Version 2.0 (the -->
-<!--- "License"); you may not use this file except in compliance -->
-<!--- with the License.  You may obtain a copy of the License at -->
-
-<!---   http://www.apache.org/licenses/LICENSE-2.0 -->
-
-<!--- Unless required by applicable law or agreed to in writing, -->
-<!--- software distributed under the License is distributed on an -->
-<!--- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -->
-<!--- KIND, either express or implied.  See the License for the -->
-<!--- specific language governing permissions and limitations -->
-<!--- under the License. -->
-
-This directory contains a Zephyr-based "demo" runtime environment that
-pulls together the microTVM runtime dependencies into a single application
-that can communicate with a Python-based host program via the UART, using
-TVM's RPC protocol.
diff --git a/apps/microtvm/zephyr/template_project/app-overlay/nucleo_l4r5zi.overlay b/apps/microtvm/zephyr/template_project/app-overlay/nucleo_l4r5zi.overlay
deleted file mode 100644
index 532efe50d397..000000000000
--- a/apps/microtvm/zephyr/template_project/app-overlay/nucleo_l4r5zi.overlay
+++ /dev/null
@@ -1,45 +0,0 @@
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-&rcc {
-	clock-frequency = <DT_FREQ_M(120)>;
-};
-
-/*
-   Set PLL accordingly to freq. reported by 'clock-frequency' property, where:
-
-   VCO freq = PLL clock input freq (HSI: 16 MHz) * N / M and
-   Core freq = VCO freq / R.
-
-   Hence:
-
-   VCO freq = 16 * 30 / 2 = 240 MHz and
-   Core freq = 240 MHz / 2 = 120 MHz
-
-   Prop. 'div-p' and 'div-q' will be inherited from the overlaid 'pll' node.
-*/
-
-&pll {
-	div-m = <2>;
-	mul-n = <30>;
-	div-r = <2>;
-	clocks = <&clk_hsi>;
-	status = "okay";
-};
diff --git a/apps/microtvm/zephyr/template_project/boards.json b/apps/microtvm/zephyr/template_project/boards.json
deleted file mode 100644
index 28cbee54d602..000000000000
--- a/apps/microtvm/zephyr/template_project/boards.json
+++ /dev/null
@@ -1,102 +0,0 @@
-{
-    "b_u585i_iot02a": {
-        "board": "b_u585i_iot02a",
-        "model": "stm32u5xx",
-        "is_qemu": false,
-        "fpu": true,
-        "vid_hex": "0483",
-        "pid_hex": "374e"
-    },
-    "mimxrt1050_evk": {
-        "board": "mimxrt1050_evk",
-        "model": "imxrt10xx",
-        "is_qemu": false,
-        "fpu": true,
-        "vid_hex": "1366",
-        "pid_hex": "0105"
-    },
-    "mps2_an521": {
-        "board": "mps2_an521",
-        "model": "mps2_an521",
-        "is_qemu": true,
-        "fpu": false,
-        "vid_hex": "",
-        "pid_hex": ""
-    },
-    "mps3_an547": {
-        "board": "mps3_an547",
-        "model": "mps3_an547",
-        "is_qemu": true,
-        "fpu": false,
-        "note": "FPU is supported by mps3_an547, but full support for FPU+MVE is only available from QEMU v6.2.0 (not present in any zephyr-sdk yet), hence FPU is left disabled.",
-        "vid_hex": "",
-        "pid_hex": ""
-    },
-    "nrf5340dk_nrf5340_cpuapp": {
-        "board": "nrf5340dk_nrf5340_cpuapp",
-        "model": "nrf5340dk",
-        "is_qemu": false,
-        "fpu": true,
-        "vid_hex": "1366",
-        "pid_hex": "1055",
-        "recommended_heap_size_bytes": 368640
-    },
-    "nucleo_f746zg": {
-        "board": "nucleo_f746zg",
-        "model": "stm32f746xx",
-        "is_qemu": false,
-        "fpu": true,
-        "vid_hex": "0483",
-        "pid_hex": "374b"
-    },
-    "nucleo_l4r5zi": {
-        "board": "nucleo_l4r5zi",
-        "model": "stm32l4r5zi",
-        "is_qemu": false,
-        "fpu": true,
-        "vid_hex": "0483",
-        "pid_hex": "374b",
-        "recommended_heap_size_bytes": 524288
-    },
-    "qemu_cortex_r5": {
-        "board": "qemu_cortex_r5",
-        "model": "zynq_mp_r5",
-        "is_qemu": true,
-        "fpu": true,
-        "vid_hex": "",
-        "pid_hex": ""
-    },
-    "qemu_riscv32": {
-        "board": "qemu_riscv32",
-        "model": "host",
-        "is_qemu": true,
-        "fpu": false,
-        "vid_hex": "",
-        "pid_hex": ""
-    },
-    "qemu_riscv64": {
-        "board": "qemu_riscv64",
-        "model": "host",
-        "is_qemu": true,
-        "fpu": true,
-        "vid_hex": "",
-        "pid_hex": ""
-    },
-    "qemu_x86": {
-        "board": "qemu_x86",
-        "model": "host",
-        "is_qemu": true,
-        "fpu": true,
-        "vid_hex": "",
-        "pid_hex": "",
-        "recommended_heap_size_bytes": 524288
-    },
-    "stm32f746g_disco": {
-        "board": "stm32f746g_disco",
-        "model": "stm32f746xx",
-        "is_qemu": false,
-        "fpu": true,
-        "vid_hex": "0483",
-        "pid_hex": "374b"
-    }
-}
diff --git a/apps/microtvm/zephyr/template_project/fvp-hack/FVP_Corstone_SSE-300_Ethos-U55 b/apps/microtvm/zephyr/template_project/fvp-hack/FVP_Corstone_SSE-300_Ethos-U55
deleted file mode 100755
index 6325fec9b3b1..000000000000
--- a/apps/microtvm/zephyr/template_project/fvp-hack/FVP_Corstone_SSE-300_Ethos-U55
+++ /dev/null
@@ -1,44 +0,0 @@
-#!/bin/bash -e
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-set -x
-
-ARGS=( "$(basename $0)" )
-
-if [ "${FVP_BIN_PATH}"  != "" ]; then
-    ARGS=( ${FVP_BIN_PATH}/${ARGS[0]} )
-fi
-
-ARGS=( "${ARGS[@]}"
-    --iris-server
-    --print-port-number
-    -C cpu0.semihosting-enable=1
-    -C mps3_board.telnetterminal0.mode=raw
-    -C mps3_board.telnetterminal1.mode=raw
-    -C mps3_board.telnetterminal2.mode=raw
-    -C mps3_board.telnetterminal0.start_telnet=0
-    -C mps3_board.telnetterminal1.start_telnet=0
-    -C mps3_board.telnetterminal2.start_telnet=0
-    )
-
-while [ "$#" -gt 0 ]; do
-    ARGS=( "${ARGS[@]}" "$1" )
-    shift
-done
-
-"${ARGS[@]}"
diff --git a/apps/microtvm/zephyr/template_project/launch_microtvm_api_server.sh b/apps/microtvm/zephyr/template_project/launch_microtvm_api_server.sh
deleted file mode 100755
index 1531c453dc27..000000000000
--- a/apps/microtvm/zephyr/template_project/launch_microtvm_api_server.sh
+++ /dev/null
@@ -1,34 +0,0 @@
-#!/bin/bash
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-function show_usage() {
-    cat <<EOF
-This script is for running microtvm_api_server with Zephyr.
-Usage: launch_microtvm_api_server.sh <microtvm_api_server.py> --read-fd <READ_FD_PATH> --write-fd <WRITE_FD_PATH>
-EOF
-}
-
-if [ "$#" -lt 5 -o "$1" == "--help" ]; then
-    show_usage
-    exit -1
-fi
-
-PYTHON_CMD=$(sed 's/#!//; q' $(which west))
-
-# Run server
-$PYTHON_CMD $1 $2 $3 $4 $5
diff --git a/apps/microtvm/zephyr/template_project/microtvm_api_server.py b/apps/microtvm/zephyr/template_project/microtvm_api_server.py
deleted file mode 100644
index 8a792ff41e8d..000000000000
--- a/apps/microtvm/zephyr/template_project/microtvm_api_server.py
+++ /dev/null
@@ -1,1273 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import atexit
-import collections
-import collections.abc
-import enum
-import fcntl
-import json
-import logging
-import os
-import os.path
-import pathlib
-import queue
-import re
-import shlex
-import shutil
-import struct
-import subprocess
-import sys
-import tarfile
-import tempfile
-import threading
-from typing import Union
-import usb
-import psutil
-import stat
-
-import serial
-import serial.tools.list_ports
-import yaml
-
-import server
-
-
-_LOG = logging.getLogger(__name__)
-
-
-API_SERVER_DIR = pathlib.Path(os.path.dirname(__file__) or os.path.getcwd())
-
-
-BUILD_DIR = API_SERVER_DIR / "build"
-
-
-MODEL_LIBRARY_FORMAT_RELPATH = "model.tar"
-
-
-IS_TEMPLATE = not (API_SERVER_DIR / MODEL_LIBRARY_FORMAT_RELPATH).exists()
-
-
-BOARDS = API_SERVER_DIR / "boards.json"
-
-CMAKELIST_FILENAME = "CMakeLists.txt"
-
-# Used to check Zephyr version installed on the host.
-# We only check two levels of the version.
-ZEPHYR_VERSION = 3.2
-
-WEST_CMD = default = sys.executable + " -m west" if sys.executable else None
-
-ZEPHYR_BASE = os.getenv("ZEPHYR_BASE")
-
-# Data structure to hold the information microtvm_api_server.py needs
-# to communicate with each of these boards.
-try:
-    with open(BOARDS) as boards:
-        BOARD_PROPERTIES = json.load(boards)
-except FileNotFoundError:
-    raise FileNotFoundError(f"Board file {{{BOARDS}}} does not exist.")
-
-
-def check_call(cmd_args, *args, **kwargs):
-    cwd_str = "" if "cwd" not in kwargs else f" (in cwd: {kwargs['cwd']})"
-    _LOG.info("run%s: %s", cwd_str, " ".join(shlex.quote(a) for a in cmd_args))
-    return subprocess.check_call(cmd_args, *args, **kwargs)
-
-
-CACHE_ENTRY_RE = re.compile(r"(?P<name>[^:]+):(?P<type>[^=]+)=(?P<value>.*)")
-
-
-CMAKE_BOOL_MAP = dict(
-    [(k, True) for k in ("1", "ON", "YES", "TRUE", "Y")]
-    + [(k, False) for k in ("0", "OFF", "NO", "FALSE", "N", "IGNORE", "NOTFOUND", "")]
-)
-
-CMSIS_PATH_ERROR = (
-    "cmsis_path is not defined! Please pass it as an option or set the `CMSIS_PATH` env variable."
-)
-
-
-class CMakeCache(collections.abc.Mapping):
-    def __init__(self, path):
-        self._path = path
-        self._dict = None
-
-    def __iter__(self):
-        return iter(self._dict)
-
-    def __getitem__(self, key):
-        if self._dict is None:
-            self._dict = self._read_cmake_cache()
-
-        return self._dict[key]
-
-    def __len__(self):
-        return len(self._dict)
-
-    def _read_cmake_cache(self):
-        """Read a CMakeCache.txt-like file and return a dictionary of values."""
-        entries = collections.OrderedDict()
-        with open(self._path, encoding="utf-8") as f:
-            for line in f:
-                m = CACHE_ENTRY_RE.match(line.rstrip("\n"))
-                if not m:
-                    continue
-
-                if m.group("type") == "BOOL":
-                    value = CMAKE_BOOL_MAP[m.group("value").upper()]
-                else:
-                    value = m.group("value")
-
-                entries[m.group("name")] = value
-
-        return entries
-
-
-CMAKE_CACHE = CMakeCache(BUILD_DIR / "CMakeCache.txt")
-
-
-class BoardError(Exception):
-    """Raised when an attached board cannot be opened (i.e. missing /dev nodes, etc)."""
-
-
-class BoardAutodetectFailed(Exception):
-    """Raised when no attached hardware is found matching the board= given to ZephyrCompiler."""
-
-
-def _get_flash_runner():
-    flash_runner = CMAKE_CACHE.get("ZEPHYR_BOARD_FLASH_RUNNER")
-    if flash_runner is not None:
-        return flash_runner
-
-    with open(CMAKE_CACHE["ZEPHYR_RUNNERS_YAML"]) as f:
-        doc = yaml.load(f, Loader=yaml.FullLoader)
-    return doc["flash-runner"]
-
-
-def _find_board_from_cmake_file(cmake_file: Union[str, pathlib.Path]) -> str:
-    """Find Zephyr board from generated CMakeLists.txt"""
-    zephyr_board = None
-    with open(cmake_file) as cmake_f:
-        for line in cmake_f:
-            if line.startswith("set(BOARD"):
-                zephyr_board = line.strip("\n").strip("\r").strip(")").split(" ")[1]
-                break
-
-    if not zephyr_board:
-        raise RuntimeError(f"No Zephyr board set in the {cmake_file}.")
-    return zephyr_board
-
-
-def _find_platform_from_cmake_file(cmake_file: Union[str, pathlib.Path]) -> str:
-    emu_platform = None
-    with open(cmake_file) as cmake_f:
-        for line in cmake_f:
-            set_platform = re.match("set\(EMU_PLATFORM (.*)\)", line)
-            if set_platform:
-                emu_platform = set_platform.group(1)
-                break
-    return emu_platform
-
-
-def _get_device_args(serial_number: str = None):
-    flash_runner = _get_flash_runner()
-
-    if flash_runner == "nrfjprog":
-        return _get_nrf_device_args(serial_number)
-
-    if flash_runner == "openocd":
-        return _get_openocd_device_args(serial_number)
-
-    raise BoardError(
-        f"Don't know how to find serial terminal for board {_find_board_from_cmake_file(API_SERVER_DIR / CMAKELIST_FILENAME)} with flash "
-        f"runner {flash_runner}"
-    )
-
-
-def _get_board_mem_size_bytes(zephyr_base: str, board: str):
-    board_file_path = pathlib.Path(zephyr_base) / "boards" / "arm" / board / (board + ".yaml")
-    try:
-        with open(board_file_path) as f:
-            board_data = yaml.load(f, Loader=yaml.FullLoader)
-            return int(board_data["ram"]) * 1024
-    except:
-        _LOG.warning("Board memory information is not available.")
-    return None
-
-
-DEFAULT_WORKSPACE_SIZE_BYTES = 216 * 1024
-
-
-def _get_recommended_heap_size_bytes(board: str):
-    prop = BOARD_PROPERTIES[board]
-    if "recommended_heap_size_bytes" in prop:
-        return prop["recommended_heap_size_bytes"]
-    return DEFAULT_WORKSPACE_SIZE_BYTES
-
-
-def generic_find_serial_port(serial_number: str = None):
-    """Find a USB serial port based on its serial number or its VID:PID.
-
-    This method finds a USB serial port device path based on the port's serial number (if given) or
-    based on the board's idVendor and idProduct ids.
-
-    Parameters
-    ----------
-    serial_number : str
-        The serial number associated to the USB serial port which the board is attached to. This is
-        the same number as shown by 'lsusb -v' in the iSerial field.
-
-    Returns
-    -------
-    Path to the USB serial port device, for example /dev/ttyACM1.
-    """
-    if serial_number:
-        regex = serial_number
-    else:
-        prop = BOARD_PROPERTIES[_find_board_from_cmake_file(API_SERVER_DIR / CMAKELIST_FILENAME)]
-        device_id = ":".join([prop["vid_hex"], prop["pid_hex"]])
-        regex = device_id
-
-    serial_ports = list(serial.tools.list_ports.grep(regex))
-
-    if len(serial_ports) == 0:
-        raise Exception(f"No serial port found for board {prop['board']}!")
-
-    if len(serial_ports) != 1:
-        ports_lst = ""
-        for port in serial_ports:
-            ports_lst += f"Serial port: {port.device}, serial number: {port.serial_number}\n"
-
-        raise Exception("Expected 1 serial port, found multiple ports:\n {ports_lst}")
-
-    return serial_ports[0].device
-
-
-def _get_openocd_device_args(serial_number: str = None):
-    return ["--serial", generic_find_serial_port(serial_number)]
-
-
-def _get_nrf_device_args(serial_number: str = None) -> list:
-    # iSerial has string type which could mistmatch with
-    # the output of `nrfjprog --ids`. Example: 001050007848 vs 1050007848
-    serial_number = serial_number.lstrip("0")
-
-    nrfjprog_args = ["nrfjprog", "--ids"]
-    nrfjprog_ids = subprocess.check_output(nrfjprog_args, encoding="utf-8")
-    if not nrfjprog_ids.strip("\n"):
-        raise BoardAutodetectFailed(f'No attached boards recognized by {" ".join(nrfjprog_args)}')
-
-    boards = nrfjprog_ids.split("\n")[:-1]
-    if len(boards) > 1:
-        if serial_number is None:
-            raise BoardError(
-                "Multiple boards connected; specify one with nrfjprog_snr=: " f'{", ".join(boards)}'
-            )
-
-        if serial_number not in boards:
-            raise BoardError(f"serial number ({serial_number}) not found in {boards}")
-
-        return ["--snr", serial_number]
-
-    if not boards:
-        return []
-
-    return ["--snr", boards[0]]
-
-
-PROJECT_TYPES = []
-if IS_TEMPLATE:
-    for d in (API_SERVER_DIR / "src").iterdir():
-        if d.is_dir():
-            PROJECT_TYPES.append(d.name)
-
-PROJECT_OPTIONS = server.default_project_options(
-    project_type={"choices": tuple(PROJECT_TYPES)},
-    board={"choices": list(BOARD_PROPERTIES)},
-    verbose={"optional": ["generate_project"]},
-) + [
-    server.ProjectOption(
-        "gdbserver_port",
-        optional=["open_transport"],
-        type="int",
-        default=None,
-        help=("If given, port number to use when running the local gdbserver."),
-    ),
-    server.ProjectOption(
-        "serial_number",
-        optional=["open_transport", "flash"],
-        type="str",
-        default=None,
-        help=("Board serial number."),
-    ),
-    server.ProjectOption(
-        "west_cmd",
-        required=(
-            ["generate_project", "build", "flash", "open_transport"] if not WEST_CMD else None
-        ),
-        optional=(["generate_project", "build", "flash", "open_transport"] if WEST_CMD else None),
-        type="str",
-        default=WEST_CMD,
-        help=(
-            "Path to the west tool. If given, supersedes both the zephyr_base "
-            "option and ZEPHYR_BASE environment variable."
-        ),
-    ),
-    server.ProjectOption(
-        "zephyr_base",
-        required=(["generate_project", "open_transport"] if not ZEPHYR_BASE else None),
-        optional=(["generate_project", "open_transport"] if ZEPHYR_BASE else ["build"]),
-        type="str",
-        default=ZEPHYR_BASE,
-        help="Path to the zephyr base directory.",
-    ),
-    server.ProjectOption(
-        "config_main_stack_size",
-        optional=["generate_project"],
-        type="int",
-        default=None,
-        help="Sets CONFIG_MAIN_STACK_SIZE for Zephyr board.",
-    ),
-    server.ProjectOption(
-        "arm_fvp_path",
-        optional=["generate_project", "open_transport"],
-        type="str",
-        default=None,
-        help="Path to the FVP binary to invoke.",
-    ),
-    server.ProjectOption(
-        "use_fvp",
-        optional=["generate_project"],
-        type="bool",
-        default=False,
-        help="Run on the FVP emulator instead of hardware.",
-    ),
-    server.ProjectOption(
-        "workspace_size_bytes",
-        optional=["generate_project"],
-        type="int",
-        default=None,
-        help="Sets the value for TVM_WORKSPACE_SIZE_BYTES passed to K_HEAP_DEFINE() to service TVM memory allocation requests.",
-    ),
-]
-
-
-class Handler(server.ProjectAPIHandler):
-    def __init__(self):
-        super(Handler, self).__init__()
-        self._proc = None
-
-    def server_info_query(self, tvm_version):
-        return server.ServerInfo(
-            platform_name="zephyr",
-            is_template=IS_TEMPLATE,
-            model_library_format_path=""
-            if IS_TEMPLATE
-            else (API_SERVER_DIR / MODEL_LIBRARY_FORMAT_RELPATH),
-            project_options=PROJECT_OPTIONS,
-        )
-
-    # These files and directories will be recursively copied into generated projects from the CRT.
-    CRT_COPY_ITEMS = ("include", "CMakeLists.txt", "src")
-
-    # Maps extra line added to prj.conf to a tuple or list of zephyr_board for which it is needed.
-    EXTRA_PRJ_CONF_DIRECTIVES = {
-        "CONFIG_TIMER_RANDOM_GENERATOR=y": (
-            "qemu_x86",
-            "qemu_riscv32",
-            "qemu_cortex_r5",
-            "qemu_riscv64",
-        ),
-        "CONFIG_ENTROPY_GENERATOR=y": (
-            "mps2_an521",
-            "nrf5340dk_nrf5340_cpuapp",
-            "nucleo_f746zg",
-            "nucleo_l4r5zi",
-            "stm32f746g_disco",
-        ),
-    }
-
-    def _create_prj_conf(
-        self,
-        project_dir: pathlib.Path,
-        board: str,
-        project_type: str,
-        config_main_stack_size: int,
-        config_led: bool,
-        use_fvp: bool,
-    ):
-        with open(project_dir / "prj.conf", "w") as f:
-            f.write(
-                "# For UART used from main().\n"
-                "CONFIG_RING_BUFFER=y\n"
-                "CONFIG_UART_CONSOLE=n\n"
-                "CONFIG_UART_INTERRUPT_DRIVEN=y\n"
-                "\n"
-            )
-            if (
-                config_led
-                and not self._is_qemu(board, use_fvp)
-                and not self._is_fvp(board, use_fvp)
-            ):
-                f.write("# For debugging.\n" "CONFIG_LED=y\n" "\n")
-
-            f.write("# For TVMPlatformAbort().\n" "CONFIG_REBOOT=y\n" "\n")
-
-            if project_type == "host_driven":
-                f.write(
-                    "CONFIG_TIMING_FUNCTIONS=y\n"
-                    "# For RPC server C++ bindings.\n"
-                    "CONFIG_CPLUSPLUS=y\n"
-                    "CONFIG_LIB_CPLUSPLUS=y\n"
-                    "\n"
-                )
-
-            f.write("# For math routines\n" "CONFIG_NEWLIB_LIBC=y\n" "\n")
-
-            if self._has_fpu(board):
-                f.write("# For models with floating point.\n" "CONFIG_FPU=y\n" "\n")
-
-            # Set main stack size, if needed.
-            if config_main_stack_size is not None:
-                f.write(f"CONFIG_MAIN_STACK_SIZE={config_main_stack_size}\n")
-
-            f.write("# For random number generation.\n" "CONFIG_TEST_RANDOM_GENERATOR=y\n")
-
-            f.write("\n# Extra prj.conf directives\n")
-            for line, board_list in self.EXTRA_PRJ_CONF_DIRECTIVES.items():
-                if board in board_list:
-                    f.write(f"{line}\n")
-
-            # TODO(mehrdadh): due to https://github.com/apache/tvm/issues/12721
-            if board not in ["qemu_riscv64"]:
-                f.write("# For setting -O2 in compiler.\n" "CONFIG_SPEED_OPTIMIZATIONS=y\n")
-
-            f.write("\n")
-
-    API_SERVER_CRT_LIBS_TOKEN = "<API_SERVER_CRT_LIBS>"
-    CMAKE_ARGS_TOKEN = "<CMAKE_ARGS>"
-    QEMU_PIPE_TOKEN = "<QEMU_PIPE>"
-
-    CRT_LIBS_BY_PROJECT_TYPE = {
-        "host_driven": "microtvm_rpc_server microtvm_rpc_common aot_executor_module aot_executor common",
-        "aot_standalone_demo": "memory microtvm_rpc_common common",
-        "mlperftiny": "memory common",
-    }
-
-    def _get_platform_version(self, zephyr_base: str) -> float:
-        with open(pathlib.Path(zephyr_base) / "VERSION", "r") as f:
-            lines = f.readlines()
-            for line in lines:
-                line = line.replace(" ", "").replace("\n", "").replace("\r", "")
-                if "VERSION_MAJOR" in line:
-                    version_major = line.split("=")[1]
-                if "VERSION_MINOR" in line:
-                    version_minor = line.split("=")[1]
-
-        return float(f"{version_major}.{version_minor}")
-
-    def _cmsis_required(self, project_path: Union[str, pathlib.Path]) -> bool:
-        """Check if CMSIS dependency is required."""
-        project_path = pathlib.Path(project_path)
-        for path in (project_path / "codegen" / "host" / "src").iterdir():
-            if path.is_file():
-                with open(path, "r") as lib_f:
-                    lib_content = lib_f.read()
-                if any(
-                    header in lib_content
-                    for header in [
-                        "<arm_nnsupportfunctions.h>",
-                        "arm_nn_types.h",
-                        "arm_nnfunctions.h",
-                    ]
-                ):
-                    return True
-        return False
-
-    def _generate_cmake_args(
-        self,
-        mlf_extracted_path: pathlib.Path,
-        board: str,
-        use_fvp: bool,
-        west_cmd: str,
-        zephyr_base: str,
-        verbose: bool,
-        cmsis_path: pathlib.Path,
-    ) -> str:
-        cmake_args = "\n# cmake args\n"
-        if verbose:
-            cmake_args += "set(CMAKE_VERBOSE_MAKEFILE TRUE)\n"
-
-        if zephyr_base:
-            cmake_args += f"set(ZEPHYR_BASE {zephyr_base})\n"
-
-        if west_cmd:
-            cmake_args += f"set(WEST {west_cmd})\n"
-
-        if self._is_qemu(board, use_fvp):
-            # Some boards support more than one emulator, so ensure QEMU is set.
-            cmake_args += f"set(EMU_PLATFORM qemu)\n"
-
-        if self._is_fvp(board, use_fvp):
-            cmake_args += "set(EMU_PLATFORM armfvp)\n"
-            cmake_args += "set(ARMFVP_FLAGS -I)\n"
-
-        cmake_args += f"set(BOARD {board})\n"
-
-        if self._cmsis_required(mlf_extracted_path):
-            assert cmsis_path, CMSIS_PATH_ERROR
-        cmake_args += f"set(CMSIS_PATH {str(cmsis_path)})\n"
-
-        return cmake_args
-
-    def _copy_src_and_header_files(self, src_dir: pathlib.Path, dst_dir: pathlib.Path):
-        """Copy content of src_dir from template project to dst_dir in separate
-        source and header sub-directories.
-        """
-        for file in os.listdir(src_dir):
-            file = src_dir / file
-            if file.is_file():
-                if file.suffix in [".cc", ".c"]:
-                    shutil.copy2(file, dst_dir / "src")
-                elif file.suffix in [".h"]:
-                    shutil.copy2(file, dst_dir / "include" / "tvm")
-
-    def generate_project(self, model_library_format_path, standalone_crt_dir, project_dir, options):
-        zephyr_board = options["board"]
-        project_type = options["project_type"]
-        zephyr_base = options["zephyr_base"]
-        west_cmd = options["west_cmd"]
-
-        warning_as_error = options.get("warning_as_error")
-        use_fvp = options.get("use_fvp")
-        verbose = options.get("verbose")
-
-        recommended_heap_size = _get_recommended_heap_size_bytes(zephyr_board)
-        workspace_size_bytes = options.get("workspace_size_bytes") or recommended_heap_size
-        board_mem_size = _get_board_mem_size_bytes(zephyr_base, zephyr_board)
-
-        compile_definitions = options.get("compile_definitions")
-        config_main_stack_size = options.get("config_main_stack_size")
-
-        extra_files_tar = options.get("extra_files_tar")
-        cmsis_path = options.get("cmsis_path")
-
-        # Check Zephyr version
-        version = self._get_platform_version(zephyr_base)
-        if version != ZEPHYR_VERSION:
-            message = f"Zephyr version found is not supported: found {version}, expected {ZEPHYR_VERSION}."
-            if warning_as_error is not None and warning_as_error:
-                raise server.ServerError(message=message)
-            _LOG.warning(message)
-
-        project_dir = pathlib.Path(project_dir)
-        # Make project directory.
-        project_dir.mkdir()
-
-        # Copy ourselves and other python scripts to the generated project. TVM may perform further build steps on the generated project
-        # by launching the copy.
-        current_dir = pathlib.Path(__file__).parent.absolute()
-        for file in os.listdir(current_dir):
-            if file.endswith(".py"):
-                shutil.copy2(current_dir / file, project_dir / file)
-
-        # Copy launch script
-        shutil.copy2(
-            current_dir / "launch_microtvm_api_server.sh",
-            project_dir / "launch_microtvm_api_server.sh",
-        )
-
-        # Copy boards.json file to generated project.
-        shutil.copy2(BOARDS, project_dir / BOARDS.name)
-
-        # Copy overlay files
-        board_overlay_path = API_SERVER_DIR / "app-overlay" / f"{zephyr_board}.overlay"
-        if board_overlay_path.exists():
-            shutil.copy2(board_overlay_path, project_dir / f"{zephyr_board}.overlay")
-
-        # Place Model Library Format tarball in the special location, which this script uses to decide
-        # whether it's being invoked in a template or generated project.
-        project_model_library_format_tar_path = project_dir / MODEL_LIBRARY_FORMAT_RELPATH
-        shutil.copy2(model_library_format_path, project_model_library_format_tar_path)
-
-        # Extract Model Library Format tarball.into <project_dir>/model.
-        extract_path = os.path.splitext(project_model_library_format_tar_path)[0]
-        with tarfile.TarFile(project_model_library_format_tar_path) as tf:
-            os.makedirs(extract_path)
-            tf.extractall(path=extract_path)
-
-        if self._is_qemu(zephyr_board, use_fvp):
-            shutil.copytree(API_SERVER_DIR / "qemu-hack", project_dir / "qemu-hack")
-        elif self._is_fvp(zephyr_board, use_fvp):
-            shutil.copytree(API_SERVER_DIR / "fvp-hack", project_dir / "fvp-hack")
-
-        # Populate CRT.
-        crt_path = project_dir / "crt"
-        crt_path.mkdir()
-        for item in self.CRT_COPY_ITEMS:
-            src_path = os.path.join(standalone_crt_dir, item)
-            dst_path = crt_path / item
-            if os.path.isdir(src_path):
-                shutil.copytree(src_path, dst_path)
-            else:
-                shutil.copy2(src_path, dst_path)
-
-        # Populate CMakeLists.
-        with open(project_dir / CMAKELIST_FILENAME, "w") as cmake_f:
-            with open(API_SERVER_DIR / f"{CMAKELIST_FILENAME}.template", "r") as cmake_template_f:
-                for line in cmake_template_f:
-                    if self.API_SERVER_CRT_LIBS_TOKEN in line:
-                        crt_libs = self.CRT_LIBS_BY_PROJECT_TYPE[project_type]
-                        line = line.replace("<API_SERVER_CRT_LIBS>", crt_libs)
-
-                    if self.CMAKE_ARGS_TOKEN in line:
-                        line = self._generate_cmake_args(
-                            extract_path,
-                            zephyr_board,
-                            use_fvp,
-                            west_cmd,
-                            zephyr_base,
-                            verbose,
-                            cmsis_path,
-                        )
-
-                    if self.QEMU_PIPE_TOKEN in line:
-                        self.qemu_pipe_dir = pathlib.Path(tempfile.mkdtemp())
-                        line = line.replace(self.QEMU_PIPE_TOKEN, str(self.qemu_pipe_dir / "fifo"))
-
-                    cmake_f.write(line)
-
-                if board_mem_size is not None:
-                    assert (
-                        workspace_size_bytes < board_mem_size
-                    ), f"Workspace size {workspace_size_bytes} is larger than memory size {board_mem_size} on this board."
-                cmake_f.write(
-                    f"target_compile_definitions(app PUBLIC -DTVM_WORKSPACE_SIZE_BYTES={workspace_size_bytes})\n"
-                )
-
-                if compile_definitions:
-                    flags = compile_definitions
-                    for item in flags:
-                        if "MAX_DB_INPUT_SIZE" in item or "TH_MODEL_VERSION" in item:
-                            compile_target = "tinymlperf_api"
-                        else:
-                            compile_target = "app"
-                        cmake_f.write(
-                            f"target_compile_definitions({compile_target} PUBLIC {item})\n"
-                        )
-
-                if self._is_fvp(zephyr_board, use_fvp):
-                    cmake_f.write(f"target_compile_definitions(app PUBLIC -DFVP=1)\n")
-
-        self._create_prj_conf(
-            project_dir, zephyr_board, project_type, config_main_stack_size, verbose, use_fvp
-        )
-
-        # Populate crt-config.h
-        crt_config_dir = project_dir / "crt_config"
-        crt_config_dir.mkdir()
-        shutil.copy2(
-            API_SERVER_DIR / "crt_config" / "crt_config.h", crt_config_dir / "crt_config.h"
-        )
-
-        # Populate `src` and `include`
-        src_dir = project_dir / "src"
-        src_dir.mkdir()
-        include_dir = project_dir / "include" / "tvm"
-        include_dir.mkdir(parents=True)
-        src_project_type_dir = API_SERVER_DIR / "src" / project_type
-        self._copy_src_and_header_files(src_project_type_dir, project_dir)
-
-        if self._is_fvp(zephyr_board, use_fvp):
-            self._copy_src_and_header_files(src_project_type_dir / "fvp", project_dir)
-
-        if project_type == "mlperftiny":
-            shutil.copytree(src_project_type_dir / "api", src_dir / "api")
-
-        # Populate extra_files
-        if extra_files_tar:
-            with tarfile.open(extra_files_tar, mode="r:*") as tf:
-                tf.extractall(project_dir)
-
-    def build(self, options):
-        if BUILD_DIR.exists():
-            shutil.rmtree(BUILD_DIR)
-        BUILD_DIR.mkdir()
-
-        zephyr_board = _find_board_from_cmake_file(API_SERVER_DIR / CMAKELIST_FILENAME)
-        emu_platform = _find_platform_from_cmake_file(API_SERVER_DIR / CMAKELIST_FILENAME)
-
-        env = os.environ
-        if self._is_fvp(zephyr_board, emu_platform == "armfvp"):
-            env["ARMFVP_BIN_PATH"] = str((API_SERVER_DIR / "fvp-hack").resolve())
-            # Note: We need to explicitly modify the file permissions and make it an executable to pass CI tests.
-            # [To Do]: Move permission change to Build.groovy.j2
-            st = os.stat(env["ARMFVP_BIN_PATH"] + "/FVP_Corstone_SSE-300_Ethos-U55")
-            os.chmod(
-                env["ARMFVP_BIN_PATH"] + "/FVP_Corstone_SSE-300_Ethos-U55",
-                st.st_mode | stat.S_IEXEC,
-            )
-
-        check_call(options["west_cmd"].split(" ") + ["build"], cwd=API_SERVER_DIR, env=env)
-
-    # A list of all zephyr_board values which are known to launch using QEMU. Many platforms which
-    # launch through QEMU by default include "qemu" in their name. However, not all do. This list
-    # includes those tested platforms which do not include qemu.
-    _KNOWN_QEMU_ZEPHYR_BOARDS = ["mps2_an521", "mps3_an547"]
-
-    # A list of all zephyr_board values which are known to launch using ARM FVP (this script configures
-    # Zephyr to use that launch method).
-    _KNOWN_FVP_ZEPHYR_BOARDS = ["mps3_an547"]
-
-    @classmethod
-    def _is_fvp(cls, board, use_fvp):
-        if use_fvp:
-            assert (
-                board in cls._KNOWN_FVP_ZEPHYR_BOARDS
-            ), "FVP can't be used to emulate this board on Zephyr"
-            return True
-        return False
-
-    @classmethod
-    def _is_qemu(cls, board, use_fvp=False):
-        return "qemu" in board or (
-            board in cls._KNOWN_QEMU_ZEPHYR_BOARDS and not cls._is_fvp(board, use_fvp)
-        )
-
-    @classmethod
-    def _has_fpu(cls, zephyr_board):
-        fpu_boards = [name for name, board in BOARD_PROPERTIES.items() if board["fpu"]]
-        return zephyr_board in fpu_boards
-
-    def flash(self, options):
-        serial_number = options.get("serial_number")
-        west_cmd_list = options["west_cmd"].split(" ")
-
-        if _find_platform_from_cmake_file(API_SERVER_DIR / CMAKELIST_FILENAME):
-            return  # NOTE: qemu requires no flash step--it is launched from open_transport.
-
-        flash_runner = _get_flash_runner()
-        # The nRF5340DK requires an additional `nrfjprog --recover` before each flash cycle.
-        # This is because readback protection is enabled by default when this device is flashed.
-        # Otherwise, flashing may fail with an error such as the following:
-        #  ERROR: The operation attempted is unavailable due to readback protection in
-        #  ERROR: your device. Please use --recover to unlock the device.
-        zephyr_board = _find_board_from_cmake_file(API_SERVER_DIR / CMAKELIST_FILENAME)
-        if zephyr_board.startswith("nrf5340dk") and flash_runner == "nrfjprog":
-            recover_args = ["nrfjprog", "--recover"]
-            recover_args.extend(_get_nrf_device_args(serial_number))
-            check_call(recover_args, cwd=API_SERVER_DIR / "build")
-
-        flash_extra_args = []
-        if flash_runner == "openocd" and serial_number:
-            flash_extra_args += ["--cmd-pre-init", f"""hla_serial {serial_number}"""]
-
-        if flash_runner == "nrfjprog":
-            flash_extra_args += _get_nrf_device_args(serial_number)
-
-        check_call(
-            west_cmd_list + ["flash", "-r", flash_runner] + flash_extra_args,
-            cwd=API_SERVER_DIR / "build",
-        )
-
-    def open_transport(self, options):
-        west_cmd = options["west_cmd"]
-        zephyr_board = _find_board_from_cmake_file(API_SERVER_DIR / CMAKELIST_FILENAME)
-        emu_platform = _find_platform_from_cmake_file(API_SERVER_DIR / CMAKELIST_FILENAME)
-        if self._is_fvp(zephyr_board, emu_platform == "armfvp"):
-            arm_fvp_path = options["arm_fvp_path"]
-            verbose = options.get("verbose")
-            transport = ZephyrFvpTransport(west_cmd, arm_fvp_path, verbose)
-        elif self._is_qemu(zephyr_board):
-            gdbserver_port = options.get("gdbserver_port")
-            transport = ZephyrQemuTransport(west_cmd, gdbserver_port)
-        else:
-            zephyr_base = options["zephyr_base"]
-            serial_number = options.get("serial_number")
-            transport = ZephyrSerialTransport(zephyr_base, serial_number)
-
-        to_return = transport.open()
-        self._transport = transport
-        atexit.register(lambda: self.close_transport())
-        return to_return
-
-    def close_transport(self):
-        if self._transport is not None:
-            self._transport.close()
-            self._transport = None
-
-    def read_transport(self, n, timeout_sec):
-        if self._transport is None:
-            raise server.TransportClosedError()
-
-        return self._transport.read(n, timeout_sec)
-
-    def write_transport(self, data, timeout_sec):
-        if self._transport is None:
-            raise server.TransportClosedError()
-
-        return self._transport.write(data, timeout_sec)
-
-
-def _set_nonblock(fd):
-    flag = fcntl.fcntl(fd, fcntl.F_GETFL)
-    fcntl.fcntl(fd, fcntl.F_SETFL, flag | os.O_NONBLOCK)
-    new_flag = fcntl.fcntl(fd, fcntl.F_GETFL)
-    assert (new_flag & os.O_NONBLOCK) != 0, "Cannot set file descriptor {fd} to non-blocking"
-
-
-class ZephyrSerialTransport:
-
-    NRF5340_VENDOR_ID = 0x1366
-
-    # NRF5340_DK v1.0.0 uses VCOM2
-    # NRF5340_DK v2.0.0 uses VCOM1
-    NRF5340_DK_BOARD_VCOM_BY_PRODUCT_ID = {0x1055: "VCOM2", 0x1051: "VCOM1"}
-
-    @classmethod
-    def _lookup_baud_rate(cls, zephyr_base: str):
-        # TODO(mehrdadh): remove this hack once dtlib.py is a standalone project
-        # https://github.com/zephyrproject-rtos/zephyr/blob/v2.7-branch/scripts/dts/README.txt
-        sys.path.insert(
-            0,
-            os.path.join(zephyr_base, "scripts", "dts", "python-devicetree", "src", "devicetree"),
-        )
-        try:
-            import dtlib  # pylint: disable=import-outside-toplevel
-        finally:
-            sys.path.pop(0)
-
-        dt_inst = dtlib.DT(BUILD_DIR / "zephyr" / "zephyr.dts")
-        uart_baud = (
-            dt_inst.get_node("/chosen")
-            .props["zephyr,console"]
-            .to_path()
-            .props["current-speed"]
-            .to_num()
-        )
-        _LOG.debug("zephyr transport: found UART baudrate from devicetree: %d", uart_baud)
-
-        return uart_baud
-
-    @classmethod
-    def _find_nrf_serial_port(cls, serial_number: str = None):
-        com_ports = subprocess.check_output(
-            ["nrfjprog", "--com"] + _get_device_args(serial_number), encoding="utf-8"
-        )
-        ports_by_vcom = {}
-        for line in com_ports.split("\n")[:-1]:
-            parts = line.split()
-            ports_by_vcom[parts[2]] = parts[1]
-
-        nrf_board = usb.core.find(idVendor=cls.NRF5340_VENDOR_ID)
-
-        if nrf_board == None:
-            raise Exception("_find_nrf_serial_port: unable to find NRF5340DK")
-
-        if nrf_board.idProduct in cls.NRF5340_DK_BOARD_VCOM_BY_PRODUCT_ID:
-            vcom_port = cls.NRF5340_DK_BOARD_VCOM_BY_PRODUCT_ID[nrf_board.idProduct]
-        else:
-            raise Exception("_find_nrf_serial_port: unable to find known NRF5340DK product ID")
-
-        return ports_by_vcom[vcom_port]
-
-    @classmethod
-    def _find_openocd_serial_port(cls, serial_number: str = None):
-        return generic_find_serial_port(serial_number)
-
-    @classmethod
-    def _find_jlink_serial_port(cls, serial_number: str = None):
-        return generic_find_serial_port(serial_number)
-
-    @classmethod
-    def _find_stm32cubeprogrammer_serial_port(cls, serial_number: str = None):
-        return generic_find_serial_port(serial_number)
-
-    @classmethod
-    def _find_serial_port(cls, serial_number: str = None):
-        flash_runner = _get_flash_runner()
-
-        if flash_runner == "nrfjprog":
-            return cls._find_nrf_serial_port(serial_number)
-
-        if flash_runner == "openocd":
-            return cls._find_openocd_serial_port(serial_number)
-
-        if flash_runner == "jlink":
-            return cls._find_jlink_serial_port(serial_number)
-
-        if flash_runner == "stm32cubeprogrammer":
-            return cls._find_stm32cubeprogrammer_serial_port(serial_number)
-
-        raise RuntimeError(f"Don't know how to deduce serial port for flash runner {flash_runner}")
-
-    def __init__(self, zephyr_base: str, serial_number: str = None):
-        self._zephyr_base = zephyr_base
-        self._serial_number = serial_number
-        self._port = None
-
-    def open(self):
-        port_path = self._find_serial_port(self._serial_number)
-        self._port = serial.Serial(port_path, baudrate=self._lookup_baud_rate(self._zephyr_base))
-        return server.TransportTimeouts(
-            session_start_retry_timeout_sec=2.0,
-            session_start_timeout_sec=5.0,
-            session_established_timeout_sec=5.0,
-        )
-
-    def close(self):
-        self._port.close()
-        self._port = None
-
-    def read(self, n, timeout_sec):
-        self._port.timeout = timeout_sec
-        to_return = self._port.read(n)
-        if not to_return:
-            raise server.IoTimeoutError()
-
-        return to_return
-
-    def write(self, data, timeout_sec):
-        self._port.write_timeout = timeout_sec
-        bytes_written = 0
-        while bytes_written < len(data):
-            n = self._port.write(data)
-            data = data[n:]
-            bytes_written += n
-
-
-class ZephyrQemuMakeResult(enum.Enum):
-    QEMU_STARTED = "qemu_started"
-    MAKE_FAILED = "make_failed"
-    EOF = "eof"
-
-
-class ZephyrQemuTransport:
-    """The user-facing Zephyr QEMU transport class."""
-
-    def __init__(self, west_cmd: str, gdbserver_port: int = None):
-        self._gdbserver_port = gdbserver_port
-        self.proc = None
-        self.pipe_dir = None
-        self.read_fd = None
-        self.write_fd = None
-        self._queue = queue.Queue()
-        self._west_cmd = west_cmd
-
-    def open(self):
-        with open(BUILD_DIR / "CMakeCache.txt", "r") as cmake_cache_f:
-            for line in cmake_cache_f:
-                if "QEMU_PIPE:" in line:
-                    self.pipe = pathlib.Path(line[line.find("=") + 1 :])
-                    break
-        self.pipe_dir = self.pipe.parents[0]
-        self.write_pipe = self.pipe_dir / "fifo.in"
-        self.read_pipe = self.pipe_dir / "fifo.out"
-        os.mkfifo(self.write_pipe)
-        os.mkfifo(self.read_pipe)
-
-        env = None
-        if self._gdbserver_port:
-            env = os.environ.copy()
-            env["TVM_QEMU_GDBSERVER_PORT"] = self._gdbserver_port
-
-        self.proc = subprocess.Popen(
-            self._west_cmd.split(" ") + ["build", "-t", "run"],
-            cwd=BUILD_DIR,
-            env=env,
-            stdout=subprocess.PIPE,
-        )
-        self._wait_for_qemu()
-
-        # NOTE: although each pipe is unidirectional, open both as RDWR to work around a select
-        # limitation on linux. Without this, non-blocking I/O can't use timeouts because named
-        # FIFO are always considered ready to read when no one has opened them for writing.
-        self.read_fd = os.open(self.read_pipe, os.O_RDWR | os.O_NONBLOCK)
-        self.write_fd = os.open(self.write_pipe, os.O_RDWR | os.O_NONBLOCK)
-        _set_nonblock(self.read_fd)
-        _set_nonblock(self.write_fd)
-
-        return server.TransportTimeouts(
-            session_start_retry_timeout_sec=2.0,
-            session_start_timeout_sec=10.0,
-            session_established_timeout_sec=10.0,
-        )
-
-    def close(self):
-        did_write = False
-        if self.write_fd is not None:
-            try:
-                server.write_with_timeout(
-                    self.write_fd, b"\x01x", 1.0
-                )  # Use a short timeout since we will kill the process
-                did_write = True
-            except server.IoTimeoutError:
-                pass
-            os.close(self.write_fd)
-            self.write_fd = None
-
-        if self.proc:
-            if not did_write:
-                self.proc.terminate()
-            try:
-                self.proc.wait(5.0)
-            except subprocess.TimeoutExpired:
-                self.proc.kill()
-
-        if self.read_fd:
-            os.close(self.read_fd)
-            self.read_fd = None
-
-        if self.pipe_dir is not None:
-            shutil.rmtree(self.pipe_dir)
-            self.pipe_dir = None
-
-    def read(self, n, timeout_sec):
-        return server.read_with_timeout(self.read_fd, n, timeout_sec)
-
-    def write(self, data, timeout_sec):
-        to_write = bytearray()
-        escape_pos = []
-        for i, b in enumerate(data):
-            if b == 0x01:
-                to_write.append(b)
-                escape_pos.append(i)
-            to_write.append(b)
-
-        while to_write:
-            num_written = server.write_with_timeout(self.write_fd, to_write, timeout_sec)
-            to_write = to_write[num_written:]
-
-    def _qemu_check_stdout(self):
-        for line in self.proc.stdout:
-            line = str(line)
-            _LOG.info("%s", line)
-            if "[QEMU] CPU" in line:
-                self._queue.put(ZephyrQemuMakeResult.QEMU_STARTED)
-            else:
-                line = re.sub("[^a-zA-Z0-9 \n]", "", line)
-                pattern = r"recipe for target (\w*) failed"
-                if re.search(pattern, line, re.IGNORECASE):
-                    self._queue.put(ZephyrQemuMakeResult.MAKE_FAILED)
-        self._queue.put(ZephyrQemuMakeResult.EOF)
-
-    def _wait_for_qemu(self):
-        threading.Thread(target=self._qemu_check_stdout, daemon=True).start()
-        while True:
-            try:
-                item = self._queue.get(timeout=120)
-            except Exception:
-                raise TimeoutError("QEMU setup timeout.")
-
-            if item == ZephyrQemuMakeResult.QEMU_STARTED:
-                break
-
-            if item in [ZephyrQemuMakeResult.MAKE_FAILED, ZephyrQemuMakeResult.EOF]:
-                raise RuntimeError("QEMU setup failed.")
-
-            raise ValueError(f"{item} not expected.")
-
-
-class ZephyrFvpMakeResult(enum.Enum):
-    FVP_STARTED = "fvp_started"
-    MICROTVM_API_SERVER_INIT = "fvp_initialized"
-    MAKE_FAILED = "make_failed"
-    EOF = "eof"
-
-
-class BlockingStream:
-    """Reimplementation of Stream class from Iris with blocking semantics."""
-
-    def __init__(self):
-        self.q = queue.Queue()
-        self.unread = None
-
-    def read(self, n=-1, timeout_sec=None):
-        assert (
-            n != -1
-        ), "expect firmware to open stdin using raw mode, and therefore expect sized read requests"
-
-        data = b""
-        if self.unread:
-            data = data + self.unread
-            self.unread = None
-
-        while len(data) < n:
-            try:
-                # When there is some data to return, fetch as much as possible, then return what we can.
-                # When there is no data yet to return, block.
-                data += self.q.get(block=not len(data), timeout=timeout_sec)
-            except queue.Empty:
-                break
-
-        if len(data) > n:
-            self.unread = data[n:]
-            data = data[:n]
-
-        return data
-
-    readline = read
-
-    def write(self, data):
-        self.q.put(data)
-
-
-class ZephyrFvpTransport:
-    """A transport class that communicates with the ARM FVP via Iris server."""
-
-    def __init__(self, arm_fvp_path: str, verbose: bool = False):
-        self._arm_fvp_path = arm_fvp_path
-        self._verbose = verbose
-        self.proc = None
-        self._queue = queue.Queue()
-        self._import_iris()
-
-    def _import_iris(self):
-        assert self._arm_fvp_path, "arm_fvp_path is not defined."
-        # Location as seen in the FVP_Corstone_SSE-300_11.15_24 tar.
-        iris_lib_path = (
-            pathlib.Path(self._arm_fvp_path).parent.parent.parent / "Iris" / "Python" / "iris"
-        )
-
-        sys.path.insert(0, str(iris_lib_path.parent))
-        try:
-            import iris.NetworkModelInitializer
-        finally:
-            sys.path.pop(0)
-
-        self._iris_lib = iris
-
-        def _convertStringToU64Array(strValue):
-            numBytes = len(strValue)
-            if numBytes == 0:
-                return []
-
-            numU64 = (numBytes + 7) // 8
-            # Extend the string ending with '\0', so that the string length is multiple of 8.
-            # E.g. 'hello' is extended to: 'hello'+\0\0\0
-            strExt = strValue.ljust(8 * numU64, b"\0")
-            # Convert the string to a list of uint64_t in little endian
-            return struct.unpack("<{}Q".format(numU64), strExt)
-
-        iris.iris.convertStringToU64Array = _convertStringToU64Array
-
-    def open(self):
-        args = ["ninja"]
-        if self._verbose:
-            args.append("-v")
-        args.append("run")
-        env = dict(os.environ)
-        env["ARMFVP_BIN_PATH"] = str(API_SERVER_DIR / "fvp-hack")
-        self.proc = subprocess.Popen(
-            args,
-            cwd=BUILD_DIR,
-            env=env,
-            stdout=subprocess.PIPE,
-        )
-        threading.Thread(target=self._fvp_check_stdout, daemon=True).start()
-
-        self.iris_port = self._wait_for_fvp()
-        _LOG.info("IRIS started on port %d", self.iris_port)
-        NetworkModelInitializer = self._iris_lib.NetworkModelInitializer.NetworkModelInitializer
-        self._model_init = NetworkModelInitializer(
-            host="localhost", port=self.iris_port, timeout_in_ms=1000
-        )
-        self._model = self._model_init.start()
-        self._target = self._model.get_target("component.FVP_MPS3_Corstone_SSE_300.cpu0")
-
-        self._target.handle_semihost_io()
-        self._target._stdout = BlockingStream()
-        self._target._stdin = BlockingStream()
-        self._model.run(blocking=False, timeout=100)
-        self._wait_for_semihost_init()
-        _LOG.info("IRIS semihosting initialized.")
-
-        return server.TransportTimeouts(
-            session_start_retry_timeout_sec=2.0,
-            session_start_timeout_sec=10.0,
-            session_established_timeout_sec=10.0,
-        )
-
-    def _fvp_check_stdout(self):
-        START_MSG = "Iris server started listening to port"
-        INIT_MSG = "microTVM Zephyr runtime - running"
-        for line in self.proc.stdout:
-            line = str(line, "utf-8")
-            _LOG.info("%s", line)
-            start_msg = re.match(START_MSG + r" ([0-9]+)\n", line)
-            init_msg = re.match(INIT_MSG, line)
-            if start_msg:
-                self._queue.put((ZephyrFvpMakeResult.FVP_STARTED, int(start_msg.group(1))))
-            elif init_msg:
-                self._queue.put((ZephyrFvpMakeResult.MICROTVM_API_SERVER_INIT, None))
-                break
-            else:
-                line = re.sub("[^a-zA-Z0-9 \n]", "", line)
-                pattern = r"recipe for target (\w*) failed"
-                if re.search(pattern, line, re.IGNORECASE):
-                    self._queue.put((ZephyrFvpMakeResult.MAKE_FAILED, None))
-
-        self._queue.put((ZephyrFvpMakeResult.EOF, None))
-
-    def _wait_for_fvp(self):
-        """waiting for the START_MSG to appear on the stdout"""
-        while True:
-            try:
-                item = self._queue.get(timeout=120)
-            except Exception:
-                raise TimeoutError("FVP setup timeout.")
-
-            if item[0] == ZephyrFvpMakeResult.FVP_STARTED:
-                return item[1]
-
-            if item[0] in [ZephyrFvpMakeResult.MAKE_FAILED, ZephyrFvpMakeResult.EOF]:
-                raise RuntimeError("FVP setup failed.")
-
-            raise ValueError(f"{item} not expected.")
-
-    def _wait_for_semihost_init(self):
-        """waiting for the INIT_MSG to appear on the stdout"""
-        while True:
-            try:
-                item = self._queue.get(timeout=240)
-            except Exception:
-                raise TimeoutError("semihost init timeout.")
-
-            if item[0] == ZephyrFvpMakeResult.MICROTVM_API_SERVER_INIT:
-                return
-
-            raise ValueError(f"{item} not expected.")
-
-    def close(self):
-        self._model._shutdown_model()
-        self._model.client.disconnect(force=True)
-        parent = psutil.Process(self.proc.pid)
-        if parent:
-            for child in parent.children(recursive=True):
-                child.terminate()
-            parent.terminate()
-
-    def read(self, n, timeout_sec):
-        return self._target.stdout.read(n, timeout_sec)
-
-    def write(self, data, timeout_sec):
-        self._target.stdin.write(data)
-
-
-if __name__ == "__main__":
-    server.main(Handler())
diff --git a/apps/microtvm/zephyr/template_project/qemu-hack/qemu-system-arm b/apps/microtvm/zephyr/template_project/qemu-hack/qemu-system-arm
deleted file mode 120000
index ebbc8ad5ad9d..000000000000
--- a/apps/microtvm/zephyr/template_project/qemu-hack/qemu-system-arm
+++ /dev/null
@@ -1 +0,0 @@
-qemu-system-i386
\ No newline at end of file
diff --git a/apps/microtvm/zephyr/template_project/qemu-hack/qemu-system-i386 b/apps/microtvm/zephyr/template_project/qemu-hack/qemu-system-i386
deleted file mode 100755
index 2d350698edb9..000000000000
--- a/apps/microtvm/zephyr/template_project/qemu-hack/qemu-system-i386
+++ /dev/null
@@ -1,43 +0,0 @@
-#!/bin/bash -e
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# Zephyr insists on running qemu with a -pidfile option, but that option doesn't appear to
-# work given the way we've configured docker (the underlying filesystem doesn't support the
-# file locking it needs to). This script strips any -pidfile option, then invokes qemu.
-
-ARGS=( "$(basename $0)" )
-
-if [ "${QEMU_BIN_PATH}"  != "" ]; then
-    ARGS=${QEMU_BIN_PATH}/${ARGS}
-fi
-
-while [ "$#" -gt 0 ]; do
-    if [ "$1" == "-pidfile" ]; then
-        shift
-    else
-        ARGS=( "${ARGS[@]}" "$1" )
-    fi
-    shift
-done
-
-# For debugging
-if [ "${TVM_QEMU_GDBSERVER_PORT}" != "" ]; then
-    ARGS=( "${ARGS[@]}" -gdb "tcp::${TVM_QEMU_GDBSERVER_PORT}" -S )
-fi
-
-"${ARGS[@]}"
diff --git a/apps/microtvm/zephyr/template_project/qemu-hack/qemu-system-riscv32 b/apps/microtvm/zephyr/template_project/qemu-hack/qemu-system-riscv32
deleted file mode 120000
index ebbc8ad5ad9d..000000000000
--- a/apps/microtvm/zephyr/template_project/qemu-hack/qemu-system-riscv32
+++ /dev/null
@@ -1 +0,0 @@
-qemu-system-i386
\ No newline at end of file
diff --git a/apps/microtvm/zephyr/template_project/qemu-hack/qemu-system-riscv64 b/apps/microtvm/zephyr/template_project/qemu-hack/qemu-system-riscv64
deleted file mode 120000
index ebbc8ad5ad9d..000000000000
--- a/apps/microtvm/zephyr/template_project/qemu-hack/qemu-system-riscv64
+++ /dev/null
@@ -1 +0,0 @@
-qemu-system-i386
\ No newline at end of file
diff --git a/apps/microtvm/zephyr/template_project/qemu-hack/qemu-system-xilinx-aarch64 b/apps/microtvm/zephyr/template_project/qemu-hack/qemu-system-xilinx-aarch64
deleted file mode 120000
index ebbc8ad5ad9d..000000000000
--- a/apps/microtvm/zephyr/template_project/qemu-hack/qemu-system-xilinx-aarch64
+++ /dev/null
@@ -1 +0,0 @@
-qemu-system-i386
\ No newline at end of file
diff --git a/apps/microtvm/zephyr/template_project/src/aot_standalone_demo/main.c b/apps/microtvm/zephyr/template_project/src/aot_standalone_demo/main.c
deleted file mode 100644
index fff8f5787597..000000000000
--- a/apps/microtvm/zephyr/template_project/src/aot_standalone_demo/main.c
+++ /dev/null
@@ -1,198 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#include <assert.h>
-#include <float.h>
-#include <stdio.h>
-#include <string.h>
-#include <tvm/runtime/c_runtime_api.h>
-#include <tvm/runtime/crt/logging.h>
-#include <tvm/runtime/crt/stack_allocator.h>
-#include <unistd.h>
-#include <zephyr/drivers/uart.h>
-#include <zephyr/kernel.h>
-#include <zephyr/sys/ring_buffer.h>
-
-#include "tvm/input_data.h"
-#include "tvm/output_data.h"
-#include "tvmgen_default.h"
-
-#ifdef CONFIG_ARCH_POSIX
-#include "posix_board_if.h"
-#endif
-
-// Transport Commands.
-// Commands on host end with `\n`
-// Commands on microTVM device end with `%`
-const unsigned char CMD_WAKEUP[] = "wakeup\n";
-const unsigned char CMD_READY[] = "ready\n";
-const unsigned char CMD_INIT[] = "init";
-const unsigned char CMD_INFER[] = "infer";
-
-#define CMD_SIZE 80u
-#define CMD_TERMINATOR '%'
-
-static uint8_t main_rx_buf[128];
-static uint8_t g_cmd_buf[128];
-static size_t g_cmd_buf_ind;
-
-static const struct device* g_microtvm_uart;
-#define RING_BUF_SIZE_BYTES (TVM_CRT_MAX_PACKET_SIZE_BYTES + 100)
-
-// Ring buffer used to store data read from the UART on rx interrupt.
-RING_BUF_DECLARE(uart_rx_rbuf, RING_BUF_SIZE_BYTES);
-
-uint32_t UartTxWrite(const char* data, uint32_t size) {
-  for (uint32_t i = 0; i < size; i++) {
-    uart_poll_out(g_microtvm_uart, data[i]);
-  }
-  return size;
-}
-
-uint32_t UartRxRead(uint8_t* data, uint32_t data_size_bytes) {
-  unsigned int key = irq_lock();
-  uint32_t bytes_read = ring_buf_get(&uart_rx_rbuf, data, data_size_bytes);
-  irq_unlock(key);
-  return bytes_read;
-}
-
-// Initialize UART
-void UartInit() {
-  // Claim console device.
-  g_microtvm_uart = DEVICE_DT_GET(DT_CHOSEN(zephyr_console));
-  const struct uart_config config = {.baudrate = 115200,
-                                     .parity = UART_CFG_PARITY_NONE,
-                                     .stop_bits = UART_CFG_STOP_BITS_1,
-                                     .data_bits = UART_CFG_DATA_BITS_8,
-                                     .flow_ctrl = UART_CFG_FLOW_CTRL_NONE};
-  uart_configure(g_microtvm_uart, &config);
-  uart_rx_init(&uart_rx_rbuf, g_microtvm_uart);
-}
-
-static uint8_t uart_data[8];
-// UART interrupt callback.
-void uart_irq_cb(const struct device* dev, void* user_data) {
-  while (uart_irq_update(dev) && uart_irq_is_pending(dev)) {
-    struct ring_buf* rbuf = (struct ring_buf*)user_data;
-    if (uart_irq_rx_ready(dev) != 0) {
-      for (;;) {
-        // Read a small chunk of data from the UART.
-        int bytes_read = uart_fifo_read(dev, uart_data, sizeof(uart_data));
-        if (bytes_read < 0) {
-          TVMPlatformAbort((tvm_crt_error_t)(0xbeef1));
-        } else if (bytes_read == 0) {
-          break;
-        }
-        // Write it into the ring buffer.
-        int bytes_written = ring_buf_put(rbuf, uart_data, bytes_read);
-        if (bytes_read != bytes_written) {
-          TVMPlatformAbort((tvm_crt_error_t)(0xbeef2));
-        }
-      }
-    }
-  }
-}
-
-// Used to initialize the UART receiver.
-void uart_rx_init(struct ring_buf* rbuf, const struct device* dev) {
-  uart_irq_callback_user_data_set(dev, uart_irq_cb, (void*)rbuf);
-  uart_irq_rx_enable(dev);
-}
-
-void TVMLogf(const char* msg, ...) {
-  char buffer[256];
-  int size;
-  va_list args;
-  va_start(args, msg);
-  size = vsprintf(buffer, msg, args);
-  va_end(args);
-  UartTxWrite(buffer, (uint32_t)size);
-}
-
-void Infer() {
-  struct tvmgen_default_inputs inputs = {
-      .input_1 = input_data,
-  };
-  struct tvmgen_default_outputs outputs = {
-      .Identity = output_data,
-  };
-
-  double elapsed_time = 0;
-  TVMPlatformTimerStart();
-  int ret_val = tvmgen_default_run(&inputs, &outputs);
-  TVMPlatformTimerStop(&elapsed_time);
-
-  if (ret_val != 0) {
-    TVMLogf("Error: %d\n", ret_val);
-    TVMPlatformAbort(kTvmErrorPlatformCheckFailure);
-  }
-
-  size_t max_ind = -1;
-  float max_val = -FLT_MAX;
-  for (size_t i = 0; i < output_data_len; i++) {
-    if (output_data[i] >= max_val) {
-      max_ind = i;
-      max_val = output_data[i];
-    }
-  }
-  TVMLogf("result:%d:%d\n", max_ind, (uint32_t)(elapsed_time * 1000));
-}
-
-// Execute functions based on received command
-void command_ready(char* command) {
-  if (strncmp(command, CMD_INIT, CMD_SIZE) == 0) {
-    UartTxWrite(CMD_WAKEUP, sizeof(CMD_WAKEUP));
-  } else if (strncmp(command, CMD_INFER, CMD_SIZE) == 0) {
-    Infer();
-  } else {
-    UartTxWrite(CMD_READY, sizeof(CMD_READY));
-  }
-}
-
-// Append received characters to buffer and check for termination character.
-void serial_callback(char* message, int len_bytes) {
-  for (int i = 0; i < len_bytes; i++) {
-    if (message[i] == CMD_TERMINATOR) {
-      g_cmd_buf[g_cmd_buf_ind] = (char)0;
-      command_ready(g_cmd_buf);
-      g_cmd_buf_ind = 0;
-    } else {
-      g_cmd_buf[g_cmd_buf_ind] = message[i];
-      g_cmd_buf_ind += 1;
-    }
-  }
-}
-
-void main(void) {
-  TVMPlatformInitialize();
-  UartInit();
-  g_cmd_buf_ind = 0;
-  memset((char*)g_cmd_buf, 0, sizeof(g_cmd_buf));
-
-  while (true) {
-    int bytes_read = UartRxRead(main_rx_buf, sizeof(main_rx_buf));
-    if (bytes_read > 0) {
-      serial_callback(main_rx_buf, bytes_read);
-    }
-  }
-
-#ifdef CONFIG_ARCH_POSIX
-  posix_exit(0);
-#endif
-}
diff --git a/apps/microtvm/zephyr/template_project/src/aot_standalone_demo/platform.c b/apps/microtvm/zephyr/template_project/src/aot_standalone_demo/platform.c
deleted file mode 100644
index c66dad571155..000000000000
--- a/apps/microtvm/zephyr/template_project/src/aot_standalone_demo/platform.c
+++ /dev/null
@@ -1,126 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \brief Implementation of TVMPlatform functions in tvm/runtime/crt/platform.h
- */
-
-#include <stdarg.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <tvm/runtime/crt/error_codes.h>
-#include <tvm/runtime/crt/stack_allocator.h>
-#include <zephyr/kernel.h>
-#include <zephyr/sys/reboot.h>
-
-#include "crt_config.h"
-#include "dlpack/dlpack.h"
-#include "tvmgen_default.h"
-
-// TVM_WORKSPACE_SIZE_BYTES defined in Project API Makefile
-static uint8_t g_aot_memory[TVM_WORKSPACE_SIZE_BYTES];
-tvm_workspace_t app_workspace;
-
-#define MILLIS_TIL_EXPIRY 200
-#define TIME_TIL_EXPIRY (K_MSEC(MILLIS_TIL_EXPIRY))
-struct k_timer g_microtvm_timer;
-uint32_t g_microtvm_start_time;
-int g_microtvm_timer_running = 0;
-
-size_t TVMPlatformFormatMessage(char* out_buf, size_t out_buf_size_bytes, const char* fmt,
-                                va_list args) {
-  return vsnprintk(out_buf, out_buf_size_bytes, fmt, args);
-}
-
-void TVMPlatformAbort(tvm_crt_error_t error) {
-  TVMLogf("TVMPlatformAbort: %08x\n", error);
-  sys_reboot(SYS_REBOOT_COLD);
-  for (;;)
-    ;
-}
-
-tvm_crt_error_t TVMPlatformMemoryAllocate(size_t num_bytes, DLDevice dev, void** out_ptr) {
-  return StackMemoryManager_Allocate(&app_workspace, num_bytes, out_ptr);
-}
-
-tvm_crt_error_t TVMPlatformMemoryFree(void* ptr, DLDevice dev) {
-  return StackMemoryManager_Free(&app_workspace, ptr);
-}
-
-tvm_crt_error_t TVMPlatformInitialize() {
-  k_timer_init(&g_microtvm_timer, NULL, NULL);
-  StackMemoryManager_Init(&app_workspace, g_aot_memory, sizeof(g_aot_memory));
-  return kTvmErrorNoError;
-}
-
-tvm_crt_error_t TVMPlatformTimerStart() {
-  if (g_microtvm_timer_running) {
-    TVMLogf("timer already running");
-    return kTvmErrorPlatformTimerBadState;
-  }
-
-  k_timer_start(&g_microtvm_timer, TIME_TIL_EXPIRY, TIME_TIL_EXPIRY);
-  g_microtvm_start_time = k_cycle_get_32();
-  g_microtvm_timer_running = 1;
-  return kTvmErrorNoError;
-}
-
-tvm_crt_error_t TVMPlatformTimerStop(double* elapsed_time_seconds) {
-  if (!g_microtvm_timer_running) {
-    TVMLogf("timer not running");
-    return kTvmErrorSystemErrorMask | 2;
-  }
-
-  uint32_t stop_time = k_cycle_get_32();
-
-  // compute how long the work took
-  uint32_t cycles_spent = stop_time - g_microtvm_start_time;
-  if (stop_time < g_microtvm_start_time) {
-    // we rolled over *at least* once, so correct the rollover it was *only*
-    // once, because we might still use this result
-    cycles_spent = ~((uint32_t)0) - (g_microtvm_start_time - stop_time);
-  }
-
-  uint32_t ns_spent = (uint32_t)k_cyc_to_ns_floor64(cycles_spent);
-  double hw_clock_res_us = ns_spent / 1000.0;
-
-  // need to grab time remaining *before* stopping. when stopped, this function
-  // always returns 0.
-  int32_t time_remaining_ms = k_timer_remaining_get(&g_microtvm_timer);
-  k_timer_stop(&g_microtvm_timer);
-  // check *after* stopping to prevent extra expiries on the happy path
-  if (time_remaining_ms < 0) {
-    return kTvmErrorSystemErrorMask | 3;
-  }
-  uint32_t num_expiries = k_timer_status_get(&g_microtvm_timer);
-  uint32_t timer_res_ms = ((num_expiries * MILLIS_TIL_EXPIRY) + time_remaining_ms);
-  double approx_num_cycles =
-      (double)k_ticks_to_cyc_floor32(1) * (double)k_ms_to_ticks_ceil32(timer_res_ms);
-  // if we approach the limits of the HW clock datatype (uint32_t), use the
-  // coarse-grained timer result instead
-  if (approx_num_cycles > (0.5 * (~((uint32_t)0)))) {
-    *elapsed_time_seconds = timer_res_ms / 1000.0;
-  } else {
-    *elapsed_time_seconds = hw_clock_res_us / 1e6;
-  }
-
-  g_microtvm_timer_running = 0;
-  return kTvmErrorNoError;
-}
diff --git a/apps/microtvm/zephyr/template_project/src/host_driven/fvp/semihost.c b/apps/microtvm/zephyr/template_project/src/host_driven/fvp/semihost.c
deleted file mode 100644
index d762807bc279..000000000000
--- a/apps/microtvm/zephyr/template_project/src/host_driven/fvp/semihost.c
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*
- *
- * SPDX-License-Identifier: Apache-2.0
- */
-
-#include "tvm/semihost.h"
-
-int32_t stdout_fd;
-int32_t stdin_fd;
-
-uint32_t semihost_cmd(uint32_t opcode, void* arg) {
-  uint32_t ret_val;
-  __asm__ volatile(
-      "mov r0, %[opcode]\n\t"
-      "mov r1, %[arg]\n\t"
-      "bkpt #0xab\n\r"
-      "mov %[ret_val], r0"
-      : [ret_val] "=r"(ret_val)
-      : [opcode] "r"(opcode), [arg] "r"(arg)
-      : "r1", "memory");
-
-  return ret_val;
-}
-
-int32_t stdout_fd;
-int32_t stdin_fd;
-
-void init_semihosting() {
-  // https://github.com/ARM-software/abi-aa/blob/main/semihosting/semihosting.rst#sys-open-0x01
-  struct {
-    const char* file_name;
-    uint32_t mode;
-    uint32_t file_name_len;
-  } params;
-  params.file_name = ":tt";
-  params.mode = 5;  // "wb"
-  params.file_name_len = 3;
-  stdout_fd = semihost_cmd(0x01, &params);
-
-  params.mode = 0;
-  stdin_fd = semihost_cmd(0x01, &params);
-}
-
-ssize_t semihost_read(uint8_t* data, size_t size) {
-  struct {
-    uint32_t file_handle;
-    const uint8_t* data;
-    uint32_t size;
-  } read_req;
-  read_req.file_handle = stdin_fd;
-  read_req.data = data;
-  read_req.size = size;
-  uint32_t ret_val = semihost_cmd(0x06, &read_req);
-  return size - ret_val;
-}
-
-ssize_t semihost_write(void* unused_context, const uint8_t* data, size_t size) {
-  struct {
-    uint32_t file_handle;
-    const uint8_t* data;
-    uint32_t size;
-  } write_req;
-  write_req.file_handle = stdout_fd;
-  write_req.data = data;
-  write_req.size = size;
-  uint32_t ret_val = semihost_cmd(0x05, &write_req);
-  return size - ret_val;
-}
diff --git a/apps/microtvm/zephyr/template_project/src/host_driven/fvp/semihost.h b/apps/microtvm/zephyr/template_project/src/host_driven/fvp/semihost.h
deleted file mode 100644
index 06b42ae3f95b..000000000000
--- a/apps/microtvm/zephyr/template_project/src/host_driven/fvp/semihost.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*
- *
- * SPDX-License-Identifier: Apache-2.0
- */
-
-#ifndef TVM_APPS_MICROTVM_ZEPHYR_HOST_DRIVEN_SEMIHOST_H_
-#define TVM_APPS_MICROTVM_ZEPHYR_HOST_DRIVEN_SEMIHOST_H_
-
-#include <kernel.h>
-#include <unistd.h>
-#include <zephyr.h>
-
-void init_semihosting();
-
-ssize_t semihost_read(uint8_t* data, size_t size);
-
-ssize_t semihost_write(void* unused_context, const uint8_t* data, size_t size);
-
-#endif /* TVM_APPS_MICROTVM_ZEPHYR_HOST_DRIVEN_SEMIHOST_H_ */
diff --git a/apps/microtvm/zephyr/template_project/src/host_driven/main.c b/apps/microtvm/zephyr/template_project/src/host_driven/main.c
deleted file mode 100644
index 1c63474817de..000000000000
--- a/apps/microtvm/zephyr/template_project/src/host_driven/main.c
+++ /dev/null
@@ -1,182 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*
- *
- * SPDX-License-Identifier: Apache-2.0
- */
-
-/*
- * This is a sample Zephyr-based application that contains the logic
- * needed to control a microTVM-based model via the UART. This is only
- * intended to be a demonstration, since typically you will want to incorporate
- * this logic into your own application.
- */
-#include <stdio.h>
-#include <tvm/runtime/crt/logging.h>
-#include <tvm/runtime/crt/microtvm_rpc_server.h>
-#include <unistd.h>
-#include <zephyr/drivers/gpio.h>
-#include <zephyr/drivers/uart.h>
-#include <zephyr/fatal.h>
-#include <zephyr/kernel.h>
-#include <zephyr/sys/ring_buffer.h>
-
-#ifdef CONFIG_ARCH_POSIX
-#include "posix_board_if.h"
-#endif
-
-#include "crt_config.h"
-
-#ifdef FVP
-#include "tvm/semihost.h"
-#endif
-
-static const struct device* tvm_uart;
-
-static size_t g_num_bytes_requested = 0;
-static size_t g_num_bytes_written = 0;
-static size_t g_num_bytes_in_rx_buffer = 0;
-
-// Called by TVM to write serial data to the UART.
-ssize_t uart_write(void* unused_context, const uint8_t* data, size_t size) {
-  g_num_bytes_requested += size;
-  for (size_t i = 0; i < size; i++) {
-    uart_poll_out(tvm_uart, data[i]);
-    g_num_bytes_written++;
-  }
-  return size;
-}
-
-ssize_t serial_write(void* unused_context, const uint8_t* data, size_t size) {
-#ifdef FVP
-  return semihost_write(unused_context, data, size);
-#else
-  return uart_write(unused_context, data, size);
-#endif
-}
-
-// Ring buffer used to store data read from the UART on rx interrupt.
-// This ring buffer size is only required for testing with QEMU and not for physical hardware.
-#define RING_BUF_SIZE_BYTES (TVM_CRT_MAX_PACKET_SIZE_BYTES + 100)
-RING_BUF_ITEM_DECLARE_SIZE(uart_rx_rbuf, RING_BUF_SIZE_BYTES);
-
-// UART interrupt callback.
-void uart_irq_cb(const struct device* dev, void* user_data) {
-  uart_irq_update(dev);
-  if (uart_irq_is_pending(dev)) {
-    struct ring_buf* rbuf = (struct ring_buf*)user_data;
-    if (uart_irq_rx_ready(dev) != 0) {
-      uint8_t* data;
-      uint32_t size;
-      size = ring_buf_put_claim(rbuf, &data, RING_BUF_SIZE_BYTES);
-      int rx_size = uart_fifo_read(dev, data, size);
-      // Write it into the ring buffer.
-      g_num_bytes_in_rx_buffer += rx_size;
-
-      if (g_num_bytes_in_rx_buffer > RING_BUF_SIZE_BYTES) {
-        TVMPlatformAbort((tvm_crt_error_t)0xbeef3);
-      }
-
-      if (rx_size < 0) {
-        TVMPlatformAbort((tvm_crt_error_t)0xbeef1);
-      }
-
-      int err = ring_buf_put_finish(rbuf, rx_size);
-      if (err != 0) {
-        TVMPlatformAbort((tvm_crt_error_t)0xbeef2);
-      }
-    }
-  }
-}
-
-// Used to initialize the UART receiver.
-void uart_rx_init(struct ring_buf* rbuf, const struct device* dev) {
-  uart_irq_callback_user_data_set(dev, uart_irq_cb, (void*)rbuf);
-  uart_irq_rx_enable(dev);
-}
-
-// The main function of this application.
-extern void __stdout_hook_install(int (*hook)(int));
-void main(void) {
-  TVMPlatformInitialize();
-
-  // Claim console device.
-  tvm_uart = DEVICE_DT_GET(DT_CHOSEN(zephyr_console));
-  uart_rx_init(&uart_rx_rbuf, tvm_uart);
-
-#ifdef FVP
-  init_semihosting();
-  // send some dummy log to speed up the initialization
-  for (int i = 0; i < 100; ++i) {
-    uart_write(NULL, "dummy log...\n", 13);
-  }
-  uart_write(NULL, "microTVM Zephyr runtime - running\n", 34);
-#endif
-
-  // Initialize microTVM RPC server, which will receive commands from the UART and execute them.
-  microtvm_rpc_server_t server = MicroTVMRpcServerInit(serial_write, NULL);
-  TVMLogf("microTVM Zephyr runtime - running");
-
-  // The main application loop. We continuously read commands from the UART
-  // and dispatch them to MicroTVMRpcServerLoop().
-  while (true) {
-#ifdef FVP
-    uint8_t data[128];
-    uint32_t bytes_read = semihost_read(data, 128);
-#else
-    uint8_t* data;
-    unsigned int key = irq_lock();
-    uint32_t bytes_read = ring_buf_get_claim(&uart_rx_rbuf, &data, RING_BUF_SIZE_BYTES);
-#endif
-    if (bytes_read > 0) {
-      uint8_t* ptr = data;
-      size_t bytes_remaining = bytes_read;
-      while (bytes_remaining > 0) {
-        // Pass the received bytes to the RPC server.
-        tvm_crt_error_t err = MicroTVMRpcServerLoop(server, &ptr, &bytes_remaining);
-        if (err != kTvmErrorNoError && err != kTvmErrorFramingShortPacket) {
-          TVMPlatformAbort(err);
-        }
-#ifdef FVP
-      }
-    }
-#else
-        g_num_bytes_in_rx_buffer -= bytes_read;
-        if (g_num_bytes_written != 0 || g_num_bytes_requested != 0) {
-          if (g_num_bytes_written != g_num_bytes_requested) {
-            TVMPlatformAbort((tvm_crt_error_t)0xbeef5);
-          }
-          g_num_bytes_written = 0;
-          g_num_bytes_requested = 0;
-        }
-      }
-      int err = ring_buf_get_finish(&uart_rx_rbuf, bytes_read);
-      if (err != 0) {
-        TVMPlatformAbort((tvm_crt_error_t)0xbeef6);
-      }
-    }
-    irq_unlock(key);
-#endif
-  }
-
-#ifdef CONFIG_ARCH_POSIX
-  posix_exit(0);
-#endif
-}
diff --git a/apps/microtvm/zephyr/template_project/src/host_driven/platform.c b/apps/microtvm/zephyr/template_project/src/host_driven/platform.c
deleted file mode 100644
index 8aa9abf235c7..000000000000
--- a/apps/microtvm/zephyr/template_project/src/host_driven/platform.c
+++ /dev/null
@@ -1,155 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \brief Implementation of TVMPlatform functions in tvm/runtime/crt/platform.h
- */
-
-#include <dlpack/dlpack.h>
-#include <stdarg.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <tvm/runtime/crt/error_codes.h>
-#include <zephyr/drivers/gpio.h>
-#include <zephyr/kernel.h>
-#include <zephyr/random/rand32.h>
-#include <zephyr/sys/printk.h>
-#include <zephyr/sys/reboot.h>
-#include <zephyr/timing/timing.h>
-
-K_HEAP_DEFINE(tvm_heap, TVM_WORKSPACE_SIZE_BYTES);
-
-volatile timing_t g_microtvm_start_time, g_microtvm_end_time;
-int g_microtvm_timer_running = 0;
-
-#ifdef CONFIG_LED
-#define LED0_NODE DT_ALIAS(led0)
-static const struct gpio_dt_spec led0 = GPIO_DT_SPEC_GET(LED0_NODE, gpios);
-#endif  // CONFIG_LED
-
-// This is invoked by Zephyr from an exception handler, which will be invoked
-// if the device crashes. Here, we turn on the LED and spin.
-void k_sys_fatal_error_handler(unsigned int reason, const z_arch_esf_t* esf) {
-#ifdef CONFIG_LED
-  gpio_pin_set_dt(&led0, 1);
-#endif
-  for (;;)
-    ;
-}
-
-void TVMPlatformAbort(tvm_crt_error_t error) {
-  TVMLogf("TVMError: 0x%x", error);
-  sys_reboot(SYS_REBOOT_COLD);
-#ifdef CONFIG_LED
-  gpio_pin_set_dt(&led0, 1);
-#endif
-  for (;;)
-    ;
-}
-
-size_t TVMPlatformFormatMessage(char* out_buf, size_t out_buf_size_bytes, const char* fmt,
-                                va_list args) {
-  return vsnprintk(out_buf, out_buf_size_bytes, fmt, args);
-}
-
-tvm_crt_error_t TVMPlatformMemoryAllocate(size_t num_bytes, DLDevice dev, void** out_ptr) {
-  *out_ptr = k_heap_alloc(&tvm_heap, num_bytes, K_NO_WAIT);
-  return (*out_ptr == NULL) ? kTvmErrorPlatformNoMemory : kTvmErrorNoError;
-}
-
-tvm_crt_error_t TVMPlatformMemoryFree(void* ptr, DLDevice dev) {
-  k_heap_free(&tvm_heap, ptr);
-  return kTvmErrorNoError;
-}
-
-// Called to start system timer.
-tvm_crt_error_t TVMPlatformTimerStart() {
-  if (g_microtvm_timer_running) {
-    TVMLogf("timer already running");
-    return kTvmErrorPlatformTimerBadState;
-  }
-
-#ifdef CONFIG_LED
-  gpio_pin_set_dt(&led0, 1);
-#endif
-  g_microtvm_start_time = timing_counter_get();
-  g_microtvm_timer_running = 1;
-  return kTvmErrorNoError;
-}
-
-// Called to stop system timer.
-tvm_crt_error_t TVMPlatformTimerStop(double* elapsed_time_seconds) {
-  if (!g_microtvm_timer_running) {
-    TVMLogf("timer not running");
-    return kTvmErrorSystemErrorMask | 2;
-  }
-
-#ifdef CONFIG_LED
-  gpio_pin_set_dt(&led0, 0);
-#endif
-
-  g_microtvm_end_time = timing_counter_get();
-  uint64_t cycles = timing_cycles_get(&g_microtvm_start_time, &g_microtvm_end_time);
-  uint64_t ns_spent = timing_cycles_to_ns(cycles);
-  *elapsed_time_seconds = ns_spent / (double)1e9;
-  g_microtvm_timer_running = 0;
-  return kTvmErrorNoError;
-}
-
-tvm_crt_error_t TVMPlatformGenerateRandom(uint8_t* buffer, size_t num_bytes) {
-  uint32_t random;  // one unit of random data.
-
-  // Fill parts of `buffer` which are as large as `random`.
-  size_t num_full_blocks = num_bytes / sizeof(random);
-  for (int i = 0; i < num_full_blocks; ++i) {
-    random = sys_rand32_get();
-    memcpy(&buffer[i * sizeof(random)], &random, sizeof(random));
-  }
-
-  // Fill any leftover tail which is smaller than `random`.
-  size_t num_tail_bytes = num_bytes % sizeof(random);
-  if (num_tail_bytes > 0) {
-    random = sys_rand32_get();
-    memcpy(&buffer[num_bytes - num_tail_bytes], &random, num_tail_bytes);
-  }
-  return kTvmErrorNoError;
-}
-
-tvm_crt_error_t TVMPlatformInitialize() {
-#ifdef CONFIG_LED
-  if (!device_is_ready(led0.port)) {
-    for (;;)
-      ;
-  }
-  int ret = gpio_pin_configure_dt(&led0, GPIO_OUTPUT_ACTIVE);
-  if (ret < 0) {
-    TVMPlatformAbort((tvm_crt_error_t)0xbeef4);
-  }
-  gpio_pin_set_dt(&led0, 0);
-#endif
-
-  // Initialize system timing. We could stop and start it every time, but we'll
-  // be using it enough we should just keep it enabled.
-  timing_init();
-  timing_start();
-
-  return kTvmErrorNoError;
-}
diff --git a/apps/microtvm/zephyr/template_project/src/mlperftiny/README.md b/apps/microtvm/zephyr/template_project/src/mlperftiny/README.md
deleted file mode 100644
index c38a1f05bfc6..000000000000
--- a/apps/microtvm/zephyr/template_project/src/mlperftiny/README.md
+++ /dev/null
@@ -1,20 +0,0 @@
-<!--- Licensed to the Apache Software Foundation (ASF) under one -->
-<!--- or more contributor license agreements.  See the NOTICE file -->
-<!--- distributed with this work for additional information -->
-<!--- regarding copyright ownership.  The ASF licenses this file -->
-<!--- to you under the Apache License, Version 2.0 (the -->
-<!--- "License"); you may not use this file except in compliance -->
-<!--- with the License.  You may obtain a copy of the License at -->
-
-<!---   http://www.apache.org/licenses/LICENSE-2.0 -->
-
-<!--- Unless required by applicable law or agreed to in writing, -->
-<!--- software distributed under the License is distributed on an -->
-<!--- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -->
-<!--- KIND, either express or implied.  See the License for the -->
-<!--- specific language governing permissions and limitations -->
-<!--- under the License. -->
-
-# MLPerf Tiny Project API
-This directory includes source files to build a Zephyr microTVM project to use for benchmarking with EEMBC runner.
-This project has been tested with NUCLEO_L4R5ZI and NRF5340DK.
diff --git a/apps/microtvm/zephyr/template_project/src/mlperftiny/main.cc b/apps/microtvm/zephyr/template_project/src/mlperftiny/main.cc
deleted file mode 100644
index 4c91177062ad..000000000000
--- a/apps/microtvm/zephyr/template_project/src/mlperftiny/main.cc
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#include "api/internally_implemented.h"
-#include "api/submitter_implemented.h"
-
-int main(int argc, char* argv[]) {
-#if NRF_BOARD == 1
-  // Set frequency to 128MHz for nrf5340dk_nrf534 by setting the clock divider to 0.
-  // 0x50005558 is the clock division reg address.
-  uint32_t* clock_div = (uint32_t*)0x50005558;
-  *clock_div = 0;
-#endif
-
-  ee_benchmark_initialize();
-  while (1) {
-    int c;
-    c = th_getchar();
-    ee_serial_callback(c);
-  }
-  return 0;
-}
diff --git a/apps/microtvm/zephyr/template_project/src/mlperftiny/platform.cc b/apps/microtvm/zephyr/template_project/src/mlperftiny/platform.cc
deleted file mode 100644
index f50911b52dbc..000000000000
--- a/apps/microtvm/zephyr/template_project/src/mlperftiny/platform.cc
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \brief Implementation of TVMPlatform functions in tvm/runtime/crt/platform.h
- */
-
-#include <dlpack/dlpack.h>
-#include <float.h>
-#include <math.h>
-#include <stdarg.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <tvm/runtime/c_runtime_api.h>
-#include <tvm/runtime/crt/error_codes.h>
-#include <tvm/runtime/crt/logging.h>
-#include <tvm/runtime/crt/platform.h>
-#include <tvm/runtime/crt/stack_allocator.h>
-#include <zephyr/sys/printk.h>
-#include <zephyr/sys/reboot.h>
-
-#include "crt_config.h"
-
-size_t TVMPlatformFormatMessage(char* out_buf, size_t out_buf_size_bytes, const char* fmt,
-                                va_list args) {
-  return vsnprintk(out_buf, out_buf_size_bytes, fmt, args);
-}
-
-void TVMPlatformAbort(tvm_crt_error_t error) {
-  TVMLogf("TVMPlatformAbort: %08x\n", error);
-  sys_reboot(SYS_REBOOT_COLD);
-  for (;;)
-    ;
-}
diff --git a/apps/microtvm/zephyr/template_project/src/mlperftiny/submitter_implemented.cc b/apps/microtvm/zephyr/template_project/src/mlperftiny/submitter_implemented.cc
deleted file mode 100644
index b74c6e8eaf6f..000000000000
--- a/apps/microtvm/zephyr/template_project/src/mlperftiny/submitter_implemented.cc
+++ /dev/null
@@ -1,359 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#include "api/submitter_implemented.h"
-
-#include <assert.h>
-#include <string.h>
-#include <tvm/runtime/crt/logging.h>
-#include <tvm/runtime/crt/platform.h>
-#include <unistd.h>
-#include <zephyr/drivers/gpio.h>
-#include <zephyr/drivers/uart.h>
-#include <zephyr/kernel.h>
-#include <zephyr/sys/ring_buffer.h>
-
-#include "api/internally_implemented.h"
-#include "crt_config.h"
-#include "tvm/output_data.h"
-#include "tvmgen_default.h"
-
-// ###############################################################
-// Model
-// ###############################################################
-#define MODEL_KWS 1
-#define MODEL_VWW 2
-#define MODEL_AD 3
-#define MODEL_IC 4
-
-static void* g_input_data;
-#if TARGET_MODEL == MODEL_AD
-static uint8_t __aligned(4) g_input_data_buffer_aligned[MAX_DB_INPUT_SIZE];
-#endif
-
-// OUT_QUANT_SCALE and OUT_QUANT_ZERO are set in python.
-#if TARGET_MODEL == MODEL_AD
-float* g_output_data = output_data;
-#else
-int8_t* g_output_data = output_data;
-float g_quant_scale = OUT_QUANT_SCALE;
-int8_t g_quant_zero = OUT_QUANT_ZERO;
-#endif
-size_t g_output_data_len = output_data_len;
-
-// ###############################################################
-// GPIO
-// ###############################################################
-#if EE_CFG_ENERGY_MODE == 1 && NRF_BOARD != 1
-// use GPIO PC6 which is on connector CN7 pin 1 on the nucleo_l4r5zi
-static const char* g_gpio_device_name = "GPIOC";
-static const struct device* g_gpio_dev;
-static const gpio_pin_t g_gpio_pin = 6;
-#endif
-
-// ###############################################################
-// UART
-// ###############################################################
-#define TVM_UART_DEFAULT_BAUDRATE 115200
-static const struct device* g_microtvm_uart;
-
-void UartInit(uint32_t baudrate = TVM_UART_DEFAULT_BAUDRATE) {
-  // Claim console device.
-  g_microtvm_uart = DEVICE_DT_GET(DT_CHOSEN(zephyr_console));
-  const struct uart_config config = {.baudrate = baudrate,
-                                     .parity = UART_CFG_PARITY_NONE,
-                                     .stop_bits = UART_CFG_STOP_BITS_1,
-                                     .data_bits = UART_CFG_DATA_BITS_8,
-                                     .flow_ctrl = UART_CFG_FLOW_CTRL_NONE};
-  uart_configure(g_microtvm_uart, &config);
-}
-
-char UartRxRead() {
-  unsigned char c;
-  int ret = -1;
-  while (ret != 0) {
-    ret = uart_poll_in(g_microtvm_uart, &c);
-  }
-  return (char)c;
-}
-
-uint32_t UartTxWrite(const char* data, uint32_t size) {
-  for (uint32_t i = 0; i < size; i++) {
-    uart_poll_out(g_microtvm_uart, data[i]);
-  }
-  return size;
-}
-
-// ###############################################################
-// TVM
-// ###############################################################
-#ifdef __cplusplus
-extern "C" {
-#endif
-// TODO(mehrdadh): remove and reuse the CRT
-// implementation in src/runtime/crt/common/crt_backend_api.c
-void* TVMBackendAllocWorkspace(int device_type, int device_id, uint64_t nbytes, int dtype_code_hint,
-                               int dtype_bits_hint) {
-  tvm_crt_error_t err = kTvmErrorNoError;
-  void* ptr = 0;
-  DLDevice dev = {(DLDeviceType)device_type, device_id};
-  assert(nbytes > 0);
-  err = TVMPlatformMemoryAllocate(nbytes, dev, &ptr);
-  CHECK_EQ(err, kTvmErrorNoError,
-           "TVMBackendAllocWorkspace(%d, %d, %" PRIu64 ", %d, %d) -> %" PRId32, device_type,
-           device_id, nbytes, dtype_code_hint, dtype_bits_hint, err);
-  return ptr;
-}
-
-// TODO(mehrdadh): remove and reuse the CRT
-// implementation in src/runtime/crt/common/crt_backend_api.c
-int TVMBackendFreeWorkspace(int device_type, int device_id, void* ptr) {
-  tvm_crt_error_t err = kTvmErrorNoError;
-  DLDevice dev = {(DLDeviceType)device_type, device_id};
-  err = TVMPlatformMemoryFree(ptr, dev);
-  CHECK_EQ(err, kTvmErrorNoError, "TVMBackendFreeWorkspace(%d, %d)", device_type, device_id);
-  return err;
-}
-
-void TVMLogf(const char* msg, ...) {
-  char buffer[128];
-  int size;
-  va_list args;
-  va_start(args, msg);
-  size = TVMPlatformFormatMessage(buffer, 128, msg, args);
-  va_end(args);
-  UartTxWrite(buffer, (size_t)size);
-}
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-void Infer(void* input_ptr) {
-  struct tvmgen_default_inputs inputs = {
-#if TARGET_MODEL == MODEL_KWS
-    .input_1 = input_ptr,
-#elif TARGET_MODEL == MODEL_IC
-    .input_1_int8 = input_ptr,
-#elif TARGET_MODEL == MODEL_VWW
-    .input_1_int8 = input_ptr,
-#elif TARGET_MODEL == MODEL_AD
-    .input_1 = input_ptr,
-#elif
-#error Wrong model.
-#endif
-  };
-
-  struct tvmgen_default_outputs outputs = {
-#if TARGET_MODEL == MODEL_KWS
-    .Identity = output_data,
-#elif TARGET_MODEL == MODEL_IC
-    .Identity_int8 = output_data,
-#elif TARGET_MODEL == MODEL_VWW
-    .Identity_int8 = output_data,
-#elif TARGET_MODEL == MODEL_AD
-    .Identity = output_data,
-#endif
-  };
-
-  int ret_val = tvmgen_default_run(&inputs, &outputs);
-  if (ret_val != 0) {
-    th_printf("Error: %d\n", ret_val);
-  }
-}
-
-// ###############################################################
-// MLPerftiny APIs
-// ###############################################################
-// Implement this method to prepare for inference and preprocess inputs.
-// Modified from source
-void th_load_tensor() {
-#if TARGET_MODEL == MODEL_KWS
-  g_input_data = static_cast<void*>(ee_get_buffer_pointer());
-#elif TARGET_MODEL == MODEL_VWW
-  // Converting uint8 to int8
-  int8_t* temp_int = reinterpret_cast<int8_t*>(ee_get_buffer_pointer());
-  for (size_t i = 0; i < MAX_DB_INPUT_SIZE; i++) {
-    temp_int[i] -= 128;
-  }
-  g_input_data = static_cast<void*>(temp_int);
-#elif TARGET_MODEL == MODEL_AD
-  uint8_t* buffer = ee_get_buffer_pointer();
-  memcpy(g_input_data_buffer_aligned, buffer, sizeof(g_input_data_buffer_aligned));
-  g_input_data = g_input_data_buffer_aligned;
-#elif TARGET_MODEL == MODEL_IC
-  uint8_t* temp_uint = reinterpret_cast<uint8_t*>(ee_get_buffer_pointer());
-  int8_t* temp_int = reinterpret_cast<int8_t*>(ee_get_buffer_pointer());
-  for (size_t i = 0; i < MAX_DB_INPUT_SIZE; i++) {
-    if (temp_uint[i] <= 127)
-      temp_int[i] = ((int8_t)temp_uint[i]) - 128;
-    else
-      temp_int[i] = (int8_t)(temp_uint[i] - 128);
-  }
-  g_input_data = reinterpret_cast<void*>(temp_int);
-#else
-#error Wrong model
-#endif
-}
-
-#if TARGET_MODEL == MODEL_AD
-// calculate |output - input| for AD model
-static float calculate_result() {
-  size_t feature_size = g_output_data_len;
-  float diffsum = 0;
-  float* input_float = reinterpret_cast<float*>(g_input_data);
-  float* output_float = reinterpret_cast<float*>(g_output_data);
-
-  for (size_t i = 0; i < feature_size; i++) {
-    float diff = output_float[i] - input_float[i];
-    diffsum += diff * diff;
-  }
-  diffsum /= feature_size;
-
-  return diffsum;
-}
-#endif
-
-// Add to this method to return real inference results.
-void th_results() {
-  /**
-   * The results need to be printed back in exactly this format; if easier
-   * to just modify this loop than copy to results[] above, do that.
-   */
-#if TARGET_MODEL == MODEL_AD
-  th_printf("m-results-[%0.3f]\r\n", calculate_result());
-#else
-  size_t kCategoryCount = g_output_data_len;
-  th_printf("m-results-[");
-  for (size_t i = 0; i < kCategoryCount; i++) {
-    float converted = static_cast<float>(g_quant_scale * (g_output_data[i] - g_quant_zero));
-    // float converted = static_cast<float>(g_output_data[i]);
-    th_printf("%.3f", converted);
-    if (i < (kCategoryCount - 1)) {
-      th_printf(",");
-    }
-  }
-  th_printf("]\r\n");
-#endif
-}
-
-// Implement this method with the logic to perform one inference cycle.
-// Modified from source
-void th_infer() { Infer(g_input_data); }
-
-/// \brief optional API.
-// Modified from source
-void th_final_initialize(void) {}
-
-void th_pre() {}
-void th_post() {}
-
-void th_command_ready(char volatile* p_command) {
-  p_command = p_command;
-  ee_serial_command_parser_callback((char*)p_command);
-}
-
-// th_libc implementations.
-int th_strncmp(const char* str1, const char* str2, size_t n) { return strncmp(str1, str2, n); }
-
-char* th_strncpy(char* dest, const char* src, size_t n) { return strncpy(dest, src, n); }
-
-size_t th_strnlen(const char* str, size_t maxlen) { return strlen(str); }
-
-char* th_strcat(char* dest, const char* src) { return strcat(dest, src); }
-
-char* th_strtok(char* str1, const char* sep) { return strtok(str1, sep); }
-
-int th_atoi(const char* str) { return atoi(str); }
-
-void* th_memset(void* b, int c, size_t len) { return memset(b, c, len); }
-
-void* th_memcpy(void* dst, const void* src, size_t n) { return memcpy(dst, src, n); }
-
-/* N.B.: Many embedded *printf SDKs do not support all format specifiers. */
-int th_vprintf(const char* format, va_list ap) { return vprintf(format, ap); }
-
-// Modified from source
-void th_printf(const char* p_fmt, ...) {
-  char buffer[128];
-  int size;
-  va_list args;
-  va_start(args, p_fmt);
-  size = TVMPlatformFormatMessage(buffer, 128, p_fmt, args);
-  va_end(args);
-  UartTxWrite(buffer, (size_t)size);
-}
-
-// Modified from source
-char th_getchar() { return UartRxRead(); }
-
-// Modified from source
-void th_serialport_initialize(void) {
-#if EE_CFG_ENERGY_MODE == 1 && NRF_BOARD != 1
-  UartInit(9600);
-#else
-  UartInit();
-#endif
-}
-
-// Modified from source
-void th_timestamp(void) {
-#if EE_CFG_ENERGY_MODE == 1 && NRF_BOARD != 1
-  /* USER CODE 1 BEGIN */
-  /* Step 1. Pull pin low */
-  gpio_pin_set(g_gpio_dev, g_gpio_pin, 0);
-  /* Step 2. Hold low for at least 1us */
-  k_busy_wait(1);
-  /* Step 3. Release driver */
-  gpio_pin_set(g_gpio_dev, g_gpio_pin, 1);
-  /* USER CODE 1 END */
-#else
-  /* USER CODE 2 BEGIN */
-  unsigned long microSeconds = (unsigned long)(k_uptime_get() * 1000LL);
-  /* USER CODE 2 END */
-  /* This message must NOT be changed. */
-  th_printf(EE_MSG_TIMESTAMP, microSeconds);
-#endif
-}
-
-// Modified from source
-void th_timestamp_initialize(void) {
-  /* USER CODE 1 BEGIN */
-  // Setting up BOTH perf and energy here
-#if EE_CFG_ENERGY_MODE == 1 && NRF_BOARD != 1
-  g_gpio_dev = device_get_binding(g_gpio_device_name);
-  if (g_gpio_dev == NULL) {
-    th_printf("GPIO device init failed\r\n");
-    return;
-  }
-
-  int ret = gpio_pin_configure(g_gpio_dev, g_gpio_pin, GPIO_OUTPUT_HIGH);
-  if (ret < 0) {
-    th_printf("GPIO pin configure failed\r\n");
-    return;
-  }
-#endif
-
-  /* USER CODE 1 END */
-  /* This message must NOT be changed. */
-  th_printf(EE_MSG_TIMESTAMP_MODE);
-  /* Always call the timestamp on initialize so that the open-drain output
-     is set to "1" (so that we catch a falling edge) */
-  th_timestamp();
-}
diff --git a/apps/microtvm/zephyr_cmsisnn/.gitignore b/apps/microtvm/zephyr_cmsisnn/.gitignore
deleted file mode 100644
index faa3be6bbc82..000000000000
--- a/apps/microtvm/zephyr_cmsisnn/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-build
-*.tflite
diff --git a/apps/microtvm/zephyr_cmsisnn/CMakeLists.txt b/apps/microtvm/zephyr_cmsisnn/CMakeLists.txt
deleted file mode 100644
index c9e971b94535..000000000000
--- a/apps/microtvm/zephyr_cmsisnn/CMakeLists.txt
+++ /dev/null
@@ -1,100 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-cmake_minimum_required(VERSION 3.20.0)
-set(BOARD mps3_an547)
-set(EMU_PLATFORM armfvp)
-
-find_package(Zephyr)
-project(my_zephyr_app)
-
-if (NOT DEFINED CMSIS_PATH)
-    set(CMSIS_PATH /opt/arm/ethosu/cmsis/)
-endif()
-
-set(TVMC_COMMAND python3 -m tvm.driver.tvmc)
-set(TVMC_ARGS
-    --target="cmsis-nn -mcpu=cortex-m55, c" # CMSIS-NN and C targets
-    --runtime=crt # C Runtime
-    --executor=aot # Ahead-of-Time Executor
-    --executor-aot-unpacked-api=1 # Direct internal calls to operators
-    --executor-aot-interface-api=c # Expose C interface to the model
-    --pass-config=tir.disable_vectorize=1 # Disable vectorizer for C output
-    --output-format=mlf # Output Model Library Format tarball
-)
-
-set(TVM_RUNTIME
-    ${CMAKE_CURRENT_BINARY_DIR}/runtime/src/runtime/crt/common/crt_backend_api.c
-    ${CMAKE_CURRENT_BINARY_DIR}/runtime/src/runtime/crt/memory/stack_allocator.c
-)
-set(CODEGEN_OUTPUT
-    ${CMAKE_CURRENT_BINARY_DIR}/codegen/host/src/default_lib0.c
-    ${CMAKE_CURRENT_BINARY_DIR}/codegen/host/src/default_lib1.c
-    ${CMAKE_CURRENT_BINARY_DIR}/codegen/host/src/default_lib2.c
-)
-set(DATA_FILES
-    ${CMAKE_CURRENT_BINARY_DIR}/inputs.c
-    ${CMAKE_CURRENT_BINARY_DIR}/outputs.c
-    ${CMAKE_CURRENT_BINARY_DIR}/labels.c
-)
-set(CMSIS_SOURCES
-    ${CMSIS_PATH}/CMSIS-NN/Source/SoftmaxFunctions/arm_softmax_s8.c
-    ${CMSIS_PATH}/CMSIS-NN/Source/ConvolutionFunctions/arm_depthwise_conv_wrapper_s8.c
-    ${CMSIS_PATH}/CMSIS-NN/Source/ConvolutionFunctions/arm_depthwise_conv_s8.c
-    ${CMSIS_PATH}/CMSIS-NN/Source/ConvolutionFunctions/arm_depthwise_conv_s8_opt.c
-    ${CMSIS_PATH}/CMSIS-NN/Source/NNSupportFunctions/arm_nn_depthwise_conv_nt_t_s8.c
-    ${CMSIS_PATH}/CMSIS-NN/Source/NNSupportFunctions/arm_nn_depthwise_conv_nt_t_padded_s8.c
-    ${CMSIS_PATH}/CMSIS-NN/Source/ConvolutionFunctions/arm_convolve_wrapper_s8.c
-    ${CMSIS_PATH}/CMSIS-NN/Source/ConvolutionFunctions/arm_convolve_1_x_n_s8.c
-    ${CMSIS_PATH}/CMSIS-NN/Source/ConvolutionFunctions/arm_convolve_1x1_s8_fast.c
-    ${CMSIS_PATH}/CMSIS-NN/Source/ConvolutionFunctions/arm_convolve_s8.c
-    ${CMSIS_PATH}/CMSIS-NN/Source/ConvolutionFunctions/arm_convolve_1x1_s8.c
-    ${CMSIS_PATH}/CMSIS-NN/Source/ConvolutionFunctions/arm_nn_mat_mult_s8.c
-    ${CMSIS_PATH}/CMSIS-NN/Source/FullyConnectedFunctions/arm_fully_connected_s8.c
-    ${CMSIS_PATH}/CMSIS-NN/Source/NNSupportFunctions/arm_nn_mat_mul_core_4x_s8.c
-    ${CMSIS_PATH}/CMSIS-NN/Source/NNSupportFunctions/arm_nn_vec_mat_mult_t_s8.c
-    ${CMSIS_PATH}/CMSIS-NN/Source/NNSupportFunctions/arm_nn_mat_mult_nt_t_s8.c
-    ${CMSIS_PATH}/CMSIS-NN/Source/NNSupportFunctions/arm_nn_mat_mul_core_1x_s8.c
-)
-
-add_custom_command(
-    OUTPUT ${TVM_RUNTIME}
-    OUTPUT ${CODEGEN_OUTPUT}
-    COMMAND ${TVMC_COMMAND} compile ${TVMC_ARGS} ${CMAKE_CURRENT_SOURCE_DIR}/model/cnn_s_quantized.tflite
-    COMMAND tar xf ${CMAKE_CURRENT_BINARY_DIR}/module.tar
-    WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
-)
-add_custom_command(
-    OUTPUT ${DATA_FILES}
-    COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/model/convert_input.py ${CMAKE_CURRENT_SOURCE_DIR}/model/input.txt ${CMAKE_CURRENT_BINARY_DIR}
-    COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/model/convert_labels.py ${CMAKE_CURRENT_SOURCE_DIR}/model/labels.txt ${CMAKE_CURRENT_BINARY_DIR}
-    WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
-)
-
-target_sources(app PRIVATE
-    src/main.c
-    ${TVM_RUNTIME}
-    ${CODEGEN_OUTPUT}
-    ${DATA_FILES}
-    ${CMSIS_SOURCES}
-)
-target_include_directories(app
-    PUBLIC ${CMAKE_CURRENT_BINARY_DIR}/crt_config
-    PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include
-    PUBLIC ${CMAKE_CURRENT_BINARY_DIR}/runtime/include ${CMAKE_CURRENT_BINARY_DIR}/codegen/host/include
-    PUBLIC ${CMSIS_PATH}/CMSIS-NN/Include/ ${CMSIS_PATH}/CMSIS/DSP/Include
-)
diff --git a/apps/microtvm/zephyr_cmsisnn/README.md b/apps/microtvm/zephyr_cmsisnn/README.md
deleted file mode 100644
index 334af71d0c77..000000000000
--- a/apps/microtvm/zephyr_cmsisnn/README.md
+++ /dev/null
@@ -1,48 +0,0 @@
-<!--- Licensed to the Apache Software Foundation (ASF) under one -->
-<!--- or more contributor license agreements.  See the NOTICE file -->
-<!--- distributed with this work for additional information -->
-<!--- regarding copyright ownership.  The ASF licenses this file -->
-<!--- to you under the Apache License, Version 2.0 (the -->
-<!--- "License"); you may not use this file except in compliance -->
-<!--- with the License.  You may obtain a copy of the License at -->
-
-<!---   http://www.apache.org/licenses/LICENSE-2.0 -->
-
-<!--- Unless required by applicable law or agreed to in writing, -->
-<!--- software distributed under the License is distributed on an -->
-<!--- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -->
-<!--- KIND, either express or implied.  See the License for the -->
-<!--- specific language governing permissions and limitations -->
-<!--- under the License. -->
-
-# Zephyr RTOS Demo with CMSIS-NN
-
-This project was used for the [TVMCon 2021 talk on Cortex-M improvements to TVM](https://www.youtube.com/watch?v=6a7o8U-8Op4). It runs a keyword spotting model with the Zephyr RTOS using CMSIS-NN with the Ahead-of-Time (AOT) executor and the stack allocation strategy.
-
-The application starts from [the Zephyr base project](https://docs.zephyrproject.org/latest/application/index.html#application) and makes minimal changes to integrate TVM. To try it out, first refer to the [Zephyr Getting Started](https://docs.zephyrproject.org/latest/getting_started/index.html) page to setup your tooling such as `west` (you can also use the `tlcpack/ci_cortexm` image). Then download the [Fixed Virtual Platform (FVP) based on Arm(R) Corstone(TM)-300 software](https://developer.arm.com/tools-and-software/open-source-software/arm-platforms-software/arm-ecosystem-fvps) and set the path for Zephyr to find it:
-
-```
-export ARMFVP_BIN_PATH=/opt/arm/FVP_Corstone_SSE-300/models/Linux64_GCC-6.4/
-```
-
-Download the keyword spotting model to the `model` directory:
-```
-wget \
-    https://github.com/ARM-software/ML-zoo/blob/ee35139af86bdace5e502b09fe8b9da9cb1f06bb/models/keyword_spotting/cnn_small/tflite_int8/cnn_s_quantized.tflite \
-    -O model/cnn_s_quantized.tflite
-```
-
-Checkout [CMSIS_5](https://github.com/ARM-software/CMSIS_5.git) (default is `/opt/arm/ethosu/cmsis` to reflect `tlcpack/ci_cortexm`):
-```
-git clone "https://github.com/ARM-software/CMSIS_5.git" cmsis
-```
-
-Checkout [CMSIS NN](https://github.com/ARM-software/CMSIS-NN.git) (default is `/opt/arm/ethosu/cmsis/CMSIS-NN` to reflect `tlcpack/ci_cortexm`):
-```
-git clone "https://github.com/ARM-software/CMSIS-NN.git" cmsis/CMSIS-NN
-```
-
-And run the demo using `west`, with the path to CMSIS:
-```
-west build -t run -- -DCMSIS_PATH=/opt/arm/ethosu/cmsis
-```
diff --git a/apps/microtvm/zephyr_cmsisnn/model/convert_input.py b/apps/microtvm/zephyr_cmsisnn/model/convert_input.py
deleted file mode 100644
index 7b10e86b0f57..000000000000
--- a/apps/microtvm/zephyr_cmsisnn/model/convert_input.py
+++ /dev/null
@@ -1,67 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import os
-import pathlib
-import sys
-import numpy as np
-
-
-def create_file(name, prefix, tensor_name, tensor_data, output_path):
-    """
-    This function generates a header file containing the data from the numpy array provided.
-    """
-    file_path = pathlib.Path(f"{output_path}/" + name).resolve()
-    # Create header file with npy_data as a C array
-    raw_path = file_path.with_suffix(".c").resolve()
-    with open(raw_path, "w") as header_file:
-        header_file.write(
-            "#include <stddef.h>\n"
-            "#include <stdint.h>\n"
-            f"const size_t {tensor_name}_len = {tensor_data.size};\n"
-            f"{prefix} float {tensor_name}_storage[] = "
-        )
-        header_file.write("{")
-        for i in np.ndindex(tensor_data.shape):
-            header_file.write(f"{tensor_data[i]}, ")
-        header_file.write("};\n\n")
-
-
-def create_files(input_file, output_dir):
-    """
-    This function generates C files for the input and output arrays required to run inferences
-    """
-    # Create out folder
-    os.makedirs(output_dir, exist_ok=True)
-
-    # Create input header file
-    input_data = np.loadtxt(input_file)
-    create_file("inputs", "const", "input", input_data, output_dir)
-
-    # Create output header file
-    output_data = np.zeros([12], np.float32)
-    create_file(
-        "outputs",
-        "",
-        "output",
-        output_data,
-        output_dir,
-    )
-
-
-if __name__ == "__main__":
-    create_files(sys.argv[1], sys.argv[2])
diff --git a/apps/microtvm/zephyr_cmsisnn/model/convert_labels.py b/apps/microtvm/zephyr_cmsisnn/model/convert_labels.py
deleted file mode 100644
index 40add496bf01..000000000000
--- a/apps/microtvm/zephyr_cmsisnn/model/convert_labels.py
+++ /dev/null
@@ -1,57 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import os
-import pathlib
-import sys
-import shutil
-import pathlib
-
-from tvm.micro import copy_crt_config_header
-
-
-def create_labels_header(labels_file, output_path):
-    """
-    This function generates a header file containing the ImageNet labels as an array of strings
-    """
-    labels_path = pathlib.Path(labels_file).resolve()
-    file_path = pathlib.Path(f"{output_path}/labels.c").resolve()
-
-    with open(labels_path) as f:
-        labels = f.readlines()
-
-    with open(file_path, "w") as header_file:
-        header_file.write(f"char* labels[] = {{")
-
-        for _, label in enumerate(labels):
-            header_file.write(f'"{label.rstrip()}",')
-
-        header_file.write("};\n")
-
-
-def prepare_crt_config():
-    crt_config_output_path = (
-        pathlib.Path(__file__).parent.resolve().parent() / "build" / "crt_config"
-    )
-    if not crt_config_output_path.exists():
-        crt_config_output_path.mkdir()
-    copy_crt_config_header("zephyr", crt_config_output_path)
-
-
-if __name__ == "__main__":
-    create_labels_header(sys.argv[1], sys.argv[2])
-    prepare_crt_config()
diff --git a/apps/microtvm/zephyr_cmsisnn/model/input.txt b/apps/microtvm/zephyr_cmsisnn/model/input.txt
deleted file mode 100644
index 3f8625fc4c04..000000000000
--- a/apps/microtvm/zephyr_cmsisnn/model/input.txt
+++ /dev/null
@@ -1 +0,0 @@
-1.550000000000000000e+02 2.500000000000000000e+01 1.530000000000000000e+02 2.070000000000000000e+02 1.300000000000000000e+01 5.100000000000000000e+01 1.010000000000000000e+02 2.270000000000000000e+02 2.800000000000000000e+01 2.220000000000000000e+02 2.500000000000000000e+01 8.100000000000000000e+01 1.860000000000000000e+02 2.100000000000000000e+01 8.600000000000000000e+01 1.090000000000000000e+02 8.900000000000000000e+01 1.740000000000000000e+02 2.490000000000000000e+02 1.920000000000000000e+02 1.490000000000000000e+02 2.520000000000000000e+02 1.860000000000000000e+02 1.170000000000000000e+02 8.200000000000000000e+01 1.820000000000000000e+02 2.460000000000000000e+02 1.930000000000000000e+02 2.200000000000000000e+01 1.750000000000000000e+02 1.870000000000000000e+02 1.860000000000000000e+02 4.200000000000000000e+01 2.230000000000000000e+02 1.180000000000000000e+02 1.540000000000000000e+02 1.580000000000000000e+02 2.280000000000000000e+02 1.700000000000000000e+02 4.600000000000000000e+01 5.900000000000000000e+01 1.830000000000000000e+02 9.900000000000000000e+01 6.500000000000000000e+01 1.210000000000000000e+02 3.500000000000000000e+01 2.000000000000000000e+02 2.070000000000000000e+02 5.000000000000000000e+01 8.000000000000000000e+01 2.500000000000000000e+02 1.350000000000000000e+02 1.590000000000000000e+02 4.100000000000000000e+01 1.800000000000000000e+02 8.400000000000000000e+01 1.370000000000000000e+02 5.000000000000000000e+00 1.740000000000000000e+02 1.930000000000000000e+02 5.400000000000000000e+01 1.480000000000000000e+02 3.000000000000000000e+01 4.800000000000000000e+01 5.700000000000000000e+01 2.150000000000000000e+02 2.010000000000000000e+02 6.000000000000000000e+00 5.200000000000000000e+01 1.500000000000000000e+01 5.000000000000000000e+01 1.290000000000000000e+02 1.150000000000000000e+02 1.450000000000000000e+02 2.440000000000000000e+02 6.500000000000000000e+01 1.740000000000000000e+02 3.200000000000000000e+01 3.800000000000000000e+01 9.300000000000000000e+01 2.360000000000000000e+02 2.170000000000000000e+02 2.120000000000000000e+02 1.940000000000000000e+02 2.300000000000000000e+02 1.790000000000000000e+02 3.000000000000000000e+00 8.700000000000000000e+01 2.100000000000000000e+01 1.530000000000000000e+02 1.940000000000000000e+02 1.680000000000000000e+02 2.040000000000000000e+02 1.530000000000000000e+02 1.930000000000000000e+02 4.800000000000000000e+01 7.000000000000000000e+00 1.460000000000000000e+02 2.500000000000000000e+01 1.180000000000000000e+02 2.130000000000000000e+02 2.320000000000000000e+02 5.000000000000000000e+01 1.570000000000000000e+02 4.000000000000000000e+00 1.270000000000000000e+02 2.300000000000000000e+01 2.120000000000000000e+02 4.300000000000000000e+01 1.610000000000000000e+02 4.500000000000000000e+01 2.410000000000000000e+02 1.740000000000000000e+02 1.040000000000000000e+02 7.700000000000000000e+01 2.030000000000000000e+02 7.400000000000000000e+01 1.690000000000000000e+02 3.700000000000000000e+01 7.800000000000000000e+01 2.110000000000000000e+02 1.070000000000000000e+02 1.930000000000000000e+02 5.000000000000000000e+01 3.300000000000000000e+01 1.370000000000000000e+02 1.000000000000000000e+02 6.900000000000000000e+01 1.150000000000000000e+02 1.690000000000000000e+02 4.000000000000000000e+01 5.000000000000000000e+00 2.700000000000000000e+01 1.500000000000000000e+01 1.660000000000000000e+02 2.400000000000000000e+02 7.500000000000000000e+01 2.060000000000000000e+02 7.700000000000000000e+01 5.500000000000000000e+01 2.090000000000000000e+02 2.800000000000000000e+01 1.900000000000000000e+01 2.700000000000000000e+01 2.400000000000000000e+02 2.030000000000000000e+02 6.200000000000000000e+01 2.450000000000000000e+02 2.540000000000000000e+02 1.210000000000000000e+02 1.360000000000000000e+02 8.000000000000000000e+01 8.300000000000000000e+01 1.980000000000000000e+02 6.200000000000000000e+01 3.200000000000000000e+01 1.820000000000000000e+02 6.700000000000000000e+01 2.240000000000000000e+02 7.600000000000000000e+01 1.170000000000000000e+02 4.000000000000000000e+01 3.000000000000000000e+00 6.000000000000000000e+00 1.570000000000000000e+02 2.520000000000000000e+02 7.800000000000000000e+01 1.750000000000000000e+02 2.420000000000000000e+02 1.770000000000000000e+02 1.220000000000000000e+02 1.710000000000000000e+02 2.390000000000000000e+02 8.300000000000000000e+01 2.000000000000000000e+00 2.160000000000000000e+02 7.000000000000000000e+00 1.900000000000000000e+02 2.520000000000000000e+02 8.300000000000000000e+01 6.100000000000000000e+01 9.700000000000000000e+01 1.090000000000000000e+02 9.300000000000000000e+01 2.270000000000000000e+02 2.300000000000000000e+02 2.280000000000000000e+02 1.260000000000000000e+02 1.340000000000000000e+02 1.710000000000000000e+02 1.020000000000000000e+02 8.300000000000000000e+01 2.000000000000000000e+02 1.370000000000000000e+02 8.600000000000000000e+01 3.600000000000000000e+01 1.690000000000000000e+02 6.000000000000000000e+00 1.420000000000000000e+02 4.900000000000000000e+01 2.120000000000000000e+02 1.780000000000000000e+02 4.300000000000000000e+01 1.920000000000000000e+02 4.200000000000000000e+01 2.900000000000000000e+01 1.490000000000000000e+02 1.020000000000000000e+02 1.040000000000000000e+02 6.000000000000000000e+00 4.300000000000000000e+01 1.240000000000000000e+02 2.190000000000000000e+02 1.530000000000000000e+02 1.700000000000000000e+02 5.800000000000000000e+01 1.700000000000000000e+01 2.470000000000000000e+02 2.160000000000000000e+02 2.020000000000000000e+02 1.100000000000000000e+01 2.490000000000000000e+02 2.150000000000000000e+02 1.200000000000000000e+01 1.230000000000000000e+02 2.410000000000000000e+02 2.230000000000000000e+02 2.100000000000000000e+02 2.480000000000000000e+02 0.000000000000000000e+00 1.900000000000000000e+02 5.700000000000000000e+01 1.660000000000000000e+02 7.200000000000000000e+01 1.220000000000000000e+02 1.600000000000000000e+01 5.600000000000000000e+01 6.400000000000000000e+01 2.010000000000000000e+02 1.300000000000000000e+01 2.370000000000000000e+02 1.760000000000000000e+02 2.240000000000000000e+02 1.880000000000000000e+02 9.300000000000000000e+01 2.320000000000000000e+02 1.290000000000000000e+02 3.000000000000000000e+00 5.900000000000000000e+01 4.100000000000000000e+01 1.400000000000000000e+02 1.680000000000000000e+02 1.730000000000000000e+02 6.500000000000000000e+01 2.220000000000000000e+02 2.020000000000000000e+02 2.080000000000000000e+02 2.170000000000000000e+02 2.290000000000000000e+02 1.660000000000000000e+02 9.000000000000000000e+01 1.170000000000000000e+02 1.440000000000000000e+02 4.000000000000000000e+00 3.800000000000000000e+01 1.720000000000000000e+02 1.330000000000000000e+02 2.300000000000000000e+02 1.920000000000000000e+02 2.270000000000000000e+02 3.700000000000000000e+01 5.200000000000000000e+01 2.500000000000000000e+01 1.000000000000000000e+02 5.800000000000000000e+01 6.100000000000000000e+01 5.400000000000000000e+01 2.600000000000000000e+01 1.100000000000000000e+02 2.270000000000000000e+02 8.000000000000000000e+01 1.000000000000000000e+02 8.700000000000000000e+01 1.300000000000000000e+01 1.970000000000000000e+02 8.000000000000000000e+00 1.280000000000000000e+02 5.800000000000000000e+01 1.080000000000000000e+02 5.000000000000000000e+01 6.200000000000000000e+01 1.530000000000000000e+02 7.800000000000000000e+01 4.900000000000000000e+01 2.250000000000000000e+02 1.780000000000000000e+02 2.600000000000000000e+01 2.350000000000000000e+02 2.530000000000000000e+02 1.940000000000000000e+02 2.540000000000000000e+02 4.600000000000000000e+01 1.590000000000000000e+02 2.060000000000000000e+02 6.600000000000000000e+01 2.090000000000000000e+02 2.380000000000000000e+02 1.970000000000000000e+02 1.930000000000000000e+02 3.100000000000000000e+01 1.910000000000000000e+02 1.400000000000000000e+01 2.190000000000000000e+02 2.470000000000000000e+02 2.300000000000000000e+01 1.930000000000000000e+02 1.540000000000000000e+02 3.200000000000000000e+01 8.000000000000000000e+00 1.300000000000000000e+01 1.100000000000000000e+02 1.960000000000000000e+02 1.970000000000000000e+02 8.600000000000000000e+01 2.250000000000000000e+02 1.520000000000000000e+02 1.110000000000000000e+02 1.190000000000000000e+02 1.370000000000000000e+02 2.370000000000000000e+02 2.000000000000000000e+01 5.600000000000000000e+01 6.900000000000000000e+01 9.300000000000000000e+01 7.100000000000000000e+01 8.000000000000000000e+01 2.700000000000000000e+01 8.100000000000000000e+01 2.510000000000000000e+02 8.000000000000000000e+01 1.130000000000000000e+02 1.680000000000000000e+02 9.300000000000000000e+01 8.200000000000000000e+01 1.980000000000000000e+02 2.290000000000000000e+02 1.050000000000000000e+02 2.300000000000000000e+01 1.040000000000000000e+02 1.380000000000000000e+02 1.010000000000000000e+02 1.510000000000000000e+02 2.160000000000000000e+02 5.900000000000000000e+01 7.900000000000000000e+01 2.600000000000000000e+01 1.970000000000000000e+02 2.500000000000000000e+01 7.000000000000000000e+01 1.800000000000000000e+01 3.500000000000000000e+01 1.040000000000000000e+02 2.150000000000000000e+02 3.800000000000000000e+01 8.200000000000000000e+01 8.700000000000000000e+01 1.420000000000000000e+02 1.410000000000000000e+02 2.530000000000000000e+02 8.900000000000000000e+01 1.190000000000000000e+02 1.850000000000000000e+02 1.850000000000000000e+02 1.050000000000000000e+02 1.120000000000000000e+02 2.800000000000000000e+01 1.590000000000000000e+02 2.020000000000000000e+02 5.900000000000000000e+01 5.100000000000000000e+01 1.830000000000000000e+02 1.010000000000000000e+02 2.330000000000000000e+02 1.850000000000000000e+02 1.370000000000000000e+02 1.380000000000000000e+02 1.980000000000000000e+02 8.300000000000000000e+01 2.090000000000000000e+02 1.780000000000000000e+02 2.150000000000000000e+02 1.830000000000000000e+02 9.500000000000000000e+01 1.890000000000000000e+02 7.900000000000000000e+01 3.900000000000000000e+01 2.520000000000000000e+02 2.340000000000000000e+02 5.300000000000000000e+01 1.580000000000000000e+02 1.040000000000000000e+02 1.030000000000000000e+02 1.170000000000000000e+02 2.060000000000000000e+02 2.300000000000000000e+02 1.250000000000000000e+02 1.070000000000000000e+02 2.250000000000000000e+02 1.710000000000000000e+02 2.380000000000000000e+02 5.500000000000000000e+01 2.530000000000000000e+02 2.460000000000000000e+02 2.100000000000000000e+02 2.080000000000000000e+02 1.530000000000000000e+02 3.500000000000000000e+01 1.200000000000000000e+02 1.130000000000000000e+02 1.010000000000000000e+02 7.500000000000000000e+01 4.300000000000000000e+01 1.870000000000000000e+02 6.400000000000000000e+01 6.700000000000000000e+01 2.170000000000000000e+02 1.400000000000000000e+01 4.800000000000000000e+01 1.060000000000000000e+02 1.790000000000000000e+02 8.000000000000000000e+01 1.180000000000000000e+02 1.790000000000000000e+02 3.000000000000000000e+00 2.180000000000000000e+02 2.600000000000000000e+01 2.540000000000000000e+02 1.220000000000000000e+02 1.210000000000000000e+02 4.000000000000000000e+00 3.400000000000000000e+01 1.980000000000000000e+02 1.580000000000000000e+02 6.000000000000000000e+00 9.700000000000000000e+01 1.000000000000000000e+00 1.280000000000000000e+02 2.030000000000000000e+02 1.500000000000000000e+01 2.700000000000000000e+01 2.090000000000000000e+02 9.700000000000000000e+01 1.910000000000000000e+02 1.590000000000000000e+02 1.470000000000000000e+02 1.200000000000000000e+02 4.000000000000000000e+01 2.800000000000000000e+01 8.500000000000000000e+01 1.930000000000000000e+02 2.700000000000000000e+01 1.140000000000000000e+02 1.300000000000000000e+02 1.580000000000000000e+02 1.400000000000000000e+02 6.700000000000000000e+01 5.100000000000000000e+01 2.300000000000000000e+02 1.020000000000000000e+02 7.200000000000000000e+01 1.100000000000000000e+02 2.330000000000000000e+02 1.790000000000000000e+02 9.300000000000000000e+01 1.760000000000000000e+02 2.180000000000000000e+02 5.200000000000000000e+01 0.000000000000000000e+00 1.770000000000000000e+02 1.020000000000000000e+02 2.300000000000000000e+01 1.060000000000000000e+02 1.720000000000000000e+02 3.500000000000000000e+01 2.170000000000000000e+02 1.170000000000000000e+02 2.300000000000000000e+02 2.400000000000000000e+02 1.380000000000000000e+02 2.270000000000000000e+02
diff --git a/apps/microtvm/zephyr_cmsisnn/model/labels.txt b/apps/microtvm/zephyr_cmsisnn/model/labels.txt
deleted file mode 100644
index f66507b143ec..000000000000
--- a/apps/microtvm/zephyr_cmsisnn/model/labels.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-_silence_
-_unknown_
-yes
-no
-up
-down
-left
-right
-on
-off
-stop
-go
diff --git a/apps/microtvm/zephyr_cmsisnn/prj.conf b/apps/microtvm/zephyr_cmsisnn/prj.conf
deleted file mode 100644
index 2bb203571b72..000000000000
--- a/apps/microtvm/zephyr_cmsisnn/prj.conf
+++ /dev/null
@@ -1,22 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# newlib needed for math.h
-CONFIG_NEWLIB_LIBC=y
-
-# Enables architecture extensions
-CONFIG_FPU=y
diff --git a/apps/microtvm/zephyr_cmsisnn/run_demo.sh b/apps/microtvm/zephyr_cmsisnn/run_demo.sh
deleted file mode 100755
index 5617e96e95f1..000000000000
--- a/apps/microtvm/zephyr_cmsisnn/run_demo.sh
+++ /dev/null
@@ -1,53 +0,0 @@
-#!/bin/bash
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-set -e
-set -x
-set -o pipefail
-
-MODEL_URL=https://github.com/ARM-software/ML-zoo/raw/ee35139af86bdace5e502b09fe8b9da9cb1f06bb/models/keyword_spotting/cnn_small/tflite_int8
-MODEL_FILE=cnn_s_quantized.tflite
-
-LOGDIR="$(mktemp -d)"
-
-cleanup()
-{
-  rm -rf "$LOGDIR"
-  pkill FVP
-}
-
-trap cleanup EXIT
-
-# Clean up previous build
-rm -rf build
-
-# Download model file
-wget $MODEL_URL/$MODEL_FILE -O model/$MODEL_FILE
-
-# System doesn't automatically exit so we wait for the output
-# and kill it ourselves
-export ARMFVP_BIN_PATH=/opt/arm/FVP_Corstone_SSE-300/models/Linux64_GCC-6.4/
-west zephyr-export
-west build
-west build -t run &> ${LOGDIR}/west.log &
-
-# Wait for "exit" keyword
-until grep -m 1 "exit" ${LOGDIR}/west.log; do sleep 1 ; done
-
-# Check the log for correct output
-grep "The word is 'down'!" ${LOGDIR}/west.log
diff --git a/apps/microtvm/zephyr_cmsisnn/src/main.c b/apps/microtvm/zephyr_cmsisnn/src/main.c
deleted file mode 100644
index 55c09a7f4212..000000000000
--- a/apps/microtvm/zephyr_cmsisnn/src/main.c
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#include <stdarg.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <tvm/runtime/c_runtime_api.h>
-#include <tvm/runtime/crt/stack_allocator.h>
-#include <zephyr/kernel.h>
-#include <zephyr/sys/printk.h>
-
-#include "tvmgen_default.h"
-
-extern char* labels[12];
-extern float input_storage[490];
-extern float output_storage[12];
-
-extern const size_t output_len;
-
-static uint8_t __attribute__((aligned(TVM_RUNTIME_ALLOC_ALIGNMENT_BYTES)))
-g_crt_workspace[TVMGEN_DEFAULT_WORKSPACE_SIZE];
-tvm_workspace_t app_workspace;
-
-void TVMLogf(const char* msg, ...) {
-  va_list args;
-  va_start(args, msg);
-  vfprintf(stderr, msg, args);
-  va_end(args);
-}
-
-void __attribute__((noreturn)) TVMPlatformAbort(tvm_crt_error_t error_code) {
-  fprintf(stderr, "TVMPlatformAbort: %d\n", error_code);
-  exit(-1);
-}
-
-tvm_crt_error_t TVMPlatformMemoryAllocate(size_t num_bytes, DLDevice dev, void** out_ptr) {
-  uintptr_t ret = StackMemoryManager_Allocate(&app_workspace, num_bytes, out_ptr);
-  return ret;
-}
-
-tvm_crt_error_t TVMPlatformMemoryFree(void* ptr, DLDevice dev) {
-  return StackMemoryManager_Free(&app_workspace, ptr);
-}
-
-void main(void) {
-  StackMemoryManager_Init(&app_workspace, g_crt_workspace, TVMGEN_DEFAULT_WORKSPACE_SIZE);
-
-  struct tvmgen_default_inputs inputs = {.input = input_storage};
-  struct tvmgen_default_outputs outputs = {.Identity = output_storage};
-
-  if (tvmgen_default_run(&inputs, &outputs) != 0) {
-    printk("Model run failed\n");
-    exit(-1);
-  }
-
-  // Calculate index of max value
-  float max_value = 0.0;
-  size_t max_index = -1;
-  for (unsigned int i = 0; i < output_len; ++i) {
-    if (output_storage[i] > max_value) {
-      max_value = output_storage[i];
-      max_index = i;
-    }
-  }
-  printk("The word is '%s'!\n", labels[max_index]);
-  exit(0);
-}
diff --git a/apps/uma/_template/__init__.py b/apps/uma/_template/__init__.py
deleted file mode 100644
index 2cc0ee880d76..000000000000
--- a/apps/uma/_template/__init__.py
+++ /dev/null
@@ -1,22 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-
-Template files for UMA tutorial
-
-
-"""
diff --git a/apps/uma/_template/backend.py b/apps/uma/_template/backend.py
deleted file mode 100644
index 5ee7ecc19ef6..000000000000
--- a/apps/uma/_template/backend.py
+++ /dev/null
@@ -1,45 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""UMA backend for the my_ai_hw accelerator"""
-from passes import MyAiHwConv2dPass
-from tvm.relay.backend.contrib.uma.api.utils import PassPhase
-from tvm.relay.backend.contrib.uma.backend import UMABackend
-from codegen import gen_includes
-from patterns import conv2d_pattern
-
-
-class MyAiHwBackend(UMABackend):
-    """UMA backend for the MyAiHw accelerator."""
-
-    def __init__(self):
-        super().__init__()
-
-        # Target configuration
-        self._register_target_attr("dimension")
-
-        # Relay Pattern registration
-        self._register_pattern("conv2d", conv2d_pattern())
-
-        # Relay to TIR function registration
-        self._register_tir_pass(PassPhase.TIR_PHASE_0, MyAiHwConv2dPass())
-
-        # TIR to runtime function registration
-        self._register_codegen(fmt="c", includes=gen_includes)
-
-    @property
-    def target_name(self):
-        return "my_ai_hw"
diff --git a/apps/uma/_template/codegen.py b/apps/uma/_template/codegen.py
deleted file mode 100644
index 5e1d6b45e81f..000000000000
--- a/apps/uma/_template/codegen.py
+++ /dev/null
@@ -1,28 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""UMA codegen for the my_ai_hw accelerator"""
-
-import tvm
-import pathlib
-
-
-def gen_includes() -> str:
-    topdir = pathlib.Path(__file__).parent.absolute()
-
-    includes = ""
-    includes += f'#include "{topdir}/conv2dnchw.cc"'
-    return includes
diff --git a/apps/uma/_template/conv2dnchw.cc b/apps/uma/_template/conv2dnchw.cc
deleted file mode 100644
index bfb4300e2aa3..000000000000
--- a/apps/uma/_template/conv2dnchw.cc
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-*/
-#include <stdlib.h>
-
-// TODO(mjklaiber): leverage pragma import_c in the future
-#ifdef __cplusplus
-extern "C"
-#endif
-
-    /*!
-     * \brief Conv2D function for mock-accelerator examples. Limited to same-padded Conv2D with
-     * stride (1,1) and datatype float. \param ifmap Pointer to input feature map data of size
-     * iw*ih*ic*sizeof(float). \param weights Pointer to weight data of size
-     * kh*kw*ic**oc*sizeof(float). \param result Pointer to output feature map data of size
-     * iw*ih*oc*sizeof(float). \param oc Number of channels of output feature map. \param iw Width
-     * of input feature map, ifmap. \param ih Height of input feature map, ifmap. \param ic Number
-     * of channels of input feature map. \param kh Height of convolution kernels. \param kw Width of
-     * convolution kernels.
-     *
-     * \return error code
-     *
-     */
-    int
-    my_ai_hw_conv2dnchw(float* ifmap, float* weights, float* result, int oc, int iw, int ih, int ic,
-                        int kh, int kw) {
-
-  int kw_low = kw / 2;
-  int kh_low = kh / 2;
-  int kw_high = iw + kw / 2;
-  int kh_high = ih + kh / 2;
-
-  int padded_iw = iw + 2 * kw_low;
-  int padded_ih = ih + 2 * kh_low;
-
-  // This is only example code. A real hardware accelerator would call a device specific malloc
-  // function.
-  float* pad_temp = (float*)malloc(
-      (((ic * padded_iw * padded_ih) + (padded_ih * padded_iw)) + padded_iw) * sizeof(float));
-
-  if (pad_temp == NULL) {
-    return -1;
-  }
-
-  for (int i1 = 0; i1 < ic; ++i1) {
-    for (int i2 = 0; i2 < padded_ih; ++i2) {
-      for (int i3 = 0; i3 < padded_iw; ++i3) {
-        ((float*)pad_temp)[(((i1 * padded_iw * padded_ih) + (i2 * padded_iw)) + i3)] =
-            (((((kh_low <= i2) && (i2 < kh_high)) && (kw_low <= i3)) && (i3 < kw_high))
-                 ? ifmap[((((i1 * iw * ih) + ((i2 - kh_low) * iw)) + i3 - kw_low))]
-                 : 0.000000e+00f);
-      }
-    }
-  }
-  for (int i11 = 0; i11 < oc; ++i11) {
-    for (int i21 = 0; i21 < ih; ++i21) {
-      for (int i31 = 0; i31 < iw; ++i31) {
-        for (int i4 = 0; i4 < ic; ++i4) {
-          for (int i5 = 0; i5 < kh; ++i5) {
-            for (int i6 = 0; i6 < kw; ++i6) {
-              int cse_var_1 = (((i11 * iw * ih) + (i21 * iw)) + i31);
-              if (((i4 == 0) && (i5 == 0)) && (i6 == 0)) {
-                result[cse_var_1] = 0.000000e+00f;
-              }
-              result[cse_var_1] =
-                  (result[cse_var_1] +
-                   (((float*)
-                         pad_temp)[i4 * padded_iw * padded_ih + (i21 + i5) * padded_iw + i31 + i6] *
-                    weights[((((i11 * ic * kh * kw) + (i4 * kh * kw)) + (i5 * kw)) + i6)]));
-            }
-          }
-        }
-      }
-    }
-  }
-
-  // This is only example code. A real hardware accelerator would call a device specific free
-  // function.
-  free(pad_temp);
-  return 0;
-}
diff --git a/apps/uma/_template/passes.py b/apps/uma/_template/passes.py
deleted file mode 100644
index b4f261a5ab49..000000000000
--- a/apps/uma/_template/passes.py
+++ /dev/null
@@ -1,136 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Transform passes for the my_ai_hw accelerator"""
-
-import tvm
-from tvm import tir
-from tvm.relay.backend.contrib.uma.api.utils import add_llvm_to_block
-
-
-@tvm.tir.transform.prim_func_pass(opt_level=2)
-class MyAiHwConv2dPass:
-    _EXTERNAL_FUNCTION_NAME = "my_ai_hw_conv2dnchw"
-    _TVM_BLOCK_MATCH_NAME = "conv2d_nchw"
-
-    def transform_function(
-        self, func: tvm.tir.PrimFunc, mod: tvm.ir.IRModule, ctx: tvm.ir.transform.PassContext
-    ) -> tvm.tir.PrimFunc:
-        return self._my_ai_hw_conv2d_pass(func, mod, ctx)
-
-    @classmethod
-    def _my_ai_hw_conv2d_pass(cls, func, mod, ctx):
-        _loops = dict()
-        _handles = []
-        _entry_node = None
-
-        def _has_block(name: str, func: tvm.tir.PrimFunc) -> bool:
-            """
-            Determine of a tir.block with `name` exists in `func`
-            """
-
-            def _hb(op):
-                if isinstance(op, tvm.tir.Block):
-                    _found_blocks.append(op.name_hint)
-
-            _found_blocks = []
-            tvm.tir.stmt_functor.post_order_visit(func.body, _hb)
-            return name in _found_blocks
-
-        def _detect_and_replace_conv2d(
-            func: tvm.tir.PrimFunc, mod: tvm.ir.IRModule, ctx: tvm.ir.transform.PassContext
-        ) -> tvm.tir.PrimFunc:
-            def _replace_conv2d(op):
-                if op == _entry_node:
-                    irb = tvm.tir.ir_builder.create()
-                    # Collection of buffer address
-                    buffers = [b[1].data for b in _handles]
-                    # extraction of loop offsets
-                    for k, v in _loops.items():
-                        assert v.min.value == 0
-                    offset_order = ["co", "w", "h", "ci", "kh", "kw"]
-                    offsets = [_loops[i].extent.value for i in offset_order]
-                    args = buffers + offsets
-                    irb.emit(tir_call(irb, True, cls._EXTERNAL_FUNCTION_NAME, *args))
-                    irb_result = irb.get()
-                    return irb_result
-                elif isinstance(op, tvm.tir.SeqStmt):
-                    # Remove that pad block of TOPI's conv2DNCHW by only returning the 2nd statement
-                    return op.seq[1]
-                return op
-
-            sch = tir.Schedule(func)
-
-            if _has_block(cls._TVM_BLOCK_MATCH_NAME, func):
-                conv2d_block = sch.get_block(cls._TVM_BLOCK_MATCH_NAME)
-                rv_loops = sch.get_loops(conv2d_block)
-                assert len(rv_loops) == 7
-                loops = dict(
-                    n=rv_loops[0],
-                    co=rv_loops[1],
-                    h=rv_loops[2],
-                    w=rv_loops[3],
-                    ci=rv_loops[4],
-                    kh=rv_loops[5],
-                    kw=rv_loops[6],
-                )
-                _entry_node = sch.get(rv_loops[1])
-                _loops = {k: sch.get(v) for k, v in loops.items()}
-                _handles = func.buffer_map.items()
-
-                x = tvm.tir.stmt_functor.ir_transform(
-                    func.body, None, _replace_conv2d, ["tir.For", "tir.SeqStmt"]
-                )
-                return func.with_body(x)
-            else:
-                return func
-
-        r = _detect_and_replace_conv2d(func, mod, ctx)
-        return r
-
-
-def tir_call(ib: tvm.tir.ir_builder, extern: bool, name: str, *args):
-    """
-    ib: ir_builder
-    extern: bool
-        True  --> tvm.tir.call_extern
-        False --> tvm.tir.call_packed
-    name: str
-        function name
-    *args:
-        arguments for function call
-    """
-
-    def buf_from_array(ib, arr, dtype):
-        # Allocate enough memory to store the whole array
-        var = ib.allocate("int32", (len(arr),), scope="global")
-        for i, v in enumerate(arr):
-            var[i] = v
-        # Declare a buffer, which is basically a view on the chunk of memory that we allocated
-        buf = tvm.tir.decl_buffer((len(arr),), dtype, data=var, scope="global")
-        return buf
-
-    if extern:
-        args = [i.data if isinstance(i, tvm.tir.Buffer) else i for i in args]
-        return tvm.tir.call_extern("int32", name, *args)
-    else:
-        args = [
-            buf_from_array(ib, i, "int32")
-            if isinstance(i, (tuple, list, tvm.ir.container.Array))
-            else i
-            for i in args
-        ]
-        return tvm.tir.call_packed(name, *args)
diff --git a/apps/uma/_template/patterns.py b/apps/uma/_template/patterns.py
deleted file mode 100644
index 1c841f2dbf1d..000000000000
--- a/apps/uma/_template/patterns.py
+++ /dev/null
@@ -1,30 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Relay graph patterns for the my_ai_hw accelerator"""
-
-from tvm.relay.dataflow_pattern import is_op, wildcard
-
-
-def conv2d_pattern():
-    pattern = is_op("nn.conv2d")(wildcard(), wildcard())
-    pattern = pattern.has_attr({"strides": [1, 1], "groups": 1})
-    return pattern
-
-
-def dense_pattern():
-    pattern = is_op("nn.dense")(wildcard(), wildcard())
-    return pattern
diff --git a/apps/uma/_template/run.py b/apps/uma/_template/run.py
deleted file mode 100644
index 852ae1234d0f..000000000000
--- a/apps/uma/_template/run.py
+++ /dev/null
@@ -1,82 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-from tvm.micro.testing.aot_test_utils import AOT_DEFAULT_RUNNER
-import tvm
-from tvm import relay
-from backend import MyAiHwBackend
-from tvm.relay import transform
-from collections import OrderedDict
-import numpy as np
-
-
-from tvm.testing.aot import (
-    AOTTestModel as AOTModel,
-    AOTTestRunner as AOTRunner,
-    generate_ref_data,
-    compile_and_run,
-)
-
-
-def create_conv2d(groups=1, runner=AOT_DEFAULT_RUNNER, weight_shape=32):
-    dtype = "float32"
-    ishape = (1, 32, 14, 14)
-    wshape = (32, weight_shape, 3, 3)
-    pass_config = {"tir.usmp.enable": True}
-    runner = AOTRunner(
-        makefile=runner.makefile,
-        prologue=runner.prologue,
-        epilogue=runner.epilogue,
-        includes=runner.includes,
-        parameters=runner.parameters,
-        pass_config=pass_config,
-    )
-    data0 = relay.var("data", shape=ishape, dtype=dtype)
-    weight0 = relay.var("weight", shape=wshape, dtype=dtype)
-    out = relay.nn.conv2d(data0, weight0, kernel_size=(3, 3), padding=(1, 1), groups=groups)
-    main_f = relay.Function([data0, weight0], out)
-    mod = tvm.IRModule()
-    mod["main"] = main_f
-    mod = transform.InferType()(mod)
-    i_data = np.random.uniform(0, 1, ishape).astype(dtype)
-    w1_data = np.random.uniform(0, 1, wshape).astype(dtype)
-    inputs = OrderedDict([("data", i_data), ("weight", w1_data)])
-    output_list = generate_ref_data(mod, inputs)
-    return mod, inputs, output_list, runner
-
-
-def main():
-    mod, inputs, output_list, runner = create_conv2d()
-
-    uma_backend = MyAiHwBackend()
-    uma_backend.register()
-    mod = uma_backend.partition(mod)
-    target = tvm.target.Target("my_ai_hw", host=tvm.target.Target("c"))
-
-    export_directory = tvm.contrib.utils.tempdir(keep_for_debug=True).path
-    print(f"Generated files are in {export_directory}")
-    compile_and_run(
-        AOTModel(module=mod, inputs=inputs, outputs=output_list),
-        runner,
-        interface_api="c",
-        use_unpacked_api=True,
-        target=target,
-        test_dir=str(export_directory),
-    )
-
-
-if __name__ == "__main__":
-    main()
diff --git a/apps/uma/_template/strategies.py b/apps/uma/_template/strategies.py
deleted file mode 100644
index aa1ea07280e4..000000000000
--- a/apps/uma/_template/strategies.py
+++ /dev/null
@@ -1,33 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Strategies for the my_ai_hw accelerator"""
-
-# Example how to integrate a custom conv1d strategy:
-
-# @relay.op.strategy.override_native_generic_func("custom_conv1d_strategy")
-# def custom_conv1d_strategy(attrs, inputs, out_type, target):
-#     strategy = _op.OpStrategy()
-#     strategy.add_implementation(
-#         wrap_compute_conv1d(custom_conv1d_compute),
-#         wrap_topi_schedule(custom_conv1d_schedule),
-#         name="custom_conv1d.generic",
-#     return strategy
-#
-
-# For further details see:
-# - github.com/apache/tvm-rfcs/blob/main/rfcs/0060_UMA_Unified_Modular_Accelerator_Interface.md
-# - $TVM_HOME/python/tvm/relay/op/strategy/x86.py
diff --git a/apps/uma/uma_cli.py b/apps/uma/uma_cli.py
deleted file mode 100644
index 159fa9e62cb6..000000000000
--- a/apps/uma/uma_cli.py
+++ /dev/null
@@ -1,98 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""
-    UMA Command Line Interface (CLI)
-
-    Tool to create code skeletons for an easy integration of
-    new AI hardware accelerators/libraries into TVM using UMA
-"""
-
-import argparse
-import os
-import shutil
-import sys
-import pathlib
-from inflection import camelize, underscore
-
-
-def _parse_args():
-    parser = argparse.ArgumentParser(description="UMA Interface command line interface")
-    parser.add_argument(
-        "--add_hardware",
-        type=str,
-        required=True,
-    )
-    parser.add_argument(
-        "--tutorial",
-        type=str,
-    )
-    args = parser.parse_args()
-    return args
-
-
-def replace_template_name(
-    files: list, template_name: str, add_hw_name: str, template_source: str = "_template"
-) -> None:
-    """
-    Replace names in template skeleton code by new name
-    """
-    for f in files:
-        with open(f) as read_file:
-            data = read_file.read()
-        for case in [underscore, camelize]:
-            data = data.replace(case(template_name), case(add_hw_name))
-        data = data.replace(template_source, underscore(add_hw_name))
-        with open(f, "w") as write_file:
-            write_file.write(data)
-
-
-def main():
-    """
-    UMA Command Line Interface (CLI)
-    """
-    args = _parse_args()
-    add_hw_name = args.add_hardware
-    uma_template_path = pathlib.Path(os.getcwd(), "_template").absolute()
-
-    add_hw_path = os.path.join(uma_template_path.parent, add_hw_name)
-    if os.path.exists(add_hw_path):
-        print(
-            f"Hardware with name {add_hw_name} already exists in UMA file structure: {add_hw_path}"
-        )
-        sys.exit(-1)
-    else:
-        os.mkdir(add_hw_path)
-
-    uma_files = ["backend.py", "codegen.py", "passes.py", "patterns.py", "run.py", "strategies.py"]
-    if args.tutorial == "vanilla":
-        uma_files.append("conv2dnchw.cc")
-
-    source_files = [os.path.join(uma_template_path, f) for f in uma_files]
-    destination_files = [os.path.join(add_hw_path, f) for f in uma_files]
-
-    for src, dst in zip(source_files, destination_files):
-        shutil.copyfile(src, dst)
-
-    template_name = "my_ai_hw"
-    replace_template_name(destination_files, template_name, add_hw_name)
-
-    print(f"Success: added {add_hw_name} to {add_hw_path}")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/ci/jenkins/data.py b/ci/jenkins/data.py
index 492608870e01..8cf762f16125 100644
--- a/ci/jenkins/data.py
+++ b/ci/jenkins/data.py
@@ -27,12 +27,6 @@
     "hexagon_api": [
         "build/hexagon_api_output",
     ],
-    # Folder for microtvm build
-    "microtvm_template_projects": [
-        "build/microtvm_template_projects",
-    ],
-    # Folders and build files for c runtime
-    "standalone_crt": ["build/standalone_crt", "build/build.ninja"],
     # This library is produced with HIDE_PRIVATE_SYMBOLS=ON
     "tvm_allvisible": ["build/libtvm_allvisible.so"],
     # runtime files
@@ -46,14 +40,6 @@
         "build/libtvm_runtime.so",
         "build/config.cmake",
     ],
-    # compiler files, fsim, and tsim
-    "tvm_multilib_tsim": [
-        "build/libvta_tsim.so",
-        "build/libtvm.so",
-        "build/libvta_fsim.so",
-        "build/libtvm_runtime.so",
-        "build/config.cmake",
-    ],
 }
 
 
diff --git a/ci/jenkins/generated/arm_jenkinsfile.groovy b/ci/jenkins/generated/arm_jenkinsfile.groovy
index d4447d2ca81d..c33be22a6f9c 100644
--- a/ci/jenkins/generated/arm_jenkinsfile.groovy
+++ b/ci/jenkins/generated/arm_jenkinsfile.groovy
@@ -480,25 +480,6 @@ def python_unittest(image) {
   )
 }
 
-def make_standalone_crt(image, build_dir) {
-  sh (
-    script: """
-      set -eux
-      ${docker_run} ${image} python3 ./tests/scripts/task_build.py \
-        --sccache-bucket tvm-sccache-prod \
-        --sccache-region us-west-2 \
-        --cmake-target standalone_crt \
-        --build-dir build
-      ${docker_run} ${image} python3 ./tests/scripts/task_build.py \
-        --sccache-bucket tvm-sccache-prod \
-        --sccache-region us-west-2 \
-        --cmake-target crttest \
-        --build-dir build
-      """,
-    label: 'Make standalone CRT',
-  )
-}
-
 def make_cpp_tests(image, build_dir) {
   sh (
     script: """
@@ -526,13 +507,6 @@ def cpp_unittest(image) {
   )
 }
 
-def micro_cpp_unittest(image) {
-  sh (
-    script: "${docker_run} --env CI_NUM_EXECUTORS ${image} ./tests/scripts/task_microtvm_cpp_tests.sh build",
-    label: 'Run microTVM C++ tests',
-  )
-}
-
 cancel_previous_build()
 
 try {
@@ -557,10 +531,9 @@ def build(node_type) {
           label: 'Create ARM cmake config',
         )
         cmake_build(ci_arm, 'build', '-j4')
-        make_standalone_crt(ci_arm, 'build')
         make_cpp_tests(ci_arm, 'build')
         sh(
-            script: "./${jenkins_scripts_root}/s3.py --action upload --bucket ${s3_bucket} --prefix ${s3_prefix}/arm --items build/libtvm.so build/libvta_fsim.so build/libtvm_runtime.so build/config.cmake build/cpptest build/build.ninja build/CMakeFiles/rules.ninja build/crttest build/standalone_crt build/build.ninja build/microtvm_template_projects",
+            script: "./${jenkins_scripts_root}/s3.py --action upload --bucket ${s3_bucket} --prefix ${s3_prefix}/arm --items build/libtvm.so build/libvta_fsim.so build/libtvm_runtime.so build/config.cmake build/cpptest build/build.ninja build/CMakeFiles/rules.ninja build/crttest build/build.ninja build/microtvm_template_projects",
             label: 'Upload artifacts to S3',
           )
             })
@@ -802,7 +775,6 @@ def shard_run_topi_aarch64_1_of_2(node_type='ARM-GRAVITON3-SPOT', on_demand=fals
 
               ci_setup(ci_arm)
               cpp_unittest(ci_arm)
-              micro_cpp_unittest(ci_arm)
               sh (
                 script: "${docker_run} ${ci_arm} ./tests/scripts/task_python_arm_compute_library.sh",
                 label: 'Run test_arm_compute_lib test',
diff --git a/ci/jenkins/generated/cortexm_jenkinsfile.groovy b/ci/jenkins/generated/cortexm_jenkinsfile.groovy
index 8efdf23f9f88..6bd4b2e092b8 100644
--- a/ci/jenkins/generated/cortexm_jenkinsfile.groovy
+++ b/ci/jenkins/generated/cortexm_jenkinsfile.groovy
@@ -480,25 +480,6 @@ def python_unittest(image) {
   )
 }
 
-def make_standalone_crt(image, build_dir) {
-  sh (
-    script: """
-      set -eux
-      ${docker_run} ${image} python3 ./tests/scripts/task_build.py \
-        --sccache-bucket tvm-sccache-prod \
-        --sccache-region us-west-2 \
-        --cmake-target standalone_crt \
-        --build-dir build
-      ${docker_run} ${image} python3 ./tests/scripts/task_build.py \
-        --sccache-bucket tvm-sccache-prod \
-        --sccache-region us-west-2 \
-        --cmake-target crttest \
-        --build-dir build
-      """,
-    label: 'Make standalone CRT',
-  )
-}
-
 def make_cpp_tests(image, build_dir) {
   sh (
     script: """
@@ -526,13 +507,6 @@ def cpp_unittest(image) {
   )
 }
 
-def micro_cpp_unittest(image) {
-  sh (
-    script: "${docker_run} --env CI_NUM_EXECUTORS ${image} ./tests/scripts/task_microtvm_cpp_tests.sh build",
-    label: 'Run microTVM C++ tests',
-  )
-}
-
 cancel_previous_build()
 
 try {
@@ -557,10 +531,9 @@ def build(node_type) {
           label: 'Create Cortex-M cmake config',
         )
         cmake_build(ci_cortexm, 'build', '-j2')
-        make_standalone_crt(ci_cortexm, 'build')
         make_cpp_tests(ci_cortexm, 'build')
         sh(
-            script: "./${jenkins_scripts_root}/s3.py --action upload --bucket ${s3_bucket} --prefix ${s3_prefix}/cortexm --items build/libtvm.so build/libtvm_runtime.so build/config.cmake build/libtvm_allvisible.so build/crttest build/standalone_crt build/build.ninja build/cpptest build/build.ninja build/CMakeFiles/rules.ninja build/microtvm_template_projects",
+            script: "./${jenkins_scripts_root}/s3.py --action upload --bucket ${s3_bucket} --prefix ${s3_prefix}/cortexm --items build/libtvm.so build/libtvm_runtime.so build/config.cmake build/libtvm_allvisible.so build/crttest build/build.ninja build/cpptest build/build.ninja build/CMakeFiles/rules.ninja build/microtvm_template_projects",
             label: 'Upload artifacts to S3',
           )
             })
@@ -604,7 +577,6 @@ def shard_run_test_Cortex_M_1_of_12(node_type='CPU-SMALL-SPOT', on_demand=false)
 
               ci_setup(ci_cortexm)
               cpp_unittest(ci_cortexm)
-              micro_cpp_unittest(ci_cortexm)
               sh (
                 script: "${docker_run} ${ci_cortexm} ./tests/scripts/task_demo_microtvm.sh",
                 label: 'Run microTVM demos',
diff --git a/ci/jenkins/generated/cpu_jenkinsfile.groovy b/ci/jenkins/generated/cpu_jenkinsfile.groovy
index a97ae1484f0c..04ab19f40414 100644
--- a/ci/jenkins/generated/cpu_jenkinsfile.groovy
+++ b/ci/jenkins/generated/cpu_jenkinsfile.groovy
@@ -480,25 +480,6 @@ def python_unittest(image) {
   )
 }
 
-def make_standalone_crt(image, build_dir) {
-  sh (
-    script: """
-      set -eux
-      ${docker_run} ${image} python3 ./tests/scripts/task_build.py \
-        --sccache-bucket tvm-sccache-prod \
-        --sccache-region us-west-2 \
-        --cmake-target standalone_crt \
-        --build-dir build
-      ${docker_run} ${image} python3 ./tests/scripts/task_build.py \
-        --sccache-bucket tvm-sccache-prod \
-        --sccache-region us-west-2 \
-        --cmake-target crttest \
-        --build-dir build
-      """,
-    label: 'Make standalone CRT',
-  )
-}
-
 def make_cpp_tests(image, build_dir) {
   sh (
     script: """
@@ -526,13 +507,6 @@ def cpp_unittest(image) {
   )
 }
 
-def micro_cpp_unittest(image) {
-  sh (
-    script: "${docker_run} --env CI_NUM_EXECUTORS ${image} ./tests/scripts/task_microtvm_cpp_tests.sh build",
-    label: 'Run microTVM C++ tests',
-  )
-}
-
 cancel_previous_build()
 
 try {
@@ -557,10 +531,9 @@ def build(node_type) {
           label: 'Create CPU cmake config',
         )
         cmake_build(ci_cpu, 'build', '-j2')
-        make_standalone_crt(ci_cpu, 'build')
         make_cpp_tests(ci_cpu, 'build')
         sh(
-            script: "./${jenkins_scripts_root}/s3.py --action upload --bucket ${s3_bucket} --prefix ${s3_prefix}/cpu --items build/libvta_tsim.so build/libtvm.so build/libvta_fsim.so build/libtvm_runtime.so build/config.cmake build/libtvm_allvisible.so build/crttest build/cpptest build/build.ninja build/CMakeFiles/rules.ninja build/standalone_crt build/build.ninja build/microtvm_template_projects",
+            script: "./${jenkins_scripts_root}/s3.py --action upload --bucket ${s3_bucket} --prefix ${s3_prefix}/cpu --items build/libvta_tsim.so build/libtvm.so build/libvta_fsim.so build/libtvm_runtime.so build/config.cmake build/libtvm_allvisible.so build/crttest build/cpptest build/build.ninja build/CMakeFiles/rules.ninja build/build.ninja build/microtvm_template_projects",
             label: 'Upload artifacts to S3',
           )
 
@@ -803,7 +776,6 @@ def shard_run_unittest_CPU_1_of_1(node_type='CPU-SMALL-SPOT', on_demand=false) {
 
               ci_setup(ci_cpu)
               cpp_unittest(ci_cpu)
-              micro_cpp_unittest(ci_cpu)
               python_unittest(ci_cpu)
             })
           }
diff --git a/ci/jenkins/generated/docker_jenkinsfile.groovy b/ci/jenkins/generated/docker_jenkinsfile.groovy
index 0451f698488d..796fbe0c2d64 100644
--- a/ci/jenkins/generated/docker_jenkinsfile.groovy
+++ b/ci/jenkins/generated/docker_jenkinsfile.groovy
@@ -480,25 +480,6 @@ def python_unittest(image) {
   )
 }
 
-def make_standalone_crt(image, build_dir) {
-  sh (
-    script: """
-      set -eux
-      ${docker_run} ${image} python3 ./tests/scripts/task_build.py \
-        --sccache-bucket tvm-sccache-prod \
-        --sccache-region us-west-2 \
-        --cmake-target standalone_crt \
-        --build-dir build
-      ${docker_run} ${image} python3 ./tests/scripts/task_build.py \
-        --sccache-bucket tvm-sccache-prod \
-        --sccache-region us-west-2 \
-        --cmake-target crttest \
-        --build-dir build
-      """,
-    label: 'Make standalone CRT',
-  )
-}
-
 def make_cpp_tests(image, build_dir) {
   sh (
     script: """
@@ -526,13 +507,6 @@ def cpp_unittest(image) {
   )
 }
 
-def micro_cpp_unittest(image) {
-  sh (
-    script: "${docker_run} --env CI_NUM_EXECUTORS ${image} ./tests/scripts/task_microtvm_cpp_tests.sh build",
-    label: 'Run microTVM C++ tests',
-  )
-}
-
 cancel_previous_build()
 
 try {
diff --git a/ci/jenkins/generated/gpu_jenkinsfile.groovy b/ci/jenkins/generated/gpu_jenkinsfile.groovy
index 0c9e48fc9d0b..014377f05d36 100644
--- a/ci/jenkins/generated/gpu_jenkinsfile.groovy
+++ b/ci/jenkins/generated/gpu_jenkinsfile.groovy
@@ -480,25 +480,6 @@ def python_unittest(image) {
   )
 }
 
-def make_standalone_crt(image, build_dir) {
-  sh (
-    script: """
-      set -eux
-      ${docker_run} ${image} python3 ./tests/scripts/task_build.py \
-        --sccache-bucket tvm-sccache-prod \
-        --sccache-region us-west-2 \
-        --cmake-target standalone_crt \
-        --build-dir build
-      ${docker_run} ${image} python3 ./tests/scripts/task_build.py \
-        --sccache-bucket tvm-sccache-prod \
-        --sccache-region us-west-2 \
-        --cmake-target crttest \
-        --build-dir build
-      """,
-    label: 'Make standalone CRT',
-  )
-}
-
 def make_cpp_tests(image, build_dir) {
   sh (
     script: """
@@ -526,13 +507,6 @@ def cpp_unittest(image) {
   )
 }
 
-def micro_cpp_unittest(image) {
-  sh (
-    script: "${docker_run} --env CI_NUM_EXECUTORS ${image} ./tests/scripts/task_microtvm_cpp_tests.sh build",
-    label: 'Run microTVM C++ tests',
-  )
-}
-
 cancel_previous_build()
 
 try {
@@ -554,9 +528,8 @@ def build(node_type) {
               ], {
               sh "${docker_run} --no-gpu ${ci_gpu} ./tests/scripts/task_config_build_gpu.sh build"
         cmake_build("${ci_gpu} --no-gpu", 'build', '-j2')
-        make_standalone_crt("${ci_gpu} --no-gpu", 'build')
         sh(
-            script: "./${jenkins_scripts_root}/s3.py --action upload --bucket ${s3_bucket} --prefix ${s3_prefix}/gpu --items build/libtvm.so build/libvta_fsim.so build/libtvm_runtime.so build/config.cmake build/libtvm_allvisible.so build/microtvm_template_projects build/crttest build/standalone_crt build/build.ninja build/3rdparty/libflash_attn/src/libflash_attn.so build/3rdparty/cutlass_fpA_intB_gemm/cutlass_kernels/libfpA_intB_gemm.so",
+            script: "./${jenkins_scripts_root}/s3.py --action upload --bucket ${s3_bucket} --prefix ${s3_prefix}/gpu --items build/libtvm.so build/libvta_fsim.so build/libtvm_runtime.so build/config.cmake build/libtvm_allvisible.so build/microtvm_template_projects build/crttest build/build.ninja build/3rdparty/libflash_attn/src/libflash_attn.so build/3rdparty/cutlass_fpA_intB_gemm/cutlass_kernels/libfpA_intB_gemm.so",
             label: 'Upload artifacts to S3',
           )
 
@@ -565,9 +538,8 @@ def build(node_type) {
         sh "rm -rf build"
         sh "${docker_run} --no-gpu ${ci_gpu} ./tests/scripts/task_config_build_gpu_other.sh build"
         cmake_build("${ci_gpu} --no-gpu", 'build', '-j2')
-        make_standalone_crt("${ci_gpu} --no-gpu", 'build')
         sh(
-            script: "./${jenkins_scripts_root}/s3.py --action upload --bucket ${s3_bucket} --prefix ${s3_prefix}/gpu2 --items build/libtvm.so build/libtvm_runtime.so build/config.cmake build/crttest build/standalone_crt build/build.ninja build/3rdparty/libflash_attn/src/libflash_attn.so build/3rdparty/cutlass_fpA_intB_gemm/cutlass_kernels/libfpA_intB_gemm.so",
+            script: "./${jenkins_scripts_root}/s3.py --action upload --bucket ${s3_bucket} --prefix ${s3_prefix}/gpu2 --items build/libtvm.so build/libtvm_runtime.so build/config.cmake build/crttest build/build.ninja build/3rdparty/libflash_attn/src/libflash_attn.so build/3rdparty/cutlass_fpA_intB_gemm/cutlass_kernels/libfpA_intB_gemm.so",
             label: 'Upload artifacts to S3',
           )
             })
@@ -611,7 +583,6 @@ def shard_run_unittest_GPU_1_of_3(node_type='GPU-SPOT', on_demand=false) {
 
               sh "${docker_run} --no-gpu ${ci_gpu} ./tests/scripts/task_config_build_gpu_other.sh build"
               // These require a GPU to finish the build (i.e. CUDA needs to be load-able)
-              make_standalone_crt(ci_gpu, 'build')
               // make_cpp_tests(ci_gpu, 'build')
               // cpp_unittest(ci_gpu)
 
@@ -623,7 +594,6 @@ def shard_run_unittest_GPU_1_of_3(node_type='GPU-SPOT', on_demand=false) {
 
               ci_setup(ci_gpu)
               sh "${docker_run} --no-gpu ${ci_gpu} ./tests/scripts/task_config_build_gpu.sh build"
-              make_standalone_crt(ci_gpu, 'build')
               make_cpp_tests(ci_gpu, 'build')
               cpp_unittest(ci_gpu)
               sh (
@@ -634,7 +604,6 @@ def shard_run_unittest_GPU_1_of_3(node_type='GPU-SPOT', on_demand=false) {
                 script: "${docker_run} ${ci_gpu} ./tests/scripts/task_opencl_cpp_unittest.sh",
                 label: 'Run OpenCL cpp unit tests',
               )
-              micro_cpp_unittest(ci_gpu)
               sh (
                 script: "${docker_run} ${ci_gpu} ./tests/scripts/task_python_unittest_gpuonly.sh",
                 label: 'Run Python GPU unit tests',
diff --git a/ci/jenkins/generated/hexagon_jenkinsfile.groovy b/ci/jenkins/generated/hexagon_jenkinsfile.groovy
index 0aaaec858a9b..a48b12ecd25b 100644
--- a/ci/jenkins/generated/hexagon_jenkinsfile.groovy
+++ b/ci/jenkins/generated/hexagon_jenkinsfile.groovy
@@ -480,25 +480,6 @@ def python_unittest(image) {
   )
 }
 
-def make_standalone_crt(image, build_dir) {
-  sh (
-    script: """
-      set -eux
-      ${docker_run} ${image} python3 ./tests/scripts/task_build.py \
-        --sccache-bucket tvm-sccache-prod \
-        --sccache-region us-west-2 \
-        --cmake-target standalone_crt \
-        --build-dir build
-      ${docker_run} ${image} python3 ./tests/scripts/task_build.py \
-        --sccache-bucket tvm-sccache-prod \
-        --sccache-region us-west-2 \
-        --cmake-target crttest \
-        --build-dir build
-      """,
-    label: 'Make standalone CRT',
-  )
-}
-
 def make_cpp_tests(image, build_dir) {
   sh (
     script: """
@@ -526,13 +507,6 @@ def cpp_unittest(image) {
   )
 }
 
-def micro_cpp_unittest(image) {
-  sh (
-    script: "${docker_run} --env CI_NUM_EXECUTORS ${image} ./tests/scripts/task_microtvm_cpp_tests.sh build",
-    label: 'Run microTVM C++ tests',
-  )
-}
-
 cancel_previous_build()
 
 try {
diff --git a/ci/jenkins/generated/i386_jenkinsfile.groovy b/ci/jenkins/generated/i386_jenkinsfile.groovy
index 840a0a5d9d8b..47d20564730f 100644
--- a/ci/jenkins/generated/i386_jenkinsfile.groovy
+++ b/ci/jenkins/generated/i386_jenkinsfile.groovy
@@ -480,25 +480,6 @@ def python_unittest(image) {
   )
 }
 
-def make_standalone_crt(image, build_dir) {
-  sh (
-    script: """
-      set -eux
-      ${docker_run} ${image} python3 ./tests/scripts/task_build.py \
-        --sccache-bucket tvm-sccache-prod \
-        --sccache-region us-west-2 \
-        --cmake-target standalone_crt \
-        --build-dir build
-      ${docker_run} ${image} python3 ./tests/scripts/task_build.py \
-        --sccache-bucket tvm-sccache-prod \
-        --sccache-region us-west-2 \
-        --cmake-target crttest \
-        --build-dir build
-      """,
-    label: 'Make standalone CRT',
-  )
-}
-
 def make_cpp_tests(image, build_dir) {
   sh (
     script: """
@@ -526,12 +507,6 @@ def cpp_unittest(image) {
   )
 }
 
-def micro_cpp_unittest(image) {
-  sh (
-    script: "${docker_run} --env CI_NUM_EXECUTORS ${image} ./tests/scripts/task_microtvm_cpp_tests.sh build",
-    label: 'Run microTVM C++ tests',
-  )
-}
 
 cancel_previous_build()
 
@@ -557,10 +532,9 @@ def build(node_type) {
           label: 'Create i386 cmake config',
         )
         cmake_build(ci_i386, 'build', '-j2')
-        make_standalone_crt(ci_i386, 'build')
         make_cpp_tests(ci_i386, 'build')
         sh(
-            script: "./${jenkins_scripts_root}/s3.py --action upload --bucket ${s3_bucket} --prefix ${s3_prefix}/i386 --items build/libvta_tsim.so build/libtvm.so build/libvta_fsim.so build/libtvm_runtime.so build/config.cmake build/standalone_crt build/build.ninja build/crttest build/cpptest build/build.ninja build/CMakeFiles/rules.ninja build/microtvm_template_projects",
+            script: "./${jenkins_scripts_root}/s3.py --action upload --bucket ${s3_bucket} --prefix ${s3_prefix}/i386 --items build/libvta_tsim.so build/libtvm.so build/libvta_fsim.so build/libtvm_runtime.so build/config.cmake build/build.ninja build/crttest build/cpptest build/build.ninja build/CMakeFiles/rules.ninja build/microtvm_template_projects",
             label: 'Upload artifacts to S3',
           )
             })
@@ -605,7 +579,6 @@ def shard_run_python_i386_1_of_3(node_type='CPU-SMALL-SPOT', on_demand=false) {
 
               ci_setup(ci_i386)
               cpp_unittest(ci_i386)
-              micro_cpp_unittest(ci_i386)
               python_unittest(ci_i386)
               sh (
                 script: "${docker_run} ${ci_i386} ./tests/scripts/task_python_integration_i386only.sh",
diff --git a/ci/jenkins/generated/lint_jenkinsfile.groovy b/ci/jenkins/generated/lint_jenkinsfile.groovy
index ac7796b329fd..b1e300d7125d 100644
--- a/ci/jenkins/generated/lint_jenkinsfile.groovy
+++ b/ci/jenkins/generated/lint_jenkinsfile.groovy
@@ -480,25 +480,6 @@ def python_unittest(image) {
   )
 }
 
-def make_standalone_crt(image, build_dir) {
-  sh (
-    script: """
-      set -eux
-      ${docker_run} ${image} python3 ./tests/scripts/task_build.py \
-        --sccache-bucket tvm-sccache-prod \
-        --sccache-region us-west-2 \
-        --cmake-target standalone_crt \
-        --build-dir build
-      ${docker_run} ${image} python3 ./tests/scripts/task_build.py \
-        --sccache-bucket tvm-sccache-prod \
-        --sccache-region us-west-2 \
-        --cmake-target crttest \
-        --build-dir build
-      """,
-    label: 'Make standalone CRT',
-  )
-}
-
 def make_cpp_tests(image, build_dir) {
   sh (
     script: """
@@ -526,13 +507,6 @@ def cpp_unittest(image) {
   )
 }
 
-def micro_cpp_unittest(image) {
-  sh (
-    script: "${docker_run} --env CI_NUM_EXECUTORS ${image} ./tests/scripts/task_microtvm_cpp_tests.sh build",
-    label: 'Run microTVM C++ tests',
-  )
-}
-
 cancel_previous_build()
 
 try {
diff --git a/ci/jenkins/generated/minimal_cross_isa_jenkinsfile.groovy b/ci/jenkins/generated/minimal_cross_isa_jenkinsfile.groovy
index 76c0bd5830f5..3645cf90364c 100644
--- a/ci/jenkins/generated/minimal_cross_isa_jenkinsfile.groovy
+++ b/ci/jenkins/generated/minimal_cross_isa_jenkinsfile.groovy
@@ -480,25 +480,6 @@ def python_unittest(image) {
   )
 }
 
-def make_standalone_crt(image, build_dir) {
-  sh (
-    script: """
-      set -eux
-      ${docker_run} ${image} python3 ./tests/scripts/task_build.py \
-        --sccache-bucket tvm-sccache-prod \
-        --sccache-region us-west-2 \
-        --cmake-target standalone_crt \
-        --build-dir build
-      ${docker_run} ${image} python3 ./tests/scripts/task_build.py \
-        --sccache-bucket tvm-sccache-prod \
-        --sccache-region us-west-2 \
-        --cmake-target crttest \
-        --build-dir build
-      """,
-    label: 'Make standalone CRT',
-  )
-}
-
 def make_cpp_tests(image, build_dir) {
   sh (
     script: """
@@ -526,13 +507,6 @@ def cpp_unittest(image) {
   )
 }
 
-def micro_cpp_unittest(image) {
-  sh (
-    script: "${docker_run} --env CI_NUM_EXECUTORS ${image} ./tests/scripts/task_microtvm_cpp_tests.sh build",
-    label: 'Run microTVM C++ tests',
-  )
-}
-
 cancel_previous_build()
 
 try {
@@ -558,7 +532,7 @@ def build(node_type) {
         )
         cmake_build(ci_minimal, 'build', '-j2')
         sh(
-            script: "./${jenkins_scripts_root}/s3.py --action upload --bucket ${s3_bucket} --prefix ${s3_prefix}/cpu-minimal-cross-isa --items build/libtvm.so build/libtvm_runtime.so build/config.cmake build/libtvm_allvisible.so build/standalone_crt build/build.ninja build/microtvm_template_projects",
+            script: "./${jenkins_scripts_root}/s3.py --action upload --bucket ${s3_bucket} --prefix ${s3_prefix}/cpu-minimal-cross-isa --items build/libtvm.so build/libtvm_runtime.so build/config.cmake build/libtvm_allvisible.so build/build.ninja build/microtvm_template_projects",
             label: 'Upload artifacts to S3',
           )
             })
diff --git a/ci/jenkins/generated/minimal_jenkinsfile.groovy b/ci/jenkins/generated/minimal_jenkinsfile.groovy
index 7f57cef32e64..9ae238012235 100644
--- a/ci/jenkins/generated/minimal_jenkinsfile.groovy
+++ b/ci/jenkins/generated/minimal_jenkinsfile.groovy
@@ -480,25 +480,6 @@ def python_unittest(image) {
   )
 }
 
-def make_standalone_crt(image, build_dir) {
-  sh (
-    script: """
-      set -eux
-      ${docker_run} ${image} python3 ./tests/scripts/task_build.py \
-        --sccache-bucket tvm-sccache-prod \
-        --sccache-region us-west-2 \
-        --cmake-target standalone_crt \
-        --build-dir build
-      ${docker_run} ${image} python3 ./tests/scripts/task_build.py \
-        --sccache-bucket tvm-sccache-prod \
-        --sccache-region us-west-2 \
-        --cmake-target crttest \
-        --build-dir build
-      """,
-    label: 'Make standalone CRT',
-  )
-}
-
 def make_cpp_tests(image, build_dir) {
   sh (
     script: """
@@ -526,13 +507,6 @@ def cpp_unittest(image) {
   )
 }
 
-def micro_cpp_unittest(image) {
-  sh (
-    script: "${docker_run} --env CI_NUM_EXECUTORS ${image} ./tests/scripts/task_microtvm_cpp_tests.sh build",
-    label: 'Run microTVM C++ tests',
-  )
-}
-
 cancel_previous_build()
 
 try {
@@ -557,10 +531,9 @@ def build(node_type) {
           label: 'Create CPU minimal cmake config',
         )
         cmake_build(ci_minimal, 'build', '-j2')
-        make_standalone_crt(ci_minimal, 'build')
         make_cpp_tests(ci_minimal, 'build')
         sh(
-            script: "./${jenkins_scripts_root}/s3.py --action upload --bucket ${s3_bucket} --prefix ${s3_prefix}/cpu-minimal --items build/libtvm.so build/libtvm_runtime.so build/config.cmake build/libtvm_allvisible.so build/crttest build/cpptest build/build.ninja build/CMakeFiles/rules.ninja build/standalone_crt build/build.ninja build/microtvm_template_projects",
+            script: "./${jenkins_scripts_root}/s3.py --action upload --bucket ${s3_bucket} --prefix ${s3_prefix}/cpu-minimal --items build/libtvm.so build/libtvm_runtime.so build/config.cmake build/libtvm_allvisible.so build/crttest build/cpptest build/build.ninja build/CMakeFiles/rules.ninja build/build.ninja build/microtvm_template_projects",
             label: 'Upload artifacts to S3',
           )
             })
diff --git a/ci/jenkins/generated/riscv_jenkinsfile.groovy b/ci/jenkins/generated/riscv_jenkinsfile.groovy
index 1667cd02c994..f836d71630f9 100644
--- a/ci/jenkins/generated/riscv_jenkinsfile.groovy
+++ b/ci/jenkins/generated/riscv_jenkinsfile.groovy
@@ -480,25 +480,6 @@ def python_unittest(image) {
   )
 }
 
-def make_standalone_crt(image, build_dir) {
-  sh (
-    script: """
-      set -eux
-      ${docker_run} ${image} python3 ./tests/scripts/task_build.py \
-        --sccache-bucket tvm-sccache-prod \
-        --sccache-region us-west-2 \
-        --cmake-target standalone_crt \
-        --build-dir build
-      ${docker_run} ${image} python3 ./tests/scripts/task_build.py \
-        --sccache-bucket tvm-sccache-prod \
-        --sccache-region us-west-2 \
-        --cmake-target crttest \
-        --build-dir build
-      """,
-    label: 'Make standalone CRT',
-  )
-}
-
 def make_cpp_tests(image, build_dir) {
   sh (
     script: """
@@ -526,13 +507,6 @@ def cpp_unittest(image) {
   )
 }
 
-def micro_cpp_unittest(image) {
-  sh (
-    script: "${docker_run} --env CI_NUM_EXECUTORS ${image} ./tests/scripts/task_microtvm_cpp_tests.sh build",
-    label: 'Run microTVM C++ tests',
-  )
-}
-
 cancel_previous_build()
 
 try {
@@ -557,10 +531,9 @@ def build(node_type) {
           label: 'Create RISC-V cmake config',
         )
         cmake_build(ci_riscv, 'build', '-j2')
-        make_standalone_crt(ci_riscv, 'build')
         make_cpp_tests(ci_riscv, 'build')
         sh(
-            script: "./${jenkins_scripts_root}/s3.py --action upload --bucket ${s3_bucket} --prefix ${s3_prefix}/riscv --items build/libtvm.so build/libtvm_runtime.so build/config.cmake build/libtvm_allvisible.so build/standalone_crt build/build.ninja build/crttest build/cpptest build/build.ninja build/CMakeFiles/rules.ninja build/microtvm_template_projects",
+            script: "./${jenkins_scripts_root}/s3.py --action upload --bucket ${s3_bucket} --prefix ${s3_prefix}/riscv --items build/libtvm.so build/libtvm_runtime.so build/config.cmake build/libtvm_allvisible.so build/build.ninja build/crttest build/cpptest build/build.ninja build/CMakeFiles/rules.ninja build/microtvm_template_projects",
             label: 'Upload artifacts to S3',
           )
             })
@@ -606,7 +579,6 @@ def shard_run_test_RISC_V_1_of_1(node_type='CPU-SMALL-SPOT', on_demand=false) {
 
               ci_setup(ci_riscv)
               cpp_unittest(ci_riscv)
-              micro_cpp_unittest(ci_riscv)
               sh (
                 script: "${docker_run} ${ci_riscv} ./tests/scripts/task_riscv_microtvm.sh",
                 label: 'Run microTVM tests',
diff --git a/ci/jenkins/generated/wasm_jenkinsfile.groovy b/ci/jenkins/generated/wasm_jenkinsfile.groovy
index 84f511de9558..da7cdfd5c1f3 100644
--- a/ci/jenkins/generated/wasm_jenkinsfile.groovy
+++ b/ci/jenkins/generated/wasm_jenkinsfile.groovy
@@ -480,25 +480,6 @@ def python_unittest(image) {
   )
 }
 
-def make_standalone_crt(image, build_dir) {
-  sh (
-    script: """
-      set -eux
-      ${docker_run} ${image} python3 ./tests/scripts/task_build.py \
-        --sccache-bucket tvm-sccache-prod \
-        --sccache-region us-west-2 \
-        --cmake-target standalone_crt \
-        --build-dir build
-      ${docker_run} ${image} python3 ./tests/scripts/task_build.py \
-        --sccache-bucket tvm-sccache-prod \
-        --sccache-region us-west-2 \
-        --cmake-target crttest \
-        --build-dir build
-      """,
-    label: 'Make standalone CRT',
-  )
-}
-
 def make_cpp_tests(image, build_dir) {
   sh (
     script: """
@@ -526,13 +507,6 @@ def cpp_unittest(image) {
   )
 }
 
-def micro_cpp_unittest(image) {
-  sh (
-    script: "${docker_run} --env CI_NUM_EXECUTORS ${image} ./tests/scripts/task_microtvm_cpp_tests.sh build",
-    label: 'Run microTVM C++ tests',
-  )
-}
-
 cancel_previous_build()
 
 try {
@@ -557,7 +531,6 @@ def build(node_type) {
           label: 'Create WASM cmake config',
         )
         cmake_build(ci_wasm, 'build', '-j2')
-        make_standalone_crt(ci_wasm, 'build')
         make_cpp_tests(ci_wasm, 'build')
         cpp_unittest(ci_wasm)
         ci_setup(ci_wasm)
diff --git a/ci/jenkins/templates/arm_jenkinsfile.groovy.j2 b/ci/jenkins/templates/arm_jenkinsfile.groovy.j2
index f606f439b135..c512ddd0ce12 100644
--- a/ci/jenkins/templates/arm_jenkinsfile.groovy.j2
+++ b/ci/jenkins/templates/arm_jenkinsfile.groovy.j2
@@ -30,9 +30,8 @@
     label: 'Create ARM cmake config',
   )
   cmake_build(ci_arm, 'build', '-j4')
-  make_standalone_crt(ci_arm, 'build')
   make_cpp_tests(ci_arm, 'build')
-  {{ m.upload_artifacts(tag='arm', filenames=tvm_multilib + cpptest + crttest + standalone_crt + microtvm_template_projects) }}
+  {{ m.upload_artifacts(tag='arm', filenames=tvm_multilib + cpptest + crttest + microtvm_template_projects) }}
 {% endcall %}
 
 {% set test_method_names = [] %}
@@ -68,7 +67,6 @@
   ci_setup(ci_arm)
   {% if shard_index == 1 %}
   cpp_unittest(ci_arm)
-  micro_cpp_unittest(ci_arm)
   {% endif %}
   sh (
     script: "${docker_run} ${ci_arm} ./tests/scripts/task_python_arm_compute_library.sh",
diff --git a/ci/jenkins/templates/cortexm_jenkinsfile.groovy.j2 b/ci/jenkins/templates/cortexm_jenkinsfile.groovy.j2
index a6708cfb6355..4f3830670114 100644
--- a/ci/jenkins/templates/cortexm_jenkinsfile.groovy.j2
+++ b/ci/jenkins/templates/cortexm_jenkinsfile.groovy.j2
@@ -30,9 +30,8 @@
     label: 'Create Cortex-M cmake config',
   )
   cmake_build(ci_cortexm, 'build', '-j2')
-  make_standalone_crt(ci_cortexm, 'build')
   make_cpp_tests(ci_cortexm, 'build')
-  {{ m.upload_artifacts(tag='cortexm', filenames=tvm_lib + tvm_allvisible + crttest + standalone_crt + cpptest + microtvm_template_projects) }}
+  {{ m.upload_artifacts(tag='cortexm', filenames=tvm_lib + tvm_allvisible + crttest + cpptest + microtvm_template_projects) }}
 {% endcall %}
 
 {% set test_method_names = [] %}
@@ -50,7 +49,6 @@
   ci_setup(ci_cortexm)
   {% if shard_index == 1%}
   cpp_unittest(ci_cortexm)
-  micro_cpp_unittest(ci_cortexm)
   sh (
     script: "${docker_run} ${ci_cortexm} ./tests/scripts/task_demo_microtvm.sh",
     label: 'Run microTVM demos',
diff --git a/ci/jenkins/templates/cpu_jenkinsfile.groovy.j2 b/ci/jenkins/templates/cpu_jenkinsfile.groovy.j2
index 3fb21863ccfc..4d6081f3f4af 100644
--- a/ci/jenkins/templates/cpu_jenkinsfile.groovy.j2
+++ b/ci/jenkins/templates/cpu_jenkinsfile.groovy.j2
@@ -30,9 +30,8 @@
     label: 'Create CPU cmake config',
   )
   cmake_build(ci_cpu, 'build', '-j2')
-  make_standalone_crt(ci_cpu, 'build')
   make_cpp_tests(ci_cpu, 'build')
-  {{ m.upload_artifacts(tag='cpu', filenames=tvm_multilib_tsim + tvm_allvisible + crttest + cpptest + standalone_crt + microtvm_template_projects) }}
+  {{ m.upload_artifacts(tag='cpu', filenames=tvm_multilib_tsim + tvm_allvisible + crttest + cpptest + microtvm_template_projects) }}
   ci_setup(ci_cpu)
   // sh "${docker_run} ${ci_cpu} ./tests/scripts/task_golang.sh"
   // TODO(@jroesch): need to resolve CI issue will turn back on in follow up patch
@@ -70,7 +69,6 @@
   {{ m.download_artifacts(tag='cpu') }}
   ci_setup(ci_cpu)
   cpp_unittest(ci_cpu)
-  micro_cpp_unittest(ci_cpu)
   python_unittest(ci_cpu)
 {% endcall %}
 {% call(shard_index, num_shards) m.sharded_test_step(
diff --git a/ci/jenkins/templates/gpu_jenkinsfile.groovy.j2 b/ci/jenkins/templates/gpu_jenkinsfile.groovy.j2
index 65c06b0088de..b6c7c2cecb60 100644
--- a/ci/jenkins/templates/gpu_jenkinsfile.groovy.j2
+++ b/ci/jenkins/templates/gpu_jenkinsfile.groovy.j2
@@ -27,15 +27,13 @@
 ) %}
   sh "${docker_run} --no-gpu ${ci_gpu} ./tests/scripts/task_config_build_gpu.sh build"
   cmake_build("${ci_gpu} --no-gpu", 'build', '-j2')
-  make_standalone_crt("${ci_gpu} --no-gpu", 'build')
-  {{ m.upload_artifacts(tag='gpu', filenames=tvm_multilib + tvm_allvisible + microtvm_template_projects + crttest + standalone_crt) }}
+  {{ m.upload_artifacts(tag='gpu', filenames=tvm_multilib + tvm_allvisible + microtvm_template_projects + crttest) }}
 
   // compiler test
   sh "rm -rf build"
   sh "${docker_run} --no-gpu ${ci_gpu} ./tests/scripts/task_config_build_gpu_other.sh build"
   cmake_build("${ci_gpu} --no-gpu", 'build', '-j2')
-  make_standalone_crt("${ci_gpu} --no-gpu", 'build')
-  {{ m.upload_artifacts(tag='gpu2', filenames=tvm_lib + crttest + standalone_crt) }}
+  {{ m.upload_artifacts(tag='gpu2', filenames=tvm_lib + crttest) }}
 {% endcall %}
 
 {% set test_method_names = [] %}
@@ -53,7 +51,6 @@
   {{ m.download_artifacts(tag='gpu2') }}
   sh "${docker_run} --no-gpu ${ci_gpu} ./tests/scripts/task_config_build_gpu_other.sh build"
   // These require a GPU to finish the build (i.e. CUDA needs to be load-able)
-  make_standalone_crt(ci_gpu, 'build')
   // make_cpp_tests(ci_gpu, 'build')
   // cpp_unittest(ci_gpu)
 
@@ -61,7 +58,6 @@
   {{ m.download_artifacts(tag='gpu') }}
   ci_setup(ci_gpu)
   sh "${docker_run} --no-gpu ${ci_gpu} ./tests/scripts/task_config_build_gpu.sh build"
-  make_standalone_crt(ci_gpu, 'build')
   make_cpp_tests(ci_gpu, 'build')
   cpp_unittest(ci_gpu)
   sh (
@@ -72,7 +68,6 @@
     script: "${docker_run} ${ci_gpu} ./tests/scripts/task_opencl_cpp_unittest.sh",
     label: 'Run OpenCL cpp unit tests',
   )
-  micro_cpp_unittest(ci_gpu)
   {% else %}
   {{ m.download_artifacts(tag='gpu') }}
   ci_setup(ci_gpu)
diff --git a/ci/jenkins/templates/i386_jenkinsfile.groovy.j2 b/ci/jenkins/templates/i386_jenkinsfile.groovy.j2
index 78cf8cb2c04c..5b181367452d 100644
--- a/ci/jenkins/templates/i386_jenkinsfile.groovy.j2
+++ b/ci/jenkins/templates/i386_jenkinsfile.groovy.j2
@@ -30,9 +30,8 @@
     label: 'Create i386 cmake config',
   )
   cmake_build(ci_i386, 'build', '-j2')
-  make_standalone_crt(ci_i386, 'build')
   make_cpp_tests(ci_i386, 'build')
-  {{ m.upload_artifacts(tag='i386', filenames=tvm_multilib_tsim + standalone_crt + crttest + cpptest + microtvm_template_projects) }}
+  {{ m.upload_artifacts(tag='i386', filenames=tvm_multilib_tsim + crttest + cpptest + microtvm_template_projects) }}
 {% endcall %}
 
 
@@ -51,7 +50,6 @@
   ci_setup(ci_i386)
   {% if shard_index == 1 %}
   cpp_unittest(ci_i386)
-  micro_cpp_unittest(ci_i386)
   {% endif %}
   python_unittest(ci_i386)
   sh (
diff --git a/ci/jenkins/templates/minimal_cross_isa_jenkinsfile.groovy.j2 b/ci/jenkins/templates/minimal_cross_isa_jenkinsfile.groovy.j2
index 4b8a600b0e67..07c3890dfce5 100644
--- a/ci/jenkins/templates/minimal_cross_isa_jenkinsfile.groovy.j2
+++ b/ci/jenkins/templates/minimal_cross_isa_jenkinsfile.groovy.j2
@@ -30,7 +30,7 @@
     label: 'Create CPU minimal cmake config',
   )
   cmake_build(ci_minimal, 'build', '-j2')
-  {{ m.upload_artifacts(tag='cpu-minimal-cross-isa', filenames=tvm_lib + tvm_allvisible + standalone_crt + microtvm_template_projects) }}
+  {{ m.upload_artifacts(tag='cpu-minimal-cross-isa', filenames=tvm_lib + tvm_allvisible + microtvm_template_projects) }}
 {% endcall %}
 
 
diff --git a/ci/jenkins/templates/minimal_jenkinsfile.groovy.j2 b/ci/jenkins/templates/minimal_jenkinsfile.groovy.j2
index a538aa406104..dd4dd02fc85d 100644
--- a/ci/jenkins/templates/minimal_jenkinsfile.groovy.j2
+++ b/ci/jenkins/templates/minimal_jenkinsfile.groovy.j2
@@ -30,9 +30,8 @@
     label: 'Create CPU minimal cmake config',
   )
   cmake_build(ci_minimal, 'build', '-j2')
-  make_standalone_crt(ci_minimal, 'build')
   make_cpp_tests(ci_minimal, 'build')
-  {{ m.upload_artifacts(tag='cpu-minimal', filenames=tvm_lib + tvm_allvisible + crttest + cpptest + standalone_crt + microtvm_template_projects) }}
+  {{ m.upload_artifacts(tag='cpu-minimal', filenames=tvm_lib + tvm_allvisible + crttest + cpptest + microtvm_template_projects) }}
 {% endcall %}
 
 
diff --git a/ci/jenkins/templates/riscv_jenkinsfile.groovy.j2 b/ci/jenkins/templates/riscv_jenkinsfile.groovy.j2
index 8ad5a1a10d20..902e912f7561 100644
--- a/ci/jenkins/templates/riscv_jenkinsfile.groovy.j2
+++ b/ci/jenkins/templates/riscv_jenkinsfile.groovy.j2
@@ -30,9 +30,8 @@
     label: 'Create RISC-V cmake config',
   )
   cmake_build(ci_riscv, 'build', '-j2')
-  make_standalone_crt(ci_riscv, 'build')
   make_cpp_tests(ci_riscv, 'build')
-  {{ m.upload_artifacts(tag='riscv', filenames=tvm_lib + tvm_allvisible + standalone_crt + crttest + cpptest + microtvm_template_projects) }}
+  {{ m.upload_artifacts(tag='riscv', filenames=tvm_lib + tvm_allvisible + crttest + cpptest + microtvm_template_projects) }}
 {% endcall %}
 
 
@@ -52,7 +51,6 @@
   ci_setup(ci_riscv)
   {% if shard_index == 1%}
   cpp_unittest(ci_riscv)
-  micro_cpp_unittest(ci_riscv)
   {% endif %}
   sh (
     script: "${docker_run} ${ci_riscv} ./tests/scripts/task_riscv_microtvm.sh",
diff --git a/ci/jenkins/templates/utils/Build.groovy.j2 b/ci/jenkins/templates/utils/Build.groovy.j2
index ce05f1c62b14..cf3cb61e7ae6 100644
--- a/ci/jenkins/templates/utils/Build.groovy.j2
+++ b/ci/jenkins/templates/utils/Build.groovy.j2
@@ -12,25 +12,6 @@ def python_unittest(image) {
   )
 }
 
-def make_standalone_crt(image, build_dir) {
-  sh (
-    script: """
-      set -eux
-      ${docker_run} ${image} python3 ./tests/scripts/task_build.py \
-        --sccache-bucket tvm-sccache-prod \
-        --sccache-region us-west-2 \
-        --cmake-target standalone_crt \
-        --build-dir build
-      ${docker_run} ${image} python3 ./tests/scripts/task_build.py \
-        --sccache-bucket tvm-sccache-prod \
-        --sccache-region us-west-2 \
-        --cmake-target crttest \
-        --build-dir build
-      """,
-    label: 'Make standalone CRT',
-  )
-}
-
 def make_cpp_tests(image, build_dir) {
   sh (
     script: """
diff --git a/ci/jenkins/templates/utils/Test.groovy.j2 b/ci/jenkins/templates/utils/Test.groovy.j2
index 1e70869cfc2c..48041983b9d2 100644
--- a/ci/jenkins/templates/utils/Test.groovy.j2
+++ b/ci/jenkins/templates/utils/Test.groovy.j2
@@ -4,10 +4,3 @@ def cpp_unittest(image) {
     label: 'Run C++ tests',
   )
 }
-
-def micro_cpp_unittest(image) {
-  sh (
-    script: "${docker_run} --env CI_NUM_EXECUTORS ${image} ./tests/scripts/task_microtvm_cpp_tests.sh build",
-    label: 'Run microTVM C++ tests',
-  )
-}
diff --git a/ci/jenkins/templates/wasm_jenkinsfile.groovy.j2 b/ci/jenkins/templates/wasm_jenkinsfile.groovy.j2
index 5cd8a6fbcc7d..df33e91eac69 100644
--- a/ci/jenkins/templates/wasm_jenkinsfile.groovy.j2
+++ b/ci/jenkins/templates/wasm_jenkinsfile.groovy.j2
@@ -30,7 +30,6 @@
     label: 'Create WASM cmake config',
   )
   cmake_build(ci_wasm, 'build', '-j2')
-  make_standalone_crt(ci_wasm, 'build')
   make_cpp_tests(ci_wasm, 'build')
   cpp_unittest(ci_wasm)
   ci_setup(ci_wasm)
diff --git a/ci/scripts/jenkins/git_skip_ci_globs.py b/ci/scripts/jenkins/git_skip_ci_globs.py
index f51df055c980..c59329146017 100755
--- a/ci/scripts/jenkins/git_skip_ci_globs.py
+++ b/ci/scripts/jenkins/git_skip_ci_globs.py
@@ -32,9 +32,6 @@
     "LICENSE",
     "NOTICE",
     "KEYS",
-    # microTVM
-    "apps/microtvm/poetry.lock",
-    "apps/microtvm/pyproject.toml",
     "tests/lint/*",
     "tests/scripts/task_lint.sh",
 ]
diff --git a/cmake/config.cmake b/cmake/config.cmake
index 0d912c0c75de..791751ac9885 100644
--- a/cmake/config.cmake
+++ b/cmake/config.cmake
@@ -123,9 +123,6 @@ set(USE_SPIRV_KHR_INTEGER_DOT_PRODUCT OFF)
 # Whether enable OpenGL runtime
 set(USE_OPENGL OFF)
 
-# Whether enable MicroTVM runtime
-set(USE_MICRO OFF)
-
 # Whether enable RPC runtime
 set(USE_RPC ON)
 
@@ -153,9 +150,6 @@ set(USE_PIPELINE_EXECUTOR OFF)
 # Whether to enable the profiler for the graph executor and vm
 set(USE_PROFILER ON)
 
-# Whether enable microTVM standalone runtime
-set(USE_MICRO_STANDALONE_RUNTIME OFF)
-
 # Whether build with LLVM support
 # Requires LLVM version >= 4.0
 #
@@ -281,23 +275,6 @@ set(USE_SORT ON)
 set(USE_ARM_COMPUTE_LIB OFF)
 set(USE_ARM_COMPUTE_LIB_GRAPH_EXECUTOR OFF)
 
-# Whether to build with Arm Ethos-N support
-# Possible values:
-# - OFF: disable Arm Ethos-N support
-# - path/to/arm-ethos-N-stack: use a specific version of the
-#   Ethos-N driver stack
-set(USE_ETHOSN OFF)
-# If USE_ETHOSN is enabled, use ETHOSN_HW (ON) if Ethos-N hardware is available on this machine
-# otherwise use ETHOSN_HW (OFF) to use the software test infrastructure
-set(USE_ETHOSN_HW OFF)
-
-# Whether to build with Arm(R) Ethos(TM)-U NPU codegen support
-set(USE_ETHOSU OFF)
-
-# Whether to build with CMSIS-NN external library support.
-# See https://github.com/ARM-software/CMSIS_5
-set(USE_CMSISNN OFF)
-
 # Whether to build with TensorRT codegen or runtime
 # Examples are available here: docs/deploy/tensorrt.rst.
 #
diff --git a/cmake/modules/LibInfo.cmake b/cmake/modules/LibInfo.cmake
index ee6561dffce8..feef618dc2fe 100644
--- a/cmake/modules/LibInfo.cmake
+++ b/cmake/modules/LibInfo.cmake
@@ -64,7 +64,6 @@ function(add_lib_info src_file)
     TVM_INFO_USE_BLAS="${USE_BLAS}"
     TVM_INFO_USE_BNNS="${USE_BNNS}"
     TVM_INFO_USE_BYODT_POSIT="${USE_BYODT_POSIT}"
-    TVM_INFO_USE_CMSISNN="${USE_CMSISNN}"
     TVM_INFO_USE_COREML="${USE_COREML}"
     TVM_INFO_USE_CPP_RPC="${USE_CPP_RPC}"
     TVM_INFO_USE_CPP_RTVM="${USE_CPP_RTVM}"
@@ -79,8 +78,6 @@ function(add_lib_info src_file)
     TVM_INFO_USE_FLASHINFER="${USE_FLASHINFER}"
     TVM_INFO_USE_AMX="${USE_AMX}"
     TVM_INFO_USE_DNNL="${USE_DNNL}"
-    TVM_INFO_USE_ETHOSN="${USE_ETHOSN}"
-    TVM_INFO_USE_ETHOSU="${USE_ETHOSU}"
     TVM_INFO_USE_FALLBACK_STL_MAP="${USE_FALLBACK_STL_MAP}"
     TVM_INFO_USE_GRAPH_EXECUTOR_CUDA_GRAPH="${USE_GRAPH_EXECUTOR_CUDA_GRAPH}"
     TVM_INFO_USE_GRAPH_EXECUTOR="${USE_GRAPH_EXECUTOR}"
@@ -97,8 +94,6 @@ function(add_lib_info src_file)
     TVM_INFO_USE_LLVM="${USE_LLVM}"
     TVM_INFO_USE_MLIR="${USE_MLIR}"
     TVM_INFO_USE_METAL="${USE_METAL}"
-    TVM_INFO_USE_MICRO_STANDALONE_RUNTIME="${USE_MICRO_STANDALONE_RUNTIME}"
-    TVM_INFO_USE_MICRO="${USE_MICRO}"
     TVM_INFO_USE_MIOPEN="${USE_MIOPEN}"
     TVM_INFO_USE_MKL="${USE_MKL}"
     TVM_INFO_USE_MRVL="${USE_MRVL}"
diff --git a/cmake/modules/Micro.cmake b/cmake/modules/Micro.cmake
deleted file mode 100644
index d887486d2e98..000000000000
--- a/cmake/modules/Micro.cmake
+++ /dev/null
@@ -1,124 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-if(USE_MICRO)
-  include(cmake/utils/CRTConfig.cmake)
-
-  message(STATUS "Build with Micro support")
-  tvm_file_glob(GLOB RUNTIME_MICRO_SRCS src/runtime/micro/*.cc)
-  list(APPEND RUNTIME_SRCS ${RUNTIME_MICRO_SRCS})
-
-  function(microtvm_add_platform_project_api platform)
-    if("${platform}" STREQUAL "zephyr")
-      list(
-        APPEND
-        PLATFORM_FILE_COPY_JOBS
-        "apps/microtvm/zephyr/template_project microtvm_api_server.py -> zephyr"
-        "python/tvm/micro/project_api server.py -> zephyr"
-        "apps/microtvm/zephyr/template_project launch_microtvm_api_server.sh -> zephyr"
-        "apps/microtvm/zephyr/template_project boards.json -> zephyr"
-        "apps/microtvm/zephyr/template_project CMakeLists.txt.template -> zephyr"
-        "apps/microtvm/zephyr/template_project/src/aot_standalone_demo *.c -> zephyr/src/aot_standalone_demo"
-        "apps/microtvm/zephyr/template_project/src/host_driven *.c -> zephyr/src/host_driven"
-        "apps/microtvm/zephyr/template_project/src/host_driven *.h -> zephyr/src/host_driven"
-        "apps/microtvm/zephyr/template_project/src/mlperftiny *.cc -> zephyr/src/mlperftiny"
-        "3rdparty/mlperftiny/api * -> zephyr/src/mlperftiny/api"
-        "apps/microtvm/zephyr/template_project/fvp-hack * -> zephyr/fvp-hack"
-        "apps/microtvm/zephyr/template_project/qemu-hack * -> zephyr/qemu-hack"
-        "apps/microtvm/zephyr/template_project/app-overlay * -> zephyr/app-overlay"
-      )
-    elseif("${platform}" STREQUAL "arduino")
-      list(
-        APPEND
-        PLATFORM_FILE_COPY_JOBS
-        "apps/microtvm/arduino/template_project microtvm_api_server.py -> arduino"
-        "python/tvm/micro/project_api server.py -> arduino"
-        "apps/microtvm/arduino/template_project launch_microtvm_api_server.sh -> arduino"
-        "apps/microtvm/arduino/template_project boards.json -> arduino"
-        "apps/microtvm/arduino/template_project/src/example_project *.c -> arduino/src/example_project"
-        "apps/microtvm/arduino/template_project/src/example_project *.h -> arduino/src/example_project"
-        "apps/microtvm/arduino/template_project/src/example_project *.ino -> arduino/src/example_project"
-        "apps/microtvm/arduino/template_project/src/host_driven *.c -> arduino/src/host_driven"
-        "apps/microtvm/arduino/template_project/src/host_driven *.ino -> arduino/src/host_driven"
-        "apps/microtvm/arduino/template_project Makefile.template -> arduino"
-      )
-    elseif("${platform}" STREQUAL "crt")
-      list(
-        APPEND
-        PLATFORM_FILE_COPY_JOBS
-        "src/runtime/crt/host microtvm_api_server.py -> crt"
-        "src/runtime/crt/host CMakeLists.txt.template -> crt"
-        "src/runtime/crt/host **.cc -> crt/src"
-      )
-    else()
-      message(FATAL_ERROR "${platform} not supported.")
-    endif()
-
-    foreach(job_spec IN LISTS PLATFORM_FILE_COPY_JOBS)
-      string(REPLACE " " ";" job_spec "${job_spec}")
-      list(LENGTH job_spec job_spec_length)
-      math(EXPR job_spec_length_mod "${job_spec_length} % 3")
-      if(NOT "${job_spec_length_mod}" EQUAL 1)
-        message(
-          FATAL_ERROR
-            "${platform} copy job spec list length is ${job_spec_length}; parsed job spec is ${job_spec}"
-        )
-      endif()
-      math(EXPR job_spec_stop "${job_spec_length} - 3")
-
-      list(GET job_spec 0 job_src_base)
-      set(job_src_base "${CMAKE_CURRENT_SOURCE_DIR}/${job_src_base}")
-      foreach(copy_pattern_index RANGE 1 "${job_spec_stop}" 3)
-        list(GET job_spec ${copy_pattern_index} copy_pattern)
-        math(EXPR copy_dest_index "${copy_pattern_index} + 2")
-        list(GET job_spec ${copy_dest_index} copy_dest)
-
-        file(
-          GLOB_RECURSE copy_files
-          RELATIVE "${job_src_base}"
-          "${job_src_base}/${copy_pattern}")
-        list(LENGTH copy_files copy_files_length)
-        if("${copy_files_length}" EQUAL 0)
-          message(
-            FATAL_ERROR
-              "${platform} copy job matched 0 files: ${job_src_base}/${copy_pattern} -> ${copy_dest}"
-          )
-        endif()
-        foreach(copy_src IN LISTS copy_files)
-          get_filename_component(
-            dest_path "${MICROTVM_TEMPLATE_PROJECTS}/${copy_dest}/${copy_src}"
-            ABSOLUTE)
-          tvm_micro_add_copy_file(platform_template_deps
-                                  ${job_src_base}/${copy_src} ${dest_path})
-        endforeach()
-      endforeach()
-    endforeach()
-
-    add_custom_target(${platform} DEPENDS ${platform_template_deps})
-  endfunction()
-
-  set(PLATFORMS crt;zephyr;arduino)
-  foreach(platform IN LISTS PLATFORMS)
-    message(STATUS "Add ${platform} template project.")
-    microtvm_add_platform_project_api(${platform})
-    generate_crt_config(${platform} "${CMAKE_CURRENT_BINARY_DIR}/microtvm_template_projects/${platform}/crt_config/crt_config.h")
-  endforeach()
-
-  # Add template files for Model Library Format
-  generate_crt_config("crt" "${MICROTVM_TEMPLATE_PROJECTS}/crt/templates/crt_config.h.template")
-  configure_file("src/runtime/crt/platform-template.c" "${MICROTVM_TEMPLATE_PROJECTS}/crt/templates/platform.c.template" COPYONLY)
-endif(USE_MICRO)
diff --git a/cmake/modules/StandaloneCrt.cmake b/cmake/modules/StandaloneCrt.cmake
deleted file mode 100644
index f6a3c6d4108e..000000000000
--- a/cmake/modules/StandaloneCrt.cmake
+++ /dev/null
@@ -1,187 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-if(USE_MICRO)
-
-  message(STATUS "Build microTVM RPC common")
-
-  include(cmake/utils/CRTConfig.cmake)
-  set(CRT_CONFIG_INCLUDE_PATH ${CMAKE_CURRENT_BINARY_DIR}/crt_config)
-  generate_crt_config("crt" "${CRT_CONFIG_INCLUDE_PATH}/crt_config.h")
-
-  # add microTVM RPC common files to TVM runtime build
-  list(APPEND TVM_CRT_SOURCES
-      3rdparty/libcrc/src/crcccitt.c
-      src/runtime/crt/microtvm_rpc_common/frame_buffer.cc
-      src/runtime/crt/microtvm_rpc_common/framing.cc
-      src/runtime/crt/microtvm_rpc_common/session.cc
-      src/runtime/crt/microtvm_rpc_common/write_stream.cc)
-
-  list(APPEND RUNTIME_SRCS ${TVM_CRT_SOURCES})
-  include_directories(SYSTEM ${CRT_CONFIG_INCLUDE_PATH})
-
-
-  function(create_crt_library CRT_LIBRARY)
-
-    set(CRT_LIBRARY_NAME host_standalone_crt_${CRT_LIBRARY})
-    set(CRT_LIBRARY_SOURCES "")
-
-    foreach(FILE_NAME IN LISTS ARGN)
-      list(APPEND CRT_LIBRARY_SOURCES ${FILE_NAME})
-    endforeach()
-
-    add_library(${CRT_LIBRARY_NAME}
-                STATIC
-                ${CRT_LIBRARY_SOURCES})
-
-    # add this library to the list of CRT libraries
-    set(CRT_LIBRARIES ${CRT_LIBRARIES} ${CRT_LIBRARY_NAME} PARENT_SCOPE)
-
-    target_include_directories(${CRT_LIBRARY_NAME}
-                              PUBLIC
-                              ${CRT_CONFIG_INCLUDE_PATH}
-                              ${STANDALONE_CRT_BASE}/include)
-
-    set_target_properties(${CRT_LIBRARY_NAME}
-                          PROPERTIES
-                          ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/host_standalone_crt
-                          POSITION_INDEPENDENT_CODE ON)
-
-    # make these libraries dependent on standalone_crt which depends on host_isolated_build_deps to avoid
-    # race with the file copy jobs
-    add_dependencies(${CRT_LIBRARY_NAME} standalone_crt)
-
-  endfunction()
-
-  message(STATUS "Build microTVM standalone CRT")
-
-  # Build an isolated build directory, separate from the TVM tree.
-  list(APPEND CRT_FILE_COPY_JOBS
-        "3rdparty/libcrc/include *.h -> include"
-        "3rdparty/libcrc/src crcccitt.c -> src/runtime/crt/microtvm_rpc_common"
-        "3rdparty/libcrc/tab gentab_ccitt.inc -> src/runtime/crt/tab"
-        "3rdparty/dlpack/include *.h -> include"
-        "3rdparty/dmlc-core/include *.h -> include"
-        "include/tvm/runtime c_*_api.h -> include/tvm/runtime"
-        "include/tvm/runtime metadata_types.h -> include/tvm/runtime"
-        "include/tvm/runtime/crt *.h -> include/tvm/runtime/crt"
-        "src/runtime/crt CMakeLists.txt -> ."
-        "src/runtime/crt/include *.h -> include"
-        "src/runtime/crt/aot_executor *.c -> src/runtime/crt/aot_executor"
-        "src/runtime/crt/aot_executor_module *.c -> src/runtime/crt/aot_executor_module"
-        "src/runtime/crt/common *.c -> src/runtime/crt/common"
-        "src/runtime/crt/graph_executor *.c -> src/runtime/crt/graph_executor"
-        "src/runtime/crt/graph_executor_module *.c -> src/runtime/crt/graph_executor_module"
-        "src/runtime/crt/memory *.c -> src/runtime/crt/memory"
-        "src/runtime/crt/microtvm_rpc_common *.cc -> src/runtime/crt/microtvm_rpc_common"
-        "src/runtime/crt/microtvm_rpc_server *.cc -> src/runtime/crt/microtvm_rpc_server"
-        "src/runtime/minrpc *.h -> src/runtime/minrpc"
-        "src/support generic_arena.h -> src/support"
-        "src/support ssize.h -> src/support"
-        )
-
-  set(STANDALONE_CRT_BASE ${CMAKE_CURRENT_BINARY_DIR}/standalone_crt)
-
-  foreach(job_spec IN LISTS CRT_FILE_COPY_JOBS)
-    string(REPLACE " " ";" job_spec "${job_spec}")
-    list(LENGTH job_spec job_spec_length)
-    math(EXPR job_spec_length_mod "${job_spec_length} % 3")
-    if(NOT "${job_spec_length_mod}" EQUAL 1)
-      message(FATAL_ERROR "CRT copy job spec list length is ${job_spec_length}; parsed job spec is ${job_spec}")
-    endif()
-    math(EXPR job_spec_stop "${job_spec_length} - 3")
-
-    list(GET job_spec 0 job_src_base)
-    set(job_src_base "${CMAKE_CURRENT_SOURCE_DIR}/${job_src_base}")
-    foreach(copy_pattern_index RANGE 1 "${job_spec_stop}" 3)
-      list(GET job_spec ${copy_pattern_index} copy_pattern)
-      math(EXPR copy_dest_index "${copy_pattern_index} + 2")
-      list(GET job_spec ${copy_dest_index} copy_dest)
-
-      tvm_file_glob(GLOB_RECURSE copy_files
-            RELATIVE "${job_src_base}"
-            "${job_src_base}/${copy_pattern}")
-      list(LENGTH copy_files copy_files_length)
-      if("${copy_files_length}" EQUAL 0)
-        message(FATAL_ERROR "CRT copy job matched 0 files: ${job_src_base}/${copy_pattern} -> ${copy_dest}")
-      endif()
-      foreach(copy_src IN LISTS copy_files)
-        get_filename_component(dest_path "${STANDALONE_CRT_BASE}/${copy_dest}/${copy_src}" ABSOLUTE)
-        tvm_micro_add_copy_file(host_isolated_build_deps ${job_src_base}/${copy_src} ${dest_path})
-      endforeach()
-    endforeach()
-  endforeach()
-
-  add_custom_target(standalone_crt DEPENDS ${host_isolated_build_deps})
-
-  set(CRT_LIBRARIES "")
-  set(RUNTIME_CRT_SOURCE_DIR ${STANDALONE_CRT_BASE}/src/runtime/crt)
-
-  # these create_crt_library() targets are in link order and the common library needs to be last
-  create_crt_library(aot_executor
-                    ${RUNTIME_CRT_SOURCE_DIR}/aot_executor/aot_executor.c)
-
-  create_crt_library(aot_executor_module
-                    ${RUNTIME_CRT_SOURCE_DIR}/aot_executor_module/aot_executor_module.c)
-
-  create_crt_library(graph_executor
-                    ${RUNTIME_CRT_SOURCE_DIR}/graph_executor/graph_executor.c
-                    ${RUNTIME_CRT_SOURCE_DIR}/graph_executor/load_json.c)
-
-  create_crt_library(graph_executor_module
-                    ${RUNTIME_CRT_SOURCE_DIR}/graph_executor_module/graph_executor_module.c)
-
-  create_crt_library(memory
-                    ${RUNTIME_CRT_SOURCE_DIR}/memory/page_allocator.c
-                    ${RUNTIME_CRT_SOURCE_DIR}/memory/stack_allocator.c)
-
-  create_crt_library(microtvm_rpc_common
-                    ${RUNTIME_CRT_SOURCE_DIR}/microtvm_rpc_common/crcccitt.c
-                    ${RUNTIME_CRT_SOURCE_DIR}/microtvm_rpc_common/frame_buffer.cc
-                    ${RUNTIME_CRT_SOURCE_DIR}/microtvm_rpc_common/framing.cc
-                    ${RUNTIME_CRT_SOURCE_DIR}/microtvm_rpc_common/session.cc
-                    ${RUNTIME_CRT_SOURCE_DIR}/microtvm_rpc_common/write_stream.cc)
-
-  create_crt_library(microtvm_rpc_server
-                    ${RUNTIME_CRT_SOURCE_DIR}/microtvm_rpc_server/rpc_server.cc)
-
-  if(NOT MSVC)
-    # TODO: if we want to eventually build standalone_crt for windows
-    # these files would be needed, but for now don't build them
-    create_crt_library(common
-                      ${RUNTIME_CRT_SOURCE_DIR}/common/crt_backend_api.c
-                      ${RUNTIME_CRT_SOURCE_DIR}/common/crt_runtime_api.c
-                      ${RUNTIME_CRT_SOURCE_DIR}/common/func_registry.c
-                      ${RUNTIME_CRT_SOURCE_DIR}/common/ndarray.c
-                      ${RUNTIME_CRT_SOURCE_DIR}/common/packed_func.c)
-  endif()
-
-  add_custom_target(host_standalone_crt DEPENDS ${CRT_LIBRARIES} standalone_crt)
-
-  # Create the `crttest` target if we can find GTest.  If not, we create dummy
-  # targets that give the user an informative error message.
-  if(GTEST_FOUND)
-    tvm_file_glob(GLOB TEST_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/tests/crt/*.cc)
-    add_executable(crttest ${TEST_SRCS})
-    target_include_directories(crttest SYSTEM PUBLIC ${CMAKE_CURRENT_BINARY_DIR}/standalone_crt/include ${CMAKE_CURRENT_BINARY_DIR}/crt_config)
-    target_link_libraries(crttest PRIVATE ${CRT_LIBRARIES} GTest::GTest GTest::Main pthread dl)
-    set_target_properties(crttest PROPERTIES EXCLUDE_FROM_ALL 1)
-    set_target_properties(crttest PROPERTIES EXCLUDE_FROM_DEFAULT_BUILD 1)
-    gtest_discover_tests(crttest)
-  endif()
-
-endif()
diff --git a/cmake/modules/Zephyr.cmake b/cmake/modules/Zephyr.cmake
deleted file mode 100644
index 38551f1dd44d..000000000000
--- a/cmake/modules/Zephyr.cmake
+++ /dev/null
@@ -1,84 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more contributor
-# license agreements.  See the NOTICE file distributed with this work for
-# additional information regarding copyright ownership.  The ASF licenses this
-# file to you under the Apache License, Version 2.0 (the "License"); you may not
-# use this file except in compliance with the License.  You may obtain a copy of
-# the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
-# License for the specific language governing permissions and limitations under
-# the License.
-
-if(USE_MICRO)
-  message(STATUS "Add Zephyr for microTVM")
-
-  function(microtvm_add_zephyr)
-    list(
-      APPEND
-      ZEPHYR_FILE_COPY_JOBS
-      "apps/microtvm/zephyr/template_project microtvm_api_server.py -> zephyr"
-      "python/tvm/micro/project_api server.py -> zephyr"
-      "apps/microtvm/zephyr/template_project launch_microtvm_api_server.sh -> zephyr"
-      "apps/microtvm/zephyr/template_project boards.json -> zephyr"
-      "apps/microtvm/zephyr/template_project CMakeLists.txt.template -> zephyr"
-      "apps/microtvm/zephyr/template_project/src/aot_standalone_demo *.c -> zephyr/src/aot_standalone_demo"
-      "apps/microtvm/zephyr/template_project/src/host_driven *.c -> zephyr/src/host_driven"
-      "apps/microtvm/zephyr/template_project/src/host_driven *.h -> zephyr/src/host_driven"
-      "apps/microtvm/zephyr/template_project/src/mlperftiny *.cc -> zephyr/src/mlperftiny"
-      "3rdparty/mlperftiny/api * -> zephyr/src/mlperftiny/api"
-      "apps/microtvm/zephyr/template_project/fvp-hack * -> zephyr/fvp-hack"
-      "apps/microtvm/zephyr/template_project/qemu-hack * -> zephyr/qemu-hack"
-      "apps/microtvm/zephyr/template_project/app-overlay * -> zephyr/app-overlay"
-      "apps/microtvm/zephyr/template_project/crt_config *.h -> zephyr/crt_config"
-    )
-
-    foreach(job_spec IN LISTS ZEPHYR_FILE_COPY_JOBS)
-      string(REPLACE " " ";" job_spec "${job_spec}")
-      list(LENGTH job_spec job_spec_length)
-      math(EXPR job_spec_length_mod "${job_spec_length} % 3")
-      if(NOT "${job_spec_length_mod}" EQUAL 1)
-        message(
-          FATAL_ERROR
-            "Zephyr copy job spec list length is ${job_spec_length}; parsed job spec is ${job_spec}"
-        )
-      endif()
-      math(EXPR job_spec_stop "${job_spec_length} - 3")
-
-      list(GET job_spec 0 job_src_base)
-      set(job_src_base "${CMAKE_CURRENT_SOURCE_DIR}/${job_src_base}")
-      foreach(copy_pattern_index RANGE 1 "${job_spec_stop}" 3)
-        list(GET job_spec ${copy_pattern_index} copy_pattern)
-        math(EXPR copy_dest_index "${copy_pattern_index} + 2")
-        list(GET job_spec ${copy_dest_index} copy_dest)
-
-        file(
-          GLOB_RECURSE copy_files
-          RELATIVE "${job_src_base}"
-          "${job_src_base}/${copy_pattern}")
-        list(LENGTH copy_files copy_files_length)
-        if("${copy_files_length}" EQUAL 0)
-          message(
-            FATAL_ERROR
-              "Zephyr copy job matched 0 files: ${job_src_base}/${copy_pattern} -> ${copy_dest}"
-          )
-        endif()
-        foreach(copy_src IN LISTS copy_files)
-          get_filename_component(
-            dest_path "${MICROTVM_TEMPLATE_PROJECTS}/${copy_dest}/${copy_src}"
-            ABSOLUTE)
-          tvm_micro_add_copy_file(zephyr_template_deps
-                                  ${job_src_base}/${copy_src} ${dest_path})
-        endforeach()
-      endforeach()
-    endforeach()
-
-    add_custom_target(zephyr DEPENDS ${zephyr_template_deps})
-  endfunction()
-
-  microtvm_add_zephyr()
-
-endif(USE_MICRO)
diff --git a/cmake/modules/contrib/CMSISNN.cmake b/cmake/modules/contrib/CMSISNN.cmake
deleted file mode 100644
index eef12fdd778e..000000000000
--- a/cmake/modules/contrib/CMSISNN.cmake
+++ /dev/null
@@ -1,25 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-if(USE_CMSISNN)
-  add_definitions(-DTVM_USE_CMSISNN)
-  message(STATUS "Build with CMSIS-NN support")
-  tvm_file_glob(GLOB RELAY_CONTRIB_CMSISNN_SRCS
-    src/relay/backend/contrib/cmsisnn/*.cc
-    src/relay/backend/contrib/constant_transforms.cc)
-  list(APPEND COMPILER_SRCS ${RELAY_CONTRIB_CMSISNN_SRCS})
-endif(USE_CMSISNN)
diff --git a/cmake/modules/contrib/EthosN.cmake b/cmake/modules/contrib/EthosN.cmake
deleted file mode 100644
index b230acfc380d..000000000000
--- a/cmake/modules/contrib/EthosN.cmake
+++ /dev/null
@@ -1,57 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# Arm(R) Ethos(TM)-N rules
-
-if(NOT USE_ETHOSN STREQUAL "OFF")
-  find_ethosn(${USE_ETHOSN})
-
-  if(NOT ETHOSN_FOUND)
-    message(FATAL_ERROR "Cannot find Arm(R) Ethos(TM)-N, USE_ETHOSN=" ${USE_ETHOSN})
-
-  else()
-    include_directories(SYSTEM ${ETHOSN_INCLUDE_DIRS})
-    add_definitions(${ETHOSN_DEFINITIONS})
-
-    message(STATUS "Build with Arm(R) Ethos(TM)-N ${ETHOSN_PACKAGE_VERSION}")
-
-    tvm_file_glob(GLOB ETHOSN_RUNTIME_CONTRIB_SRC
-                  src/runtime/contrib/ethosn/ethosn_runtime.cc
-                  src/runtime/contrib/ethosn/ethosn_device.cc)
-    list(APPEND RUNTIME_SRCS ${ETHOSN_RUNTIME_CONTRIB_SRC})
-
-    tvm_file_glob(GLOB COMPILER_ETHOSN_SRCS
-                  src/relay/backend/contrib/ethosn/*
-                  src/relay/backend/contrib/constant_transforms.cc)
-    list(APPEND COMPILER_SRCS ${COMPILER_ETHOSN_SRCS})
-
-    list(APPEND TVM_LINKER_LIBS ${ETHOSN_COMPILER_LIBRARY}
-      ${ETHOSN_RUNTIME_LIBRARY})
-    list(APPEND TVM_RUNTIME_LINKER_LIBS ${ETHOSN_RUNTIME_LIBRARY})
-
-    if(NOT MSVC)
-      set_source_files_properties(${COMPILER_ETHOSN_SRCS}
-        PROPERTIES COMPILE_DEFINITIONS "DMLC_ENABLE_RTTI=0")
-      set_source_files_properties(${COMPILER_ETHOSN_SRCS}
-        PROPERTIES COMPILE_FLAGS "-fno-rtti")
-    endif()
-  endif(NOT ETHOSN_FOUND)
-else()
-  if(USE_ETHOSN_HW)
-    message(FATAL_ERROR "Cannot enable Arm(R) Ethos(TM)-N HW if USE_ETHOSN=OFF")
-  endif()
-endif(NOT USE_ETHOSN STREQUAL "OFF")
diff --git a/cmake/modules/contrib/EthosU.cmake b/cmake/modules/contrib/EthosU.cmake
deleted file mode 100644
index bdd8846430d4..000000000000
--- a/cmake/modules/contrib/EthosU.cmake
+++ /dev/null
@@ -1,33 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-if(USE_ETHOSU)
-  tvm_file_glob(GLOB COMPILER_ETHOSU_SRCS
-                src/relay/backend/contrib/ethosu/*
-                src/contrib/ethosu/cascader/*
-                src/contrib/ethosu/cascader/parts/*
-                src/tir/contrib/ethosu/*)
-  list(APPEND COMPILER_SRCS ${COMPILER_ETHOSU_SRCS})
-else()
-  # Keeping just utils.cc because it has Object definitions
-  # used by python side
-  tvm_file_glob(GLOB COMPILER_ETHOSU_SRCS
-                src/relay/backend/contrib/ethosu/utils.cc
-                src/contrib/ethosu/cascader/*
-                src/contrib/ethosu/cascader/parts/*)
-  list(APPEND COMPILER_SRCS ${COMPILER_ETHOSU_SRCS})
-endif(USE_ETHOSU)
diff --git a/cmake/modules/contrib/MicroStandaloneRuntime.cmake b/cmake/modules/contrib/MicroStandaloneRuntime.cmake
deleted file mode 100644
index 4bf354523936..000000000000
--- a/cmake/modules/contrib/MicroStandaloneRuntime.cmake
+++ /dev/null
@@ -1,23 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-if(USE_MICRO_STANDALONE_RUNTIME)
-  message(STATUS "Build with micro.standalone_runtime")
-  tvm_file_glob(GLOB MICRO_STANDALONE_RUNTIME_SRC src/runtime/micro/standalone/*.cc)
-  list(APPEND RUNTIME_SRCS ${MICRO_STANDALONE_RUNTIME_SRC})
-  add_definitions(-DUSE_MICRO_STANDALONE_RUNTIME=1)
-endif(USE_MICRO_STANDALONE_RUNTIME)
diff --git a/cmake/utils/FindEthosN.cmake b/cmake/utils/FindEthosN.cmake
deleted file mode 100644
index 7d5f2802f6bd..000000000000
--- a/cmake/utils/FindEthosN.cmake
+++ /dev/null
@@ -1,93 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-#######################################################
-# Find Arm(R) Ethos(TM)-N libraries
-#
-# Usage:
-#   find_ethosn(${USE_ETHOSN})
-#
-# - When USE_ETHOSN=/path/to/ethos-sdk-path, use the path from USE_ETHOSN
-# - Else, when environment variable ETHOSN_STACK is set, use that path
-# - When USE_ETHOSN=ON, use auto search
-#
-# Provide variables:
-#
-# - ETHOSN_FOUND
-# - ETHOSN_PACKAGE_VERSION
-# - ETHOSN_DEFINITIONS
-# - ETHOSN_INCLUDE_DIRS
-# - ETHOSN_COMPILER_LIBRARY
-# - ETHOSN_RUNTIME_LIBRARY
-
-macro(find_ethosn use_ethosn)
-  set(__use_ethosn ${use_ethosn})
-  if(IS_DIRECTORY ${__use_ethosn})
-    set(__ethosn_stack ${__use_ethosn})
-    message(STATUS "Arm(R) Ethos(TM)-N driver stack PATH=" ${__use_ethosn})
-  elseif(IS_DIRECTORY $ENV{ETHOSN_STACK})
-     set(__ethosn_stack $ENV{ETHOSN_STACK})
-    message(STATUS "Arm(R) Ethos(TM)-N driver stack from env=" ${__use_ethosn})
-  else()
-     set(__ethosn_stack "")
-  endif()
-
-  if(__ethosn_stack)
-    set(ETHOSN_INCLUDE_DIRS "")
-    # Compile-time support
-    find_path(_SL_DIR NAMES Support.hpp
-      PATHS ${__ethosn_stack}/include/ethosn_support_library)
-    string(REGEX REPLACE "/ethosn_support_library" "" _SL_DIR2 ${_SL_DIR})
-    list(APPEND ETHOSN_INCLUDE_DIRS "${_SL_DIR2}")
-
-    find_library(ETHOSN_COMPILER_LIBRARY NAMES EthosNSupport
-      PATHS ${__ethosn_stack}/lib)
-    find_library(ETHOSN_COMPILER_LIBRARY NAMES EthosNSupport)
-
-    # Runtime hardware support. Driver library also needed for
-    # test support.
-    find_path(_DL_DIR NAMES Network.hpp
-      PATHS ${__ethosn_stack}/include/ethosn_driver_library)
-    string(REGEX REPLACE "/ethosn_driver_library" "" _DL_DIR2 ${_DL_DIR})
-    list(APPEND ETHOSN_INCLUDE_DIRS "${_DL_DIR2}")
-
-    find_library(ETHOSN_RUNTIME_LIBRARY NAMES EthosNDriver
-      PATHS ${__ethosn_stack}/lib)
-    find_library(ETHOSN_RUNTIME_LIBRARY NAMES EthosNDriver)
-    if(${USE_ETHOSN_HW} MATCHES ${IS_TRUE_PATTERN})
-      set(ETHOSN_DEFINITIONS -DETHOSN_HW)
-    endif()
-
-    if(ETHOSN_COMPILER_LIBRARY)
-      set(ETHOSN_FOUND TRUE)
-    endif()
-  endif(__ethosn_stack)
-
-  if(NOT ETHOSN_FOUND)
-    if(${__use_ethosn} MATCHES ${IS_TRUE_PATTERN})
-      message(WARNING "No cmake find_package available for Arm(R) Ethos(TM)-N")
-    endif()
-
-  # additional libraries
-  else()
-    message(STATUS "Found ETHOSN_DEFINITIONS=${ETHOSN_DEFINITIONS}")
-    message(STATUS "Found ETHOSN_INCLUDE_DIRS=${ETHOSN_INCLUDE_DIRS}")
-    message(STATUS "Found ETHOSN_COMPILER_LIBRARY=${ETHOSN_COMPILER_LIBRARY}")
-    message(STATUS "Found ETHOSN_RUNTIME_LIBRARY=${ETHOSN_RUNTIME_LIBRARY}")
-  endif(NOT ETHOSN_FOUND)
-
-endmacro(find_ethosn)
diff --git a/cmake/utils/Utils.cmake b/cmake/utils/Utils.cmake
index fdd70228f861..69400698f0d5 100644
--- a/cmake/utils/Utils.cmake
+++ b/cmake/utils/Utils.cmake
@@ -75,19 +75,6 @@ function(assign_source_group group)
     endforeach()
 endfunction(assign_source_group)
 
-function(tvm_micro_add_copy_file var src dest)
-    get_filename_component(basename "${src}" NAME)
-    get_filename_component(dest_parent_dir "${dest}" DIRECTORY)
-    add_custom_command(
-        OUTPUT "${dest}"
-        COMMAND "${CMAKE_COMMAND}" -E copy "${src}" "${dest}"
-        DEPENDS "${src}")
-    list(APPEND "${var}" "${dest}")
-    set("${var}" "${${var}}" PARENT_SCOPE)
-endfunction(tvm_micro_add_copy_file)
-
-set(MICROTVM_TEMPLATE_PROJECTS "${CMAKE_CURRENT_BINARY_DIR}/microtvm_template_projects")
-
 # From cmake documentation:
 # True if the constant is 1, ON, YES, TRUE, Y, or a non-zero number.
 # False if the constant is 0, OFF, NO, FALSE, N, IGNORE, NOTFOUND, the empty string, or ends in the suffix -NOTFOUND.
diff --git a/conda/recipe/bld.bat b/conda/recipe/bld.bat
index 561dcff87802..57ce3666eaee 100644
--- a/conda/recipe/bld.bat
+++ b/conda/recipe/bld.bat
@@ -27,7 +27,6 @@ cmake ^
       -DUSE_LLVM=ON ^
       -DUSE_RPC=ON ^
       -DUSE_CPP_RPC=ON ^
-      -DUSE_MICRO=ON ^
       -DUSE_SORT=ON ^
       -DUSE_RANDOM=ON ^
       -DUSE_PROFILER=ON ^
diff --git a/conda/recipe/build.sh b/conda/recipe/build.sh
index 3422c4d8f13b..0131fd65a48e 100755
--- a/conda/recipe/build.sh
+++ b/conda/recipe/build.sh
@@ -49,7 +49,6 @@ cmake -DCMAKE_INSTALL_PREFIX="${PREFIX}" \
       -DCMAKE_BUILD_TYPE=Release \
       -DUSE_RPC=ON \
       -DUSE_CPP_RPC=OFF \
-      -DUSE_MICRO=ON \
       -DUSE_SORT=ON \
       -DUSE_RANDOM=ON \
       -DUSE_PROFILER=ON \
diff --git a/docker/Dockerfile.ci_cortexm b/docker/Dockerfile.ci_cortexm
index 5535d29ed104..142b1e41ad36 100644
--- a/docker/Dockerfile.ci_cortexm
+++ b/docker/Dockerfile.ci_cortexm
@@ -21,109 +21,4 @@ FROM ubuntu:22.04
 
 COPY utils/apt-install-and-clear.sh /usr/local/bin/apt-install-and-clear
 
-RUN apt-get update --fix-missing
-
-COPY install/ubuntu_setup_tz.sh /install/ubuntu_setup_tz.sh
-RUN bash /install/ubuntu_setup_tz.sh
-
-COPY install/ubuntu_install_core.sh /install/ubuntu_install_core.sh
-RUN bash /install/ubuntu_install_core.sh
-
-COPY install/ubuntu_install_cmake_source.sh /install/ubuntu_install_cmake_source.sh
-RUN bash /install/ubuntu_install_cmake_source.sh 3.20.0 9c06b2ddf7c337e31d8201f6ebcd3bba86a9a033976a9aee207fe0c6971f4755
-
-COPY install/ubuntu_install_googletest.sh /install/ubuntu_install_googletest.sh
-RUN bash /install/ubuntu_install_googletest.sh
-
-ENV TVM_VENV /venv/apache-tvm-py3.9
-COPY python/bootstrap/lockfiles /install/python/bootstrap/lockfiles
-COPY install/ubuntu_install_python.sh /install/ubuntu_install_python.sh
-RUN bash /install/ubuntu_install_python.sh 3.9
-ENV PATH ${TVM_VENV}/bin:$PATH
-ENV PYTHONNOUSERSITE 1  # Disable .local directory from affecting CI.
-
-COPY install/ubuntu_install_python_package.sh /install/ubuntu_install_python_package.sh
-RUN bash /install/ubuntu_install_python_package.sh
-
-COPY install/ubuntu2204_install_llvm.sh /install/ubuntu2204_install_llvm.sh
-RUN bash /install/ubuntu2204_install_llvm.sh
-
-# Rust env (build early; takes a while)
-COPY install/ubuntu_install_rust.sh /install/ubuntu_install_rust.sh
-RUN bash /install/ubuntu_install_rust.sh
-ENV RUSTUP_HOME /opt/rust
-ENV CARGO_HOME /opt/rust
-ENV PATH $PATH:$CARGO_HOME/bin
-
-# AutoTVM deps
-COPY install/ubuntu_install_redis.sh /install/ubuntu_install_redis.sh
-RUN bash /install/ubuntu_install_redis.sh
-
-# ANTLR deps
-COPY install/ubuntu_install_java.sh /install/ubuntu_install_java.sh
-RUN bash /install/ubuntu_install_java.sh
-
-# TensorFlow deps
-COPY install/ubuntu_install_tensorflow.sh /install/ubuntu_install_tensorflow.sh
-RUN bash /install/ubuntu_install_tensorflow.sh
-
-# TFLite deps
-COPY install/ubuntu_install_tflite.sh /install/ubuntu_install_tflite.sh
-RUN bash /install/ubuntu_install_tflite.sh
-
-# sccache
-COPY install/ubuntu_install_sccache.sh /install/ubuntu_install_sccache.sh
-RUN bash /install/ubuntu_install_sccache.sh
-ENV PATH /opt/sccache:$PATH
-
-# Zephyr Project
-COPY install/ubuntu_install_zephyr.sh /install/ubuntu_install_zephyr.sh
-COPY install/ubuntu_init_zephyr_project.sh /install/ubuntu_init_zephyr_project.sh
-RUN bash /install/ubuntu_install_zephyr.sh
-ENV ZEPHYR_BASE=/opt/zephyrproject/zephyr
-
-#Zephyr SDK
-COPY install/ubuntu_install_zephyr_sdk.sh /install/ubuntu_install_zephyr_sdk.sh
-RUN bash /install/ubuntu_install_zephyr_sdk.sh /opt/zephyr-sdk
-ENV PATH /opt/zephyr-sdk/sysroots/x86_64-pokysdk-linux/usr/bin:$PATH
-
-# NRF
-COPY install/ubuntu_install_nrfjprog.sh /install/ubuntu_install_nrfjprog.sh
-RUN bash /install/ubuntu_install_nrfjprog.sh
-
-# FreeRTOS deps
-COPY install/ubuntu_install_freertos.sh /install/ubuntu_install_freertos.sh
-RUN bash /install/ubuntu_install_freertos.sh
-
-# Arduino deps
-# NOTE: override Arduino directories so packages are installed in a
-# CI-accessible location.
-ENV ARDUINO_DIRECTORIES_DATA=/arduino15-data
-ENV ARDUINO_DIRECTORIES_DOWNLOADS=/arduino15-downloads
-ENV ARDUINO_DIRECTORIES_USER=/arduino15-user
-COPY install/ubuntu_install_arduino.sh /install/ubuntu_install_arduino.sh
-RUN bash /install/ubuntu_install_arduino.sh
-
-# Install ONNX
-COPY install/ubuntu_install_onnx.sh /install/ubuntu_install_onnx.sh
-RUN bash /install/ubuntu_install_onnx.sh
-
-# NNEF
-COPY install/ubuntu_install_nnef.sh /install/ubuntu_install_nnef.sh
-RUN bash /install/ubuntu_install_nnef.sh
-
-# Install CMSIS_NN
-COPY install/ubuntu_install_cmsis.sh /install/ubuntu_install_cmsis.sh
-RUN bash /install/ubuntu_install_cmsis.sh /opt/arm/ethosu/cmsis
-ENV CMSIS_PATH=/opt/arm/ethosu/cmsis/
-
-# Arm(R) Ethos(TM)-U NPU driver
-COPY install/ubuntu_install_ethosu_driver_stack.sh /install/ubuntu_install_ethosu_driver_stack.sh
-RUN bash /install/ubuntu_install_ethosu_driver_stack.sh
-
-# Install Vela compiler
-COPY install/ubuntu_install_vela.sh /install/ubuntu_install_vela.sh
-RUN bash /install/ubuntu_install_vela.sh
-
-# Update PATH
-ENV PATH /opt/arm/gcc-arm-none-eabi/bin:/opt/arm/FVP_Corstone_SSE-300/models/Linux64_GCC-6.4:$PATH
+# Keep it for now
diff --git a/docker/Dockerfile.ci_cpu b/docker/Dockerfile.ci_cpu
index 9e53882e1638..fe58ccbd63fb 100644
--- a/docker/Dockerfile.ci_cpu
+++ b/docker/Dockerfile.ci_cpu
@@ -109,10 +109,6 @@ RUN bash /install/ubuntu_install_jax.sh "cpu"
 COPY install/ubuntu_download_arm_compute_lib_binaries.sh /install/ubuntu_download_arm_compute_lib_binaries.sh
 RUN bash /install/ubuntu_download_arm_compute_lib_binaries.sh
 
-# Github Arm(R) Ethos(TM)-N NPU driver
-COPY install/ubuntu_install_ethosn_driver_stack.sh /install/ubuntu_install_ethosn_driver_stack.sh
-RUN bash /install/ubuntu_install_ethosn_driver_stack.sh
-
 # Vitis-AI PyXIR CI deps
 COPY install/ubuntu_install_vitis_ai_packages_ci.sh /install/ubuntu_install_vitis_ai_packages_ci.sh
 RUN bash /install/ubuntu_install_vitis_ai_packages_ci.sh
diff --git a/docker/install/ubuntu_install_ethosn_driver_stack.sh b/docker/install/ubuntu_install_ethosn_driver_stack.sh
deleted file mode 100755
index 8a7ef9f23db4..000000000000
--- a/docker/install/ubuntu_install_ethosn_driver_stack.sh
+++ /dev/null
@@ -1,51 +0,0 @@
-#!/bin/bash
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-set -e
-set -u
-set -o pipefail
-
-repo_url="https://github.com/Arm-software/ethos-n-driver-stack"
-repo_dir="ethosn-driver"
-repo_revision="23.02"
-install_path="/opt/arm/$repo_dir"
-
-tmpdir=$(mktemp -d)
-
-cleanup()
-{
-  rm -rf "$tmpdir"
-}
-
-trap cleanup 0
-
-apt-get update && apt-install-and-clear -y \
-    bsdmainutils \
-    build-essential \
-    cpp \
-    git \
-    linux-headers-generic \
-    scons \
-    wget \
-    openssh-client
-
-cd "$tmpdir"
-git clone --branch "$repo_revision" "$repo_url" "$repo_dir"
-
-cd "$repo_dir"/driver
-scons werror=False install_prefix="$install_path" install
diff --git a/docker/install/ubuntu_install_ethosu_driver_stack.sh b/docker/install/ubuntu_install_ethosu_driver_stack.sh
deleted file mode 100755
index 8e6e7d79553d..000000000000
--- a/docker/install/ubuntu_install_ethosu_driver_stack.sh
+++ /dev/null
@@ -1,94 +0,0 @@
-#!/bin/bash
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-set -e
-set -u
-set -o pipefail
-
-fvp_dir="/opt/arm/FVP_Corstone_SSE-300"
-cmake_dir="/opt/arm/cmake"
-ethosu_dir="/opt/arm/ethosu"
-ethosu_driver_ver="23.05"
-
-mkdir -p /opt/arm
-
-tmpdir=$(mktemp -d)
-
-cleanup()
-{
-  rm -rf "$tmpdir"
-}
-
-trap cleanup 0
-
-# Ubuntu 22.04 dependencies
-apt-get update
-apt-install-and-clear -y \
-    bsdmainutils \
-    build-essential \
-    cpp \
-    git \
-    linux-headers-generic \
-    make \
-    python3-dev \
-    python3 \
-    ssh \
-    wget \
-    xxd
-
-# Download the FVP
-mkdir -p "$fvp_dir"
-cd "$tmpdir"
-curl -sL https://developer.arm.com/-/media/Arm%20Developer%20Community/Downloads/OSS/FVP/Corstone-300/FVP_Corstone_SSE-300_11.15_24.tgz | tar -xz
-./FVP_Corstone_SSE-300.sh --i-agree-to-the-contained-eula --no-interactive -d "$fvp_dir"
-rm -rf FVP_Corstone_SSE-300.sh license_terms
-
-# Setup cmake 3.19.5
-mkdir -p "${cmake_dir}"
-cd "$tmpdir"
-curl -sL -o cmake-3.19.5-Linux-x86_64.sh https://github.com/Kitware/CMake/releases/download/v3.19.5/cmake-3.19.5-Linux-x86_64.sh
-chmod +x cmake-3.19.5-Linux-x86_64.sh
-./cmake-3.19.5-Linux-x86_64.sh --prefix="${cmake_dir}" --skip-license
-rm cmake-3.19.5-Linux-x86_64.sh
-export PATH="${cmake_dir}/bin:${PATH}"
-
-# Install the GCC toolchain
-mkdir -p /opt/arm/gcc-arm-none-eabi/
-gcc_arm_url='https://developer.arm.com/-/media/Files/downloads/gnu-rm/10-2020q4/gcc-arm-none-eabi-10-2020-q4-major-x86_64-linux.tar.bz2?revision=ca0cbf9c-9de2-491c-ac48-898b5bbc0443&la=en&hash=68760A8AE66026BCF99F05AC017A6A50C6FD832A'
-curl --retry 64 -sSL ${gcc_arm_url} | tar -C /opt/arm/gcc-arm-none-eabi --strip-components=1 -jx
-export PATH="/opt/arm/gcc-arm-none-eabi/bin:${PATH}"
-
-# Clone Arm(R) Ethos(TM)-U NPU driver stack
-mkdir -p "${ethosu_dir}"
-cd "${ethosu_dir}"
-git clone --branch ${ethosu_driver_ver} "https://review.mlplatform.org/ml/ethos-u/ethos-u-core-driver" core_driver
-git clone --branch ${ethosu_driver_ver} "https://review.mlplatform.org/ml/ethos-u/ethos-u-core-platform" core_platform
-
-# Build Driver
-NPU_VARIANTS=("u55" "u65")
-for i in ${NPU_VARIANTS[*]}
-do
-    mkdir ${ethosu_dir}/core_driver/build_${i} && cd ${ethosu_dir}/core_driver/build_${i}
-    cmake -DCMAKE_TOOLCHAIN_FILE=${ethosu_dir}/core_platform/cmake/toolchain/arm-none-eabi-gcc.cmake -DETHOSU_LOG_SEVERITY=debug -DTARGET_CPU=cortex-m55 -DETHOSU_TARGET_NPU_CONFIG=ethos-${i}-128 ..
-    make
-done
-
-# Build NN Library
-mkdir ${CMSIS_PATH}/CMSIS-NN/build/ && cd ${CMSIS_PATH}/CMSIS-NN/build/
-cmake .. -DCMAKE_TOOLCHAIN_FILE=${ethosu_dir}/core_platform/cmake/toolchain/arm-none-eabi-gcc.cmake -DTARGET_CPU=cortex-m55 -DBUILD_CMSIS_NN_FUNCTIONS=YES -DCMSIS_PATH=${CMSIS_PATH}
-make
diff --git a/docker/install/ubuntu_install_vela.sh b/docker/install/ubuntu_install_vela.sh
deleted file mode 100755
index ce27541db2cc..000000000000
--- a/docker/install/ubuntu_install_vela.sh
+++ /dev/null
@@ -1,23 +0,0 @@
-#!/bin/bash
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-set -e
-set -u
-set -o pipefail
-
-pip3 install ethos-u-vela==3.8.0 numpy==1.23.*
diff --git a/docker/python/ci-constraints.txt b/docker/python/ci-constraints.txt
index feba27cd03d0..5b0a4da23fbf 100644
--- a/docker/python/ci-constraints.txt
+++ b/docker/python/ci-constraints.txt
@@ -11,7 +11,6 @@ blocklint = "==0.2.3"
 #commonmark = ">=0.7.3"
 cpplint = "==1.6.0"
 #docutils = ">=0.11,<0.17"
-#ethos-u-vela = "==3.2.0"
 flake8 = "==3.9.2"
 flowvision = "==0.1.0"
 #h5py = "==3.1.0"
diff --git a/docs/conf.py b/docs/conf.py
index d686a2d72d95..189877da8f5d 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -418,7 +418,6 @@ def jupyter_notebook(script_blocks, gallery_conf, target_dir, real_func):
     tvm_path.joinpath("gallery", "how_to", "optimize_operators"),
     tvm_path.joinpath("gallery", "how_to", "tune_with_autotvm"),
     tvm_path.joinpath("gallery", "how_to", "tune_with_autoscheduler"),
-    tvm_path.joinpath("gallery", "how_to", "work_with_microtvm"),
     tvm_path.joinpath("gallery", "how_to", "extend_tvm"),
     # New tutorial structure under docs folder
     tvm_path.joinpath("docs", "get_started", "tutorials"),
@@ -437,7 +436,6 @@ def jupyter_notebook(script_blocks, gallery_conf, target_dir, real_func):
     "how_to/optimize_operators",
     "how_to/tune_with_autotvm",
     "how_to/tune_with_autoscheduler",
-    "how_to/work_with_microtvm",
     "how_to/extend_tvm",
     # New tutorial structure under docs folder
     "get_started/tutorials/",
@@ -511,16 +509,6 @@ def jupyter_notebook(script_blocks, gallery_conf, target_dir, real_func):
         "use_pass_instrument.py",
         "bring_your_own_datatypes.py",
     ],
-    "work_with_microtvm": [
-        "micro_tvmc.py",
-        "micro_tflite.py",
-        "micro_aot.py",
-        "micro_pytorch.py",
-        "micro_train.py",
-        "micro_autotune.py",
-        "micro_ethosu.py",
-        "micro_mlperftiny.py",
-    ],
 }
 
 
diff --git a/docs/contribute/document.rst b/docs/contribute/document.rst
index 43f98ded7401..d789b5714152 100644
--- a/docs/contribute/document.rst
+++ b/docs/contribute/document.rst
@@ -115,10 +115,9 @@ appropriate. As a result the tutorials and how-tos will be divided between
 focus on the developer experience.
 
 The next consideration is that there are special topics within the TVM
-community that benefit from additional attention. These topics include, but are
-not limited to, microTVM and VTA. Special "Topic Guides" can be created to
-index existing material, and provide context on how to navigate that material
-most effectively.
+community that benefit from additional attention. Special "Topic Guides" can be
+created to index existing material, and provide context on how to navigate that
+material most effectively.
 
 To facilitate newcomers, a special "Getting Started" section with installation
 instructions, a overview of why to use TVM, and other first-experience
diff --git a/docs/how_to/dev/setup_rpc_system.rst b/docs/how_to/dev/setup_rpc_system.rst
index f61b7477f5c0..50cfa30a0583 100644
--- a/docs/how_to/dev/setup_rpc_system.rst
+++ b/docs/how_to/dev/setup_rpc_system.rst
@@ -108,7 +108,6 @@ After executing commands like something below under the root directory of TVM re
   # You maybe need to enable other options, e.g., USE_OPENCL, USE_xPU.
   $ sed -i "s|USE_LLVM.*)|USE_LLVM OFF)|" config.cmake
   $ sed -i "s|USE_LIBBACKTRACE.*)|USE_LIBBACKTRACE OFF)|" config.cmake
-  $ sed -i "s|USE_MICRO.*)|USE_MICRO OFF)|" config.cmake
 
   $ cmake -DCMAKE_TOOLCHAIN_FILE=/YYY/aarch64-linux-gnu.cmake -DCMAKE_BUILD_TYPE=Release ..
   $ cmake --build . -j -- runtime
diff --git a/docs/how_to/legacy_index.rst b/docs/how_to/legacy_index.rst
index d675adbee2da..91ed4639db47 100644
--- a/docs/how_to/legacy_index.rst
+++ b/docs/how_to/legacy_index.rst
@@ -32,6 +32,5 @@ schedule with tesor expressions?"
    optimize_operators/index
    tune_with_autotvm/index
    tune_with_autoscheduler/index
-   work_with_microtvm/index
    extend_tvm/index
    profile/index
diff --git a/docs/index.rst b/docs/index.rst
index 041931552b03..2eccb60caa23 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -74,7 +74,6 @@ driving its costs down.
    dev/tutorial/index
    dev/how_to/how_to.rst
    reference/langref/index
-   topic/microtvm/index
 
 .. toctree::
    :maxdepth: 1
diff --git a/docs/reference/api/python/index.rst b/docs/reference/api/python/index.rst
index e64ea304cbee..c4082354a8de 100644
--- a/docs/reference/api/python/index.rst
+++ b/docs/reference/api/python/index.rst
@@ -102,5 +102,4 @@ Python API
     relay/testing
     autotvm
     auto_scheduler
-    micro
     graph_executor
diff --git a/docs/reference/api/python/micro.rst b/docs/reference/api/python/micro.rst
deleted file mode 100644
index 1a93f74834c7..000000000000
--- a/docs/reference/api/python/micro.rst
+++ /dev/null
@@ -1,23 +0,0 @@
-..  Licensed to the Apache Software Foundation (ASF) under one
-    or more contributor license agreements.  See the NOTICE file
-    distributed with this work for additional information
-    regarding copyright ownership.  The ASF licenses this file
-    to you under the Apache License, Version 2.0 (the
-    "License"); you may not use this file except in compliance
-    with the License.  You may obtain a copy of the License at
-
-..    http://www.apache.org/licenses/LICENSE-2.0
-
-..  Unless required by applicable law or agreed to in writing,
-    software distributed under the License is distributed on an
-    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-    KIND, either express or implied.  See the License for the
-    specific language governing permissions and limitations
-    under the License.
-
-tvm.micro
----------
-.. automodule:: tvm.micro
-   :members:
-   :imported-members:
-   :autosummary:
diff --git a/docs/topic/microtvm/index.rst b/docs/topic/microtvm/index.rst
deleted file mode 100644
index 2bac70241d3b..000000000000
--- a/docs/topic/microtvm/index.rst
+++ /dev/null
@@ -1,65 +0,0 @@
-..  Licensed to the Apache Software Foundation (ASF) under one
-    or more contributor license agreements.  See the NOTICE file
-    distributed with this work for additional information
-    regarding copyright ownership.  The ASF licenses this file
-    to you under the Apache License, Version 2.0 (the
-    "License"); you may not use this file except in compliance
-    with the License.  You may obtain a copy of the License at
-
-..    http://www.apache.org/licenses/LICENSE-2.0
-
-..  Unless required by applicable law or agreed to in writing,
-    software distributed under the License is distributed on an
-    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-    KIND, either express or implied.  See the License for the
-    specific language governing permissions and limitations
-    under the License.
-
-.. _microtvm-index:
-
-microTVM: TVM on bare-metal
-===========================
-
-microTVM runs TVM models on bare-metal (i.e. IoT) devices. microTVM depends only on the C standard
-library, and doesn't require an operating system to execute. microTVM is currently under heavy
-development.
-
-.. figure:: https://raw.githubusercontent.com/tvmai/web-data/main/images/dev/microtvm_workflow.svg
-   :align: center
-   :width: 85%
-
-microTVM is:
-
-* an extension to TVM's compiler to allow it to target microcontrollers
-* a way to run the TVM RPC server on-device, to allow autotuning
-* a minimal C runtime that supports standalone model inference on bare metal devices.
-
-Supported Hardware
-~~~~~~~~~~~~~~~~~~
-
-microTVM currently tests against Cortex-M microcontrollers with the Zephyr RTOS; however, it is
-flexible and portable to other processors such as RISC-V and does not require Zephyr. The current
-demos run against QEMU and the following hardware:
-
-* `STM Nucleo-F746ZG <https://www.st.com/en/evaluation-tools/nucleo-f746zg.html>`_
-* `STM STM32F746 Discovery <https://www.st.com/en/evaluation-tools/32f746gdiscovery.html>`_
-* `nRF 5340 Development Kit <https://www.nordicsemi.com/Software-and-tools/Development-Kits/nRF5340-DK>`_
-
-
-Getting Started with microTVM
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Before working with microTVM, we recommend you have a supported development board. Then, follow these
-tutorials to get started with microTVM. Tutorials are in the order that could help developers to learn
-more as they follow through them. Here is a list of tutorials that you can start with:
-
-1. Try :ref:`microTVM CLI Tool <tutorial-micro-cli-tool>`.
-2. Try the :ref:`microTVM TFLite Tutorial <tutorial_micro_tflite>`.
-3. Try running a more complex tutorial: :ref:`Creating Your MLPerfTiny Submission with microTVM <tutorial-micro-mlperftiny>`.
-
-
-Help and Discussion
-~~~~~~~~~~~~~~~~~~~
-
-The `TVM Discuss Forum <https://discuss.tvm.ai>`_ is a great place to collaborate on microTVM tasks,
-and maintains a searchable history of past problems.
diff --git a/gallery/how_to/work_with_microtvm/README.txt b/gallery/how_to/work_with_microtvm/README.txt
deleted file mode 100644
index a3cc5a0c6c43..000000000000
--- a/gallery/how_to/work_with_microtvm/README.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-.. _tutorial-micro:
-
-Work With microTVM
-------------------
-microTVM enables inference on bare-metal platforms, for example, those without
-a traditional Operating System such as Linux, OS X, or Windows. These how-tos
-demonstrate how to tune and deploy models with microTVM.
diff --git a/gallery/how_to/work_with_microtvm/install_cmsis.rst b/gallery/how_to/work_with_microtvm/install_cmsis.rst
deleted file mode 100644
index 13286b1b54f6..000000000000
--- a/gallery/how_to/work_with_microtvm/install_cmsis.rst
+++ /dev/null
@@ -1,39 +0,0 @@
-..  Licensed to the Apache Software Foundation (ASF) under one
-    or more contributor license agreements.  See the NOTICE file
-    distributed with this work for additional information
-    regarding copyright ownership.  The ASF licenses this file
-    to you under the Apache License, Version 2.0 (the
-    "License"); you may not use this file except in compliance
-    with the License.  You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing,
-    software distributed under the License is distributed on an
-    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-    KIND, either express or implied.  See the License for the
-    specific language governing permissions and limitations
-    under the License.
-
-..  Boilerplate script for installing CMSIS-NN in the microTVM
-    tutorials that use it. Does not show up as a separate file
-    on the documentation website.
-
-Install CMSIS-NN
-----------------------------
-
-    .. code-block:: bash
-
-        %%shell
-        CMSIS_SHA="51263182d16c92649a48144ba56c0945f9fce60e"
-        CMSIS_URL="http://github.com/ARM-software/CMSIS_5/archive/${CMSIS_SHA}.tar.gz"
-        export CMSIS_PATH=/content/cmsis
-        DOWNLOAD_PATH="/content/${CMSIS_SHA}.tar.gz"
-        mkdir ${CMSIS_PATH}
-        wget ${CMSIS_URL} -O "${DOWNLOAD_PATH}"
-        tar -xf "${DOWNLOAD_PATH}" -C ${CMSIS_PATH} --strip-components=1
-        rm ${DOWNLOAD_PATH}
-
-        CMSIS_NN_TAG="v4.0.0"
-        CMSIS_NN_URL="https://github.com/ARM-software/CMSIS-NN.git"
-        git clone ${CMSIS_NN_URL} --branch ${CMSIS_NN_TAG} --single-branch ${CMSIS_PATH}/CMSIS-NN
diff --git a/gallery/how_to/work_with_microtvm/install_dependencies.rst b/gallery/how_to/work_with_microtvm/install_dependencies.rst
deleted file mode 100644
index d1bee4176d94..000000000000
--- a/gallery/how_to/work_with_microtvm/install_dependencies.rst
+++ /dev/null
@@ -1,33 +0,0 @@
-..  Licensed to the Apache Software Foundation (ASF) under one
-    or more contributor license agreements.  See the NOTICE file
-    distributed with this work for additional information
-    regarding copyright ownership.  The ASF licenses this file
-    to you under the Apache License, Version 2.0 (the
-    "License"); you may not use this file except in compliance
-    with the License.  You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing,
-    software distributed under the License is distributed on an
-    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-    KIND, either express or implied.  See the License for the
-    specific language governing permissions and limitations
-    under the License.
-
-..  Boilerplate script for installing Zephyr in the microTVM
-    tutorials that use it. Does not show up as a separate file
-    on the documentation website.
-
-
-Install microTVM Python dependencies
-------------------------------------
-
-TVM does not include a package for Python serial communication, so
-we must install one before using microTVM. We will also need TFLite
-to load models.
-
-    .. code-block:: bash
-
-        %%shell
-        pip install pyserial==3.5 tflite==2.1
diff --git a/gallery/how_to/work_with_microtvm/install_zephyr.rst b/gallery/how_to/work_with_microtvm/install_zephyr.rst
deleted file mode 100644
index 49bc20c425aa..000000000000
--- a/gallery/how_to/work_with_microtvm/install_zephyr.rst
+++ /dev/null
@@ -1,52 +0,0 @@
-..  Licensed to the Apache Software Foundation (ASF) under one
-    or more contributor license agreements.  See the NOTICE file
-    distributed with this work for additional information
-    regarding copyright ownership.  The ASF licenses this file
-    to you under the Apache License, Version 2.0 (the
-    "License"); you may not use this file except in compliance
-    with the License.  You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing,
-    software distributed under the License is distributed on an
-    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-    KIND, either express or implied.  See the License for the
-    specific language governing permissions and limitations
-    under the License.
-
-..  Boilerplate script for installing Zephyr in the microTVM
-    tutorials that use it. Does not show up as a separate file
-    on the documentation website.
-
-Install Zephyr
-----------------------------
-
-    .. code-block:: bash
-
-        %%shell
-        # Install west and ninja
-        python3 -m pip install west
-        apt-get install -y ninja-build
-
-        # Install ZephyrProject
-        ZEPHYR_PROJECT_PATH="/content/zephyrproject"
-        export ZEPHYR_BASE=${ZEPHYR_PROJECT_PATH}/zephyr
-        west init ${ZEPHYR_PROJECT_PATH}
-        cd ${ZEPHYR_BASE}
-        git checkout v3.2-branch
-        cd ..
-        west update
-        west zephyr-export
-        chmod -R o+w ${ZEPHYR_PROJECT_PATH}
-
-        # Install Zephyr SDK
-        cd /content
-        ZEPHYR_SDK_VERSION="0.15.2"
-        wget "https://github.com/zephyrproject-rtos/sdk-ng/releases/download/v${ZEPHYR_SDK_VERSION}/zephyr-sdk-${ZEPHYR_SDK_VERSION}_linux-x86_64.tar.gz"
-        tar xvf "zephyr-sdk-${ZEPHYR_SDK_VERSION}_linux-x86_64.tar.gz"
-        mv "zephyr-sdk-${ZEPHYR_SDK_VERSION}" zephyr-sdk
-        rm "zephyr-sdk-${ZEPHYR_SDK_VERSION}_linux-x86_64.tar.gz"
-
-        # Install python dependencies
-        python3 -m pip install -r "${ZEPHYR_BASE}/scripts/requirements.txt"
diff --git a/gallery/how_to/work_with_microtvm/micro_aot.py b/gallery/how_to/work_with_microtvm/micro_aot.py
deleted file mode 100644
index f07731c183b7..000000000000
--- a/gallery/how_to/work_with_microtvm/micro_aot.py
+++ /dev/null
@@ -1,195 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-.. _tutorial-micro-aot:
-
-3. microTVM Ahead-of-Time (AOT) Compilation
-===========================================
-**Authors**:
-`Mehrdad Hessar <https://github.com/mehrdadh>`_,
-`Alan MacDonald <https://github.com/alanmacd>`_
-
-This tutorial is showcasing microTVM host-driven AoT compilation with
-a TFLite model. AoTExecutor reduces the overhead of parsing graph at runtime
-compared to GraphExecutor. Also, we can have better memory management using ahead
-of time compilation. This tutorial can be executed on a x86 CPU using C runtime (CRT)
-or on Zephyr platform on a microcontroller/board supported by Zephyr.
-"""
-
-######################################################################
-#
-#     .. include:: ../../../../gallery/how_to/work_with_microtvm/install_dependencies.rst
-#
-
-
-import os
-
-# By default, this tutorial runs on x86 CPU using TVM's C runtime. If you would like
-# to run on real Zephyr hardware, you must export the `TVM_MICRO_USE_HW` environment
-# variable. Otherwise (if you are using the C runtime), you can skip installing
-# Zephyr. It takes ~20 minutes to install Zephyr.
-use_physical_hw = bool(os.getenv("TVM_MICRO_USE_HW"))
-
-######################################################################
-#
-#     .. include:: ../../../../gallery/how_to/work_with_microtvm/install_zephyr.rst
-#
-
-######################################################################
-# Import Python dependencies
-# -------------------------------
-#
-import numpy as np
-import pathlib
-import json
-
-import tvm
-from tvm import relay
-import tvm.micro.testing
-from tvm.relay.backend import Executor, Runtime
-from tvm.contrib.download import download_testdata
-
-######################################################################
-# Import a TFLite model
-# ---------------------
-#
-# To begin with, download and import a Keyword Spotting TFLite model.
-# This model is originally from `MLPerf Tiny repository <https://github.com/mlcommons/tiny>`_.
-# To test this model, we use samples from `KWS dataset provided by Google <https://ai.googleblog.com/2017/08/launching-speech-commands-dataset.html>`_.
-#
-# **Note:** By default this tutorial runs on x86 CPU using CRT, if you would like to run on Zephyr platform
-# you need to export `TVM_MICRO_USE_HW` environment variable.
-#
-MODEL_URL = "https://github.com/mlcommons/tiny/raw/bceb91c5ad2e2deb295547d81505721d3a87d578/benchmark/training/keyword_spotting/trained_models/kws_ref_model.tflite"
-MODEL_PATH = download_testdata(MODEL_URL, "kws_ref_model.tflite", module="model")
-SAMPLE_URL = "https://github.com/tlc-pack/web-data/raw/main/testdata/microTVM/data/keyword_spotting_int8_6.pyc.npy"
-SAMPLE_PATH = download_testdata(SAMPLE_URL, "keyword_spotting_int8_6.pyc.npy", module="data")
-
-tflite_model_buf = open(MODEL_PATH, "rb").read()
-try:
-    import tflite
-
-    tflite_model = tflite.Model.GetRootAsModel(tflite_model_buf, 0)
-except AttributeError:
-    import tflite.Model
-
-    tflite_model = tflite.Model.Model.GetRootAsModel(tflite_model_buf, 0)
-
-input_shape = (1, 49, 10, 1)
-INPUT_NAME = "input_1"
-relay_mod, params = relay.frontend.from_tflite(
-    tflite_model, shape_dict={INPUT_NAME: input_shape}, dtype_dict={INPUT_NAME: "int8"}
-)
-
-######################################################################
-# Defining the target
-# -------------------
-#
-# Now we need to define the target, runtime and executor. In this tutorial, we focused on
-# using AOT host driven executor. We use the host micro target which is for running a model
-# on x86 CPU using CRT runtime or running a model with Zephyr platform on qemu_x86 simulator
-# board. In the case of a physical microcontroller, we get the target model for the physical
-# board (E.g. nucleo_l4r5zi) and change `BOARD` to supported Zephyr board.
-#
-
-# Use the C runtime (crt) and enable static linking by setting system-lib to True
-RUNTIME = Runtime("crt", {"system-lib": True})
-
-# Simulate a microcontroller on the host machine. Uses the main() from `src/runtime/crt/host/main.cc`.
-# To use physical hardware, replace "host" with something matching your hardware.
-TARGET = tvm.micro.testing.get_target("crt")
-
-# Use the AOT executor rather than graph or vm executors. Don't use unpacked API or C calling style.
-EXECUTOR = Executor("aot")
-
-if use_physical_hw:
-    BOARD = os.getenv("TVM_MICRO_BOARD", default="nucleo_l4r5zi")
-    SERIAL = os.getenv("TVM_MICRO_SERIAL", default=None)
-    TARGET = tvm.micro.testing.get_target("zephyr", BOARD)
-
-######################################################################
-# Compile the model
-# -----------------
-#
-# Now, we compile the model for the target:
-#
-with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}):
-    module = tvm.relay.build(
-        relay_mod, target=TARGET, params=params, runtime=RUNTIME, executor=EXECUTOR
-    )
-
-######################################################################
-# Create a microTVM project
-# -------------------------
-#
-# Now that we have the compiled model as an IRModule, we need to create a firmware project
-# to use the compiled model with microTVM. To do this, we use Project API. We have defined
-# CRT and Zephyr microTVM template projects which are used for x86 CPU and Zephyr boards
-# respectively.
-#
-template_project_path = pathlib.Path(tvm.micro.get_microtvm_template_projects("crt"))
-project_options = {}  # You can use options to provide platform-specific options through TVM.
-
-if use_physical_hw:
-    template_project_path = pathlib.Path(tvm.micro.get_microtvm_template_projects("zephyr"))
-    project_options = {
-        "project_type": "host_driven",
-        "board": BOARD,
-        "serial_number": SERIAL,
-        "config_main_stack_size": 4096,
-        "zephyr_base": os.getenv("ZEPHYR_BASE", default="/content/zephyrproject/zephyr"),
-    }
-
-temp_dir = tvm.contrib.utils.tempdir()
-generated_project_dir = temp_dir / "project"
-project = tvm.micro.generate_project(
-    template_project_path, module, generated_project_dir, project_options
-)
-
-######################################################################
-# Build, flash and execute the model
-# ----------------------------------
-# Next, we build the microTVM project and flash it. Flash step is specific to
-# physical microcontrollers and it is skipped if it is simulating a microcontroller
-# via the host main.cc or if a Zephyr emulated board is selected as the target.
-# Next, we define the labels for the model output and execute the model with a
-# sample with expected value of 6 (label: left).
-#
-project.build()
-project.flash()
-
-labels = [
-    "_silence_",
-    "_unknown_",
-    "yes",
-    "no",
-    "up",
-    "down",
-    "left",
-    "right",
-    "on",
-    "off",
-    "stop",
-    "go",
-]
-with tvm.micro.Session(project.transport()) as session:
-    aot_executor = tvm.runtime.executor.aot_executor.AotModule(session.create_aot_executor())
-    sample = np.load(SAMPLE_PATH)
-    aot_executor.get_input(INPUT_NAME).copyfrom(sample)
-    aot_executor.run()
-    result = aot_executor.get_output(0).numpy()
-    print(f"Label is `{labels[np.argmax(result)]}` with index `{np.argmax(result)}`")
diff --git a/gallery/how_to/work_with_microtvm/micro_autotune.py b/gallery/how_to/work_with_microtvm/micro_autotune.py
deleted file mode 100644
index e8c032b70e05..000000000000
--- a/gallery/how_to/work_with_microtvm/micro_autotune.py
+++ /dev/null
@@ -1,294 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""
-.. _tutorial-micro-autotune:
-
-6. Model Tuning with microTVM
-=============================
-**Authors**:
-`Andrew Reusch <https://github.com/areusch>`_,
-`Mehrdad Hessar <https://github.com/mehrdadh>`_
-
-This tutorial explains how to autotune a model using the C runtime.
-"""
-
-######################################################################
-#
-#     .. include:: ../../../../gallery/how_to/work_with_microtvm/install_dependencies.rst
-#
-
-
-# You can skip the following section (installing Zephyr) if the following flag is False.
-# Installing Zephyr takes ~20 min.
-import os
-
-use_physical_hw = bool(os.getenv("TVM_MICRO_USE_HW"))
-
-######################################################################
-#
-#     .. include:: ../../../../gallery/how_to/work_with_microtvm/install_zephyr.rst
-#
-
-
-######################################################################
-# Import Python dependencies
-# -------------------------------
-#
-import json
-import numpy as np
-import pathlib
-
-import tvm
-from tvm.relay.backend import Runtime
-import tvm.micro.testing
-
-####################
-# Defining the model
-####################
-#
-# To begin with, define a model in Relay to be executed on-device. Then create an IRModule from relay model and
-# fill parameters with random numbers.
-#
-
-data_shape = (1, 3, 10, 10)
-weight_shape = (6, 3, 5, 5)
-
-data = tvm.relay.var("data", tvm.relay.TensorType(data_shape, "float32"))
-weight = tvm.relay.var("weight", tvm.relay.TensorType(weight_shape, "float32"))
-
-y = tvm.relay.nn.conv2d(
-    data,
-    weight,
-    padding=(2, 2),
-    kernel_size=(5, 5),
-    kernel_layout="OIHW",
-    out_dtype="float32",
-)
-f = tvm.relay.Function([data, weight], y)
-
-relay_mod = tvm.IRModule.from_expr(f)
-relay_mod = tvm.relay.transform.InferType()(relay_mod)
-
-weight_sample = np.random.rand(
-    weight_shape[0], weight_shape[1], weight_shape[2], weight_shape[3]
-).astype("float32")
-params = {"weight": weight_sample}
-
-#######################
-# Defining the target
-#######################
-# Now we define the TVM target that describes the execution environment. This looks very similar
-# to target definitions from other microTVM tutorials. Alongside this we pick the C Runtime to code
-# generate our model against.
-#
-# When running on physical hardware, choose a target and a board that
-# describe the hardware. There are multiple hardware targets that could be selected from
-# PLATFORM list in this tutorial. You can chose the platform by passing --platform argument when running
-# this tutorial.
-#
-
-RUNTIME = Runtime("crt", {"system-lib": True})
-TARGET = tvm.micro.testing.get_target("crt")
-
-# Compiling for physical hardware
-# --------------------------------------------------------------------------
-#  When running on physical hardware, choose a TARGET and a BOARD that describe the hardware. The
-#  STM32L4R5ZI Nucleo target and board is chosen in the example below.
-if use_physical_hw:
-    BOARD = os.getenv("TVM_MICRO_BOARD", default="nucleo_l4r5zi")
-    SERIAL = os.getenv("TVM_MICRO_SERIAL", default=None)
-    TARGET = tvm.micro.testing.get_target("zephyr", BOARD)
-
-
-#########################
-# Extracting tuning tasks
-#########################
-# Not all operators in the Relay program printed above can be tuned. Some are so trivial that only
-# a single implementation is defined; others don't make sense as tuning tasks. Using
-# `extract_from_program`, you can produce a list of tunable tasks.
-#
-# Because task extraction involves running the compiler, we first configure the compiler's
-# transformation passes; we'll apply the same configuration later on during autotuning.
-#
-
-pass_context = tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True})
-with pass_context:
-    tasks = tvm.autotvm.task.extract_from_program(relay_mod["main"], {}, TARGET)
-assert len(tasks) > 0
-
-######################
-# Configuring microTVM
-######################
-# Before autotuning, we need to define a module loader and then pass that to
-# a `tvm.autotvm.LocalBuilder`. Then we create a `tvm.autotvm.LocalRunner` and use
-# both builder and runner to generates multiple measurements for auto tunner.
-#
-# In this tutorial, we have the option to use x86 host as an example or use different targets
-# from Zephyr RTOS. If you choose pass `--platform=host` to this tutorial it will uses x86. You can
-# choose other options by choosing from `PLATFORM` list.
-#
-
-module_loader = tvm.micro.AutoTvmModuleLoader(
-    template_project_dir=pathlib.Path(tvm.micro.get_microtvm_template_projects("crt")),
-    project_options={"verbose": False},
-)
-builder = tvm.autotvm.LocalBuilder(
-    n_parallel=1,
-    build_kwargs={"build_option": {"tir.disable_vectorize": True}},
-    do_fork=True,
-    build_func=tvm.micro.autotvm_build_func,
-    runtime=RUNTIME,
-)
-runner = tvm.autotvm.LocalRunner(number=1, repeat=1, timeout=100, module_loader=module_loader)
-
-measure_option = tvm.autotvm.measure_option(builder=builder, runner=runner)
-
-# Compiling for physical hardware
-if use_physical_hw:
-    module_loader = tvm.micro.AutoTvmModuleLoader(
-        template_project_dir=pathlib.Path(tvm.micro.get_microtvm_template_projects("zephyr")),
-        project_options={
-            "board": BOARD,
-            "verbose": False,
-            "project_type": "host_driven",
-            "serial_number": SERIAL,
-        },
-    )
-    builder = tvm.autotvm.LocalBuilder(
-        n_parallel=1,
-        build_kwargs={"build_option": {"tir.disable_vectorize": True}},
-        do_fork=False,
-        build_func=tvm.micro.autotvm_build_func,
-        runtime=RUNTIME,
-    )
-    runner = tvm.autotvm.LocalRunner(number=1, repeat=1, timeout=100, module_loader=module_loader)
-
-    measure_option = tvm.autotvm.measure_option(builder=builder, runner=runner)
-
-##########################
-# Run Autotuning
-##########################
-# Now we can run autotuning separately on each extracted task on microTVM device.
-#
-
-autotune_log_file = pathlib.Path("microtvm_autotune.log.txt")
-if os.path.exists(autotune_log_file):
-    os.remove(autotune_log_file)
-
-num_trials = 10
-for task in tasks:
-    tuner = tvm.autotvm.tuner.GATuner(task)
-    tuner.tune(
-        n_trial=num_trials,
-        measure_option=measure_option,
-        callbacks=[
-            tvm.autotvm.callback.log_to_file(str(autotune_log_file)),
-            tvm.autotvm.callback.progress_bar(num_trials, si_prefix="M"),
-        ],
-        si_prefix="M",
-    )
-
-############################
-# Timing the untuned program
-############################
-# For comparison, let's compile and run the graph without imposing any autotuning schedules. TVM
-# will select a randomly-tuned implementation for each operator, which should not perform as well as
-# the tuned operator.
-#
-
-with pass_context:
-    lowered = tvm.relay.build(relay_mod, target=TARGET, runtime=RUNTIME, params=params)
-
-temp_dir = tvm.contrib.utils.tempdir()
-project = tvm.micro.generate_project(
-    str(tvm.micro.get_microtvm_template_projects("crt")),
-    lowered,
-    temp_dir / "project",
-    {"verbose": False},
-)
-
-# Compiling for physical hardware
-if use_physical_hw:
-    temp_dir = tvm.contrib.utils.tempdir()
-    project = tvm.micro.generate_project(
-        str(tvm.micro.get_microtvm_template_projects("zephyr")),
-        lowered,
-        temp_dir / "project",
-        {
-            "board": BOARD,
-            "verbose": False,
-            "project_type": "host_driven",
-            "serial_number": SERIAL,
-            "config_main_stack_size": 4096,
-        },
-    )
-
-project.build()
-project.flash()
-with tvm.micro.Session(project.transport()) as session:
-    debug_module = tvm.micro.create_local_debug_executor(
-        lowered.get_graph_json(), session.get_system_lib(), session.device
-    )
-    debug_module.set_input(**lowered.get_params())
-    print("########## Build without Autotuning ##########")
-    debug_module.run()
-    del debug_module
-
-##########################
-# Timing the tuned program
-##########################
-# Once autotuning completes, you can time execution of the entire program using the Debug Runtime:
-
-with tvm.autotvm.apply_history_best(str(autotune_log_file)):
-    with pass_context:
-        lowered_tuned = tvm.relay.build(relay_mod, target=TARGET, runtime=RUNTIME, params=params)
-
-temp_dir = tvm.contrib.utils.tempdir()
-project = tvm.micro.generate_project(
-    str(tvm.micro.get_microtvm_template_projects("crt")),
-    lowered_tuned,
-    temp_dir / "project",
-    {"verbose": False},
-)
-
-# Compiling for physical hardware
-if use_physical_hw:
-    temp_dir = tvm.contrib.utils.tempdir()
-    project = tvm.micro.generate_project(
-        str(tvm.micro.get_microtvm_template_projects("zephyr")),
-        lowered_tuned,
-        temp_dir / "project",
-        {
-            "board": BOARD,
-            "verbose": False,
-            "project_type": "host_driven",
-            "serial_number": SERIAL,
-            "config_main_stack_size": 4096,
-        },
-    )
-
-project.build()
-project.flash()
-with tvm.micro.Session(project.transport()) as session:
-    debug_module = tvm.micro.create_local_debug_executor(
-        lowered_tuned.get_graph_json(), session.get_system_lib(), session.device
-    )
-    debug_module.set_input(**lowered_tuned.get_params())
-    print("########## Build with Autotuning ##########")
-    debug_module.run()
-    del debug_module
diff --git a/gallery/how_to/work_with_microtvm/micro_custom_ide.py b/gallery/how_to/work_with_microtvm/micro_custom_ide.py
deleted file mode 100644
index ae8352799c48..000000000000
--- a/gallery/how_to/work_with_microtvm/micro_custom_ide.py
+++ /dev/null
@@ -1,361 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-.. _tutorial-micro-ide:
-
-9. Bring microTVM to your own development environment
-======================================================
-**Author**:
-`Mohamad Katanbaf <https://github.com/mkatanbaf>`_
-
-This tutorial describes the steps required to integrate a model compiled with microTVM into a custom development environment.
-We use `STM32CubeIDE <https://www.st.com/en/development-tools/stm32cubeide.html>`_, as the target IDE in this tutorial, but we do not rely on any specific feature of this IDE and integrating microTVM in other IDEs would be similar.
-We also use the Visual Wake Word (VWW) model from MLPerf Tiny and the nucleo_l4r5zi board here, but the same steps can be used for any other model or target MCU.
-If you want to use another target MCU with the vww model, we recommend a cortex-M4 or cortex-M7 device with ~512 KB and ~256 KB of Flash and RAM respectively.
-
-Here is a brief overview of the steps that we would take in this tutorial.
-
-1. We start by importing the model, compiling it using TVM and generating the `Model Library Format <https://tvm.apache.org/docs/arch/model_library_format.html>`_ (MLF) tar-file that includes the generated code for the model as well as all the required TVM dependencies.
-2. We also add two sample images in binary format (one person and one not-person sample) to the .tar file for evaluating the model.
-3. Next we use the stmCubeMX to generate the initialization code for the project in stmCube IDE.
-4. After that, we include our MLF file and the required CMSIS libraries in the project and build it.
-5. Finally, we flash the device and evaluate the model performance on our sample images.
-
-Let's Begin.
-"""
-
-######################################################################
-# Install microTVM Python dependencies
-# ------------------------------------
-#
-# TVM does not include a package for Python serial communication, so
-# we must install one before using microTVM. We will also need TFLite
-# to load models, and Pillow to prepare the sample images.
-#
-#   .. code-block:: bash
-#
-#     %%shell
-#     pip install pyserial==3.5 tflite==2.1 Pillow==9.0 typing_extensions
-#
-
-
-######################################################################
-# Import Python dependencies
-# ---------------------------
-#
-# If you want to run this script locally, check out `TVM Online Documentation <https://tvm.apache.org/docs/install/index.html>`_ for instructions to install TVM.
-#
-
-import os
-import numpy as np
-import pathlib
-import json
-from PIL import Image
-import tarfile
-
-import tvm
-from tvm import relay
-from tvm.relay.backend import Executor, Runtime
-from tvm.contrib.download import download_testdata
-from tvm.micro import export_model_library_format
-from tvm.relay.op.contrib import cmsisnn
-from tvm.micro.testing.utils import create_header_file
-
-######################################################################
-# Import the TFLite model
-# ------------------------
-#
-# To begin with, download and import a Visual Wake Word TFLite model. This model takes in a 96x96x3 RGB image and determines whether a person is present in the image or not.
-# This model is originally from `MLPerf Tiny repository <https://github.com/mlcommons/tiny>`_.
-# To test this model, we use two samples from `COCO 2014 Train images <https://cocodataset.org/>`_.
-#
-MODEL_URL = "https://github.com/mlcommons/tiny/raw/bceb91c5ad2e2deb295547d81505721d3a87d578/benchmark/training/visual_wake_words/trained_models/vww_96_int8.tflite"
-MODEL_NAME = "vww_96_int8.tflite"
-MODEL_PATH = download_testdata(MODEL_URL, MODEL_NAME, module="model")
-
-tflite_model_buf = open(MODEL_PATH, "rb").read()
-try:
-    import tflite
-
-    tflite_model = tflite.Model.GetRootAsModel(tflite_model_buf, 0)
-except AttributeError:
-    import tflite.Model
-
-    tflite_model = tflite.Model.Model.GetRootAsModel(tflite_model_buf, 0)
-
-input_shape = (1, 96, 96, 3)
-INPUT_NAME = "input_1_int8"
-relay_mod, params = relay.frontend.from_tflite(
-    tflite_model, shape_dict={INPUT_NAME: input_shape}, dtype_dict={INPUT_NAME: "int8"}
-)
-
-######################################################################
-# Generate the Model Library Format file
-# -----------------------------------------
-#
-# First we define the target, runtime and executor. Then we compile the model for the target device and
-# finally we export the generated code and all the required dependencies in a single file.
-#
-
-# We can use TVM native schedules or rely on the CMSIS-NN kernels using TVM Bring-Your-Own-Code (BYOC) capability.
-USE_CMSIS_NN = True
-
-# USMP (Unified Static Memory Planning) performs memory planning of all tensors holistically to achieve best memory utilization
-DISABLE_USMP = False
-
-# Use the C runtime (crt)
-RUNTIME = Runtime("crt")
-
-# We define the target by passing the board name to `tvm.target.target.micro`.
-# If your board is not included in the supported models, you can define the target such as:
-# TARGET = tvm.target.Target("c -keys=arm_cpu,cpu -mcpu=cortex-m4")
-TARGET = tvm.target.target.micro("stm32l4r5zi")
-
-# Use the AOT executor rather than graph or vm executors. Use unpacked API and C calling style.
-EXECUTOR = tvm.relay.backend.Executor(
-    "aot", {"unpacked-api": True, "interface-api": "c", "workspace-byte-alignment": 8}
-)
-
-# Now, we set the compilation configurations and compile the model for the target:
-config = {"tir.disable_vectorize": True}
-if USE_CMSIS_NN:
-    config["relay.ext.cmsisnn.options"] = {"mcpu": TARGET.mcpu}
-if DISABLE_USMP:
-    config["tir.usmp.enable"] = False
-
-with tvm.transform.PassContext(opt_level=3, config=config):
-    if USE_CMSIS_NN:
-        # When we are using CMSIS-NN, TVM searches for patterns in the
-        # relay graph that it can offload to the CMSIS-NN kernels.
-        relay_mod = cmsisnn.partition_for_cmsisnn(relay_mod, params, mcpu=TARGET.mcpu)
-    lowered = tvm.relay.build(
-        relay_mod, target=TARGET, params=params, runtime=RUNTIME, executor=EXECUTOR
-    )
-parameter_size = len(tvm.runtime.save_param_dict(lowered.get_params()))
-print(f"Model parameter size: {parameter_size}")
-
-# We need to pick a directory where our file will be saved.
-# If running on Google Colab, we'll save everything in ``/root/tutorial`` (aka ``~/tutorial``)
-# but you'll probably want to store it elsewhere if running locally.
-
-BUILD_DIR = pathlib.Path("/root/tutorial")
-# sphinx_gallery_start_ignore
-BUILD_DIR = pathlib.Path(os.getcwd()) / "tutorial"
-# sphinx_gallery_end_ignore
-
-BUILD_DIR.mkdir(exist_ok=True)
-
-# Now, we export the model into a tar file:
-TAR_PATH = pathlib.Path(BUILD_DIR) / "model.tar"
-export_model_library_format(lowered, TAR_PATH)
-
-######################################################################
-# Add sample images to the MLF files
-# -----------------------------------
-# Finally, we downlaod two sample images (one person and one not-person), convert them to binary format and store them in two header files.
-#
-
-with tarfile.open(TAR_PATH, mode="a") as tar_file:
-    SAMPLES_DIR = "samples"
-    SAMPLE_PERSON_URL = (
-        "https://github.com/tlc-pack/web-data/raw/main/testdata/microTVM/data/vww_sample_person.jpg"
-    )
-    SAMPLE_NOT_PERSON_URL = "https://github.com/tlc-pack/web-data/raw/main/testdata/microTVM/data/vww_sample_not_person.jpg"
-
-    SAMPLE_PERSON_PATH = download_testdata(SAMPLE_PERSON_URL, "person.jpg", module=SAMPLES_DIR)
-    img = Image.open(SAMPLE_PERSON_PATH)
-    create_header_file("sample_person", np.asarray(img), SAMPLES_DIR, tar_file)
-
-    SAMPLE_NOT_PERSON_PATH = download_testdata(
-        SAMPLE_NOT_PERSON_URL, "not_person.jpg", module=SAMPLES_DIR
-    )
-    img = Image.open(SAMPLE_NOT_PERSON_PATH)
-    create_header_file("sample_not_person", np.asarray(img), SAMPLES_DIR, tar_file)
-
-######################################################################
-# At this point you have all you need to take the compiled model to your IDE and evaluate it. Inside the MLF file (model.tar), you should find the following file hierearchy:
-#
-#     .. code-block::
-#
-#      /root
-#      ├── codegen
-#      ├── parameters
-#      ├── runtime
-#      ├── samples
-#      ├── src
-#      ├── templates
-#      ├── metadata.json
-#
-# * The codegen folder includes the C code TVM generated for your model.
-# * The runtime folder includes all the TVM dependencies that the target needs to compile the generated C code.
-# * The samples folder includes the two generated sample files for evaluating the model.
-# * The src folder includes the relay module describing the model.
-# * The templates folder includes two template files that you might need to edit based on your platform.
-# * The metadata.json file includes information about the model, its layers and memory requirement.
-#
-
-
-######################################################################
-# Generate the project in your IDE
-# -----------------------------------
-#
-# The next step is to create a project for our target device. We use STM32CubeIDE, you can download it `here <https://www.st.com/en/development-tools/stm32cubeide.html>`_.
-# We are using version 1.11.0 in this tutorial. Once you install STM32CubeIDE follow these steps to create a project:
-#
-# #. select File -> New -> STM32Project. The target selection Window appears.
-#
-# #. Navigate to the "Board Selector" tab, type in the board name "nucleo-l4r5zi" in the "Commercial Part Number" text box. Select the board from the list of boards that appear on the right side of the screen and click "Next".
-#
-# #. Type in your project name (for example microtvm_vww_demo). We are using the default options. (Target Language: C, Binary Type: Executable, Project Type: STM32Cube). Click "Finish".
-#
-# #. A text box will appear asking if you want to "Initialize all the peripherals with their default mode?". click "Yes". This will generate the project and open the device configuration tool where you can use the GUI to setup the peripherals. By default the USB, USART3 and LPUART1 are enabled, as well as a few GPIOs.
-#
-# #. We will use LPUART1 to send data to the host pc. From the connectivity section, select the LPUART1 and set the "Baud Rate" to 115200 and the "Word Length" to 8. Save the changes and click "Yes" to regenerate the initialization code. This should regenerate the code and open your main.c file. You can also find main.c from the Project Explorer panel on the left, under microtvm_vww_demo -> Core -> Src.
-#
-# #. For sanity check, copy the code below and paste it in the "Infinite loop (aka. While (1) ) section of the main function.
-#
-#    * Note: Make sure to write your code inside the sections marked by USER CODE BEGIN <...> and USER CODE END <...>. The code outside these sections get erased if you regenerate the initialization code.
-#
-#        .. code-block:: c
-#
-#         HAL_GPIO_TogglePin(LD2_GPIO_Port, LD2_Pin);
-#         HAL_UART_Transmit(&hlpuart1, "Hello World.\r\n", 14, 100);
-#         HAL_Delay(1000);
-#
-# #. From the menu bar, select Project -> Build (or right click on project name and select Build). This should build the project and generate the .elf file. Select Run -> Run to download the binary on your MCU. If the "Edit Configuration" window opens, just click "OK".
-#
-# #. Open the terminal console on your host machine. On Mac you can simply use the "screen <usb_device> 115200" command, e.g. "screen tty.usbmodemXXXX 115200". An LED should blink on the board and the string "Hello World." should print out on your terminal console every second. Press "Control-a k" to exit screen.
-#
-
-######################################################################
-# Import the model to the generated project
-# ------------------------------------------
-#
-# To integrate the compiled model into the generated project, follow these steps:
-#
-# #. Extract the tar file and include it in the project
-#
-#    * Open the project Properties. (by right clicking on the project name and selecting "Properties" or by selecting Project -> Properties from the menu bar).
-#    * Select C/C++ General -> Paths and Symbols. Select the Source Location tab.
-#    * If you extracted the model inside the project folder, click "Add Folder" and select the "model" folder. (You might need to right click on the project name and select "Refresh" before it appears.)
-#    * If you extracted the model file somewhere else, click on the "Link Folder" button, check the box for "Link to folder in the file system" in the window that appears, click "Browse" and select the model folder.
-#
-# #. If you used CMSIS-NN in compiling the model, you need to include the CMSIS-NN source files in your project too.
-#
-#    * Download or clone the files from the `CMSIS-NN repository <https://github.com/ARM-software/CMSIS-NN>`_, and follow the above steps to include the CMSIS-NN folder in the project.
-#
-# #. Open the project properties. In C/C++ Build -> Settings: add the following folders to the list of Include Paths for MCU GCC Compiler (and MCU G++ Compiler if you have a C++ project) by clicking on the "+" button, selecting "Workspace" and navigating to each of the following folders:
-#
-#    * model/runtime/include
-#    * model/codegen/host/include
-#    * model/samples
-#    * CMSIS-NN/Include
-#
-# #. Copy crt_config.h.template from model/templates to the Core/Inc folder, and rename it to crt_config.h.
-#
-# #. Copy platform.c.template from model/templates to the Core/Src folder, and rename it to platform.c.
-#    * This file includes functions for managing the memory that you might need to edit based on your platform.
-#    * define "TVM_WORKSPACE_SIZE_BYTES" in platform.c. if you are using USMP, a small value (for example 1024 Bytes) is enough.
-#    * if you are not using usmp, checkout "workspace_size_bytes" field in metadata.json for an estimate of the required memory.
-#
-# #. Exclude the following folders from build (right click on the folder name, select Resource Configuration → Exclude from build). Check Debug and Release configurations.
-#
-#    * CMSIS_NN/Tests
-#
-# #. Download the CMSIS drivers from `CMSIS Version 5 repository <https://github.com/ARM-software/CMSIS_5>`_.
-#
-#    * In your Project directory, delete the Drivers/CMSIS/Include folder (which is an older version of the CMSIS drivers) and copy the CMSIS/Core/Include from the one you downloaded in its place.
-#
-# #. Edit the main.c file:
-#
-#    * Include following header files:
-#
-#        .. code-block:: c
-#
-#         #include <stdio.h>
-#         #include <string.h>
-#         #include <stdarg.h>
-#         #include "tvmgen_default.h"
-#         #include "sample_person.h"
-#         #include "sample_not_person.h"
-#
-#    * Copy the following code into the main function right before the infinite loop. It sets the input and output to the model.
-#
-#        .. code-block:: c
-#
-#         TVMPlatformInitialize();
-#         signed char output[2];
-#         struct tvmgen_default_inputs inputs = {
-#         .input_1_int8 = (void*)&sample_person,
-#         };
-#         struct tvmgen_default_outputs outputs = {
-#         .Identity_int8 = (void*)&output,
-#         };
-#         char msg[] = "Evaluating VWW model using microTVM:\r\n";
-#         HAL_UART_Transmit(&hlpuart1, msg, strlen(msg), 100);
-#         uint8_t sample = 0;
-#         uint32_t timer_val;
-#         char buf[50];
-#         uint16_t buf_len;
-#
-#    * Copy the following code inside the infinite loop to run inference on both images and print the result on the console:
-#
-#        .. code-block:: c
-#
-#         if (sample == 0)
-#             inputs.input_1_int8 = (void*)&sample_person;
-#         else
-#             inputs.input_1_int8 = (void*)&sample_not_person;
-#
-#         timer_val = HAL_GetTick();
-#         tvmgen_default_run(&inputs, &outputs);
-#         timer_val = HAL_GetTick() - timer_val;
-#         if (output[0] > output[1])
-#             buf_len = sprintf(buf, "Person not detected, inference time = %lu ms\r\n", timer_val);
-#         else
-#             buf_len = sprintf(buf, "Person detected, inference time = %lu ms\r\n", timer_val);
-#         HAL_UART_Transmit(&hlpuart1, buf, buf_len, 100);
-#
-#         sample++;
-#         if (sample == 2)
-#             sample = 0;
-#
-#
-#    * Define the TVMLogf function in main, to receive TVM runtime errors on serial console.
-#
-#        .. code-block:: c
-#
-#         void TVMLogf(const char* msg, ...) {
-#           char buffer[128];
-#           int size;
-#           va_list args;
-#           va_start(args, msg);
-#           size = TVMPlatformFormatMessage(buffer, 128, msg, args);
-#           va_end(args);
-#           HAL_UART_Transmit(&hlpuart1, buffer, size, 100);
-#         }
-#
-# #. In project properties, C/C++ Build -> Settings, MCU GCC Compiler -> Optimization, set the Optimization level to "Optimize more (-O2)"
-
-
-######################################################################
-# Evaluate the model
-# -------------------
-#
-# Now, select Run -> Run from the menu bar to flash the MCU and run the project.
-# You should see the LED blinking and the inference result printing on the console.
-#
diff --git a/gallery/how_to/work_with_microtvm/micro_ethosu.py b/gallery/how_to/work_with_microtvm/micro_ethosu.py
deleted file mode 100644
index f5b61974d4e7..000000000000
--- a/gallery/how_to/work_with_microtvm/micro_ethosu.py
+++ /dev/null
@@ -1,574 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-.. _tutorial-micro-ethosu:
-
-7. Running TVM on bare metal Arm(R) Cortex(R)-M55 CPU and Ethos(TM)-U55 NPU with CMSIS-NN
-=========================================================================================
-**Author**:
-`Grant Watson <https://github.com/grant-arm>`_
-
-This section contains an example of how to use TVM to run a model
-on an Arm(R) Cortex(R)-M55 CPU and Ethos(TM)-U55 NPU with CMSIS-NN, using bare metal.
-The Cortex(R)-M55 is a small, low-power CPU designed for use in embedded
-devices. CMSIS-NN is a collection of kernels optimized for Arm(R) Cortex(R)-M CPUs.
-The Ethos(TM)-U55 is a microNPU, specifically designed to accelerate
-ML inference in resource-constrained embedded devices.
-
-In order to run the demo application without having access to a Cortex(R)-M55
-and Ethos(TM)-U55 development board, we will be running our sample application
-on a Fixed Virtual Platform (FVP). The FVP based on Arm(R) Corstone(TM)-300
-software, models a hardware system containing a Cortex(R)-M55 and Ethos(TM)-U55.
-It provides a programmer's view that is suitable for software development.
-
-In this tutorial, we will be compiling a MobileNet v1 model and instructing
-TVM to offload operators to the Ethos(TM)-U55 where possible.
-"""
-
-
-################################################################################
-# Obtaining TVM
-# -------------
-#
-# To obtain TVM for you platform, please visit https://tlcpack.ai/ and follow the
-# instructions. Once TVM has been installed correctly, you should have access to
-# ``tvmc`` from the command line.
-#
-# Typing ``tvmc`` on the command line should display the following:
-#
-# .. code-block:: text
-#
-#     usage: tvmc [-h] [-v] [--version] {tune,compile,run} ...
-#
-#     TVM compiler driver
-#
-#     optional arguments:
-#       -h, --help          show this help message and exit
-#       -v, --verbose       increase verbosity
-#       --version           print the version and exit
-#
-#     commands:
-#       {tune,compile,run}
-#         tune              auto-tune a model
-#         compile           compile a model.
-#         run               run a compiled module
-#
-#     TVMC - TVM driver command-line interface
-#
-
-################################################################################
-# Installing additional python dependencies
-# -----------------------------------------
-#
-# In order to run the demo, you will need some additional python packages.
-# These can be installed by using the requirements.txt file below:
-#
-# .. code-block:: text
-#    :caption: requirements.txt
-#    :name: requirements.txt
-#
-#     attrs==21.2.0
-#     cloudpickle==2.0.0
-#     decorator==5.1.0
-#     ethos-u-vela==3.8.0
-#     flatbuffers==2.0.7
-#     lxml==4.6.3
-#     nose==1.3.7
-#     numpy==1.19.5
-#     Pillow==8.3.2
-#     psutil==5.8.0
-#     scipy==1.5.4
-#     tflite==2.4.0
-#     tornado==6.1
-#
-# These packages can be installed by running the following from the command line:
-#
-# .. code-block:: bash
-#
-#   pip install -r requirements.txt
-#
-
-################################################################################
-# Obtaining the Model
-# -------------------
-#
-# For this tutorial, we will be working with MobileNet v1.
-# MobileNet v1 is a convolutional neural network designed to classify images,
-# that has been optimized for edge devices. The model we will be using has been
-# pre-trained to classify images into one of 1001 different categories.
-# The network has an input image size of 224x224 so any input images will need
-# to be resized to those dimensions before being used.
-#
-# For this tutorial we will be using the model in Tflite format.
-#
-# .. code-block:: bash
-#
-#   mkdir -p ./build
-#   cd build
-#   wget https://storage.googleapis.com/download.tensorflow.org/models/mobilenet_v1_2018_08_02/mobilenet_v1_1.0_224_quant.tgz
-#   gunzip mobilenet_v1_1.0_224_quant.tgz
-#   tar xvf mobilenet_v1_1.0_224_quant.tar
-#
-
-######################################################################################
-# Compiling the model for Arm(R) Cortex(R)-M55 CPU and Ethos(TM)-U55 NPU with CMSIS-NN
-# ------------------------------------------------------------------------------------
-#
-# Once we've downloaded the MobileNet v1 model, the next step is to compile it.
-# To accomplish that, we are going to use ``tvmc compile``. The output we get from
-# the compilation process is a TAR package of the model compiled to the Model
-# Library Format (MLF) for our target platform. We will be able to run that model
-# on our target device using the TVM runtime.
-#
-# .. code-block:: bash
-#
-#   tvmc compile --target=ethos-u,cmsis-nn,c \
-#                --target-ethos-u-accelerator_config=ethos-u55-256 \
-#                --target-cmsis-nn-mcpu=cortex-m55 \
-#                --target-c-mcpu=cortex-m55 \
-#                --runtime=crt \
-#                --executor=aot \
-#                --executor-aot-interface-api=c \
-#                --executor-aot-unpacked-api=1 \
-#                --pass-config tir.usmp.enable=1 \
-#                --pass-config tir.usmp.algorithm=hill_climb \
-#                --pass-config tir.disable_storage_rewrite=1 \
-#                --pass-config tir.disable_vectorize=1 \
-#                ./mobilenet_v1_1.0_224_quant.tflite \
-#                --output-format=mlf
-#
-
-################################################################################
-# .. note:: Explanation of tvmc compile arguments:
-#
-#   * ``--target=ethos-u,cmsis-nn,c`` : offload operators to the microNPU where possible, falling back to CMSIS-NN and finally generated C code where an operator is not supported on the microNPU..
-#
-#   * ``--target-ethos-u-accelerator_config=ethos-u55-256`` : specifies the microNPU configuration
-#
-#   * ``--target-c-mcpu=cortex-m55`` : Cross-compile for the Cortex(R)-M55.
-#
-#   * ``--runtime=crt`` : Generate glue code to allow operators to work with C runtime.
-#
-#   * ``--executor=aot`` : Use Ahead Of Time compiltaion instead of the Graph Executor.
-#
-#   * ``--executor-aot-interface-api=c`` : Generate a C-style interface with structures designed for integrating into C apps at the boundary.
-#
-#   * ``--executor-aot-unpacked-api=1`` : Use the unpacked API internally.
-#
-#   * ``--pass-config tir.usmp.enable=1`` : Enable Unified Static Memory Planning
-#
-#   * ``--pass-config tir.usmp.algorithm=hill_climb`` : Use the hill-climb algorithm for USMP
-#
-#   * ``--pass-config tir.disable_storage_rewrite=1`` : Disable storage rewrite
-#
-#   * ``--pass-config tir.disable_vectorize=1`` : Disable vectorize since there are no standard vectorized types in C.
-#
-#   * ``./mobilenet_v1_1.0_224_quant.tflite`` : The TFLite model that is being compiled.
-#
-#   * ``--output-format=mlf`` : Output should be generated in the Model Library Format.
-#
-
-################################################################################
-# .. note:: If you don't want to make use of the microNPU and want to offload
-#    operators to CMSIS-NN only:
-#
-#   * Use ``--target=cmsis-nn,c`` in place of ``--target=ethos-u,cmsis-nn,c``
-#
-#   * Remove the microNPU config parameter ``--target-ethos-u-accelerator_config=ethos-u55-256``
-#
-
-################################################################################
-# Extracting the generated code into the current directory
-# --------------------------------------------------------
-#
-# .. code-block:: bash
-#
-#   tar xvf module.tar
-#
-
-################################################################################
-# Getting ImageNet labels
-# -----------------------
-#
-# When running MobileNet v1 on an image, the result is an index in the range 0 to
-# 1000. In order to make our application a little more user friendly, instead of
-# just displaying the category index, we will display the associated label. We
-# will download these image labels into a text file now and use a python script
-# to include them in our C application later.
-#
-# .. code-block:: bash
-#
-#   curl -sS  https://raw.githubusercontent.com/tensorflow/tensorflow/master/tensorflow/lite/java/demo/app/src/main/assets/labels_mobilenet_quant_v1_224.txt \
-#   -o ./labels_mobilenet_quant_v1_224.txt
-#
-
-################################################################################
-# Getting the input image
-# -----------------------
-#
-# As input for this tutorial, we will use the image of a cat, but you can
-# substitute an image of your choosing.
-#
-# .. image:: https://s3.amazonaws.com/model-server/inputs/kitten.jpg
-#    :height: 224px
-#    :width: 224px
-#    :align: center
-#
-# We download the image into the build directory and we will use a python script
-# in the next step to convert the image into an array of bytes in a C header file.
-#
-# .. code-block:: bash
-#
-#   curl -sS https://s3.amazonaws.com/model-server/inputs/kitten.jpg -o ./kitten.jpg
-#
-
-################################################################################
-# Pre-processing the image
-# ------------------------
-#
-# The following script will create 2 C header files in the src directory:
-#
-# * ``inputs.h`` - The image supplied as an argument to the script will be converted
-#   to an array of integers for input to our MobileNet v1 model.
-# * ``outputs.h`` - An integer array of zeroes will reserve 1001 integer values
-#   for the output of inference.
-#
-# .. code-block:: python
-#    :caption: convert_image.py
-#    :name: convert_image.py
-#
-#     #!python ./convert_image.py
-#     import os
-#     import pathlib
-#     import re
-#     import sys
-#     from PIL import Image
-#     import numpy as np
-#
-#
-#     def create_header_file(name, section, tensor_name, tensor_data, output_path):
-#         """
-#         This function generates a header file containing the data from the numpy array provided.
-#         """
-#         file_path = pathlib.Path(f"{output_path}/" + name).resolve()
-#         # Create header file with npy_data as a C array
-#         raw_path = file_path.with_suffix(".h").resolve()
-#         with open(raw_path, "w") as header_file:
-#             header_file.write(
-#                 "#include <tvmgen_default.h>\n"
-#                 + f"const size_t {tensor_name}_len = {tensor_data.size};\n"
-#                 + f'uint8_t {tensor_name}[] __attribute__((section("{section}"), aligned(16))) = "'
-#             )
-#             data_hexstr = tensor_data.tobytes().hex()
-#             for i in range(0, len(data_hexstr), 2):
-#                 header_file.write(f"\\x{data_hexstr[i:i+2]}")
-#             header_file.write('";\n\n')
-#
-#
-#     def create_headers(image_name):
-#         """
-#         This function generates C header files for the input and output arrays required to run inferences
-#         """
-#         img_path = os.path.join("./", f"{image_name}")
-#
-#         # Resize image to 224x224
-#         resized_image = Image.open(img_path).resize((224, 224))
-#         img_data = np.asarray(resized_image).astype("float32")
-#
-#         # Convert input to NCHW
-#         img_data = np.transpose(img_data, (2, 0, 1))
-#
-#         # Create input header file
-#         input_data = img_data.astype(np.uint8)
-#         create_header_file("inputs", "ethosu_scratch", "input", input_data, "./include")
-#         # Create output header file
-#         output_data = np.zeros([1001], np.uint8)
-#         create_header_file(
-#             "outputs",
-#             "output_data_sec",
-#             "output",
-#             output_data,
-#             "./include",
-#         )
-#
-#
-#     if __name__ == "__main__":
-#         create_headers(sys.argv[1])
-#
-# Run the script from the command line:
-#
-# .. code-block:: bash
-#
-#   python convert_image.py ./kitten.jpg
-
-################################################################################
-# Pre-processing the labels
-# -------------------------
-#
-# The following script will create a ``labels.h`` header file in the src directory.
-# The labels.txt file that we downloaded previously will be turned
-# into an array of strings. This array will be used to display the label that
-# our image has been classified as.
-#
-# .. code-block:: python
-#    :caption: convert_labels.py
-#    :name: convert_labels.py
-#
-#     #!python ./convert_labels.py
-#     import os
-#     import pathlib
-#     import sys
-#
-#
-#     def create_labels_header(labels_file, section, output_path):
-#         """
-#         This function generates a header file containing the ImageNet labels as an array of strings
-#         """
-#         labels_path = pathlib.Path(labels_file).resolve()
-#         file_path = pathlib.Path(f"{output_path}/labels.h").resolve()
-#
-#         with open(labels_path) as f:
-#             labels = f.readlines()
-#
-#         with open(file_path, "w") as header_file:
-#             header_file.write(f'char* labels[] __attribute__((section("{section}"), aligned(16))) = {{')
-#
-#             for _, label in enumerate(labels):
-#                 header_file.write(f'"{label.rstrip()}",')
-#
-#             header_file.write("};\n")
-#
-#
-#     if __name__ == "__main__":
-#         create_labels_header(sys.argv[1], "ethosu_scratch", "./include")
-#
-# Run the script from the command line:
-#
-# .. code-block:: bash
-#
-#   python convert_labels.py
-
-################################################################################
-# Writing the demo application
-# ----------------------------
-#
-# The following C application will run a single inference of the MobileNet v1
-# model on the image that we downloaded and converted to an array of integers
-# previously. Since the model was compiled with a target of "ethos-u ...",
-# operators supported by the Ethos(TM)-U55 NPU will be offloaded for acceleration.
-# Once the application is built and run, our test image should be correctly
-# classied as a "tabby" and the result should be displayed on the console.
-# This file should be placed in ``./src``
-#
-# .. code-block:: c
-#    :caption: demo.c
-#    :name: demo.c
-#
-#     #include <stdio.h>
-#     #include <tvm_runtime.h>
-#
-#     #include "ethosu_mod.h"
-#     #include "uart_stdout.h"
-#
-#     // Header files generated by convert_image.py and convert_labels.py
-#     #include "inputs.h"
-#     #include "labels.h"
-#     #include "outputs.h"
-#
-#     int abs(int v) { return v * ((v > 0) - (v < 0)); }
-#
-#     int main(int argc, char** argv) {
-#       UartStdOutInit();
-#       printf("Starting Demo\n");
-#       EthosuInit();
-#
-#       printf("Allocating memory\n");
-#       StackMemoryManager_Init(&app_workspace, g_aot_memory, WORKSPACE_SIZE);
-#
-#       printf("Running inference\n");
-#       struct tvmgen_default_outputs outputs = {
-#           .output = output,
-#       };
-#       struct tvmgen_default_inputs inputs = {
-#           .input = input,
-#       };
-#       struct ethosu_driver* driver = ethosu_reserve_driver();
-#       struct tvmgen_default_devices devices = {
-#           .ethos_u = driver,
-#       };
-#       tvmgen_default_run(&inputs, &outputs, &devices);
-#       ethosu_release_driver(driver);
-#
-#       // Calculate index of max value
-#       uint8_t max_value = 0;
-#       int32_t max_index = -1;
-#       for (unsigned int i = 0; i < output_len; ++i) {
-#         if (output[i] > max_value) {
-#           max_value = output[i];
-#           max_index = i;
-#         }
-#       }
-#       printf("The image has been classified as '%s'\n", labels[max_index]);
-#
-#       // The FVP will shut down when it receives "EXITTHESIM" on the UART
-#       printf("EXITTHESIM\n");
-#       while (1 == 1)
-#         ;
-#       return 0;
-#     }
-#
-#
-# In addition, you will need these header files from github in your ``./include`` directory:
-#
-# `include files <https://github.com/apache/tvm/tree/main/apps/microtvm/ethosu/include>`_
-
-################################################################################
-# .. note::
-#
-#   If you'd like to use FreeRTOS for task scheduling and queues, a sample application can be found here
-#   `demo_freertos.c <https://github.com/apache/tvm/blob/main/apps/microtvm/ethosu/src/demo_freertos.c>`
-
-################################################################################
-# Creating the linker script
-# --------------------------
-#
-# We need to create a linker script that will be used when we build our application
-# in the following section. The linker script tells the linker where everything
-# should be placed in memory. The corstone300.ld linker script below should be
-# placed in your working directory.
-#
-# An example linker script for the FVP can be found here
-# `corstone300.ld <https://github.com/apache/tvm/blob/main/apps/microtvm/ethosu/corstone300.ld>`_
-
-################################################################################
-# .. note::
-#
-#   The code generated by TVM will place the model weights and the Arm(R)
-#   Ethos(TM)-U55 command stream in a section named ``ethosu_scratch``.
-#   For a model the size of MobileNet v1, the weights and command stream will not
-#   fit into the limited SRAM available. For this reason it's important that the
-#   linker script places the ``ethosu_scratch`` section into DRAM (DDR).
-
-################################################################################
-# .. note::
-#
-#   Before building and running the application, you will need to update your
-#   PATH environment variable to include the path to cmake 3.19.5 and the FVP.
-#   For example if you've installed these in ``/opt/arm`` , then you would do
-#   the following:
-#
-#   ``export PATH=/opt/arm/FVP_Corstone_SSE-300_Ethos-U55/models/Linux64_GCC-6.4:/opt/arm/cmake/bin:$PATH``
-#
-
-################################################################################
-# Building the demo application using make
-# ----------------------------------------
-#
-# We can now build the demo application using make. The Makefile should be placed
-# in your working directory before running ``make`` on the command line:
-#
-# An example Makefile can be found here:
-# `Makefile <https://github.com/apache/tvm/blob/main/apps/microtvm/ethosu/Makefile>`_
-
-################################################################################
-# .. note::
-#
-#    If you're using FreeRTOS, the Makefile builds it from the specified FREERTOS_PATH:
-#     ``make FREERTOS_PATH=<FreeRTOS directory>``
-#
-
-################################################################################
-# Running the demo application
-# ----------------------------
-#
-# Finally, we can run our demo appliction on the Fixed Virtual Platform (FVP),
-# by using the following command:
-#
-# .. code-block:: bash
-#
-#     FVP_Corstone_SSE-300_Ethos-U55 -C cpu0.CFGDTCMSZ=15 \
-#     -C cpu0.CFGITCMSZ=15 -C mps3_board.uart0.out_file=\"-\" -C mps3_board.uart0.shutdown_tag=\"EXITTHESIM\" \
-#     -C mps3_board.visualisation.disable-visualisation=1 -C mps3_board.telnetterminal0.start_telnet=0 \
-#     -C mps3_board.telnetterminal1.start_telnet=0 -C mps3_board.telnetterminal2.start_telnet=0 -C mps3_board.telnetterminal5.start_telnet=0 \
-#     -C ethosu.extra_args="--fast" \
-#     -C ethosu.num_macs=256 ./build/demo
-#
-# You should see the following output displayed in your console window:
-#
-# .. code-block:: text
-#
-#     telnetterminal0: Listening for serial connection on port 5000
-#     telnetterminal1: Listening for serial connection on port 5001
-#     telnetterminal2: Listening for serial connection on port 5002
-#     telnetterminal5: Listening for serial connection on port 5003
-#
-#         Ethos-U rev dedfa618 --- Jan 12 2021 23:03:55
-#         (C) COPYRIGHT 2019-2021 Arm Limited
-#         ALL RIGHTS RESERVED
-#
-#     Starting Demo
-#     ethosu_init. base_address=0x48102000, fast_memory=0x0, fast_memory_size=0, secure=1, privileged=1
-#     ethosu_register_driver: New NPU driver at address 0x20000de8 is registered.
-#     CMD=0x00000000
-#     Soft reset NPU
-#     Allocating memory
-#     Running inference
-#     ethosu_find_and_reserve_driver - Driver 0x20000de8 reserved.
-#     ethosu_invoke
-#     CMD=0x00000004
-#     QCONFIG=0x00000002
-#     REGIONCFG0=0x00000003
-#     REGIONCFG1=0x00000003
-#     REGIONCFG2=0x00000013
-#     REGIONCFG3=0x00000053
-#     REGIONCFG4=0x00000153
-#     REGIONCFG5=0x00000553
-#     REGIONCFG6=0x00001553
-#     REGIONCFG7=0x00005553
-#     AXI_LIMIT0=0x0f1f0000
-#     AXI_LIMIT1=0x0f1f0000
-#     AXI_LIMIT2=0x0f1f0000
-#     AXI_LIMIT3=0x0f1f0000
-#     ethosu_invoke OPTIMIZER_CONFIG
-#     handle_optimizer_config:
-#     Optimizer release nbr: 0 patch: 1
-#     Optimizer config cmd_stream_version: 0 macs_per_cc: 8 shram_size: 48 custom_dma: 0
-#     Optimizer config Ethos-U version: 1.0.6
-#     Ethos-U config cmd_stream_version: 0 macs_per_cc: 8 shram_size: 48 custom_dma: 0
-#     Ethos-U version: 1.0.6
-#     ethosu_invoke NOP
-#     ethosu_invoke NOP
-#     ethosu_invoke NOP
-#     ethosu_invoke COMMAND_STREAM
-#     handle_command_stream: cmd_stream=0x61025be0, cms_length 1181
-#     QBASE=0x0000000061025be0, QSIZE=4724, base_pointer_offset=0x00000000
-#     BASEP0=0x0000000061026e60
-#     BASEP1=0x0000000060002f10
-#     BASEP2=0x0000000060002f10
-#     BASEP3=0x0000000061000fb0
-#     BASEP4=0x0000000060000fb0
-#     CMD=0x000Interrupt. status=0xffff0022, qread=4724
-#     CMD=0x00000006
-#     00006
-#     CMD=0x0000000c
-#     ethosu_release_driver - Driver 0x20000de8 released
-#     The image has been classified as 'tabby'
-#     EXITTHESIM
-#     Info: /OSCI/SystemC: Simulation stopped by user.
-#
-# You should see near the end of the output that the image has been correctly
-# classified as 'tabby'.
diff --git a/gallery/how_to/work_with_microtvm/micro_mlperftiny.py b/gallery/how_to/work_with_microtvm/micro_mlperftiny.py
deleted file mode 100644
index 6be61789f8b0..000000000000
--- a/gallery/how_to/work_with_microtvm/micro_mlperftiny.py
+++ /dev/null
@@ -1,304 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-.. _tutorial-micro-mlperftiny:
-
-8. Creating Your MLPerfTiny Submission with microTVM
-====================================================
-**Authors**:
-`Mehrdad Hessar <https://github.com/mehrdadh>`_
-
-This tutorial is showcasing building an MLPerfTiny submission using microTVM. This
-tutorial shows the steps to import a TFLite model from MLPerfTiny benchmark models,
-compile it with TVM and generate a Zephyr project which can be flashed to a Zephyr
-supported board to benchmark the model using EEMBC runner.
-"""
-
-######################################################################
-#
-#     .. include:: ../../../../gallery/how_to/work_with_microtvm/install_dependencies.rst
-#
-
-import os
-import pathlib
-import tarfile
-import tempfile
-import shutil
-
-######################################################################
-#
-#     .. include:: ../../../../gallery/how_to/work_with_microtvm/install_zephyr.rst
-#
-
-
-######################################################################
-#
-# **Note:** Install CMSIS-NN only if you are interested to generate this submission
-# using CMSIS-NN code generator.
-#
-
-######################################################################
-#
-#     .. include:: ../../../../gallery/how_to/work_with_microtvm/install_cmsis.rst
-#
-
-######################################################################
-# Import Python dependencies
-# -------------------------------
-#
-import tensorflow as tf
-import numpy as np
-
-import tvm
-from tvm import relay
-from tvm.relay.backend import Executor, Runtime
-from tvm.contrib.download import download_testdata
-from tvm.micro import export_model_library_format
-import tvm.micro.testing
-from tvm.micro.testing.utils import (
-    create_header_file,
-    mlf_extract_workspace_size_bytes,
-)
-
-######################################################################
-# Import Visual Wake Word Model
-# --------------------------------------------------------------------
-#
-# To begin with, download and import the Visual Wake Word (VWW) TFLite model from MLPerfTiny.
-# This model is originally from `MLPerf Tiny repository <https://github.com/mlcommons/tiny>`_.
-# We also capture metadata information from the TFLite model such as input/output name,
-# quantization parameters, etc. which will be used in following steps.
-#
-# We use indexing for various models to build the submission. The indices are defined as follows:
-# To build another model, you need to update the model URL, the short name and index number.
-#
-#   * Keyword Spotting(KWS) 1
-#   * Visual Wake Word(VWW) 2
-#   * Anomaly Detection(AD) 3
-#   * Image Classification(IC) 4
-#
-# If you would like to build the submission with CMSIS-NN, modify USE_CMSIS environment variable.
-#
-#   .. code-block:: bash
-#
-#     export USE_CMSIS=1
-#
-
-MODEL_URL = "https://github.com/mlcommons/tiny/raw/bceb91c5ad2e2deb295547d81505721d3a87d578/benchmark/training/visual_wake_words/trained_models/vww_96_int8.tflite"
-MODEL_PATH = download_testdata(MODEL_URL, "vww_96_int8.tflite", module="model")
-
-MODEL_SHORT_NAME = "VWW"
-MODEL_INDEX = 2
-
-USE_CMSIS = os.environ.get("TVM_USE_CMSIS", False)
-
-tflite_model_buf = open(MODEL_PATH, "rb").read()
-try:
-    import tflite
-
-    tflite_model = tflite.Model.GetRootAsModel(tflite_model_buf, 0)
-except AttributeError:
-    import tflite.Model
-
-    tflite_model = tflite.Model.Model.GetRootAsModel(tflite_model_buf, 0)
-
-interpreter = tf.lite.Interpreter(model_path=str(MODEL_PATH))
-interpreter.allocate_tensors()
-input_details = interpreter.get_input_details()
-output_details = interpreter.get_output_details()
-
-input_name = input_details[0]["name"]
-input_shape = tuple(input_details[0]["shape"])
-input_dtype = np.dtype(input_details[0]["dtype"]).name
-output_name = output_details[0]["name"]
-output_shape = tuple(output_details[0]["shape"])
-output_dtype = np.dtype(output_details[0]["dtype"]).name
-
-# We extract quantization information from TFLite model.
-# This is required for all models except Anomaly Detection,
-# because for other models we send quantized data to interpreter
-# from host, however, for AD model we send floating data and quantization
-# happens on the microcontroller.
-if MODEL_SHORT_NAME != "AD":
-    quant_output_scale = output_details[0]["quantization_parameters"]["scales"][0]
-    quant_output_zero_point = output_details[0]["quantization_parameters"]["zero_points"][0]
-
-relay_mod, params = relay.frontend.from_tflite(
-    tflite_model, shape_dict={input_name: input_shape}, dtype_dict={input_name: input_dtype}
-)
-
-######################################################################
-# Defining Target, Runtime and Executor
-# --------------------------------------------------------------------
-#
-# Now we need to define the target, runtime and executor to compile this model. In this tutorial,
-# we use Ahead-of-Time (AoT) compilation and we build a standalone project. This is different
-# than using AoT with host-driven mode where the target would communicate with host using host-driven
-# AoT executor to run inference.
-#
-
-# Use the C runtime (crt)
-RUNTIME = Runtime("crt")
-
-# Use the AoT executor with `unpacked-api=True` and `interface-api=c`. `interface-api=c` forces
-# the compiler to generate C type function APIs and `unpacked-api=True` forces the compiler
-# to generate minimal unpacked format inputs which reduces the stack memory usage on calling
-# inference layers of the model.
-EXECUTOR = Executor(
-    "aot",
-    {"unpacked-api": True, "interface-api": "c", "workspace-byte-alignment": 8},
-)
-
-# Select a Zephyr board
-BOARD = os.getenv("TVM_MICRO_BOARD", default="nucleo_l4r5zi")
-
-# Get the full target description using the BOARD
-TARGET = tvm.micro.testing.get_target("zephyr", BOARD)
-
-######################################################################
-# Compile the model and export model library format
-# --------------------------------------------------------------------
-#
-# Now, we compile the model for the target. Then, we generate model
-# library format for the compiled model. We also need to calculate the
-# workspace size that is required for the compiled model.
-#
-#
-
-config = {"tir.disable_vectorize": True}
-if USE_CMSIS:
-    from tvm.relay.op.contrib import cmsisnn
-
-    config["relay.ext.cmsisnn.options"] = {"mcpu": TARGET.mcpu}
-    relay_mod = cmsisnn.partition_for_cmsisnn(relay_mod, params, mcpu=TARGET.mcpu)
-
-with tvm.transform.PassContext(opt_level=3, config=config):
-    module = tvm.relay.build(
-        relay_mod, target=TARGET, params=params, runtime=RUNTIME, executor=EXECUTOR
-    )
-
-temp_dir = tvm.contrib.utils.tempdir()
-model_tar_path = temp_dir / "model.tar"
-export_model_library_format(module, model_tar_path)
-workspace_size = mlf_extract_workspace_size_bytes(model_tar_path)
-
-######################################################################
-# Generate input/output header files
-# --------------------------------------------------------------------
-#
-# To create a microTVM standalone project with AoT, we need to generate
-# input and output header files. These header files are used to connect
-# the input and output API from generated code to the rest of the
-# standalone project. For this specific submission, we only need to generate
-# output header file since the input API call is handled differently.
-#
-
-extra_tar_dir = tvm.contrib.utils.tempdir()
-extra_tar_file = extra_tar_dir / "extra.tar"
-
-with tarfile.open(extra_tar_file, "w:gz") as tf:
-    create_header_file(
-        "output_data",
-        np.zeros(
-            shape=output_shape,
-            dtype=output_dtype,
-        ),
-        "include/tvm",
-        tf,
-    )
-
-######################################################################
-# Create the project, build and prepare the project tar file
-# --------------------------------------------------------------------
-#
-# Now that we have the compiled model as a model library format,
-# we can generate the full project using Zephyr template project. First,
-# we prepare the project options, then build the project. Finally, we
-# cleanup the temporary files and move the submission project to the
-# current working directory which could be downloaded and used on
-# your development kit.
-#
-
-input_total_size = 1
-for i in range(len(input_shape)):
-    input_total_size *= input_shape[i]
-
-template_project_path = pathlib.Path(tvm.micro.get_microtvm_template_projects("zephyr"))
-project_options = {
-    "extra_files_tar": str(extra_tar_file),
-    "project_type": "mlperftiny",
-    "board": BOARD,
-    "compile_definitions": [
-        f"-DWORKSPACE_SIZE={workspace_size + 512}",  # Memory workspace size, 512 is a temporary offset
-        # since the memory calculation is not accurate.
-        f"-DTARGET_MODEL={MODEL_INDEX}",  # Sets the model index for project compilation.
-        f"-DTH_MODEL_VERSION=EE_MODEL_VERSION_{MODEL_SHORT_NAME}01",  # Sets model version. This is required by MLPerfTiny API.
-        f"-DMAX_DB_INPUT_SIZE={input_total_size}",  # Max size of the input data array.
-    ],
-}
-
-if MODEL_SHORT_NAME != "AD":
-    project_options["compile_definitions"].append(f"-DOUT_QUANT_SCALE={quant_output_scale}")
-    project_options["compile_definitions"].append(f"-DOUT_QUANT_ZERO={quant_output_zero_point}")
-
-if USE_CMSIS:
-    project_options["compile_definitions"].append(f"-DCOMPILE_WITH_CMSISNN=1")
-
-# Note: You might need to adjust this based on the board that you are using.
-project_options["config_main_stack_size"] = 4000
-
-if USE_CMSIS:
-    project_options["cmsis_path"] = os.environ.get("CMSIS_PATH", "/content/cmsis")
-
-generated_project_dir = temp_dir / "project"
-
-project = tvm.micro.project.generate_project_from_mlf(
-    template_project_path, generated_project_dir, model_tar_path, project_options
-)
-project.build()
-
-# Cleanup the build directory and extra artifacts
-shutil.rmtree(generated_project_dir / "build")
-(generated_project_dir / "model.tar").unlink()
-
-project_tar_path = pathlib.Path(os.getcwd()) / "project.tar"
-with tarfile.open(project_tar_path, "w:tar") as tar:
-    tar.add(generated_project_dir, arcname=os.path.basename("project"))
-
-print(f"The generated project is located here: {project_tar_path}")
-
-######################################################################
-# Use this project with your board
-# --------------------------------------------------------------------
-#
-# Now that we have the generated project, you can use this project locally
-# to flash your board and prepare it for EEMBC runner software.
-# To do this follow these steps:
-#
-#   .. code-block:: bash
-#
-#     tar -xf project.tar
-#     cd project
-#     mkdir build
-#     cmake ..
-#     make -j2
-#     west flash
-#
-# Now you can connect your board to EEMBC runner using this
-# `instructions <https://github.com/eembc/energyrunner>`_
-# and benchmark this model on your board.
-#
diff --git a/gallery/how_to/work_with_microtvm/micro_pytorch.py b/gallery/how_to/work_with_microtvm/micro_pytorch.py
deleted file mode 100644
index a0f4ebddee48..000000000000
--- a/gallery/how_to/work_with_microtvm/micro_pytorch.py
+++ /dev/null
@@ -1,207 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-.. _tutorial-micro-pytorch:
-
-4. microTVM PyTorch Tutorial
-============================
-**Authors**:
-`Mehrdad Hessar <https://github.com/mehrdadh>`_
-
-This tutorial is showcasing microTVM host-driven AoT compilation with
-a PyTorch model. This tutorial can be executed on a x86 CPU using C runtime (CRT).
-
-**Note:** This tutorial only runs on x86 CPU using CRT and does not run on Zephyr
-since the model would not fit on our current supported Zephyr boards.
-"""
-
-######################################################################
-#
-#     .. include:: ../../../../gallery/how_to/work_with_microtvm/install_dependencies.rst
-#
-
-
-import pathlib
-import torch
-import torchvision
-from torchvision import transforms
-import numpy as np
-from PIL import Image
-
-import tvm
-from tvm import relay
-from tvm.contrib.download import download_testdata
-from tvm.relay.backend import Executor
-import tvm.micro.testing
-
-##################################
-# Load a pre-trained PyTorch model
-# --------------------------------
-#
-# To begin with, load pre-trained MobileNetV2 from torchvision. Then,
-# download a cat image and preprocess it to use as the model input.
-#
-
-model = torchvision.models.quantization.mobilenet_v2(weights="DEFAULT", quantize=True)
-model = model.eval()
-
-input_shape = [1, 3, 224, 224]
-input_data = torch.randn(input_shape)
-scripted_model = torch.jit.trace(model, input_data).eval()
-
-img_url = "https://github.com/dmlc/mxnet.js/blob/main/data/cat.png?raw=true"
-img_path = download_testdata(img_url, "cat.png", module="data")
-img = Image.open(img_path).resize((224, 224))
-
-# Preprocess the image and convert to tensor
-my_preprocess = transforms.Compose(
-    [
-        transforms.Resize(256),
-        transforms.CenterCrop(224),
-        transforms.ToTensor(),
-        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
-    ]
-)
-img = my_preprocess(img)
-img = np.expand_dims(img, 0)
-
-input_name = "input0"
-shape_list = [(input_name, input_shape)]
-relay_mod, params = relay.frontend.from_pytorch(scripted_model, shape_list)
-
-#####################################
-# Define Target, Runtime and Executor
-# -----------------------------------
-#
-# In this tutorial we use AOT host-driven executor. To compile the model
-# for an emulated embedded environment on an x86 machine we use C runtime (CRT)
-# and we use `host` micro target. Using this setup, TVM compiles the model
-# for C runtime which can run on a x86 CPU machine with the same flow that
-# would run on a physical microcontroller.
-# CRT Uses the main() from `src/runtime/crt/host/main.cc`
-# To use physical hardware, replace `board` with another physical micro target, e.g. `nrf5340dk_nrf5340_cpuapp`
-# or `mps2_an521` and change the platform type to Zephyr.
-# See more target examples in :ref:`Training Vision Models for microTVM on Arduino <tutorial-micro-train-arduino>`
-# and :ref:`microTVM TFLite Tutorial<tutorial_micro_tflite>`.
-#
-
-target = tvm.micro.testing.get_target(platform="crt", board=None)
-
-# Use the C runtime (crt) and enable static linking by setting system-lib to True
-runtime = tvm.relay.backend.Runtime("crt", {"system-lib": True})
-
-# Use the AOT executor rather than graph or vm executors. Don't use unpacked API or C calling style.
-executor = Executor("aot")
-
-####################
-# Compile the model
-# ------------------
-#
-# Now, we compile the model for the target:
-#
-
-with tvm.transform.PassContext(
-    opt_level=3,
-    config={"tir.disable_vectorize": True},
-):
-    module = tvm.relay.build(
-        relay_mod, target=target, runtime=runtime, executor=executor, params=params
-    )
-
-###########################
-# Create a microTVM project
-# -------------------------
-#
-# Now that we have the compiled model as an IRModule, we need to create a firmware project
-# to use the compiled model with microTVM. To do this, we use Project API.
-#
-
-template_project_path = pathlib.Path(tvm.micro.get_microtvm_template_projects("crt"))
-project_options = {"verbose": False, "workspace_size_bytes": 6 * 1024 * 1024}
-
-temp_dir = tvm.contrib.utils.tempdir() / "project"
-project = tvm.micro.generate_project(
-    str(template_project_path),
-    module,
-    temp_dir,
-    project_options,
-)
-
-####################################
-# Build, flash and execute the model
-# ----------------------------------
-# Next, we build the microTVM project and flash it. Flash step is specific to
-# physical microcontroller and it is skipped if it is simulating a microcontroller
-# via the host `main.cc`` or if a Zephyr emulated board is selected as the target.
-#
-
-project.build()
-project.flash()
-
-input_data = {input_name: tvm.nd.array(img.astype("float32"))}
-with tvm.micro.Session(project.transport()) as session:
-    aot_executor = tvm.runtime.executor.aot_executor.AotModule(session.create_aot_executor())
-    aot_executor.set_input(**input_data)
-    aot_executor.run()
-    result = aot_executor.get_output(0).numpy()
-
-#####################
-# Look up synset name
-# -------------------
-# Look up prediction top 1 index in 1000 class synset.
-#
-
-synset_url = (
-    "https://raw.githubusercontent.com/Cadene/"
-    "pretrained-models.pytorch/master/data/"
-    "imagenet_synsets.txt"
-)
-synset_name = "imagenet_synsets.txt"
-synset_path = download_testdata(synset_url, synset_name, module="data")
-with open(synset_path) as f:
-    synsets = f.readlines()
-
-synsets = [x.strip() for x in synsets]
-splits = [line.split(" ") for line in synsets]
-key_to_classname = {spl[0]: " ".join(spl[1:]) for spl in splits}
-
-class_url = (
-    "https://raw.githubusercontent.com/Cadene/"
-    "pretrained-models.pytorch/master/data/"
-    "imagenet_classes.txt"
-)
-class_path = download_testdata(class_url, "imagenet_classes.txt", module="data")
-with open(class_path) as f:
-    class_id_to_key = f.readlines()
-
-class_id_to_key = [x.strip() for x in class_id_to_key]
-
-# Get top-1 result for TVM
-top1_tvm = np.argmax(result)
-tvm_class_key = class_id_to_key[top1_tvm]
-
-# Convert input to PyTorch variable and get PyTorch result for comparison
-with torch.no_grad():
-    torch_img = torch.from_numpy(img)
-    output = model(torch_img)
-
-    # Get top-1 result for PyTorch
-    top1_torch = np.argmax(output.numpy())
-    torch_class_key = class_id_to_key[top1_torch]
-
-print("Relay top-1 id: {}, class name: {}".format(top1_tvm, key_to_classname[tvm_class_key]))
-print("Torch top-1 id: {}, class name: {}".format(top1_torch, key_to_classname[torch_class_key]))
diff --git a/gallery/how_to/work_with_microtvm/micro_tflite.py b/gallery/how_to/work_with_microtvm/micro_tflite.py
deleted file mode 100644
index 67b3e66e3315..000000000000
--- a/gallery/how_to/work_with_microtvm/micro_tflite.py
+++ /dev/null
@@ -1,240 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-.. _tutorial_micro_tflite:
-
-2. microTVM TFLite Tutorial
-===========================
-**Author**: `Tom Gall <https://github.com/tom-gall>`_
-
-This tutorial is an introduction to working with microTVM and a TFLite
-model with Relay.
-"""
-
-######################################################################
-#
-#     .. include:: ../../../../gallery/how_to/work_with_microtvm/install_dependencies.rst
-#
-
-
-import os
-
-# By default, this tutorial runs on x86 CPU using TVM's C runtime. If you would like
-# to run on real Zephyr hardware, you must export the `TVM_MICRO_USE_HW` environment
-# variable. Otherwise (if you are using the C runtime), you can skip installing
-# Zephyr. It takes ~20 minutes to install Zephyr.
-use_physical_hw = bool(os.getenv("TVM_MICRO_USE_HW"))
-
-######################################################################
-#
-#     .. include:: ../../../../gallery/how_to/work_with_microtvm/install_zephyr.rst
-#
-
-######################################################################
-# Import Python dependencies
-# -------------------------------
-#
-import json
-import tarfile
-import pathlib
-import tempfile
-import numpy as np
-
-import tvm
-import tvm.micro
-import tvm.micro.testing
-from tvm import relay
-import tvm.contrib.utils
-from tvm.micro import export_model_library_format
-from tvm.contrib.download import download_testdata
-
-model_url = (
-    "https://github.com/tlc-pack/web-data/raw/main/testdata/microTVM/model/sine_model.tflite"
-)
-model_file = "sine_model.tflite"
-model_path = download_testdata(model_url, model_file, module="data")
-
-tflite_model_buf = open(model_path, "rb").read()
-
-######################################################################
-# Using the buffer, transform into a tflite model python object
-try:
-    import tflite
-
-    tflite_model = tflite.Model.GetRootAsModel(tflite_model_buf, 0)
-except AttributeError:
-    import tflite.Model
-
-    tflite_model = tflite.Model.Model.GetRootAsModel(tflite_model_buf, 0)
-
-######################################################################
-# Print out the version of the model
-version = tflite_model.Version()
-print("Model Version: " + str(version))
-
-######################################################################
-# Parse the python model object to convert it into a relay module
-# and weights.
-# It is important to note that the input tensor name must match what
-# is contained in the model.
-#
-# If you are unsure what that might be, this can be discovered by using
-# the ``visualize.py`` script within the Tensorflow project.
-# See `How do I inspect a .tflite file? <https://www.tensorflow.org/lite/guide/faq>`_
-
-input_tensor = "dense_4_input"
-input_shape = (1,)
-input_dtype = "float32"
-
-mod, params = relay.frontend.from_tflite(
-    tflite_model, shape_dict={input_tensor: input_shape}, dtype_dict={input_tensor: input_dtype}
-)
-
-######################################################################
-# Defining the target
-# -------------------
-#
-# Now we create a build config for relay, turning off two options and then calling relay.build which
-# will result in a C source file for the selected TARGET. When running on a simulated target of the
-# same architecture as the host (where this Python script is executed) choose "crt" below for the
-# TARGET, the C Runtime as the RUNTIME and a proper board/VM to run it (Zephyr will create the right
-# QEMU VM based on BOARD. In the example below the x86 arch is selected and a x86 VM is picked up accordingly:
-#
-RUNTIME = tvm.relay.backend.Runtime("crt", {"system-lib": True})
-TARGET = tvm.micro.testing.get_target("crt")
-
-# When running on physical hardware, choose a TARGET and a BOARD that describe the hardware. The
-# STM32L4R5ZI Nucleo target and board is chosen in the example below. You could change the testing
-# board by simply exporting `TVM_MICRO_BOARD` variable with a different Zephyr supported board.
-
-if use_physical_hw:
-    BOARD = os.getenv("TVM_MICRO_BOARD", default="nucleo_l4r5zi")
-    SERIAL = os.getenv("TVM_MICRO_SERIAL", default=None)
-    TARGET = tvm.micro.testing.get_target("zephyr", BOARD)
-
-# For some boards, Zephyr runs them emulated by default, using QEMU. For example, below is the
-# TARGET and BOARD used to build a microTVM firmware for the mps2-an521 board.
-#
-# `mps2_an521 = "mps2_an521"`
-# `TARGET = tvm.micro.testing.get_target("zephyr", BOARD)`
-
-######################################################################
-# Now, compile the model for the target. If you do not specify Executor,
-# by default it uses GraphExecutor.
-
-with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}):
-    module = relay.build(mod, target=TARGET, runtime=RUNTIME, params=params)
-
-
-######################################################################
-# Inspecting the compilation output
-# ---------------------------------
-#
-# The compilation process has produced some C code implementing the operators in this graph. We
-# can inspect it by printing the CSourceModule contents (for the purposes of this tutorial, let's
-# just print the first 10 lines):
-#
-
-c_source_module = module.get_lib().imported_modules[0]
-assert c_source_module.type_key == "c", "tutorial is broken"
-
-c_source_code = c_source_module.get_source()
-first_few_lines = c_source_code.split("\n")[:10]
-assert any(
-    l.startswith("TVM_DLL int32_t tvmgen_default_") for l in first_few_lines
-), f"tutorial is broken: {first_few_lines!r}"
-print("\n".join(first_few_lines))
-
-
-######################################################################
-# Compiling the generated code
-# ----------------------------
-#
-# Now we need to incorporate the generated C code into a project that allows us to run inference on the
-# device. The simplest way to do this is to integrate it yourself, using microTVM's standard output format
-# model library format. This is a tarball with a standard layout.
-
-# Get a temporary path where we can store the tarball (since this is running as a tutorial).
-
-temp_dir = tvm.contrib.utils.tempdir()
-model_tar_path = temp_dir / "model.tar"
-export_model_library_format(module, model_tar_path)
-
-with tarfile.open(model_tar_path, "r:*") as tar_f:
-    print("\n".join(f" - {m.name}" for m in tar_f.getmembers()))
-
-# TVM also provides a standard way for embedded platforms to automatically generate a standalone
-# project, compile and flash it to a target, and communicate with it using the standard TVM RPC
-# protocol. The Model Library Format serves as the model input to this process. When embedded
-# platforms provide such an integration, they can be used directly by TVM for both host-driven
-# inference and autotuning . This integration is provided by the
-# `microTVM Project API` <https://github.com/apache/tvm-rfcs/blob/main/rfcs/0008-microtvm-project-api.md>_,
-#
-# Embedded platforms need to provide a Template Project containing a microTVM API Server (typically,
-# this lives in a file ``microtvm_api_server.py`` in the root directory). Let's use the example ``host``
-# project in this tutorial, which simulates the device using a POSIX subprocess and pipes:
-
-template_project_path = pathlib.Path(tvm.micro.get_microtvm_template_projects("crt"))
-project_options = {}  # You can use options to provide platform-specific options through TVM.
-
-#  For physical hardware, you can try out the Zephyr platform by using a different template project
-#  and options:
-
-if use_physical_hw:
-    template_project_path = pathlib.Path(tvm.micro.get_microtvm_template_projects("zephyr"))
-    project_options = {
-        "project_type": "host_driven",
-        "board": BOARD,
-        "serial_number": SERIAL,
-        "config_main_stack_size": 4096,
-        "zephyr_base": os.getenv("ZEPHYR_BASE", default="/content/zephyrproject/zephyr"),
-    }
-
-# Create a temporary directory
-temp_dir = tvm.contrib.utils.tempdir()
-generated_project_dir = temp_dir / "generated-project"
-generated_project = tvm.micro.generate_project(
-    template_project_path, module, generated_project_dir, project_options
-)
-
-# Build and flash the project
-generated_project.build()
-generated_project.flash()
-
-
-######################################################################
-# Next, establish a session with the simulated device and run the
-# computation. The `with session` line would typically flash an attached
-# microcontroller, but in this tutorial, it simply launches a subprocess
-# to stand in for an attached microcontroller.
-
-with tvm.micro.Session(transport_context_manager=generated_project.transport()) as session:
-    graph_mod = tvm.micro.create_local_graph_executor(
-        module.get_graph_json(), session.get_system_lib(), session.device
-    )
-
-    # Set the model parameters using the lowered parameters produced by `relay.build`.
-    graph_mod.set_input(**module.get_params())
-
-    # The model consumes a single float32 value and returns a predicted sine value.  To pass the
-    # input value we construct a tvm.nd.array object with a single contrived number as input. For
-    # this model values of 0 to 2Pi are acceptable.
-    graph_mod.set_input(input_tensor, tvm.nd.array(np.array([0.5], dtype="float32")))
-    graph_mod.run()
-
-    tvm_output = graph_mod.get_output(0).numpy()
-    print("result is: " + str(tvm_output))
diff --git a/gallery/how_to/work_with_microtvm/micro_train.py b/gallery/how_to/work_with_microtvm/micro_train.py
deleted file mode 100644
index bbb265420cef..000000000000
--- a/gallery/how_to/work_with_microtvm/micro_train.py
+++ /dev/null
@@ -1,643 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-.. _tutorial-micro-train-arduino:
-
-5. Training Vision Models for microTVM on Arduino
-=================================================
-**Author**: `Gavin Uberti <https://github.com/guberti>`_
-
-This tutorial shows how MobileNetV1 models can be trained
-to fit on embedded devices, and how those models can be
-deployed to Arduino using TVM.
-"""
-
-######################################################################
-# Motivation
-# ----------
-# When building IOT devices, we often want them to **see and understand** the world around them.
-# This can take many forms, but often times a device will want to know if a certain **kind of
-# object** is in its field of vision.
-#
-# For example, a security camera might look for **people**, so it can decide whether to save a video
-# to memory. A traffic light might look for **cars**, so it can judge which lights should change
-# first. Or a forest camera might look for a **kind of animal**, so they can estimate how large
-# the animal population is.
-#
-# To make these devices affordable, we would like them to need only a low-cost processor like the
-# `nRF52840 <https://www.nordicsemi.com/Products/nRF52840>`_ (costing five dollars each on Mouser) or the `RP2040 <https://www.raspberrypi.com/products/rp2040/>`_ (just $1.45 each!).
-#
-# These devices have very little memory (~250 KB RAM), meaning that no conventional edge AI
-# vision model (like MobileNet or EfficientNet) will be able to run. In this tutorial, we will
-# show how these models can be modified to work around this requirement. Then, we will use TVM
-# to compile and deploy it for an Arduino that uses one of these processors.
-#
-# Installing the Prerequisites
-# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-#
-# This tutorial will use TensorFlow to train the model - a widely used machine learning library
-# created by Google. TensorFlow is a very low-level library, however, so we will the Keras
-# interface to talk to TensorFlow. We will also use TensorFlow Lite to perform quantization on
-# our model, as TensorFlow by itself does not support this.
-#
-# Once we have our generated model, we will use TVM to compile and test it. To avoid having to
-# build from source, we'll install ``tlcpack`` - a community build of TVM. Lastly, we'll also
-# install ``imagemagick`` and ``curl`` to preprocess data:
-#
-#     .. code-block:: bash
-#
-#       %%shell
-#       pip install -q tensorflow tflite
-#       pip install -q tlcpack-nightly -f https://tlcpack.ai/wheels
-#       apt-get -qq install imagemagick curl
-#
-#       # Install Arduino CLI and library for Nano 33 BLE
-#       curl -fsSL https://raw.githubusercontent.com/arduino/arduino-cli/master/install.sh | sh
-#       /content/bin/arduino-cli core update-index
-#       /content/bin/arduino-cli core install arduino:mbed_nano
-#
-# Using the GPU
-# ^^^^^^^^^^^^^
-#
-# This tutorial demonstrates training a neural network, which is requires a lot of computing power
-# and will go much faster if you have a GPU. If you are viewing this tutorial on Google Colab, you
-# can enable a GPU by going to **Runtime->Change runtime type** and selecting "GPU" as our hardware
-# accelerator. If you are running locally, you can `follow TensorFlow's guide <https://www.tensorflow.org/guide/gpu>`_ instead.
-#
-# We can test our GPU installation with the following code:
-
-import tensorflow as tf
-
-if not tf.test.gpu_device_name():
-    print("No GPU was detected!")
-    print("Model training will take much longer (~30 minutes instead of ~5)")
-else:
-    print("GPU detected - you're good to go.")
-
-######################################################################
-# Choosing Our Work Dir
-# ^^^^^^^^^^^^^^^^^^^^^
-# We need to pick a directory where our image datasets, trained model, and eventual Arduino sketch
-# will all live. If running on Google Colab, we'll save everything in ``/root`` (aka ``~``) but you'll
-# probably want to store it elsewhere if running locally. Note that this variable only affects Python
-# scripts - you'll have to adjust the Bash commands too.
-
-import os
-
-FOLDER = "/root"
-# sphinx_gallery_start_ignore
-import tempfile
-
-FOLDER = tempfile.mkdtemp()
-# sphinx_gallery_end_ignore
-
-######################################################################
-# Downloading the Data
-# --------------------
-# Convolutional neural networks usually learn by looking at many images, along with labels telling
-# the network what those images are. To get these images, we'll need a publicly available dataset
-# with thousands of images of all sorts of objects and labels of what's in each image. We'll also
-# need a bunch of images that **aren't** of cars, as we're trying to distinguish these two classes.
-#
-# In this tutorial, we'll create a model to detect if an image contains a **car**, but you can use
-# whatever category you like! Just change the source URL below to one containing images of another
-# type of object.
-#
-# To get our car images, we'll be downloading the `Stanford Cars dataset <http://ai.stanford.edu/~jkrause/cars/car_dataset.html>`_,
-# which contains 16,185 full color images of cars. We'll also need images of random things that
-# aren't cars, so we'll use the `COCO 2017 <https://cocodataset.org/#home>`_ validation set (it's
-# smaller, and thus faster to download than the full training set. Training on the full data set
-# would yield better results). Note that there are some cars in the COCO 2017 data set, but it's
-# a small enough fraction not to matter - just keep in mind that this will drive down our percieved
-# accuracy slightly.
-#
-# We could use the TensorFlow dataloader utilities, but we'll instead do it manually to make sure
-# it's easy to change the datasets being used. We'll end up with the following file hierarchy:
-#
-#     .. code-block::
-#
-#         /root
-#         ├── images
-#         │   ├── object
-#         │   │   ├── 000001.jpg
-#         │   │   │ ...
-#         │   │   └── 016185.jpg
-#         │   ├── object.tgz
-#         │   ├── random
-#         │   │   ├── 000000000139.jpg
-#         │   │   │ ...
-#         │   │   └── 000000581781.jpg
-#         │   └── random.zip
-#
-# We should also note that Stanford cars has 8k images, while the COCO 2017 validation set is 5k
-# images - it is not a 50/50 split! If we wanted to, we could weight these classes differently
-# during training to correct for this, but training will still work if we ignore it. It should
-# take about **2 minutes** to download the Stanford Cars, while COCO 2017 validation will take
-# **1 minute**.
-
-import os
-import shutil
-import urllib.request
-
-# Download datasets
-os.makedirs(f"{FOLDER}/downloads")
-os.makedirs(f"{FOLDER}/images")
-urllib.request.urlretrieve(
-    "https://data.deepai.org/stanfordcars.zip", f"{FOLDER}/downloads/target.zip"
-)
-urllib.request.urlretrieve(
-    "http://images.cocodataset.org/zips/val2017.zip", f"{FOLDER}/downloads/random.zip"
-)
-
-# Extract them and rename their folders
-shutil.unpack_archive(f"{FOLDER}/downloads/target.zip", f"{FOLDER}/downloads")
-shutil.unpack_archive(f"{FOLDER}/downloads/random.zip", f"{FOLDER}/downloads")
-shutil.move(f"{FOLDER}/downloads/cars_train/cars_train", f"{FOLDER}/images/target")
-shutil.move(f"{FOLDER}/downloads/val2017", f"{FOLDER}/images/random")
-
-######################################################################
-# Loading the Data
-# ----------------
-# Currently, our data is stored on-disk as JPG files of various sizes. To train with it, we'll have
-# to load the images into memory, resize them to be 64x64, and convert them to raw, uncompressed
-# data. Keras's ``image_dataset_from_directory`` will take care of most of this, though it loads
-# images such that each pixel value is a float from 0 to 255.
-#
-# We'll also need to load labels, though Keras will help with this. From our subdirectory structure,
-# it knows the images in ``/objects`` are one class, and those in ``/random`` another. Setting
-# ``label_mode='categorical'`` tells Keras to convert these into **categorical labels** - a 2x1 vector
-# that's either ``[1, 0]`` for an object of our target class, or ``[0, 1]`` vector for anything else.
-# We'll also set ``shuffle=True`` to randomize the order of our examples.
-#
-# We will also **batch** the data - grouping samples into clumps to make our training go faster.
-# Setting ``batch_size = 32`` is a decent number.
-#
-# Lastly, in machine learning we generally want our inputs to be small numbers. We'll thus use a
-# ``Rescaling`` layer to change our images such that each pixel is a float between ``0.0`` and ``1.0``,
-# instead of ``0`` to ``255``. We need to be careful not to rescale our categorical labels though, so
-# we'll use a ``lambda`` function.
-
-IMAGE_SIZE = (64, 64, 3)
-unscaled_dataset = tf.keras.utils.image_dataset_from_directory(
-    f"{FOLDER}/images",
-    batch_size=32,
-    shuffle=True,
-    label_mode="categorical",
-    image_size=IMAGE_SIZE[0:2],
-)
-rescale = tf.keras.layers.Rescaling(scale=1.0 / 255)
-full_dataset = unscaled_dataset.map(lambda im, lbl: (rescale(im), lbl))
-
-######################################################################
-# What's Inside Our Dataset?
-# ^^^^^^^^^^^^^^^^^^^^^^^^^^
-# Before giving this data set to our neural network, we ought to give it a quick visual inspection.
-# Does the data look properly transformed? Do the labels seem appropriate? And what's our ratio of
-# objects to other stuff? We can display some examples from our datasets using ``matplotlib``:
-
-import matplotlib.pyplot as plt
-
-num_target_class = len(os.listdir(f"{FOLDER}/images/target/"))
-num_random_class = len(os.listdir(f"{FOLDER}/images/random/"))
-print(f"{FOLDER}/images/target contains {num_target_class} images")
-print(f"{FOLDER}/images/random contains {num_random_class} images")
-
-# Show some samples and their labels
-SAMPLES_TO_SHOW = 10
-plt.figure(figsize=(20, 10))
-for i, (image, label) in enumerate(unscaled_dataset.unbatch()):
-    if i >= SAMPLES_TO_SHOW:
-        break
-    ax = plt.subplot(1, SAMPLES_TO_SHOW, i + 1)
-    plt.imshow(image.numpy().astype("uint8"))
-    plt.title(list(label.numpy()))
-    plt.axis("off")
-
-######################################################################
-# Validating our Accuracy
-# ^^^^^^^^^^^^^^^^^^^^^^^
-# While developing our model, we'll often want to check how accurate it is (e.g. to see if it
-# improves during training). How do we do this? We could just train it on *all* of the data, and
-# then ask it to classify that same data. However, our model could cheat by just memorizing all of
-# the samples, which would make it *appear* to have very high accuracy, but perform very badly in
-# reality. In practice, this "memorizing" is called **overfitting**.
-#
-# To prevent this, we will set aside some of the data (we'll use 20%) as a **validation set**. Our
-# model will never be trained on validation data - we'll only use it to check our model's accuracy.
-
-num_batches = len(full_dataset)
-train_dataset = full_dataset.take(int(num_batches * 0.8))
-validation_dataset = full_dataset.skip(len(train_dataset))
-
-######################################################################
-# Loading the Data
-# ----------------
-# In the past decade, `convolutional neural networks <https://en.wikipedia.org/wiki/Convolutional_neural_network>`_ have been widely
-# adopted for image classification tasks. State-of-the-art models like `EfficientNet V2 <https://arxiv.org/abs/2104.00298>`_ are able
-# to perform image classification better than even humans! Unfortunately, these models have tens of
-# millions of parameters, and thus won't fit on cheap security camera computers.
-#
-# Our applications generally don't need perfect accuracy - 90% is good enough. We can thus use the
-# older and smaller MobileNet V1 architecture. But this *still* won't be small enough - by default,
-# MobileNet V1 with 224x224 inputs and alpha 1.0 takes ~50 MB to just **store**. To reduce the size
-# of the model, there are three knobs we can turn. First, we can reduce the size of the input images
-# from 224x224 to 96x96 or 64x64, and Keras makes it easy to do this. We can also reduce the **alpha**
-# of the model, from 1.0 to 0.25, which downscales the width of the network (and the number of
-# filters) by a factor of four. And if we were really strapped for space, we could reduce the
-# number of **channels** by making our model take grayscale images instead of RGB ones.
-#
-# In this tutorial, we will use an RGB 64x64 input image and alpha 0.25. This is not quite
-# ideal, but it allows the finished model to fit in 192 KB of RAM, while still letting us perform
-# transfer learning using the official TensorFlow source models (if we used alpha <0.25 or a
-# grayscale input, we wouldn't be able to do this).
-#
-# What is Transfer Learning?
-# ^^^^^^^^^^^^^^^^^^^^^^^^^^
-# Deep learning has `dominated image classification <https://paperswithcode.com/sota/image-classification-on-imagenet>`_ for a long time,
-# but training neural networks takes a lot of time. When a neural network is trained "from scratch",
-# its parameters start out randomly initialized, forcing it to learn very slowly how to tell images
-# apart.
-#
-# With transfer learning, we instead start with a neural network that's **already** good at a
-# specific task. In this example, that task is classifying images from `the ImageNet database <https://www.image-net.org/>`_. This
-# means the network already has some object detection capabilities, and is likely closer to what you
-# want then a random model would be.
-#
-# This works especially well with image processing neural networks like MobileNet. In practice, it
-# turns out the convolutional layers of the model (i.e. the first 90% of the layers) are used for
-# identifying low-level features like lines and shapes - only the last few fully connected layers
-# are used to determine how those shapes make up the objects the network is trying to detect.
-#
-# We can take advantage of this by starting training with a MobileNet model that was trained on
-# ImageNet, and already knows how to identify those lines and shapes. We can then just remove the
-# last few layers from this pretrained model, and add our own final layers. We'll then train this
-# conglomerate model for a few epochs on our cars vs non-cars dataset, to adjust the first layers
-# and train from scratch the last layers. This process of training an already-partially-trained
-# model is called *fine-tuning*.
-#
-# Source MobileNets for transfer learning have been `pretrained by the TensorFlow folks <https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet_v1.md>`_, so we
-# can just download the one closest to what we want (the 128x128 input model with 0.25 depth scale).
-
-os.makedirs(f"{FOLDER}/models")
-WEIGHTS_PATH = f"{FOLDER}/models/mobilenet_2_5_128_tf.h5"
-urllib.request.urlretrieve(
-    "https://storage.googleapis.com/tensorflow/keras-applications/mobilenet/mobilenet_2_5_128_tf.h5",
-    WEIGHTS_PATH,
-)
-
-pretrained = tf.keras.applications.MobileNet(
-    input_shape=IMAGE_SIZE, weights=WEIGHTS_PATH, alpha=0.25
-)
-
-######################################################################
-# Modifying Our Network
-# ^^^^^^^^^^^^^^^^^^^^^
-# As mentioned above, our pretrained model is designed to classify the 1,000 ImageNet categories,
-# but we want to convert it to classify cars. Since only the bottom few layers are task-specific,
-# we'll **cut off the last five layers** of our original model. In their place we'll build our own
-# "tail" to the model by performing respape, dropout, flatten, and softmax operations.
-
-model = tf.keras.models.Sequential()
-
-model.add(tf.keras.layers.InputLayer(input_shape=IMAGE_SIZE))
-model.add(tf.keras.Model(inputs=pretrained.inputs, outputs=pretrained.layers[-5].output))
-
-model.add(tf.keras.layers.Reshape((-1,)))
-model.add(tf.keras.layers.Dropout(0.1))
-model.add(tf.keras.layers.Flatten())
-model.add(tf.keras.layers.Dense(2, activation="softmax"))
-
-######################################################################
-# Fine Tuning Our Network
-# ^^^^^^^^^^^^^^^^^^^^^^^
-# When training neural networks, we must set a parameter called the **learning rate** that controls
-# how fast our network learns. It must be set carefully - too slow, and our network will take
-# forever to train; too fast, and our network won't be able to learn some fine details. Generally
-# for Adam (the optimizer we're using), ``0.001`` is a pretty good learning rate (and is what's
-# recommended in the `original paper <https://arxiv.org/abs/1412.6980>`_). However, in this case
-# ``0.0005`` seems to work a little better.
-#
-# We'll also pass the validation set from earlier to ``model.fit``. This will evaluate how good our
-# model is each time we train it, and let us track how our model is improving. Once training is
-# finished, the model should have a validation accuracy around ``0.98`` (meaning it was right 98% of
-# the time on our validation set).
-
-model.compile(
-    optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005),
-    loss="categorical_crossentropy",
-    metrics=["accuracy"],
-)
-model.fit(train_dataset, validation_data=validation_dataset, epochs=3, verbose=2)
-
-######################################################################
-# Quantization
-# ------------
-# We've done a decent job of reducing our model's size so far - changing the input dimension,
-# along with removing the bottom layers reduced the model to just 219k parameters. However, each of
-# these parameters is a ``float32`` that takes four bytes, so our model will take up almost one MB!
-#
-# Additionally, it might be the case that our hardware doesn't have built-in support for floating
-# point numbers. While most high-memory Arduinos (like the Nano 33 BLE) do have hardware support,
-# some others (like the Arduino Due) do not. On any boards *without* dedicated hardware support,
-# floating point multiplication will be extremely slow.
-#
-# To address both issues we will **quantize** the model - representing the weights as eight bit
-# integers. It's more complex than just rounding, though - to get the best performance, TensorFlow
-# tracks how each neuron in our model activates, so we can figure out how most accurately simulate
-# the neuron's original activations with integer operations.
-#
-# We will help TensorFlow do this by creating a representative dataset - a subset of the original
-# that is used for tracking how those neurons activate. We'll then pass this into a ``TFLiteConverter``
-# (Keras itself does not have quantization support) with an ``Optimize`` flag to tell TFLite to perform
-# the conversion. By default, TFLite keeps the inputs and outputs of our model as floats, so we must
-# explicitly tell it to avoid this behavior.
-
-
-def representative_dataset():
-    for image_batch, label_batch in full_dataset.take(10):
-        yield [image_batch]
-
-
-converter = tf.lite.TFLiteConverter.from_keras_model(model)
-converter.optimizations = [tf.lite.Optimize.DEFAULT]
-converter.representative_dataset = representative_dataset
-converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
-converter.inference_input_type = tf.uint8
-converter.inference_output_type = tf.uint8
-
-quantized_model = converter.convert()
-
-######################################################################
-# Download the Model if Desired
-# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-# We've now got a finished model that you can use locally or in other tutorials (try autotuning
-# this model or viewing it on `https://netron.app/ <https://netron.app/>`_). But before we do
-# those things, we'll have to write it to a file (``quantized.tflite``). If you're running this
-# tutorial on Google Colab, you'll have to uncomment the last two lines to download the file
-# after writing it.
-
-QUANTIZED_MODEL_PATH = f"{FOLDER}/models/quantized.tflite"
-with open(QUANTIZED_MODEL_PATH, "wb") as f:
-    f.write(quantized_model)
-# from google.colab import files
-# files.download(QUANTIZED_MODEL_PATH)
-
-######################################################################
-# Compiling With TVM For Arduino
-# ------------------------------
-# TensorFlow has a built-in framework for deploying to microcontrollers - `TFLite Micro <https://www.tensorflow.org/lite/microcontrollers>`_. However,
-# it's poorly supported by development boards and does not support autotuning. We will use Apache
-# TVM instead.
-#
-# TVM can be used either with its command line interface (``tvmc``) or with its Python interface. The
-# Python interface is fully-featured and more stable, so we'll use it here.
-#
-# TVM is an optimizing compiler, and optimizations to our model are performed in stages via
-# **intermediate representations**. The first of these is `Relay <https://arxiv.org/abs/1810.00952>`_ a high-level intermediate
-# representation emphasizing portability. The conversion from ``.tflite`` to Relay is done without any
-# knowledge of our "end goal" - the fact we intend to run this model on an Arduino.
-#
-# Choosing an Arduino Board
-# ^^^^^^^^^^^^^^^^^^^^^^^^^
-# Next, we'll have to decide exactly which Arduino board to use. The Arduino sketch that we
-# ultimately generate should be compatible with any board, but knowing which board we are using in
-# advance allows TVM to adjust its compilation strategy to get better performance.
-#
-# There is one catch - we need enough **memory** (flash and RAM) to be able to run our model. We
-# won't ever be able to run a complex vision model like a MobileNet on an Arduino Uno - that board
-# only has 2 kB of RAM and 32 kB of flash! Our model has ~200,000 parameters, so there is just no
-# way it could fit.
-#
-# For this tutorial, we will use the Nano 33 BLE, which has 1 MB of flash memory and 256 KB of RAM.
-# However, any other Arduino with those specs or better should also work.
-#
-# Generating our project
-# ^^^^^^^^^^^^^^^^^^^^^^
-# Next, we'll compile the model to TVM's MLF (model library format) intermediate representation,
-# which consists of C/C++ code and is designed for autotuning. To improve performance, we'll tell
-# TVM that we're compiling for the ``nrf52840`` microprocessor (the one the Nano 33 BLE uses). We'll
-# also tell it to use the C runtime (abbreviated ``crt``) and to use ahead-of-time memory allocation
-# (abbreviated ``aot``, which helps reduce the model's memory footprint). Lastly, we will disable
-# vectorization with ``"tir.disable_vectorize": True``, as C has no native vectorized types.
-#
-# Once we have set these configuration parameters, we will call ``tvm.relay.build`` to compile our
-# Relay model into the MLF intermediate representation. From here, we just need to call
-# ``tvm.micro.generate_project`` and pass in the Arduino template project to finish compilation.
-
-import shutil
-import tvm
-import tvm.micro.testing
-
-# Method to load model is different in TFLite 1 vs 2
-try:  # TFLite 2.1 and above
-    import tflite
-
-    tflite_model = tflite.Model.GetRootAsModel(quantized_model, 0)
-except AttributeError:  # Fall back to TFLite 1.14 method
-    import tflite.Model
-
-    tflite_model = tflite.Model.Model.GetRootAsModel(quantized_model, 0)
-
-# Convert to the Relay intermediate representation
-mod, params = tvm.relay.frontend.from_tflite(tflite_model)
-
-# Set configuration flags to improve performance
-target = tvm.micro.testing.get_target("zephyr", "nrf5340dk_nrf5340_cpuapp")
-runtime = tvm.relay.backend.Runtime("crt")
-executor = tvm.relay.backend.Executor("aot", {"unpacked-api": True})
-
-# Convert to the MLF intermediate representation
-with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}):
-    mod = tvm.relay.build(mod, target, runtime=runtime, executor=executor, params=params)
-
-# Generate an Arduino project from the MLF intermediate representation
-shutil.rmtree(f"{FOLDER}/models/project", ignore_errors=True)
-arduino_project = tvm.micro.generate_project(
-    tvm.micro.get_microtvm_template_projects("arduino"),
-    mod,
-    f"{FOLDER}/models/project",
-    {
-        "board": "nano33ble",
-        "arduino_cli_cmd": "/content/bin/arduino-cli",
-        "project_type": "example_project",
-    },
-)
-
-######################################################################
-# Testing our Arduino Project
-# ---------------------------
-# Consider the following two 224x224 images from the author's camera roll - one of a car, one not.
-# We will test our Arduino project by loading both of these images and executing the compiled model
-# on them.
-#
-# .. image:: https://raw.githubusercontent.com/tlc-pack/web-data/main/testdata/microTVM/data/model_train_images_combined.png
-#      :align: center
-#      :height: 200px
-#      :width: 600px
-#
-# Currently, these are 224x224 PNG images we can download from Imgur. Before we can feed in these
-# images, we'll need to resize and convert them to raw data, which can be done with ``imagemagick``.
-#
-# It's also challenging to load raw data onto an Arduino, as only C/CPP files (and similar) are
-# compiled. We can work around this by embedding our raw data in a hard-coded C array with the
-# built-in utility ``bin2c`` that will output a file like below:
-#
-#     .. code-block:: c
-#
-#       static const unsigned char CAR_IMAGE[] = {
-#         0x22,0x23,0x14,0x22,
-#         ...
-#         0x07,0x0e,0x08,0x08
-#       };
-#
-# We can do both of these things with a few lines of Bash code:
-#
-#     .. code-block:: bash
-#
-#       %%shell
-#       mkdir -p ~/tests
-#       curl "https://i.imgur.com/JBbEhxN.png" -o ~/tests/car_224.png
-#       convert ~/tests/car_224.png -resize 64 ~/tests/car_64.png
-#       stream ~/tests/car_64.png ~/tests/car.raw
-#       bin2c -c -st ~/tests/car.raw --name CAR_IMAGE > ~/models/project/car.c
-#
-#       curl "https://i.imgur.com/wkh7Dx2.png" -o ~/tests/catan_224.png
-#       convert ~/tests/catan_224.png -resize 64 ~/tests/catan_64.png
-#       stream ~/tests/catan_64.png ~/tests/catan.raw
-#       bin2c -c -st ~/tests/catan.raw --name CATAN_IMAGE > ~/models/project/catan.c
-
-######################################################################
-# Writing our Arduino Script
-# --------------------------
-# We now need a little bit of Arduino code to read the two binary arrays we just generated, run the
-# model on them, and log the output to the serial monitor. This file will replace ``arduino_sketch.ino``
-# as the main file of our sketch. You'll have to copy this code in manually..
-#
-#     .. code-block:: c
-#
-#         %%writefile /root/models/project.ino
-#         #include "src/model.h"
-#         #include "car.c"
-#         #include "catan.c"
-#
-#         void setup() {
-#           Serial.begin(9600);
-#           TVMInitialize();
-#         }
-#
-#         void loop() {
-#           uint8_t result_data[2];
-#           Serial.println("Car results:");
-#           TVMExecute(const_cast<uint8_t*>(CAR_IMAGE), result_data);
-#           Serial.print(result_data[0]); Serial.print(", ");
-#           Serial.print(result_data[1]); Serial.println();
-#
-#           Serial.println("Other object results:");
-#           TVMExecute(const_cast<uint8_t*>(CATAN_IMAGE), result_data);
-#           Serial.print(result_data[0]); Serial.print(", ");
-#           Serial.print(result_data[1]); Serial.println();
-#
-#           delay(1000);
-#         }
-#
-# Compiling Our Code
-# ^^^^^^^^^^^^^^^^^^
-# Now that our project has been generated, TVM's job is mostly done! We can still call
-# ``arduino_project.build()`` and ``arduino_project.upload()``, but these just use ``arduino-cli``'s
-# compile and flash commands underneath. We could also begin autotuning our model, but that's a
-# subject for a different tutorial. To finish up, we'll verify no compiler errors are thrown
-# by our project:
-
-shutil.rmtree(f"{FOLDER}/models/project/build", ignore_errors=True)
-# sphinx_gallery_start_ignore
-from unittest.mock import MagicMock
-
-arduino_project = MagicMock()
-# sphinx_gallery_end_ignore
-arduino_project.build()
-print("Compilation succeeded!")
-
-######################################################################
-# Uploading to Our Device
-# -----------------------
-# The very last step is uploading our sketch to an Arduino to make sure our code works properly.
-# Unfortunately, we can't do that from Google Colab, so we'll have to download our sketch. This is
-# simple enough to do - we'll just turn our project into a `.zip` archive, and call `files.download`.
-# If you're running on Google Colab, you'll have to uncomment the last two lines to download the file
-# after writing it.
-
-ZIP_FOLDER = f"{FOLDER}/models/project"
-shutil.make_archive(ZIP_FOLDER, "zip", ZIP_FOLDER)
-# from google.colab import files
-# files.download(f"{FOLDER}/models/project.zip")
-# sphinx_gallery_start_ignore
-# Run a few unit tests to make sure the Python code worked
-
-# Ensure transfer learn model was correctly assembled
-assert len(model.layers) == 5
-assert model.count_params() == 219058  # Only 219,058 of these are trainable
-
-assert len(quantized_model) >= 250000  # Quantized model will be 250 KB - 350 KB
-assert len(quantized_model) <= 350000  # Exact value depends on quantization
-
-# Assert .tflite and .zip files were written to disk
-assert os.path.isfile(f"{FOLDER}/models/quantized.tflite")
-assert os.path.isfile(f"{FOLDER}/models/project.zip")
-
-# Assert MLF file was correctly generated
-assert mod.executor.name == "aot"
-
-# Remove the temporary folder we generated at the beginning
-shutil.rmtree(FOLDER)
-# sphinx_gallery_end_ignore
-
-
-######################################################################
-# From here, we'll need to open it in the Arduino IDE. You'll have to download the IDE as well as
-# the SDK for whichever board you are using. For certain boards like the Sony SPRESENSE, you may
-# have to change settings to control how much memory you want the board to use.
-#
-# Expected Results
-# ^^^^^^^^^^^^^^^^
-# If all works as expected, you should see the following output on a Serial monitor:
-#
-#     .. code-block::
-#
-#       Car results:
-#       255, 0
-#       Other object results:
-#       0, 255
-#
-# The first number represents the model's confidence that the object **is** a car and ranges from
-# 0-255. The second number represents the model's confidence that the object **is not** a car and
-# is also 0-255. These results mean the model is very sure that the first image is a car, and the
-# second image is not (which is correct). Hence, our model is working!
-#
-# Summary
-# -------
-# In this tutorial, we used transfer learning to quickly train an image recognition model to
-# identify cars. We modified its input dimensions and last few layers to make it better at this,
-# and to make it faster and smaller. We then quantified the model and compiled it using TVM to
-# create an Arduino sketch. Lastly, we tested the model using two static images to prove it works
-# as intended.
-#
-# Next Steps
-# ^^^^^^^^^^
-# From here, we could modify the model to read live images from the camera - we have another
-# Arduino tutorial for how to do that `on GitHub <https://github.com/guberti/tvm-arduino-demos/tree/master/examples/person_detection>`_. Alternatively, we could also
-# `use TVM's autotuning capabilities <https://tvm.apache.org/docs/how_to/work_with_microtvm/micro_autotune.html>`_ to dramatically improve the model's performance.
-#
diff --git a/gallery/how_to/work_with_microtvm/micro_tvmc.sh b/gallery/how_to/work_with_microtvm/micro_tvmc.sh
deleted file mode 100755
index bf9338cf5f7f..000000000000
--- a/gallery/how_to/work_with_microtvm/micro_tvmc.sh
+++ /dev/null
@@ -1,202 +0,0 @@
-#!/bin/bash
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# bash-ignore
-set -euxo pipefail
-# bash-ignore
-
-# bash-comment
-# """
-# .. _tutorial-micro-cli-tool:
-#
-# 1. microTVM CLI Tool
-# ====================
-# **Author**: `Mehrdad Hessar <https://github.com/mehrdadh>`_
-#
-# This tutorial explains how to compile a tiny model for a micro device,
-# build a program on Zephyr platform to execute this model, flash the program
-# and run the model all using `tvmc micro` command.
-# You need to install python and Zephyr dependencies before processing with this tutorial.
-# """
-# bash-comment
-
-######################################################################
-#
-#     .. include:: ../../../../gallery/how_to/work_with_microtvm/install_dependencies.rst
-#
-
-######################################################################
-#
-#     .. include:: ../../../../gallery/how_to/work_with_microtvm/install_zephyr.rst
-#
-
-# bash-ignore
-shopt -s expand_aliases
-alias tvmc="python3 -m tvm.driver.tvmc"
-# bash-ignore
-
-############################################################
-# Using TVMC Micro
-############################################################
-#
-# TVMC is a command-line tool which is installed as a part of TVM Python packages. Accessing this
-# package varies based on your machine setup. In many cases, you can use the ``tvmc`` command directly.
-# Alternatively, if you have TVM as a Python module on your ``$PYTHONPATH``, you can access this
-# driver with ``python -m tvm.driver.tvmc`` command. This tutorial will use TVMC command as
-# ``tvmc`` for simplicity.
-#
-# To check if you have TVMC command installed on your machine, you can run:
-#
-# bash
-tvmc --help
-# bash
-# To compile a model for microtvm we use ``tvmc compile`` subcommand. The output of this command
-# is used in next steps with ``tvmc micro`` subcommands. You can check the availability of TVMC Micro using:
-#
-# bash
-tvmc micro --help
-# bash
-#
-# The main tasks that you can perform using ``tvmc micro`` are ``create``, ``build`` and ``flash``.
-# To read about specific options under a givern subcommand, use
-# ``tvmc micro <subcommand> --help``. We will use each subcommand in this tutorial.
-#
-
-############################################################
-# Obtain a Tiny Model
-############################################################
-#
-# For this tutorial, we will use Micro Speech model from tflite micro. Micro Speech is a
-# Depthwise Convolution Layer model to recognize keywords in speech.
-#
-# For this tutorial we will be using the model in tflite format.
-#
-# bash
-wget https://github.com/tensorflow/tflite-micro/raw/a56087ffa2703b4d5632f024a8a4c899815c31bb/tensorflow/lite/micro/examples/micro_speech/micro_speech.tflite
-# bash
-
-############################################################
-# Compiling a TFLite model to a Model Library Format
-############################################################
-#
-# Model Library Format (MLF) is an output format that TVM provides for micro targets. MLF is a tarball
-# containing a file for each piece of the TVM compiler output which can be used on micro targets outside
-# TVM environment.
-#
-# Here, we generate a MLF file for ``qemu_x86`` Zephyr board. You can chooses `aot` or `graph` executor type
-# to run this tutorial, however, we recommend to use `aot` for microTVM targets since `aot` uses ahead of time
-# compilation with static memory allocation. To generate MLF output for the ``micro_speech`` tflite model:
-#
-# bash
-tvmc compile micro_speech.tflite \
-    --target='c -keys=cpu -model=host' \
-    --runtime=crt \
-    --runtime-crt-system-lib 1 \
-    --executor='aot' \
-    --output model.tar \
-    --output-format mlf \
-    --pass-config tir.disable_vectorize=1
-# bash
-# This will generate a ``model.tar`` file which contains TVM compiler output files. To run this command for
-# a different Zephyr device, you need to update ``target``. For instance, for ``nrf5340dk_nrf5340_cpuapp`` board
-# the target is ``--target='c -keys=cpu -model=nrf5340dk'``.
-#
-
-
-############################################################
-# Create a Zephyr Project Using Model Library Format
-############################################################
-#
-# To generate a Zephyr project we use TVM Micro subcommand ``create``. We pass the MLF format and the path
-# for the project to ``create`` subcommand along with project options. Project options for each
-# platform (Zephyr/Arduino) are defined in their Project API server file. To build
-# Zephyr project for a different Zephyr board, change ``zephyr_board`` project option.
-# To generate Zephyr project, run:
-#
-# bash
-tvmc micro create \
-    project \
-    model.tar \
-    zephyr \
-    --project-option project_type=host_driven board=qemu_x86
-# bash
-# This will generate a ``Host-Driven`` Zephyr project for ``qemu_x86`` Zephyr board. In Host-Driven template project,
-# the Graph Executor will run on host and perform the model execution on Zephyr device by issuing commands to the
-# device using an RPC mechanism. Read more about `Host-Driven Execution <https://tvm.apache.org/docs/arch/microtvm_design.html#host-driven-execution>`_.
-#
-# To get more information about TVMC Micro ``create`` subcommand:
-#
-# .. code-block:: bash
-#
-#     tvmc micro create --help
-#
-
-############################################################
-# Build and Flash Zephyr Project Using TVMC Micro
-############################################################
-#
-# Next step is to build the Zephyr project which includes TVM generated code for running the tiny model, Zephyr
-# template code to run a model in Host-Driven mode and TVM runtime source/header files. To build the project:
-#
-# bash
-tvmc micro build \
-    project \
-    zephyr
-# bash
-# This will build the project in ``project`` directory and generates binary files under ``project/build``.
-#
-# Next, we flash the Zephyr binary file to Zephyr device. For ``qemu_x86`` Zephyr board this step does not
-# actually perform any action since QEMU will be used, however you need this step for physical hardware.
-#
-# bash
-tvmc micro flash \
-    project \
-    zephyr
-# bash
-
-############################################################
-# Run Tiny Model on Micro Target
-############################################################
-#
-# After flashing the device, the compiled model and TVM RPC server are programmed on the device.
-# The Zephyr board is waiting for host to open a communication channel. MicroTVM devices typicall communicate
-# using a serial communication (UART). To run the flashed model on the device using TVMC, we use ``tvmc run`` subcommand
-# and pass ``--device micro`` to specify the device type. This command will open a communication channel, set input
-# values using ``Graph Executor`` on host and run full model on the device. Then it gets output from the device.
-#
-# bash
-tvmc run \
-    --device micro \
-    project \
-    --fill-mode ones \
-    --print-top 4
-# bash
-
-############################################################
-# Specifically, this command sets the input of the model
-# to all ones and shows the four values of the output with their indices.
-#
-# .. code-block:: bash
-#
-#      # Output:
-#      # INFO:__main__:b'[100%] [QEMU] CPU: qemu32,+nx,+pae\n'
-#      # remote: microTVM Zephyr runtime - running
-#      # INFO:__main__:b'[100%] Built target run\n'
-#      # [[   3    2    1    0]
-#      #  [ 113 -120 -121 -128]]
-#
diff --git a/include/tvm/meta_schedule/mutator.h b/include/tvm/meta_schedule/mutator.h
index 1560c00f3907..08a8248dfdbc 100644
--- a/include/tvm/meta_schedule/mutator.h
+++ b/include/tvm/meta_schedule/mutator.h
@@ -137,8 +137,6 @@ class Mutator : public runtime::ObjectRef {
   TVM_DLL static Map<Mutator, FloatImm, void> DefaultCUDATensorCore();
   /*! \brief Create default mutators for Hexagon */
   TVM_DLL static Map<Mutator, FloatImm, void> DefaultHexagon();
-  /*! \brief Create default mutators for Micro */
-  TVM_DLL static Map<Mutator, FloatImm, void> DefaultMicro();
 
   TVM_DEFINE_MUTABLE_OBJECT_REF_METHODS(Mutator, ObjectRef, MutatorNode);
 };
diff --git a/include/tvm/meta_schedule/postproc.h b/include/tvm/meta_schedule/postproc.h
index f297ca090482..47b6b80ea43e 100644
--- a/include/tvm/meta_schedule/postproc.h
+++ b/include/tvm/meta_schedule/postproc.h
@@ -170,8 +170,6 @@ class Postproc : public runtime::ObjectRef {
   TVM_DLL static Array<Postproc, void> DefaultCUDATensorCore();
   /*! \brief Create default postprocessors for Hexagon */
   TVM_DLL static Array<Postproc, void> DefaultHexagon();
-  /*! \brief Create default postprocessors for Micro */
-  TVM_DLL static Array<Postproc, void> DefaultMicro();
 
   TVM_DEFINE_MUTABLE_OBJECT_REF_METHODS(Postproc, ObjectRef, PostprocNode);
 };
diff --git a/include/tvm/meta_schedule/schedule_rule.h b/include/tvm/meta_schedule/schedule_rule.h
index 90aec05187eb..e0dbc7be50cf 100644
--- a/include/tvm/meta_schedule/schedule_rule.h
+++ b/include/tvm/meta_schedule/schedule_rule.h
@@ -298,8 +298,6 @@ class ScheduleRule : public runtime::ObjectRef {
   TVM_DLL static Array<ScheduleRule, void> DefaultCUDATensorCore();
   /*! \brief Create default schedule rules for Hexagon */
   TVM_DLL static Array<ScheduleRule, void> DefaultHexagon();
-  /*! \brief Create default schedule rules for Micro */
-  TVM_DLL static Array<ScheduleRule, void> DefaultMicro();
   /*! \brief Create default schedule rules for ARM CPU (NEON and DOTPROD) */
   TVM_DLL static Array<ScheduleRule, void> DefaultARM(const String& type);
 
diff --git a/include/tvm/runtime/crt/aot_executor.h b/include/tvm/runtime/crt/aot_executor.h
deleted file mode 100644
index 693a66c3ae04..000000000000
--- a/include/tvm/runtime/crt/aot_executor.h
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file aot_executor.h
- * \brief AoT Executor
- */
-#ifndef TVM_RUNTIME_CRT_AOT_EXECUTOR_H_
-#define TVM_RUNTIME_CRT_AOT_EXECUTOR_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <dlpack/dlpack.h>
-#include <tvm/runtime/crt/internal/common/ndarray.h>
-#include <tvm/runtime/metadata_types.h>
-
-typedef struct TVMMetadata TVMMetadata;
-
-typedef struct TVMAotExecutor {
-  /*! \brief The top-level metadata structure supplied by the generated code */
-  const TVMMetadata* metadata;
-  /*! \brief The code module that contains the compiled model */
-  TVMModuleHandle module_handle;
-  /*! \brief The device type */
-  DLDevice device;
-  /*! \brief List of allocated arguments, input(s), output(s), and pool(s)*/
-  TVMNDArray* args;
-  int64_t num_args;
-} TVMAotExecutor;
-
-/*!
- * \brief Allocate a new AotExecutor with TVMPlatformMemoryAllocate and initialize it.
- *
- * \param module_handle TVM Module that exposes the functions to call.
- * \param device Runtime execution device, only supports device type kDLCPU, index 0.
- * \param executor Pointer which receives a pointer to the newly-created instance.
- * \param module_name TVM Module name prefix, typically "default".
- * \return 0 if successful.
- */
-int TVMAotExecutor_Create(TVMModuleHandle module_handle, const DLDevice device,
-                          TVMAotExecutor** executor, const char* module_name);
-
-/*!
- * \brief Release the AoT executor created by TVMAotExecutor_Create().
- *
- * \param executor Pointer to executor instance, created by TVMAotExecutor_Create().
- * \param device Runtime execution device, only supports device type kDLCPU, index 0.
- * \return 0 if successful.
- */
-int TVMAotExecutor_Release(TVMAotExecutor* executor, const DLDevice device);
-
-/*!
- * \brief Return the number of inputs.
- *
- * \param executor Pointer to executor instance, created by TVMAotExecutor_Create().
- * \return Number of inputs.
- */
-int TVMAotExecutor_GetNumInputs(TVMAotExecutor* executor);
-
-/*!
- * \brief Return the number of outputs.
- *
- * \param executor Pointer to executor instance, created by TVMAotExecutor_Create().
- * \return Number of outputs.
- */
-int TVMAotExecutor_GetNumOutputs(TVMAotExecutor* executor);
-
-/*!
- * \brief Return the input index of the specified input name
- *
- * \param executor Pointer to executor instance, created by TVMAotExecutor_Create().
- * \param name Input name for retrieving index.
- * \return Input index.
- */
-int TVMAotExecutor_GetInputIndex(TVMAotExecutor* executor, const char* name);
-
-/*!
- * \brief Return a pointer to name of input with the specified input index
- *
- * \param executor Pointer to executor instance, created by TVMAotExecutor_Create().
- * \param index Input index for retrieving name.
- * \param name Output for retrieving name.
- * \return Pointer to input name in `name`.
- */
-int TVMAotExecutor_GetInputName(TVMAotExecutor* executor, int index, const char** name);
-
-/*!
- * \brief Run the generated program.
- *
- * \param executor Pointer to executor instance, created by TVMAotExecutor_Create().
- * \return 0 if successful.
- */
-int TVMAotExecutor_Run(TVMAotExecutor* executor);
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // TVM_RUNTIME_CRT_AOT_EXECUTOR_H_
diff --git a/include/tvm/runtime/crt/aot_executor_module.h b/include/tvm/runtime/crt/aot_executor_module.h
deleted file mode 100644
index bd539c9b08c9..000000000000
--- a/include/tvm/runtime/crt/aot_executor_module.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file graph_executor.h
- * \brief Tiny AoT executor
- */
-#ifndef TVM_RUNTIME_CRT_AOT_EXECUTOR_MODULE_H_
-#define TVM_RUNTIME_CRT_AOT_EXECUTOR_MODULE_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <tvm/runtime/crt/error_codes.h>
-
-/*!
- * \brief Register the "tvm.aot_executor.create" constructor PackedFunc.
- */
-tvm_crt_error_t TVMAotExecutorModule_Register();
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // TVM_RUNTIME_CRT_AOT_EXECUTOR_MODULE_H_
diff --git a/include/tvm/runtime/crt/crt.h b/include/tvm/runtime/crt/crt.h
deleted file mode 100644
index 8c9ef4504057..000000000000
--- a/include/tvm/runtime/crt/crt.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file tvm/runtime/crt/crt.h
- * \brief Defines core life cycle functions used by CRT.
- */
-
-#ifndef TVM_RUNTIME_CRT_CRT_H_
-#define TVM_RUNTIME_CRT_CRT_H_
-
-#include <inttypes.h>
-#include <tvm/runtime/crt/error_codes.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*!
- * \brief Initialize various data structures used by the runtime.
- * Prior to calling this, any initialization needed to support TVMPlatformMemory* functions should
- * be completed.
- * \return An error code describing the outcome of initialization. Generally, initialization
- *     is only expected to fail due to a misconfiguration.
- */
-tvm_crt_error_t TVMInitializeRuntime();
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // TVM_RUNTIME_CRT_CRT_H_
diff --git a/include/tvm/runtime/crt/error_codes.h b/include/tvm/runtime/crt/error_codes.h
deleted file mode 100644
index 2495cad50b48..000000000000
--- a/include/tvm/runtime/crt/error_codes.h
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file include/tvm/runtime/crt/error_codes.h
- * \brief Defines integral error codes returned by the CRT.
- */
-#ifndef TVM_RUNTIME_CRT_ERROR_CODES_H_
-#define TVM_RUNTIME_CRT_ERROR_CODES_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define TVM_CRT_ERROR_CATEGORY_Pos 8
-#define TVM_CRT_ERROR_CATEGORY_Msk (0xff << TVM_CRT_ERROR_CATEGORY_Pos)
-#define TVM_CRT_ERROR_CODE_Pos 0
-#define TVM_CRT_ERROR_CODE_Msk (0xff << TVM_CRT_ERROR_CODE_Pos)
-
-#define DEFINE_TVM_CRT_ERROR(category, code) \
-  (((category) << TVM_CRT_ERROR_CATEGORY_Pos) | ((code) << TVM_CRT_ERROR_CODE_Pos))
-typedef enum {
-  kTvmErrorCategoryFunctionRegistry = 1,
-  kTvmErrorCategoryFraming = 2,
-  kTvmErrorCategoryWriteStream = 3,
-  kTvmErrorCategorySession = 4,
-  kTvmErrorCategoryPlatform = 5,
-  kTvmErrorCategoryGenerated = 6,
-  kTvmErrorCategoryExecutor = 7,
-  kTvmErrorCategoryFunctionCall = 8,
-  kTvmErrorCategoryTimeEvaluator = 9,
-} tvm_crt_error_category_t;
-
-typedef enum {
-  kTvmErrorNoError = 0,
-
-  // Function Registry
-  kTvmErrorFunctionNameNotFound = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryFunctionRegistry, 0),
-  kTvmErrorFunctionIndexInvalid = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryFunctionRegistry, 1),
-  kTvmErrorFunctionRegistryFull = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryFunctionRegistry, 2),
-  kTvmErrorFunctionAlreadyDefined = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryFunctionRegistry, 3),
-  kTvmErrorBufferTooSmall = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryFunctionRegistry, 4),
-
-  // Framing
-  kTvmErrorFramingInvalidState = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryFraming, 0),
-  kTvmErrorFramingShortPacket = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryFraming, 1),
-  kTvmErrorFramingInvalidEscape = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryFraming, 2),
-  kTvmErrorFramingPayloadOverflow = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryFraming, 3),
-  kTvmErrorFramingPayloadIncomplete = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryFraming, 4),
-
-  // Write stream
-  kTvmErrorWriteStreamShortWrite = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryWriteStream, 0),
-  kTvmErrorWriteStreamLongWrite = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryWriteStream, 1),
-
-  // Session
-  kTvmErrorSessionInvalidState = DEFINE_TVM_CRT_ERROR(kTvmErrorCategorySession, 0),
-  kTvmErrorSessionReceiveBufferBusy = DEFINE_TVM_CRT_ERROR(kTvmErrorCategorySession, 1),
-  kTvmErrorSessionReceiveBufferShortWrite = DEFINE_TVM_CRT_ERROR(kTvmErrorCategorySession, 2),
-
-  // Platform
-  kTvmErrorPlatformCheckFailure = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryPlatform, 0),
-  kTvmErrorPlatformMemoryManagerInitialized = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryPlatform, 1),
-  kTvmErrorPlatformShutdown = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryPlatform, 2),
-  kTvmErrorPlatformNoMemory = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryPlatform, 3),
-  kTvmErrorPlatformTimerBadState = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryPlatform, 4),
-  kTvmErrorPlatformStackAllocBadFree = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryPlatform, 5),
-
-  // Common error codes returned from generated functions.
-  kTvmErrorGeneratedInvalidStorageId = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryGenerated, 0),
-
-  // Graph or AoT executor
-  kTvmErrorExecutorModuleAlreadyCreated = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryExecutor, 0),
-  kTvmErrorExecutorModuleBadContext = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryExecutor, 1),
-  kTvmErrorExecutorModuleNoSuchInput = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryExecutor, 2),
-
-  // Function Calls - common problems encountered calling functions.
-  kTvmErrorFunctionCallNumArguments = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryFunctionCall, 0),
-  kTvmErrorFunctionCallWrongArgType = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryFunctionCall, 1),
-  kTvmErrorFunctionCallNotImplemented = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryFunctionCall, 2),
-  kTvmErrorFunctionCallInvalidArg = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryFunctionCall, 3),
-
-  // Time Evaluator - times functions for use with debug runtime.
-  kTvmErrorTimeEvaluatorBadHandle = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryTimeEvaluator, 0),
-
-  // System errors are always negative integers; this mask indicates presence of a system error.
-  // Cast tvm_crt_error_t to a signed integer to interpret the negative error code.
-  kTvmErrorSystemErrorMask = (1 << (sizeof(int) * 8 - 1)),
-} tvm_crt_error_t;
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif  // TVM_RUNTIME_CRT_ERROR_CODES_H_
diff --git a/include/tvm/runtime/crt/func_registry.h b/include/tvm/runtime/crt/func_registry.h
deleted file mode 100644
index 50737f871798..000000000000
--- a/include/tvm/runtime/crt/func_registry.h
+++ /dev/null
@@ -1,162 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file include/tvm/runtime/crt/func_registry.h
- * \brief Defines generic string-based function lookup structs
- */
-#ifndef TVM_RUNTIME_CRT_FUNC_REGISTRY_H_
-#define TVM_RUNTIME_CRT_FUNC_REGISTRY_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <tvm/runtime/c_backend_api.h>
-#include <tvm/runtime/crt/error_codes.h>
-
-typedef uint16_t tvm_function_index_t;
-
-typedef uint16_t tvm_module_index_t;
-
-/*!
- * \brief A data structure that facilitates function lookup by C-string name.
- */
-typedef struct TVMFuncRegistry {
-  /*! \brief Names of registered functions, concatenated together and separated by \0.
-   * An additional \0 is present at the end of the concatenated blob to mark the end.
-   *
-   * Byte 0 and 1 are the number of functions in `funcs`.
-   */
-  const char* names;
-
-  /*! \brief Function pointers, in the same order as their names in `names`. */
-  const TVMBackendPackedCFunc* funcs;
-} TVMFuncRegistry;
-
-/*!
- * \brief Get the of the number of functions from registry.
- *
- * \param reg TVMFunctionRegistry instance that contains the function.
- * \return The number of functions from registry.
- */
-uint16_t TVMFuncRegistry_GetNumFuncs(const TVMFuncRegistry* reg);
-
-/*!
- * \brief Set the number of functions to registry.
- *
- * \param reg TVMFunctionRegistry instance that contains the function.
- * \param num_funcs The number of functions
- * \return 0 when successful.
- */
-int TVMFuncRegistry_SetNumFuncs(const TVMFuncRegistry* reg, const uint16_t num_funcs);
-
-/*!
- * \brief Get the address of 0th function from registry.
- *
- * \param reg TVMFunctionRegistry instance that contains the function.
- * \return the address of 0th function from registry
- */
-const char* TVMFuncRegistry_Get0thFunctionName(const TVMFuncRegistry* reg);
-
-/*!
- * \brief Get packed function from registry by name.
- *
- * \param reg TVMFunctionRegistry instance that contains the function.
-, * \param name The function name
- * \param function_index Pointer to receive the 0-based index of the function in the registry, if it
- *     was found. Unmodified otherwise.
- * \return kTvmErrorNoError when successful. kTvmErrorFunctionNameNotFound when no function matched
-`name`.
- */
-tvm_crt_error_t TVMFuncRegistry_Lookup(const TVMFuncRegistry* reg, const char* name,
-                                       tvm_function_index_t* function_index);
-
-/*!
- * \brief Fetch TVMBackendPackedCFunc given a function index
- *
- * \param reg TVMFunctionRegistry instance that contains the function.
- * \param index Index of the function.
- * \param out_func Pointer which receives the function pointer at `index`, if a valid
- *      index was given. Unmodified otherwise.
- * \return kTvmErrorNoError when successful. kTvmErrorFunctionIndexInvalid when index was out of
- * range.
- */
-tvm_crt_error_t TVMFuncRegistry_GetByIndex(const TVMFuncRegistry* reg, tvm_function_index_t index,
-                                           TVMBackendPackedCFunc* out_func);
-
-/*!
- * \brief A TVMFuncRegistry that supports adding and changing the functions.
- */
-typedef struct TVMMutableFuncRegistry {
-  TVMFuncRegistry registry;
-
-  /*! \brief maximum number of functions in this registry. */
-  size_t max_functions;
-} TVMMutableFuncRegistry;
-
-// Defined to work around compiler limitations.
-#define TVM_AVERAGE_FUNCTION_NAME_STRLEN_BYTES 10
-
-/*!
- * \brief Size of an average function name in a TVMMutableFuncRegistry, in bytes.
- *
- * This is just an assumption made by the runtime for ease of use.
- */
-static const size_t kTvmAverageFunctionNameStrlenBytes = TVM_AVERAGE_FUNCTION_NAME_STRLEN_BYTES;
-
-/*!
- * \brief Size of an average entry in a TVMMutableFuncRegistry, in bytes.
- *
- * Assumes a constant average function name length.
- */
-static const size_t kTvmAverageFuncEntrySizeBytes =
-    TVM_AVERAGE_FUNCTION_NAME_STRLEN_BYTES + 1 + sizeof(void*);
-
-/*!
- * \brief Create a new mutable function registry from a block of memory.
- *
- * \param reg TVMMutableFuncRegistry to create.
- * \param buffer Backing memory available for this function registry.
- * \param buffer_size_bytes Number of bytes available in buffer.
- * \return kTvmErrorNoError when successful. kTvmErrorBufferTooSmall when buffer_size_bytes is so
- *      small that a single function cannot be registered.
- */
-tvm_crt_error_t TVMMutableFuncRegistry_Create(TVMMutableFuncRegistry* reg, uint8_t* buffer,
-                                              size_t buffer_size_bytes);
-
-/*!
- * \brief Add or set a function in the registry.
- *
- * \param reg The mutable function registry to affect.
- * \param name Name of the function.
- * \param func The function pointer.
- * \param override non-zero if an existing entry should be overridden.
- * \return kTvmErrorNoError when successful. kTvmErrorRegistryFull when `reg` already contains
- *     `max_functions` entries. kTvmErrorFunctionAlreadyDefined when a function named `name` is
- * already present in the registry, and `override` == 0.
- */
-tvm_crt_error_t TVMMutableFuncRegistry_Set(TVMMutableFuncRegistry* reg, const char* name,
-                                           TVMBackendPackedCFunc func, int override);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif  // TVM_RUNTIME_CRT_FUNC_REGISTRY_H_
diff --git a/include/tvm/runtime/crt/graph_executor.h b/include/tvm/runtime/crt/graph_executor.h
deleted file mode 100644
index 1353d8e06e6b..000000000000
--- a/include/tvm/runtime/crt/graph_executor.h
+++ /dev/null
@@ -1,130 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file graph_executor.h
- * \brief Tiny graph executor that can run graph containing only tvm PackedFunc.
- */
-#ifndef TVM_RUNTIME_CRT_GRAPH_EXECUTOR_H_
-#define TVM_RUNTIME_CRT_GRAPH_EXECUTOR_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <dlpack/dlpack.h>
-#include <tvm/runtime/c_runtime_api.h>
-#include <tvm/runtime/crt/packed_func.h>
-
-struct TVMModule;
-
-/*! \brief operator attributes about tvm op */
-typedef struct TVMOpParam {
-  char func_name[TVM_CRT_MAX_STRLEN_FUNCTION_NAME];
-  uint32_t num_inputs;
-  uint32_t num_outputs;
-  uint32_t flatten_data;
-} TVMOpParam;
-
-// Graph attribute
-typedef struct TVMGraphExecutorGraphAttr {
-  uint32_t storage_num_not_alloctaed;
-  uint32_t* storage_id;
-  uint32_t* device_index;
-  char* dltype;  // "int8", "int16", "float32"
-  uint32_t dltype_count;
-  int64_t* shape;
-  uint32_t* ndim;
-  uint32_t shape_count;
-} TVMGraphExecutorGraphAttr;
-
-typedef struct TVMGraphExecutor TVMGraphExecutor;
-
-// public functions
-/*!
- * \brief Allocate a new GraphExecutor with TVMPlatformMemoryAllocate and initialize it.
- *
- * \param sym_json JSON-encoded graph.
- * \param module_handle TVM Module that exposes the functions to call.
- * \param devices runtime execution device.
- * \param executor Pointer which receives a pointer to the newly-created instance.
- * \return 0 if successful.
- */
-int TVMGraphExecutor_Create(const char* sym_json, TVMModuleHandle module_handle,
-                            const DLDevice* devices, TVMGraphExecutor** executor);
-
-int TVMGraphExecutor_GetInputIndex(TVMGraphExecutor* executor, const char* name);
-
-/*!
- * \brief get number of input tensors allocated.
- * \return integer number of tensors available to use.
- */
-int TVMGraphExecutor_GetNumInputs();
-
-/*!
- * \brief set input to the graph based on name.
- * \param executor The graph executor.
- * \param name The name of the input.
- * \param data_in The input data.
- */
-void TVMGraphExecutor_SetInput(TVMGraphExecutor* executor, const char* name, DLTensor* data_in);
-
-/*!
- * \brief get number of output tensors allocated.
- * \return integer number of output tensors allocated.
- */
-int TVMGraphExecutor_GetNumOutputs();
-
-/*!
- * \brief Return NDArray for given output index.
- * \param executor The graph executor.
- * \param index The output index.
- * \param out The DLTensor corresponding to given output node index.
- * \return The result of this function execution.
- */
-int TVMGraphExecutor_GetOutput(TVMGraphExecutor* executor, const int32_t index, DLTensor* out);
-
-/*!
- * \brief Load parameters from parameter blob.
- * \param executor The graph executor.
- * \param param_blob A binary blob of parameter.
- * \param param_size The parameter size.
- * \return The result of this function execution.
- */
-int TVMGraphExecutor_LoadParams(TVMGraphExecutor* executor, const char* param_blob,
-                                const uint32_t param_size);
-
-/*!
- * \brief Execute the graph.
- * \param executor The graph executor.
- */
-void TVMGraphExecutor_Run(TVMGraphExecutor* executor);
-
-/*!
- * \brief Release memory associated with the graph executor.
- * \param executor Pointer to graph executor.
- * \return 0 if successful
- */
-int TVMGraphExecutor_Release(TVMGraphExecutor** executor);
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // TVM_RUNTIME_CRT_GRAPH_EXECUTOR_H_
diff --git a/include/tvm/runtime/crt/graph_executor_module.h b/include/tvm/runtime/crt/graph_executor_module.h
deleted file mode 100644
index 5eb3994835a8..000000000000
--- a/include/tvm/runtime/crt/graph_executor_module.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file graph_executor_module.h
- * \brief Tiny graph executor that can run graph containing only tvm PackedFunc.
- */
-#ifndef TVM_RUNTIME_CRT_GRAPH_EXECUTOR_MODULE_H_
-#define TVM_RUNTIME_CRT_GRAPH_EXECUTOR_MODULE_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <tvm/runtime/crt/error_codes.h>
-
-/*!
- * \brief Register the "tvm.graph_executor.create" constructor PackedFunc.
- */
-tvm_crt_error_t TVMGraphExecutorModule_Register();
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // TVM_RUNTIME_CRT_GRAPH_EXECUTOR_MODULE_H_
diff --git a/include/tvm/runtime/crt/logging.h b/include/tvm/runtime/crt/logging.h
deleted file mode 100644
index 6cedf1b6df2b..000000000000
--- a/include/tvm/runtime/crt/logging.h
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file runtime/crt/logging.h
- * \brief A replacement of the dmlc logging system that avoids
- *  the usage of GLOG and C++ headers
- */
-
-#ifndef TVM_RUNTIME_CRT_LOGGING_H_
-#define TVM_RUNTIME_CRT_LOGGING_H_
-
-#include <tvm/runtime/crt/platform.h>
-
-#define TVM_CRT_LOG_LEVEL_DEBUG 3
-#define TVM_CRT_LOG_LEVEL_INFO 2
-#define TVM_CRT_LOG_LEVEL_WARN 1
-#define TVM_CRT_LOG_LEVEL_ERROR 0
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#if defined(_MSC_VER)
-void TVMLogf(const char* fmt, ...);
-#else
-void __attribute__((format(printf, 1, 2))) TVMLogf(const char* fmt, ...);
-#endif
-
-#define LOG(level, x, ...)          \
-  if (TVM_CRT_LOG_LEVEL >= level) { \
-    TVMLogf(x, ##__VA_ARGS__);      \
-  }
-
-#define LOG_ERROR(x, ...) LOG(TVM_CRT_LOG_LEVEL_ERROR, x, ##__VA_ARGS__)
-#define LOG_WARN(x, ...) LOG(TVM_CRT_LOG_LEVEL_WARN, x, ##__VA_ARGS__)
-#define LOG_INFO(x, ...) LOG(TVM_CRT_LOG_LEVEL_INFO, x, ##__VA_ARGS__)
-#define LOG_DEBUG(x, ...) LOG(TVM_CRT_LOG_LEVEL_DEBUG, x, ##__VA_ARGS__)
-
-#ifndef CHECK
-#define CHECK(x)                                                   \
-  do {                                                             \
-    if (!(x)) {                                                    \
-      LOG_ERROR(__FILE__ ":%d: Check failed: %s\n", __LINE__, #x); \
-      TVMPlatformAbort(kTvmErrorPlatformCheckFailure);             \
-    }                                                              \
-  } while (0)
-#endif
-
-#ifndef CHECK_BINARY_OP
-#define CHECK_BINARY_OP(op, x, y, fmt, ...)                                               \
-  do {                                                                                    \
-    if (!(x op y)) {                                                                      \
-      LOG_ERROR(__FILE__ ":%d: Check failed: %s %s %s: " fmt "\n", __LINE__, #x, #op, #y, \
-                ##__VA_ARGS__);                                                           \
-      TVMPlatformAbort(kTvmErrorPlatformCheckFailure);                                    \
-    }                                                                                     \
-  } while (0)
-#endif
-
-#ifndef CHECK_LT
-#define CHECK_LT(x, y, fmt, ...) CHECK_BINARY_OP(<, x, y, fmt, ##__VA_ARGS__)
-#endif
-
-#ifndef CHECK_GT
-#define CHECK_GT(x, y, fmt, ...) CHECK_BINARY_OP(>, x, y, fmt, ##__VA_ARGS__)
-#endif
-
-#ifndef CHECK_LE
-#define CHECK_LE(x, y, fmt, ...) CHECK_BINARY_OP(<=, x, y, fmt, ##__VA_ARGS__)
-#endif
-
-#ifndef CHECK_GE
-#define CHECK_GE(x, y, fmt, ...) CHECK_BINARY_OP(>=, x, y, fmt, ##__VA_ARGS__)
-#endif
-
-#ifndef CHECK_EQ
-#define CHECK_EQ(x, y, fmt, ...) CHECK_BINARY_OP(==, x, y, fmt, ##__VA_ARGS__)
-#endif
-
-#ifndef CHECK_NE
-#define CHECK_NE(x, y, fmt, ...) CHECK_BINARY_OP(!=, x, y, fmt, ##__VA_ARGS__)
-#endif
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // TVM_RUNTIME_CRT_LOGGING_H_
diff --git a/include/tvm/runtime/crt/microtvm_rpc_server.h b/include/tvm/runtime/crt/microtvm_rpc_server.h
deleted file mode 100644
index 19b3d759308a..000000000000
--- a/include/tvm/runtime/crt/microtvm_rpc_server.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file microtvm_rpc_server.h
- * \brief MicroTVM RPC Server
- */
-
-#ifndef TVM_RUNTIME_CRT_MICROTVM_RPC_SERVER_H_
-#define TVM_RUNTIME_CRT_MICROTVM_RPC_SERVER_H_
-
-#include <stdlib.h>
-#include <sys/types.h>
-#include <tvm/runtime/crt/error_codes.h>
-
-#include "../../../../src/support/ssize.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*! \brief TVM RPC channel write function.
- *
- * Tries to write `num_bytes` from `data` to the underlying channel.
- * \param context The context.
- * \param data Pointer to data to write.
- * \param num_bytes Number of bytes avaiable in data.
- * \return The number of bytes written.
- */
-typedef ssize_t (*microtvm_rpc_channel_write_t)(void* context, const uint8_t* data,
-                                                size_t num_bytes);
-
-/*! \brief Opaque pointer type to TVM RPC Server. */
-typedef void* microtvm_rpc_server_t;
-
-/*! \brief Initialize the TVM RPC Server.
- *
- * Call this on device startup before calling anyother microtvm_rpc_server_ functions.
- *
- * \param write_func A callback function invoked by the TVM RPC Server to write data back to the
- *                   host. Internally, the TVM RPC Server will block until all data in a reply
- *                   packet has been written.
- * \param write_func_ctx An opaque pointer passed to write_func when it is called.
- * \return A pointer to the TVM RPC Server. The pointer is allocated in the same memory space as
- *         the TVM workspace.
- */
-microtvm_rpc_server_t MicroTVMRpcServerInit(microtvm_rpc_channel_write_t write_func,
-                                            void* write_func_ctx);
-
-/*! \brief Do any tasks suitable for the main thread, and maybe process new incoming data.
- *
- * \param server The TVM RPC Server pointer.
- * \param new_data If not nullptr, a pointer to a buffer pointer, which should point at new input
- *     data to process. On return, updated to point past data that has been consumed.
- * \param new_data_size_bytes Points to the number of valid bytes in `new_data`. On return,
- *     updated to the number of unprocessed bytes remaining in `new_data` (usually 0).
- * \return An error code indicating the outcome of the server main loop iteration.
- */
-tvm_crt_error_t MicroTVMRpcServerLoop(microtvm_rpc_server_t server, uint8_t** new_data,
-                                      size_t* new_data_size_bytes);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif  // TVM_RUNTIME_CRT_MICROTVM_RPC_SERVER_H_
diff --git a/include/tvm/runtime/crt/module.h b/include/tvm/runtime/crt/module.h
deleted file mode 100644
index 252766755f3b..000000000000
--- a/include/tvm/runtime/crt/module.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file include/tvm/runtime/crt/module.h
- * \brief Runtime container of the functions
- */
-#ifndef TVM_RUNTIME_CRT_MODULE_H_
-#define TVM_RUNTIME_CRT_MODULE_H_
-
-#include <tvm/runtime/c_backend_api.h>
-#include <tvm/runtime/crt/func_registry.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*!
- * \brief Module container of TVM.
- */
-typedef struct TVMModule {
-  /*! \brief The function registry associated with this module. */
-  const TVMFuncRegistry* registry;
-} TVMModule;
-
-/*!
- * \brief Create a new module handle from the given TVMModule instance.
- * \param mod The module instance to register.
- * \param out_handle Pointer to receive the newly-minted handle for this module.
- * \return 0 on success, non-zero on error.
- */
-int TVMModCreateFromCModule(const TVMModule* mod, TVMModuleHandle* out_handle);
-
-/*! \brief Entry point for the system lib module. */
-const TVMModule* TVMSystemLibEntryPoint(void);
-
-#ifdef __cplusplus
-}
-#endif
-#endif  // TVM_RUNTIME_CRT_MODULE_H_
diff --git a/include/tvm/runtime/crt/packed_func.h b/include/tvm/runtime/crt/packed_func.h
deleted file mode 100644
index 83d961baf203..000000000000
--- a/include/tvm/runtime/crt/packed_func.h
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file tvm/runtime/crt/packed_func.h
- * \brief Type-erased function used across TVM API.
- */
-#ifndef TVM_RUNTIME_CRT_PACKED_FUNC_H_
-#define TVM_RUNTIME_CRT_PACKED_FUNC_H_
-
-#include <assert.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <tvm/runtime/c_runtime_api.h>
-#include <tvm/runtime/crt/module.h>
-#include <tvm/runtime/crt/platform.h>
-
-#include "crt_config.h"
-
-DLDataType String2DLDataType(const char* s);
-
-typedef struct TVMArgs {
-  TVMValue values[TVM_CRT_MAX_ARGS];
-  int tcodes[TVM_CRT_MAX_ARGS]; /* Data type should be identical to type_codes in TVMPackedCFunc */
-  uint32_t values_count;
-} TVMArgs;
-
-TVMArgs TVMArgs_Create(TVMValue* values, uint32_t* tcodes, uint32_t values_count);
-
-typedef struct TVMPackedFunc {
-  char name[200];
-  TVMFunctionHandle fexec;
-  TVMArgs args;
-  TVMArgs ret_value;
-  int (*Call)(struct TVMPackedFunc* pf);
-  void (*SetArgs)(struct TVMPackedFunc* pf, const struct TVMArgs* args);
-} TVMPackedFunc;
-
-int TVMPackedFunc_InitGlobalFunc(TVMPackedFunc* pf, const char* name, const TVMArgs* args);
-int TVMPackedFunc_InitModuleFunc(TVMPackedFunc* pf, TVMModuleHandle module, const char* name,
-                                 const TVMArgs* args);
-
-int TVMPackedFunc_Call(TVMPackedFunc* pf);
-
-void TVMPackedFunc_SetArgs(TVMPackedFunc* pf, const TVMArgs* args);
-
-inline TVMModuleHandle TVMArgs_AsModuleHandle(const TVMArgs* args, size_t index) {
-  if (index >= args->values_count) {
-    TVMPlatformAbort((tvm_crt_error_t)-1);
-  }
-
-  if (args->tcodes[index] != kTVMModuleHandle) {
-    TVMPlatformAbort((tvm_crt_error_t)-1);
-  }
-
-  return args->values[index].v_handle;
-}
-
-extern TVMPackedFunc* g_fexecs;
-extern uint32_t g_fexecs_count;
-
-#endif  // TVM_RUNTIME_CRT_PACKED_FUNC_H_
diff --git a/include/tvm/runtime/crt/page_allocator.h b/include/tvm/runtime/crt/page_allocator.h
deleted file mode 100644
index 7a5de169c72e..000000000000
--- a/include/tvm/runtime/crt/page_allocator.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file tvm/runtime/crt/page_allocator.h
- * \brief An implementation of a dynamic memory allocator for microcontrollers.
- */
-
-#ifndef TVM_RUNTIME_CRT_PAGE_ALLOCATOR_H_
-#define TVM_RUNTIME_CRT_PAGE_ALLOCATOR_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <stdlib.h>
-#include <tvm/runtime/c_runtime_api.h>
-#include <tvm/runtime/crt/error_codes.h>
-
-extern int vleak_size;
-
-typedef struct MemoryManagerInterface MemoryManagerInterface;
-
-struct MemoryManagerInterface {
-  /*!
-   * \brief Allocate a chunk of memory.
-   * \param interface Pointer to this structure.
-   * \param num_bytes Number of bytes requested.
-   * \param dev Execution device that will be used with the allocated memory. Must be {kDLCPU, 0}.
-   * \param out_ptr A pointer to which is written a pointer to the newly-allocated memory.
-   * \return kTvmErrorNoError if successful; a descriptive error code otherwise.
-   */
-  tvm_crt_error_t (*Allocate)(MemoryManagerInterface* interface, size_t num_bytes, DLDevice dev,
-                              void** out_ptr);
-
-  /*!
-   * \brief Free a chunk of previously-used memory.
-   *
-   * \param interface Pointer to this structure.
-   * \param ptr A pointer returned from TVMPlatformMemoryAllocate which should be free'd.
-   * \param dev Execution device passed to TVMPlatformMemoryAllocate. Fixed to {kDLCPU, 0}.
-   * \return kTvmErrorNoError if successful; a descriptive error code otherwise.
-   */
-  tvm_crt_error_t (*Free)(MemoryManagerInterface* interface, void* ptr, DLDevice dev);
-
-  /*! \brief Used in testing; the number of allocated objects. */
-  int vleak_size;
-};
-
-/*!
- * Exposed for testing.
- *
- * \param manager Pointer, initialized with the new MemoryManager.
- * \param memory_pool Pointer to the global memory pool used by the CRT.
- * \param memory_pool_size_bytes Size of `memory_pool`, in bytes.
- * \param page_size_bytes_log2 log2 of the page size, in bytes.
- * \return kTvmErrorNoError on success.
- */
-tvm_crt_error_t PageMemoryManagerCreate(MemoryManagerInterface** manager, uint8_t* memory_pool,
-                                        size_t memory_pool_size_bytes, size_t page_size_bytes_log2);
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // TVM_RUNTIME_CRT_PAGE_ALLOCATOR_H_
diff --git a/include/tvm/runtime/crt/platform.h b/include/tvm/runtime/crt/platform.h
deleted file mode 100644
index 85121fd0f520..000000000000
--- a/include/tvm/runtime/crt/platform.h
+++ /dev/null
@@ -1,155 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file tvm/runtime/crt/platform.h
- * \brief The virtual memory manager for micro-controllers
- */
-
-#ifndef TVM_RUNTIME_CRT_PLATFORM_H_
-#define TVM_RUNTIME_CRT_PLATFORM_H_
-
-#include <stdarg.h>
-#include <stddef.h>
-#include <tvm/runtime/c_runtime_api.h>
-#include <tvm/runtime/crt/error_codes.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*! \brief Called when an internal error occurs and execution cannot continue.
- *
- * The platform should ideally restart or hang at this point.
- *
- * \param code An error code.
- */
-#if defined(_MSC_VER)
-__declspec(noreturn) void TVMPlatformAbort(tvm_crt_error_t code);
-#else
-void __attribute__((noreturn)) TVMPlatformAbort(tvm_crt_error_t code);
-#endif
-
-/*! \brief Called by the microTVM RPC server to implement TVMLogf.
- *
- * Not required to be implemented when the RPC server is not linked into the binary. This
- * function's signature matches that of vsnprintf, so trivial implementations can just call
- * vsnprintf.
- *
- * \param out_buf A char buffer where the formatted string should be written.
- * \param out_buf_size_bytes Number of bytes available for writing in out_buf.
- * \param fmt The printf-style formatstring.
- * \param args extra arguments to be formatted.
- * \return number of bytes written.
- */
-size_t TVMPlatformFormatMessage(char* out_buf, size_t out_buf_size_bytes, const char* fmt,
-                                va_list args);
-
-/*!
- * \brief Allocate memory for use by TVM.
- *
- * When this function returns something other than kTvmErrorNoError, *out_ptr should not be modified
- * and the caller is not obligated to call TVMPlatformMemoryFree in order to avoid a memory leak.
- *
- * \param num_bytes Number of bytes requested.
- * \param dev Execution device that will be used with the allocated memory. Fixed to {kDLCPU, 0}.
- * \param out_ptr A pointer to which is written a pointer to the newly-allocated memory.
- * \return kTvmErrorNoError if successful; a descriptive error code otherwise.
- */
-tvm_crt_error_t TVMPlatformMemoryAllocate(size_t num_bytes, DLDevice dev, void** out_ptr);
-
-/*!
- * \brief Free memory used by TVM.
- *
- * \param ptr A pointer returned from TVMPlatformMemoryAllocate which should be free'd.
- * \param dev Execution device passed to TVMPlatformMemoryAllocate. Fixed to {kDLCPU, 0}.
- * \return kTvmErrorNoError if successful; a descriptive error code otherwise.
- */
-tvm_crt_error_t TVMPlatformMemoryFree(void* ptr, DLDevice dev);
-
-/*! \brief Start a device timer.
- *
- * The device timer used must not be running.
- *
- * \return kTvmErrorNoError if successful; a descriptive error code otherwise.
- */
-tvm_crt_error_t TVMPlatformTimerStart();
-
-/*! \brief Stop the running device timer and get the elapsed time (in microseconds).
- *
- * The device timer used must be running.
- *
- * \param elapsed_time_seconds Pointer to write elapsed time into.
- *
- * \return kTvmErrorNoError if successful; a descriptive error code otherwise.
- */
-tvm_crt_error_t TVMPlatformTimerStop(double* elapsed_time_seconds);
-
-/*! \brief Platform-specific before measurement call.
- *
- * A function which is called before calling TVMFuncCall in the TimeEvaluator.
- * Can be used, for example, to initialize reset global state which may affect the results of
- * measurement.
- *
- * \return kTvmErrorNoError if successful; a descriptive error code otherwise.
- */
-tvm_crt_error_t TVMPlatformBeforeMeasurement();
-
-/*! \brief Platform-specific after measurement call.
- *
- * A function which is called after calling TVMFuncCall in the TimeEvaluator.
- * It is the counterpart of the TVMPlatformBeforeMeasurement function.
- *
- * \return kTvmErrorNoError if successful; a descriptive error code otherwise.
- */
-tvm_crt_error_t TVMPlatformAfterMeasurement();
-
-/*! \brief Fill a buffer with random data.
- *
- * Cryptographically-secure random data is NOT required. This function is intended for use
- * cases such as filling autotuning input tensors and choosing the nonce used for microTVM RPC.
- *
- * This function does not need to be implemented for inference tasks. It is used only by
- * AutoTVM and the RPC server. When not implemented, an internal weak-linked stub is provided.
- *
- * Please take care that across successive resets, this function returns different sequences of
- * values. If e.g. the random number generator is seeded with the same value, it may make it
- * difficult for a host to detect device resets during autotuning or host-driven inference.
- *
- * \param buffer Pointer to the 0th byte to write with random data. `num_bytes` of random data
- * should be written here.
- * \param num_bytes Number of bytes to write.
- * \return kTvmErrorNoError if successful; a descriptive error code otherwise.
- */
-tvm_crt_error_t TVMPlatformGenerateRandom(uint8_t* buffer, size_t num_bytes);
-
-/*! \brief Initialize TVM inference.
- *
- * Placeholder function for TVM inference initializations on a specific platform.
- * A common use of this function is setting up workspace memory for TVM inference.
- *
- * \return kTvmErrorNoError if successful.
- */
-tvm_crt_error_t TVMPlatformInitialize();
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // TVM_RUNTIME_CRT_PLATFORM_H_
diff --git a/include/tvm/runtime/crt/rpc_common/frame_buffer.h b/include/tvm/runtime/crt/rpc_common/frame_buffer.h
deleted file mode 100644
index 0d264e313a1d..000000000000
--- a/include/tvm/runtime/crt/rpc_common/frame_buffer.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file tvm/runtime/crt/rpc_common/frame_buffer.h
- * \brief Defines a buffer for use by the RPC framing layer.
- */
-
-#ifndef TVM_RUNTIME_CRT_RPC_COMMON_FRAME_BUFFER_H_
-#define TVM_RUNTIME_CRT_RPC_COMMON_FRAME_BUFFER_H_
-
-#include <inttypes.h>
-#include <stdlib.h>
-
-namespace tvm {
-namespace runtime {
-namespace micro_rpc {
-
-class FrameBuffer {
- public:
-  FrameBuffer(uint8_t* data, size_t data_size_bytes)
-      : data_{data}, capacity_{data_size_bytes}, num_valid_bytes_{0}, read_cursor_{0} {}
-
-  size_t Write(const uint8_t* data, size_t data_size_bytes);
-
-  size_t Read(uint8_t* data, size_t data_size_bytes);
-
-  size_t Peek(uint8_t* data, size_t data_size_bytes);
-
-  void Clear();
-
-  size_t ReadAvailable() const { return num_valid_bytes_ - read_cursor_; }
-
-  size_t Size() const { return num_valid_bytes_; }
-
- private:
-  /*! \brief pointer to data buffer. */
-  uint8_t* data_;
-
-  /*! \brief The total number of bytes available in data_. Always a power of 2. */
-  size_t capacity_;
-
-  /*! \brief index into data_ of the next potentially-available byte in the buffer.
-   * The byte is available when tail_ != data_ + capacity_.
-   */
-  size_t num_valid_bytes_;
-
-  /*! \brief Read cursor position. */
-  size_t read_cursor_;
-};
-
-}  // namespace micro_rpc
-}  // namespace runtime
-}  // namespace tvm
-
-#endif  // TVM_RUNTIME_CRT_RPC_COMMON_FRAME_BUFFER_H_
diff --git a/include/tvm/runtime/crt/rpc_common/framing.h b/include/tvm/runtime/crt/rpc_common/framing.h
deleted file mode 100644
index 33f37a0af03f..000000000000
--- a/include/tvm/runtime/crt/rpc_common/framing.h
+++ /dev/null
@@ -1,270 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file framing.h
- * \brief Framing for RPC.
- */
-
-#ifndef TVM_RUNTIME_CRT_RPC_COMMON_FRAMING_H_
-#define TVM_RUNTIME_CRT_RPC_COMMON_FRAMING_H_
-
-#include <inttypes.h>
-#include <stddef.h>
-#include <tvm/runtime/crt/error_codes.h>
-#include <tvm/runtime/crt/rpc_common/write_stream.h>
-
-namespace tvm {
-namespace runtime {
-namespace micro_rpc {
-
-uint16_t crc16_compute(const uint8_t* data, size_t data_size_bytes, uint16_t* previous_crc);
-
-enum class Escape : uint8_t { kEscapeStart = 0xff, kEscapeNop = 0xfe, kPacketStart = 0xfd };
-
-class PacketFieldSizeBytes {
- public:
-  static constexpr const size_t kPayloadLength = sizeof(uint32_t);
-  static constexpr const size_t kCrc = sizeof(uint16_t);
-};
-
-class Unframer {
- public:
-  explicit Unframer(WriteStream* stream)
-      : stream_{stream},
-        state_{State::kFindPacketStart},
-        saw_escape_start_{false},
-        num_buffer_bytes_valid_{0} {}
-
-  /*!
-   * \brief Push data into unframer and try to decode one packet.
-   *
-   * This function will return when exactly one packet has been decoded. It may not consume all of
-   * `data` in this case, and valid bytes may remain at the end of data.
-   *
-   * \param data The new data to unframe and send downstream.
-   * \param data_size_bytes The number of valid bytes in data.
-   * \param bytes_consumed Pointer written with the number of bytes consumed from data.
-   * \return
-   *     - kTvmErrorNoError when successful -- continue writing data.
-   *     - kTvmErrorFramingInvalidState when the Unframer was in or enters an invalid state
-   *       (probably indicates memory corruption).
-   *     - kTvmErrorFramingShortPacket when a new packet started before the current one ended.
-   *     - kTvmErrorFramingInvalidEscape when an invalid escape sequence was seen
-   */
-  tvm_crt_error_t Write(const uint8_t* data, size_t data_size_bytes, size_t* bytes_consumed);
-
-  /*! \brief Reset unframer to initial state. */
-  void Reset();
-
-  /*! \brief Return an underestimate of the number of bytes needed from the wire. */
-  size_t BytesNeeded();
-
- private:
-  tvm_crt_error_t FindPacketStart();
-  tvm_crt_error_t FindPacketLength();
-  tvm_crt_error_t FindPacketCrc();
-  tvm_crt_error_t FindCrcEnd();
-
-  bool IsBufferFull(size_t buffer_full_bytes) {
-    return num_buffer_bytes_valid_ >= buffer_full_bytes;
-  }
-
-  /*! \brief Consume input into buffer_ until buffer_ has buffer_full_bytes. */
-  tvm_crt_error_t AddToBuffer(size_t buffer_full_bytes, bool update_crc);
-
-  void ClearBuffer();
-
-  /*! \brief Unescape and consume input bytes, storing into buffer.
-   *
-   * \param buffer A buffer to fill with consumed, unescaped bytes.
-   * \param buffer_size_bytes Size of buffer, in bytes.
-   * \param bytes_filled A pointer to an accumulator to which is added the number of bytes written
-   *      to `buffer`.
-   * \param update_crc true when the CRC should be updated with the escaped bytes.
-   * \return
-   *     - kTvmErrorNoError if successful
-   *     - kTvmErrorFramingShortPacket if a start-of-packet escape code was encountered. If so,
-   *       *bytes_filled indicates the number of bytes before the Escape::kEscapeStart byte.
-   *     - kTvmErrorFramingInvalidEscape if an invalid escape sequence was seen.
-   *     - kTvmErrorWriteStreamShortWrite if the WriteStream passed to constructor's Write()
-   *       function returns 0.
-   *     - kTvmErrorWriteStreamShortWrite if the WriteStream passed to constructor's Write()
-   *       function returns an invalid positive number.
-   *     - Any negative value (i.e. with bits in kTvmErrorSystemErrorMask set) returned by the
-   *       WriteStream's Write() function.
-   */
-  tvm_crt_error_t ConsumeInput(uint8_t* buffer, size_t buffer_size_bytes, size_t* bytes_filled,
-                               bool update_crc);
-
-  WriteStream* stream_;
-
-  enum class State : uint8_t {
-    kFindPacketStart = 0,
-    kFindPacketLength = 1,
-    kFindPacketCrc = 2,
-    kFindCrcEnd = 3,
-  };
-  State state_;
-
-  const uint8_t* input_;
-  size_t input_size_bytes_;
-
-  bool saw_escape_start_;
-
-  /*! \brief unframe buffer, sized to the longest framing field. */
-  uint8_t buffer_[128];
-
-  /*! \brief number of bytes in buffer that are currently valid. */
-  size_t num_buffer_bytes_valid_;
-
-  /*! \brief number of payload bytes left to receive before the CRC begins. */
-  size_t num_payload_bytes_remaining_;
-
-  /*! \brief Running CRC value. */
-  uint16_t crc_;
-};
-
-class Framer {
- public:
-  typedef ssize_t (*WriteFunc)(const uint8_t* data, size_t data_size_bytes);
-
-  explicit Framer(WriteStream* stream)
-      : stream_{stream}, state_{State::kReset}, num_payload_bytes_remaining_{0} {}
-
-  /*! \brief Frame and write a full packet.
-   * \param payload The entire packet payload.
-   * \param payload_size_bytes Number of bytes in the packet.
-   * \return
-   *     - kTvmErrorNoError when no error occurs
-   *     - kTvmErrorWriteStreamShortWrite if the WriteStream passed to constructor's Write()
-   *       function returns 0.
-   *     - kTvmErrorWriteStreamShortWrite if the WriteStream passed to constructor's Write()
-   *       function returns an invalid positive number.
-   *     - Any negative value (i.e. with bits in kTvmErrorSystemErrorMask set) returned by the
-   *       WriteStream's Write() function.
-   */
-  tvm_crt_error_t Write(const uint8_t* payload, size_t payload_size_bytes);
-
-  /*! \brief Start framing and writing a new packet to the wire.
-   *
-   * When transmitting payloads that are too large to be buffered, call this function first to send
-   * the packet header and length fields.
-   *
-   * \param payload_size_bytes Number of payload bytes included as part of this packet.
-   * \return
-   *     - kTvmErrorNoError when no error occurs
-   *     - kTvmErrorWriteStreamShortWrite if the WriteStream passed to constructor's Write()
-   *       function returns 0.
-   *     - kTvmErrorWriteStreamShortWrite if the WriteStream passed to constructor's Write()
-   *       function returns an invalid positive number.
-   *     - Any negative value (i.e. with bits in kTvmErrorSystemErrorMask set) returned by the
-   *       WriteStream's Write() function.
-   */
-  tvm_crt_error_t StartPacket(size_t payload_size_bytes);
-
-  /*! \brief Write payload data to the wire.
-   *
-   * When transmitting payloads that are too large to be buffered, call this function after calling
-   * StartPacket to escape and transmit framed payloads. This function can be called multiple times
-   * for a single packet.
-   *
-   * \param payload_chunk A piece of the packet payload.
-   * \param payload_chunk_size_bytes Number of valid bytes in payload_chunk.
-   * \return
-   *     - kTvmErrorNoError when no error occurs
-   *     - kTvmErrorFramingInvalidState when StartPacket() has not been called.
-   *     - kTvmErrorFramingPayloadOverflow when more bytes were requested to be written than were
-   *       declared in the payload_size_bytes parameter given to StartPacket().
-   *     - kTvmErrorWriteStreamShortWrite if the WriteStream passed to constructor's Write()
-   *       function returns 0.
-   *     - kTvmErrorWriteStreamShortWrite if the WriteStream passed to constructor's Write()
-   *       function returns an invalid positive number.
-   *     - Any negative value (i.e. with bits in kTvmErrorSystemErrorMask set) returned by the
-   *       WriteStream's Write() function.
-   */
-  tvm_crt_error_t WritePayloadChunk(const uint8_t* payload_chunk, size_t payload_chunk_size_bytes);
-
-  /* \brief Finish writing one packet by sending the CRC.
-   *
-   * When transmitting paylaods that are too large to be buffered, call this function after sending
-   * the entire payload using WritePayloadChunk.
-   *
-   * \return
-   *     - kTvmErrorNoError when no error occurs
-   *     - kTvmErrorFramingInvalidState when StartPacket() has not been called.
-   *     - kTvmErrorFramingPayloadIncomplete when less bytes were written using WritePayloadChunk()
-   *       than were declared in the payload_size_bytes parameter given to StartPacket().
-   *     - kTvmErrorWriteStreamShortWrite if the WriteStream passed to constructor's Write()
-   *       function returns 0.
-   *     - kTvmErrorWriteStreamShortWrite if the WriteStream passed to constructor's Write()
-   *       function returns an invalid positive number.
-   *     - Any negative value (i.e. with bits in kTvmErrorSystemErrorMask set) returned by the
-   *       WriteStream's Write() function.
-   */
-  tvm_crt_error_t FinishPacket();
-
-  /* \brief Reset state of the Framer. */
-  void Reset();
-
- private:
-  /*! \brief Maximum size of stack-based buffer. */
-  static constexpr const size_t kMaxStackBufferSizeBytes = 128;
-
-  enum class State : uint8_t {
-    /*! \brief State entered at construction time or after write error, before first packet sent. */
-    kReset = 0,
-
-    /*! \brief State entered after a packet has successfully finished transmitting. */
-    kIdle = 1,
-
-    /*! \brief State entered when a packet payload or CRC needs to be transmitted. */
-    kTransmitPacketPayload = 2,
-  };
-
-  /*!
-   * \brief Escape data and write the result to wire, and update crc_.
-   *
-   * \param data Unescaped data to write.
-   * \param data_size_bytes Number of valid bytes in data.
-   * \param escape true if escaping should be applied.
-   * \param update_crc true if escaping should be applied.
-   * \return kTvmErrorNoError on success, negative value on error.
-   */
-  tvm_crt_error_t WriteAndCrc(const uint8_t* data, size_t data_size_bytes, bool escape,
-                              bool update_crc);
-
-  /*! \brief Called to write framed data to the transport. */
-  WriteStream* stream_;
-
-  /*! \brief State fo the Framer. */
-  State state_;
-
-  /*! \brief When state_ == kTransmitPacketPayload, number of payload bytes left to transmit. */
-  size_t num_payload_bytes_remaining_;
-
-  /*! \brief Running CRC value. */
-  uint16_t crc_;
-};
-
-}  // namespace micro_rpc
-}  // namespace runtime
-}  // namespace tvm
-
-#endif  // TVM_RUNTIME_CRT_RPC_COMMON_FRAMING_H_
diff --git a/include/tvm/runtime/crt/rpc_common/session.h b/include/tvm/runtime/crt/rpc_common/session.h
deleted file mode 100644
index 9bea4b05e7eb..000000000000
--- a/include/tvm/runtime/crt/rpc_common/session.h
+++ /dev/null
@@ -1,256 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file session.h
- * \brief RPC Session
- */
-
-#ifndef TVM_RUNTIME_CRT_RPC_COMMON_SESSION_H_
-#define TVM_RUNTIME_CRT_RPC_COMMON_SESSION_H_
-
-#include <inttypes.h>
-#include <tvm/runtime/crt/error_codes.h>
-#include <tvm/runtime/crt/rpc_common/frame_buffer.h>
-#include <tvm/runtime/crt/rpc_common/framing.h>
-#include <tvm/runtime/crt/rpc_common/write_stream.h>
-
-namespace tvm {
-namespace runtime {
-namespace micro_rpc {
-
-enum class MessageType : uint8_t {
-  kStartSessionInit = 0x00,
-  kStartSessionReply = 0x01,
-  kTerminateSession = 0x02,
-  kLog = 0x03,
-  kNormal = 0x10,
-};
-
-#if defined(_MSC_VER)
-
-#pragma pack(push, 1)
-typedef struct SessionHeader {
-  uint16_t session_id;
-  MessageType message_type;
-} SessionHeader;
-#pragma pack(pop)
-
-#else
-
-typedef struct SessionHeader {
-  uint16_t session_id;
-  MessageType message_type;
-} __attribute__((packed)) SessionHeader;
-
-#endif
-
-/*!
- * \brief CRT communication session management class.
- * Assumes the following properties provided by the underlying transport:
- *  - in-order delivery.
- *  - reliable delivery.
- *
- * Specifically, designed for use with UARTs. Will probably work over semihosting, USB, and TCP;
- * will probably not work reliably enough over UDP.
- */
-class Session {
- public:
-  /*! \brief Callback invoked when a full message is received.
-   *
-   * This function is called in the following situations:
-   *  - When a new session is established (this typically indicates the remote end reset).
-   *    In this case, buf is NULL.
-   *  - When a log message or normal traffic is received. In this case, buf points to a
-   *    valid buffer containing the message content.
-   *
-   * \param context The value of `message_received_func_context` passed to the constructor.
-   * \param message_type The type of session message received. Currently, this is always
-   *      either kNormal or kLog.
-   * \param buf When message_type is not kStartSessionMessage, a FrameBuffer whose read cursor is
-   *      at the first byte of the message payload. Otherwise, NULL.
-   */
-  typedef void (*MessageReceivedFunc)(void* context, MessageType message_type, FrameBuffer* buf);
-
-  /*! \brief An invalid nonce value that typically indicates an unknown nonce. */
-  static constexpr const uint8_t kInvalidNonce = 0;
-
-  Session(Framer* framer, FrameBuffer* receive_buffer, MessageReceivedFunc message_received_func,
-          void* message_received_func_context)
-      : local_nonce_{kInvalidNonce},
-        session_id_{0},
-        state_{State::kReset},
-        receiver_{this},
-        framer_{framer},
-        receive_buffer_{receive_buffer},
-        receive_buffer_has_complete_message_{false},
-        message_received_func_{message_received_func},
-        message_received_func_context_{message_received_func_context} {
-    // Session can be used for system startup logging, before the RPC server is instantiated. In
-    // this case, allow receive_buffer_ to be nullptr. The instantiator agrees not to use
-    // Receiver().
-    if (receive_buffer_ != nullptr) {
-      receive_buffer_->Clear();
-    }
-  }
-
-  /*!
-   * \brief Send a session terminate message, usually done at startup to interrupt a hanging remote.
-   * \param initial_session_nonce Initial nonce that should be used on the first session start
-   *      message. Callers should ensure this is different across device resets.
-   * \return kTvmErrorNoError on success, or an error code otherwise.
-   */
-  tvm_crt_error_t Initialize(uint8_t initial_session_nonce);
-
-  /*!
-   * \brief Terminate any previously-established session.
-   * \return kTvmErrorNoError on success, or an error code otherwise.
-   */
-  tvm_crt_error_t TerminateSession();
-
-  /*!
-   * \brief Start a new session regardless of state. Sends kStartSessionMessage.
-   *
-   * Generally speaking, this function should be called once per device reset by exactly one side
-   * in the system. No traffic can flow until this function is called.
-   *
-   * \return kTvmErrorNoError on success, or an error code otherwise.
-   */
-  tvm_crt_error_t StartSession();
-
-  /*!
-   * \brief Obtain a WriteStream implementation for use by the framing layer.
-   * \return A WriteStream to which received data should be written. Owned by this class.
-   */
-  WriteStream* Receiver() { return &receiver_; }
-
-  /*!
-   * \brief Send a full message including header, payload, and CRC footer.
-   * \param message_type One of MessageType; distinguishes the type of traffic at the session layer.
-   * \param message_data The data contained in the message.
-   * \param message_size_bytes The number of valid bytes in message_data.
-   * \return kTvmErrorNoError on success, or an error code otherwise.
-   */
-  tvm_crt_error_t SendMessage(MessageType message_type, const uint8_t* message_data,
-                              size_t message_size_bytes);
-
-  /*!
-   * \brief Send the framing and session layer headers.
-   *
-   * This function allows messages to be sent in pieces.
-   *
-   * \param message_type One of MessageType; distinguishes the type of traffic at the session layer.
-   * \param message_size_bytes The size of the message body, in bytes. Excludes the framing and
-   * session layer headers. \return 0 on success, negative error code on failure.
-   * \return kTvmErrorNoError on success, or an error code otherwise.
-   */
-  tvm_crt_error_t StartMessage(MessageType message_type, size_t message_size_bytes);
-
-  /*!
-   * \brief Send a part of the message body.
-   *
-   * This function allows messages to be sent in pieces.
-   *
-   * \param chunk_data The data contained in this message body chunk.
-   * \param chunk_size_bytes The number of valid bytes in chunk_data.
-   * \return kTvmErrorNoError on success, or an error code otherwise.
-   */
-  tvm_crt_error_t SendBodyChunk(const uint8_t* chunk_data, size_t chunk_size_bytes);
-
-  /*!
-   * \brief Finish sending the message by sending the framing layer footer.
-   * \return kTvmErrorNoError on success, or an error code otherwise.
-   */
-  tvm_crt_error_t FinishMessage();
-
-  /*! \brief Returns true if the session is in the established state. */
-  bool IsEstablished() const { return state_ == State::kSessionEstablished; }
-
-  /*!
-   * \brief Clear the receive buffer and prepare to receive next message.
-   *
-   * Call this function after MessageReceivedFunc is invoked. Any SessionReceiver::Write() calls
-   * made will return errors until this function is called to prevent them from corrupting the
-   * valid message in the receive buffer.
-   */
-  void ClearReceiveBuffer();
-
-  /*! \brief A version number used to check compatibility of the remote session implementation. */
-  static const constexpr uint8_t kVersion = 0x01;
-
- private:
-  class SessionReceiver : public WriteStream {
-   public:
-    explicit SessionReceiver(Session* session) : session_{session} {}
-    virtual ~SessionReceiver() {}
-
-    ssize_t Write(const uint8_t* data, size_t data_size_bytes) override;
-    void PacketDone(bool is_valid) override;
-
-   private:
-    void operator delete(void*) noexcept {}  // NOLINT(readability/casting)
-    Session* session_;
-  };
-
-  enum class State : uint8_t {
-    kReset = 0,
-    kNoSessionEstablished = 1,
-    kStartSessionSent = 2,
-    kSessionEstablished = 3,
-  };
-
-  void RegenerateNonce();
-
-  tvm_crt_error_t SendInternal(MessageType message_type, const uint8_t* message_data,
-                               size_t message_size_bytes);
-
-  void SendSessionStartReply(const SessionHeader& header);
-
-  void ProcessStartSessionInit(const SessionHeader& header);
-
-  void ProcessStartSessionReply(const SessionHeader& header);
-
-  void OnSessionEstablishedMessage();
-
-  void OnSessionTerminatedMessage();
-
-  void SetSessionId(uint8_t initiator_nonce, uint8_t responder_nonce) {
-    session_id_ = initiator_nonce | (((uint16_t)responder_nonce) << 8);
-  }
-
-  uint8_t InitiatorNonce(uint16_t session_id) { return session_id & 0xff; }
-
-  uint8_t ResponderNonce(uint16_t session_id) { return (session_id >> 8) & 0xff; }
-
-  uint8_t local_nonce_;
-  uint16_t session_id_;
-  State state_;
-  SessionReceiver receiver_;
-  Framer* framer_;
-  FrameBuffer* receive_buffer_;
-  bool receive_buffer_has_complete_message_;
-  MessageReceivedFunc message_received_func_;
-  void* message_received_func_context_;
-};
-
-}  // namespace micro_rpc
-}  // namespace runtime
-}  // namespace tvm
-
-#endif  // TVM_RUNTIME_CRT_RPC_COMMON_SESSION_H_
diff --git a/include/tvm/runtime/crt/rpc_common/write_stream.h b/include/tvm/runtime/crt/rpc_common/write_stream.h
deleted file mode 100644
index f72ba021def6..000000000000
--- a/include/tvm/runtime/crt/rpc_common/write_stream.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file framing.h
- * \brief Framing for RPC.
- */
-
-#ifndef TVM_RUNTIME_CRT_RPC_COMMON_WRITE_STREAM_H_
-#define TVM_RUNTIME_CRT_RPC_COMMON_WRITE_STREAM_H_
-
-#include <inttypes.h>
-#include <stddef.h>
-#include <sys/types.h>
-#include <tvm/runtime/crt/error_codes.h>
-
-#include "../../../../../src/support/ssize.h"
-
-namespace tvm {
-namespace runtime {
-namespace micro_rpc {
-
-class WriteStream {
- public:
-  virtual ~WriteStream();
-  virtual ssize_t Write(const uint8_t* data, size_t data_size_bytes) = 0;
-  virtual void PacketDone(bool is_valid) = 0;
-
-  tvm_crt_error_t WriteAll(uint8_t* data, size_t data_size_bytes, size_t* bytes_consumed);
-};
-
-}  // namespace micro_rpc
-}  // namespace runtime
-}  // namespace tvm
-
-#endif  // TVM_RUNTIME_CRT_RPC_COMMON_WRITE_STREAM_H_
diff --git a/include/tvm/runtime/crt/stack_allocator.h b/include/tvm/runtime/crt/stack_allocator.h
deleted file mode 100644
index 4184dff7510e..000000000000
--- a/include/tvm/runtime/crt/stack_allocator.h
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-// LINT_C_FILE
-#ifndef TVM_RUNTIME_CRT_STACK_ALLOCATOR_H_
-#define TVM_RUNTIME_CRT_STACK_ALLOCATOR_H_
-#include <stddef.h>
-#include <stdint.h>
-
-#include "crt_config.h"
-#include "error_codes.h"
-
-#define STACK_ALLOCATOR_TAG 0xabcd1234
-#define STACK_ALLOCATOR_TAG_SIZE_BYTES 4
-
-/*! Memory alignment for allocator */
-
-#ifndef TVM_RUNTIME_ALLOC_ALIGNMENT_BYTES
-#define TVM_RUNTIME_ALLOC_ALIGNMENT_BYTES 16
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef struct {
-  uint8_t* next_alloc;    // Pointer to the next block of TVM_RUNTIME_ALLOC_ALIGNMENT_BYTES
-  uint8_t* workspace;     // Pointer to start of the workspace
-  size_t workspace_size;  // Total number of bytes in the workspace
-} tvm_workspace_t;
-
-/*!
- * \brief Initialize the stack-based memory manager
- *
- * \param tvm_runtime_workspace The tvm_workspace_t struct containing state
- * \param g_aot_memory The memory buffer used to allocate within
- * \param workspace_size The total size of the workspace buffer workspace
- */
-tvm_crt_error_t StackMemoryManager_Init(tvm_workspace_t* tvm_runtime_workspace,
-                                        uint8_t* g_aot_memory, size_t workspace_size);
-
-/*!
- * \brief The intended user-facing function to allocate within the buffer. It wraps
- * StackMemoryManager_Allocate_Body enable and disable the LIFO check that is useful for debugging
- * the AoT codegen.
- *
- * \param tvm_runtime_workspace The tvm_workspace_t struct containing state
- * \param nbytes The number of bytes required for the allocation
- * \param current_alloc The pointer-to-pointer to be populated with the allocated address
- */
-tvm_crt_error_t StackMemoryManager_Allocate(tvm_workspace_t* tvm_runtime_workspace, int32_t nbytes,
-                                            void** current_alloc);
-
-/*!
- * \brief The internal function that accepts allocate inputs and an extra byte to say to enable the
- * LIFO check that is useful in debugging for debugging the AoT codegen.
- *
- * \param tvm_runtime_workspace The tvm_workspace_t struct containing state
- * \param nbytes The number of bytes required for the allocation
- * \param current_alloc The pointer-to-pointer to be populated with the allocated address
- * \param do_lifo_check This being non-zero indicates to perform a check LIFO pattern Allocs/Frees
- */
-tvm_crt_error_t StackMemoryManager_Allocate_Body(tvm_workspace_t* tvm_runtime_workspace,
-                                                 int32_t nbytes, void** current_alloc,
-                                                 uint8_t do_lifo_check);
-
-/*!
- * \brief The intended user-facing function to free the tensor within the buffer. It wraps
- * StackMemoryManager_Free_Body enable and disable the stack allocator
- *
- * \param tvm_runtime_workspace The tvm_workspace_t struct containing state
- * \param ptr The base pointer of the tensor to be free'd
- */
-tvm_crt_error_t StackMemoryManager_Free(tvm_workspace_t* tvm_runtime_workspace, void* ptr);
-
-/*!
- * \brief The internal function that accepts free inputs and an extra byte to say to enable the LIFO
- * check that is useful in debugging for debugging the AoT codegen.
- *
- * \param tvm_runtime_workspace The tvm_workspace_t struct containing state
- * \param ptr The base pointer of tensor to be free'd within the workspace buffer
- * \param do_lifo_check This being non-zero indicates to perform a check LIFO pattern Allocs/Frees
- */
-tvm_crt_error_t StackMemoryManager_Free_Body(tvm_workspace_t* tvm_runtime_workspace, void* ptr,
-                                             uint8_t do_lifo_check);
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // TVM_RUNTIME_CRT_STACK_ALLOCATOR_H_
diff --git a/include/tvm/runtime/micro/standalone/microtvm_runtime.h b/include/tvm/runtime/micro/standalone/microtvm_runtime.h
deleted file mode 100644
index 827d91f62076..000000000000
--- a/include/tvm/runtime/micro/standalone/microtvm_runtime.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#ifndef TVM_RUNTIME_MICRO_STANDALONE_MICROTVM_RUNTIME_H_
-#define TVM_RUNTIME_MICRO_STANDALONE_MICROTVM_RUNTIME_H_
-
-#include <stddef.h>
-#include <stdint.h>
-
-#define TVM_MICRO_RUNTIME_API_API extern "C" __attribute__((visibility("default")))
-
-TVM_MICRO_RUNTIME_API_API void* MicroTVMRuntimeCreate(const char* json, size_t json_len,
-                                                      void* module);
-
-TVM_MICRO_RUNTIME_API_API void MicroTVMRuntimeDestroy(void* handle);
-
-TVM_MICRO_RUNTIME_API_API void MicroTVMRuntimeSetInput(void* handle, int index, void* tensor);
-
-TVM_MICRO_RUNTIME_API_API void MicroTVMRuntimeRun(void* handle);
-
-TVM_MICRO_RUNTIME_API_API void MicroTVMRuntimeGetOutput(void* handle, int index, void* tensor);
-
-TVM_MICRO_RUNTIME_API_API void* MicroTVMRuntimeDSOModuleCreate(const char* so, size_t so_len);
-
-TVM_MICRO_RUNTIME_API_API void MicroTVMRuntimeDSOModuleDestroy(void* module);
-
-#undef TVM_MICRO_RUNTIME_API_API
-
-#endif  // TVM_RUNTIME_MICRO_STANDALONE_MICROTVM_RUNTIME_H_
diff --git a/python/setup.py b/python/setup.py
index 594ab5fc8def..55f29c651af4 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -61,20 +61,6 @@ def get_lib_path():
             if "3rdparty" in name:
                 libs.append(name)
 
-        # Add standalone_crt, if present
-        for name in lib_path:
-            candidate_path = os.path.join(os.path.dirname(name), "standalone_crt")
-            if os.path.isdir(candidate_path):
-                libs.append(candidate_path)
-                break
-
-        # Add microTVM template projects
-        for name in lib_path:
-            candidate_path = os.path.join(os.path.dirname(name), "microtvm_template_projects")
-            if os.path.isdir(candidate_path):
-                libs.append(candidate_path)
-                break
-
         # Add tvmc configuration json files
         for name in lib_path:
             candidate_path = os.path.abspath(os.path.join(os.path.dirname(name), "..", "configs"))
diff --git a/python/tvm/__init__.py b/python/tvm/__init__.py
index 7aefe78155a3..1d04772e9e3e 100644
--- a/python/tvm/__init__.py
+++ b/python/tvm/__init__.py
@@ -81,9 +81,6 @@
     from . import relay
     from . import relax
 
-if not _RUNTIME_ONLY and support.libinfo().get("USE_MICRO", "OFF") == "ON":
-    from . import micro
-
 # NOTE: This file should be python2 compatible so we can
 # raise proper error message when user run the package using
 # an older version of the python
diff --git a/python/tvm/autotvm/measure/measure_methods.py b/python/tvm/autotvm/measure/measure_methods.py
index c1ea34cefe30..6a8d0f5e3c5c 100644
--- a/python/tvm/autotvm/measure/measure_methods.py
+++ b/python/tvm/autotvm/measure/measure_methods.py
@@ -551,15 +551,7 @@ def __call__(self, measure_input, tmp_dir, **kwargs):
             )
             # TODO(tvm-team) consider linline _build_func_common
             func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
-            if self.build_func.output_format == ".model-library-format":
-                # Late import to preserve autoTVM with USE_MICRO OFF
-                try:
-                    from tvm import micro  # pylint: disable=import-outside-toplevel
-                except ImportError:
-                    raise ImportError("Requires USE_MICRO")
-                micro.export_model_library_format(func, filename)
-            else:
-                func.export_library(filename, fcompile=self.build_func)
+            func.export_library(filename, fcompile=self.build_func)
         except Exception as e:  # pylint: disable=broad-except
             tb = traceback.format_exc()
             return BuildResult(None, None, (tb, e), time.time() - tic)
diff --git a/python/tvm/contrib/ethosu/__init__.py b/python/tvm/contrib/ethosu/__init__.py
deleted file mode 100644
index 0ac5badae572..000000000000
--- a/python/tvm/contrib/ethosu/__init__.py
+++ /dev/null
@@ -1,17 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Namespace for Arm(R) Ethos(TM)-U NPU contrib functionality"""
diff --git a/python/tvm/contrib/ethosu/cascader/__init__.py b/python/tvm/contrib/ethosu/cascader/__init__.py
deleted file mode 100644
index 1d608c04ff6e..000000000000
--- a/python/tvm/contrib/ethosu/cascader/__init__.py
+++ /dev/null
@@ -1,41 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""The NPU cascader.
-
-This component performs inter-operator scheduling to optimize
-for both performance and memory usage on Arm(R) Ethos(TM)-U NPUs.
-"""
-from .stripe_config import StripeConfig
-from .block_config import BlockConfig
-from .propagator import Propagator
-from .graph import (
-    PerformanceInfo,
-    Tensor,
-    Part,
-    TESubgraph,
-    CascaderGraph,
-    BufferMode,
-    register_matcher,
-    create_cascader_graph,
-)
-from .parts import InlinePart, EthosuPart
-from .device_config import EthosuDeviceConfig
-from .tensor_config import TensorConfigState, MemoryRegion, TensorConfig
-from .plan import Plan
-from .scheduler import apply_proposal, cascade, extract_memory_info
-from .logging import Logging
-from .cascader_options import CascaderOptions
diff --git a/python/tvm/contrib/ethosu/cascader/_ffi_api.py b/python/tvm/contrib/ethosu/cascader/_ffi_api.py
deleted file mode 100644
index 9f098ad3df74..000000000000
--- a/python/tvm/contrib/ethosu/cascader/_ffi_api.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""FFI APIs for the NPU cascader."""
-import tvm._ffi
-
-
-tvm._ffi._init_api("contrib.ethosu.cascader", __name__)
diff --git a/python/tvm/contrib/ethosu/cascader/block_config.py b/python/tvm/contrib/ethosu/cascader/block_config.py
deleted file mode 100644
index b90de753f679..000000000000
--- a/python/tvm/contrib/ethosu/cascader/block_config.py
+++ /dev/null
@@ -1,71 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name
-"""Block config to hold an output block shape and a corresponding input block shape"""
-from typing import List
-import tvm._ffi
-
-from tvm.runtime import Object
-
-from . import _ffi_api
-
-
-@tvm._ffi.register_object("contrib.ethosu.cascader.BlockConfig")
-class BlockConfig(Object):
-    """BlockConfig class"""
-
-    def __init__(
-        self,
-        input_shape: List[int],
-        output_shape: List[int],
-        compute_cycles: int,
-        output_cycles: int,
-    ):
-        self.__init_handle_by_constructor__(
-            _ffi_api.BlockConfig, input_shape, output_shape, compute_cycles, output_cycles
-        )
-
-    @property
-    def input_shape(self) -> List[int]:
-        return list(self._input_shape)
-
-    @property
-    def output_shape(self) -> List[int]:
-        return list(self._output_shape)
-
-    @property
-    def compute_cycles(self) -> int:
-        return int(self._compute_cycles)
-
-    @property
-    def output_cycles(self) -> int:
-        return int(self._output_cycles)
-
-    def __ge__(self, other: "BlockConfig"):
-        if len(self.output_shape) != len(other.output_shape):
-            return False
-
-        return all(a >= b for a, b in zip(self.output_shape, other.output_shape))
-
-    def __lt__(self, other: "BlockConfig"):
-        if len(self.output_shape) != len(other.output_shape):
-            return False
-
-        return other >= self
-
-    def __repr__(self) -> str:
-        return f"BlockConfig(output_shape={self.output_shape})"
diff --git a/python/tvm/contrib/ethosu/cascader/cascader_options.py b/python/tvm/contrib/ethosu/cascader/cascader_options.py
deleted file mode 100644
index 9d5562c44b39..000000000000
--- a/python/tvm/contrib/ethosu/cascader/cascader_options.py
+++ /dev/null
@@ -1,90 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name
-"""Object to hold options for the NPU cascader"""
-import tvm._ffi
-
-from tvm.runtime import Object
-
-from . import _ffi_api
-from .tensor_config import MemoryRegion
-
-
-@tvm._ffi.register_object("contrib.ethosu.cascader.CascaderOptions")
-class CascaderOptions(Object):
-    """
-    A class to hold configuration options for the cascader.
-
-    Attributes
-    ----------
-    cascade_region : MemoryRegion
-        The MemoryRegion to place cascading buffers into.
-    max_proposals : int
-        The maximum number of Proposals to generate.
-    stripe_factors : int
-        How many striping factors to try per axis.
-    max_plan_size : int
-        The maximum number of Parts in a Plan.
-    max_open_plans : int
-        The maximum number of open Plans to keep after culling.
-    max_closed_plans : int
-        The maxmum number of closed Plans to keep after culling.
-    always_copy_size : int
-        The maximum size of a Tensor that will always be copied into the cascade region.
-    disable_pareto_plans : bool
-        Disable pareto culling for Plans.
-    disable_pareto_proposals : bool
-        Disable pareto culling for Proposals.
-    enable_multi_dimensional_striping : bool
-        Enable striping in multiple dimensions simultaneously.
-    disable_block_culling : bool
-        Disable culling of block configs.
-    enable_striping : bool
-        A boolean option to enable striping
-
-    """
-
-    def __init__(
-        self,
-        cascade_region: MemoryRegion,
-        max_proposals: int,
-        stripe_factors: int,
-        max_plan_size: int,
-        max_open_plans: int,
-        max_closed_plans: int,
-        always_copy_size: int,
-        disable_pareto_plans: bool = False,
-        disable_pareto_proposals: bool = False,
-        enable_multi_dimensional_striping: bool = False,
-        disable_block_culling: bool = True,
-        enable_striping: bool = False,
-    ):
-        self.__init_handle_by_constructor__(
-            _ffi_api.CascaderOptions,
-            cascade_region,
-            max_proposals,
-            stripe_factors,
-            max_plan_size,
-            max_open_plans,
-            max_closed_plans,
-            always_copy_size,
-            disable_pareto_plans,
-            disable_pareto_proposals,
-            enable_multi_dimensional_striping,
-            disable_block_culling,
-            enable_striping,
-        )
diff --git a/python/tvm/contrib/ethosu/cascader/device_config.py b/python/tvm/contrib/ethosu/cascader/device_config.py
deleted file mode 100644
index 7c38c4ac4971..000000000000
--- a/python/tvm/contrib/ethosu/cascader/device_config.py
+++ /dev/null
@@ -1,895 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name
-# pylint: disable=too-many-nested-blocks
-"""Device config class to hold information about the target hardware"""
-from typing import Tuple, List, Dict, Optional
-from functools import reduce
-
-import math
-import numpy as np
-
-import tvm
-from . import BlockConfig
-from . import StripeConfig
-from . import Propagator
-
-
-def _round_up(a: int, b: int) -> int:
-    """Round up to a multiple of b"""
-    return ((a + b - 1) // b) * b
-
-
-def _round_up_div(a: int, b: int) -> int:
-    """Divide by b and round up to a multiple of b"""
-    return (a + b - 1) // b
-
-
-class _Shape:
-    """Helper class for dealing with Tensor shapes of different layouts"""
-
-    def __init__(self, shape: List[int], layout="NHWC"):
-        if layout == "NHCWB16":
-            self.height = int(shape[1])
-            self.width = int(shape[3])
-            self.depth = int(shape[2]) * int(shape[4])
-        else:
-            # identity layout is NHWC but the shape is not always 4
-            length = len(shape)
-            if length == 4:
-                self.height = int(shape[1])
-                self.width = int(shape[2])
-                self.depth = int(shape[3])
-            elif length == 3:
-                self.height = int(shape[0])
-                self.width = int(shape[1])
-                self.depth = int(shape[2])
-            elif length == 2:
-                self.height = int(shape[0])
-                self.width = int(shape[1])
-                self.depth = 1
-            elif length == 1:
-                self.height = int(shape[0])
-                self.width = 1
-                self.depth = 1
-
-    def round_up(self, other: "_Shape"):
-        self.height = _round_up(self.height, other.height)
-        self.width = _round_up(self.width, other.width)
-        self.depth = _round_up(self.depth, other.depth)
-
-    def area(self) -> int:
-        return self.height * self.width
-
-    def as_list(self):
-        return [1, self.height, self.width, self.depth]
-
-
-class EthosuDeviceConfig:
-    """Arm(R) Ethos(TM)-U NPU config class"""
-
-    def __init__(self, device: str, disable_block_bulling: bool = False):
-        self._device = device
-        self._subkernel_limits = (8, 8)
-        self._output_cycles = (1, 2, 3, 4, 6)
-        self._split_depth = 16
-        self._max_block_shape = _Shape([1, 32, 64, 128])
-        self._bank_size_bytes = 1024
-        self._disable_block_culling = disable_block_bulling
-        if self._device == "ethos-u55-256":
-            self._micro_block = _Shape([1, 2, 2, 8])
-            self._input_micro_block = _Shape([1, 2, 2, 8])
-            self._delay_cycles = (2, 2)
-            self._activation_cycles = (0.25, 1)
-            self._output_units = 8
-
-            self._total_banks = 48
-            self._reserved_banks = 4
-            self._input_granularity = {1: 8, 2: 8, 4: 16}
-            self._accumulator_granularity = {4: 16, 5: 20}
-            self._lut_reserved = True
-        elif self._device == "ethos-u55-128":
-            self._micro_block = _Shape([1, 1, 2, 8])
-            self._input_micro_block = _Shape([1, 1, 2, 8])
-            self._delay_cycles = (2, 3)
-            self._activation_cycles = (0.5, 1)
-            self._output_units = 4
-
-            self._total_banks = 24
-            self._reserved_banks = 4
-            self._input_granularity = {1: 4, 2: 4, 4: 8}
-            self._accumulator_granularity = {4: 8, 5: 12}
-            self._lut_reserved = True
-        elif self._device == "ethos-u55-64":
-            self._micro_block = _Shape([1, 1, 1, 8])
-            self._input_micro_block = _Shape([1, 1, 1, 8])
-            self._delay_cycles = (2, 3)
-            self._activation_cycles = (1, 1)
-            self._output_units = 2
-
-            self._total_banks = 16
-            self._reserved_banks = 2
-            self._input_granularity = {1: 2, 2: 2, 4: 4}
-            self._accumulator_granularity = {4: 4, 5: 8}
-            self._lut_reserved = False
-        elif self._device == "ethos-u55-32":
-            self._micro_block = _Shape([1, 1, 1, 4])
-            self._input_micro_block = _Shape([1, 1, 1, 8])
-            self._delay_cycles = (3, 7)
-            self._activation_cycles = (1, 2)
-            self._output_units = 1
-
-            self._total_banks = 16
-            self._reserved_banks = 2
-            self._input_granularity = {1: 2, 2: 2, 4: 4}
-            self._accumulator_granularity = {4: 4, 5: 4}
-            self._lut_reserved = False
-
-    def _get_output_cycles(
-        self, op_type: str, op_str: str, ifm_dtype: str, ofm_dtype: str, activation: str
-    ) -> float:
-        """Estimate cycles per output element for an NPU operator
-
-        Parameters
-        ----------
-        op_type : str
-            The NPU primitive operator
-                "ethosu_pooling"
-        op_str : str
-            The type of NPU operator.
-                "MAX"
-        ifm_dtype: str
-            Datatype of the Input Feature Map tensor (IFM)
-        ofm_dtype: str
-            Datatype of the Output Feature Map tensor (OFM)
-        activation : str
-            The activation function to use.
-                "NONE" - no activation function.
-                "CLIP" - clip the output between clip_min and clip_max.
-                "TANH" - tanh activation function.
-                "SIGMOID" - sigmoid activation function.
-                "LUT" - use a look-up table to perform the activation function.
-
-        Returns
-        -------
-        float
-            The cycles per output element
-        """
-        cycles = 0
-        bw_limit = 0
-        if op_type == "ethosu_pooling" and op_str == "MAX":
-            cycles = self._output_cycles[0]
-        elif op_type in ("ethosu_pooling", "ethosu_conv2d", "ethosu_depthwise_conv2d"):
-            cycles = self._output_cycles[1] if ifm_dtype == "int8" else self._output_cycles[2]
-        elif op_type == "ethosu_binary_elementwise":
-            # Binary Bandwidth Limitations
-            if ifm_dtype == "int8":
-                bw_limit = 0.125 if ofm_dtype == "int8" else 0.75
-            elif ifm_dtype == "int16":
-                bw_limit = 0.75 if ofm_dtype == "int16" else 1
-            else:
-                bw_limit = 1.5
-
-            if op_str in ("MIN", "MAX"):
-                cycles = self._output_cycles[1]
-            elif op_str == "MUL":
-                cycles = self._output_cycles[2]
-            if op_str in ("ADD", "SUB"):
-                if ofm_dtype == "int32":
-                    cycles = (
-                        self._output_cycles[2] if ifm_dtype == "int32" else self._output_cycles[3]
-                    )
-                else:
-                    cycles = self._output_cycles[4]
-
-        elif op_type == "ethosu_unary_elementwise":
-            # Unary Bandwidth Limitations
-            if ifm_dtype == "int16":
-                bw_limit = 0.25
-            elif ifm_dtype == "int32":
-                bw_limit = 1
-
-            if op_str == "CLZ":
-                cycles = self._output_cycles[1]
-            elif op_str in ("SHL", "SHR"):
-                cycles = self._output_cycles[2]
-            elif op_str in ("LRELU", "ABS"):
-                cycles = self._output_cycles[1]
-                if ifm_dtype == "int16":
-                    bw_limit = 0.5
-
-        act_cycles = 0
-        if activation == "CLIP":
-            act_cycles = self._activation_cycles[0]
-        elif activation in ("LUT", "TANH", "SIGMOID"):
-            act_cycles = self._activation_cycles[1]
-
-        return max((cycles / self._output_units), act_cycles, bw_limit)
-
-    def _get_delay_cycles(self, op_type: str, ifm_dtype: str) -> int:
-        """Get the number of delay cycles during a bubble
-
-        Parameters
-        ----------
-        op_type : str
-            The NPU primitive operator
-                "ethosu_pooling"
-        op_str : str
-            The type of NPU operator.
-                "MAX"
-        ifm_dtype: str
-            Datatype of the Input Feature Map tensor (IFM)
-
-        Returns
-        ----------
-        int
-            The amount of delay cycles
-        """
-        if op_type in ("ethosu_conv2d", "ethosu_depthwise2d", "ethosu_pooling"):
-            if ifm_dtype == "int16":
-                return self._delay_cycles[1]
-
-            return self._delay_cycles[0]
-
-        return 0
-
-    def _get_weight_decoder_cycles(self, op_type: str) -> int:
-        """Get cycle estimate for weight decoding
-
-        Parameters
-        ----------
-        op_type: str
-            The NPU primitive operator
-                "ethosu_pooling"
-
-        Returns
-        ----------
-        int
-            Estimated cycles for weight decoding
-        """
-        if op_type in ("ethosu_conv2d", "ethosu_depthwise2d"):
-            return 32 * self._micro_block.depth // 8
-
-        return 0
-
-    def get_output_quantum(self, ofm_layout: str) -> Tuple[int]:
-        """Get the atomic output volume
-
-        Parameters
-        ----------
-        ofm_layout : str
-            The layout of the Output Feature Map tensor. Can be "NHWC" or "NHCWB16".
-
-        Returns
-        ----------
-        Tuple[int]
-            The atomic output volume formatted to the ofm_layout parameter
-        """
-        if ofm_layout == "NHCWB16":
-            return [
-                1,
-                self._micro_block.height,
-                1,
-                self._micro_block.width,
-                self._micro_block.depth,
-            ]
-
-        return self._micro_block.as_list()
-
-    def _align(self, x: int, n: int) -> int:
-        return int(math.ceil(x / n) * n)
-
-    def _get_input_size(
-        self, output_size: int, kernel_stride: int, border: int, upscaling_factor: int
-    ) -> int:
-        return int(math.ceil(((output_size - 1) * kernel_stride + border)) / upscaling_factor)
-
-    def _get_dilated_kernel_size(self, kernel_size: int, dilation: int) -> int:
-        return (kernel_size - 1) * dilation + 1
-
-    def _get_input_block(
-        self,
-        output_block: _Shape,
-        input_shape: _Shape,
-        dtype: str,
-        op_type: str,
-        partkernel: bool,
-        stride_h: int,
-        stride_w: int,
-        dilated_kernel_h: int,
-        dilated_kernel_w: int,
-        upscaling_factor: int,
-    ) -> _Shape:
-        height = self._get_input_size(
-            output_block.height,
-            stride_h,
-            min(dilated_kernel_h, self._subkernel_limits[0]),
-            upscaling_factor,
-        )
-        width = self._get_input_size(
-            output_block.width,
-            stride_w,
-            min(dilated_kernel_w, self._subkernel_limits[1]),
-            upscaling_factor,
-        )
-
-        if op_type == "ethosu_conv2d":
-            if dtype == "int8":
-                if partkernel:
-                    depth = self._align(min(32, input_shape.depth), 8)
-                else:
-                    depth = self._align(min(16, input_shape.depth), 8)
-            elif dtype == "int16":
-                depth = self._align(min(16, input_shape.depth), 4)
-            else:
-                depth = self._align(min(8, input_shape.depth), 2)
-        else:
-            depth = output_block.depth
-
-        return _Shape(
-            [
-                1,
-                self._align(height, self._micro_block.height),
-                self._align(width, self._micro_block.width),
-                depth,
-            ]
-        )
-
-    def get_kernel_steps(
-        self,
-        op_type: str,
-        dilated_kernel_h: int,
-        dilated_kernel_w: int,
-        ifm_dtype: str,
-        partkernel: bool = False,
-    ) -> List[int]:
-        """Calculate the total number of subkernels and their sizes
-
-        Parameters
-        ----------
-        op_type : str
-            The NPU primitive operator
-                "ethosu_pooling"
-        dilated_kernel_h: int
-            Height of dilated kernel
-        dilated_kernel_w: int
-            Width of dilated kernel
-        ifm_dtype: str
-            Datatype of the Input Feature Map tensor (IFM)
-        partkernel: bool
-            Flag showing whether part-kernel first traversal is used
-
-        Returns
-        ----------
-        List[int]
-            List where each entry contains the amount of elements in one of the subkernels
-        """
-        if op_type == "ethosu_binary_elementwise":
-            return [1]
-
-        subkernels = self._get_subkernels(dilated_kernel_h, dilated_kernel_w)
-
-        # Determine the number of kernel steps per subkernel
-        kernel_steps = []
-        for y, x in subkernels:
-            subkernel_elements = x * y
-            if op_type == "ethosu_conv2d" and partkernel:
-                # Part-kernel-first traversal conv2d
-                divisor = 4 if ifm_dtype == "int8" else 2
-                kernel_steps.append(int(_round_up_div(subkernel_elements, divisor)))
-            elif op_type == "ethosu_depthwise_conv2d":
-                kernel_steps.append(int(_round_up_div(subkernel_elements, 4)))
-            else:
-                # Depth-first traversal conv2d or pooling
-                kernel_steps.append(int(subkernel_elements))
-
-        return kernel_steps
-
-    def _get_subkernels(self, dilated_kernel_h: int, dilated_kernel_w: int):
-        num_subkernels_y = _round_up_div(dilated_kernel_h, self._subkernel_limits[0])
-        num_subkernels_x = _round_up_div(dilated_kernel_w, self._subkernel_limits[1])
-        subkernels_y = [
-            min((dilated_kernel_h - i * self._subkernel_limits[0]), self._subkernel_limits[0])
-            for i in range(num_subkernels_y)
-        ]
-        subkernels_x = [
-            min((dilated_kernel_w - i * self._subkernel_limits[1]), self._subkernel_limits[1])
-            for i in range(num_subkernels_x)
-        ]
-
-        subkernels = []
-        for y in subkernels_y:
-            for x in subkernels_x:
-                subkernels.append((y, x))
-
-        return subkernels
-
-    def _get_accumulator_width(self, op_type: str, ifm_dtype: str):
-        if ifm_dtype == "int16" and op_type != "ethosu_pooling":
-            return 5
-
-        return 4
-
-    def is_partkernel(
-        self, op_type: str, ifm_channels: int, ifm_dtype: str, kernel_elements: int
-    ) -> bool:
-        """Determine which block traversal strategy has better DPU utilization
-
-        Parameters
-        ----------
-        op_type: str
-            The NPU primitive operator
-                "ethosu_pooling"
-        ifm_channels: int
-            Number of input channels
-        ifm_dtype: str
-            Datatype of the Input Feature Map tensor (IFM)
-        kernel_elements: int
-            Total number of elements in the kernel
-
-        Returns
-        ----------
-        bool
-            True if partkernel first has best DPU utilization
-        """
-        if op_type != "ethosu_conv2d":
-            return False
-
-        depth_first_utilization = ifm_channels / _round_up(
-            ifm_channels, 32 if ifm_dtype == "int8" else 16
-        )
-        part_kernel_first_utilization = (ifm_channels / _round_up(ifm_channels, 8)) * (
-            kernel_elements / _round_up(kernel_elements, 4 if ifm_dtype == "int8" else 2)
-        )
-
-        return part_kernel_first_utilization > depth_first_utilization or ifm_channels <= 8
-
-    def _get_input_banks(self, input_block_shape, input_bytewidth):
-        input_bytes = input_block_shape.area() * self._align(
-            input_block_shape.depth * input_bytewidth, 8
-        )
-        input_banks = _round_up_div(input_bytes, self._bank_size_bytes) * 2
-        input_banks = _round_up(input_banks, self._input_granularity[input_bytewidth])
-
-        return input_banks
-
-    def _get_accumulator_banks(self, output_block_shape, acc_bytewidth):
-        acc_depth = _round_up(output_block_shape.depth, 8)
-        acc_bytes = output_block_shape.area() * self._align(acc_depth, 8) * acc_bytewidth
-        acc_banks = _round_up_div(acc_bytes, self._bank_size_bytes) * 2
-        acc_banks = _round_up(acc_banks, self._accumulator_granularity[acc_bytewidth])
-
-        return acc_banks
-
-    @staticmethod
-    def _create_layout_block(nhwc_block_config, layout):
-        """A helper function to convert to brick layout"""
-        if layout == "NHCWB16":
-            return [
-                nhwc_block_config[0],
-                nhwc_block_config[1],
-                1 + ((nhwc_block_config[3] - 1) // 16),
-                nhwc_block_config[2],
-                16,
-            ]
-        # else it could only be NHWC
-        return nhwc_block_config
-
-    def get_elementwise_block_config(
-        self,
-        ifm_propagator: Propagator,
-        ifm2_propagator: Optional[Propagator],
-        op_attrs: Dict,
-        ofm_shape: List[int],
-        output_layout: str,
-        input_layout: str,
-        input2_layout: Optional[str],
-        ifm_dtype: str,
-        ofm_dtype: str,
-    ) -> List[BlockConfig]:
-        """Get a suitable block config for an elementwise operator
-
-        Parameters
-        ----------
-        ifm_propagator: Propagator,
-            The propagator containing the data dependencies between input and output
-        ifm2_propagator: Propagator,
-            The propagator containing the data dependencies between input2 and output
-        op_attrs: Dict,
-            Dictionary containing operator attributes
-        ofm_shape: List[int],
-            Shape of the output tensor
-        output_layout: str,
-            The layout of the Output Feature Map tensor. Can be "NHWC" or "NHCWB16".
-        input_layout: str,
-            The layout of the Input Feature Map tensor. Can be "NHWC" or "NHCWB16".
-        input2_layout: str,
-            The layout of the Input2 Feature Map tensor. Can be "NHWC" or "NHCWB16".
-        ifm_dtype: str,
-            Datatype of the Input Feature Map tensor (IFM)
-        ofm_dtype: str,
-            Datatype of the Output Feature Map tensor (OFM)
-
-        Returns
-        ----------
-        List[BlockConfig]
-            List containing a single suitable block config
-        """
-        block_config = []
-        output_shape = [int(a) for a in ofm_shape]
-
-        op_type = op_attrs.get("op")
-        op_str = op_attrs.get("op_str")
-        activation = op_attrs.get("activation", "NONE")
-
-        input_bytewidth = 1 if ifm_dtype == "int8" else 2 if ifm_dtype == "int16" else 4
-        banks_available = self._total_banks - self._reserved_banks
-        if activation == "LUT" and not self._lut_reserved:
-            banks_available -= 2
-
-        # Handle user-forced block config
-        options = tvm.transform.PassContext.current().config.get("relay.ext.ethos-u.options", None)
-        if options and options.dev_force_block_config:
-            block_config = [int(v) for v in options.dev_force_block_config.split("x")]
-            assert len(block_config) == 3
-            if output_layout == "NHWC":
-                block_shape = [output_shape[0], block_config[0], block_config[1], block_config[2]]
-            else:
-                block_shape = [
-                    output_shape[0],
-                    block_config[0],
-                    1 + ((block_config[2] - 1) // 16),
-                    block_config[1],
-                    16,
-                ]
-            output_cycles = self._get_output_cycles(
-                op_type, op_str, ifm_dtype, ofm_dtype, activation
-            )
-            output_cycles *= reduce(lambda a, b: a * b, block_shape, 1)
-            output_cycles = int(math.ceil(output_cycles))
-            return [BlockConfig(block_shape, block_shape, 0, output_cycles)]
-
-        # Split the block in half until it fits into SHRAM
-        max_height, max_width, max_depth = self._max_block_shape.as_list()[1:]
-        if output_layout == "NHCWB16":
-            output_height = output_shape[1]
-            output_width = output_shape[3]
-            output_channels = output_shape[2] * 16
-        else:
-            output_height = output_shape[1]
-            output_width = output_shape[2]
-            output_channels = output_shape[3]
-
-        output_nhwc_block = [
-            1,
-            _round_up(min(output_height, max_height), self._micro_block.height),
-            _round_up(min(output_width, max_width), self._micro_block.width),
-            _round_up(min(output_channels, max_depth), self._micro_block.depth),
-        ]
-        output_block = self._create_layout_block(output_nhwc_block, output_layout)
-        split_order = (a for a in [1, 2, 3])
-        split_axis = next(split_order)
-
-        offset = [0] * len(output_block)
-        stripes = [1] * len(output_block)
-        order = [1, 2, 4, 3, 0] if output_layout == "NHCWB16" else [1, 2, 3, 4]
-        while True:
-            # Create stripe config for output block
-            output_stripe_config = StripeConfig(
-                output_block, output_block, output_block, order, stripes, offset
-            )
-
-            # Propagate the output to obtain the two input blocks
-            input_block = _Shape(ifm_propagator.propagate(output_stripe_config).shape, input_layout)
-            if ifm2_propagator:
-                input2_block = _Shape(
-                    ifm2_propagator.propagate(output_stripe_config).shape, input2_layout
-                )
-            else:
-                # Unary elementwise
-                input2_block = input_block
-
-            input_block.round_up(self._input_micro_block)
-            input2_block.round_up(self._input_micro_block)
-
-            # Banks required for input block
-            input_banks = self._get_input_banks(input_block, input_bytewidth)
-            # Banks required for input2 block
-            input2_banks = self._get_input_banks(input2_block, input_bytewidth)
-
-            # Check whether or not both IFMs fit into SHRAM
-            if (input_banks + input2_banks) <= banks_available:
-                output_cycles = self._get_output_cycles(
-                    op_type, op_str, ifm_dtype, ofm_dtype, activation
-                )
-                output_cycles *= reduce(lambda a, b: a * b, output_block, 1)
-                output_cycles = int(math.ceil(output_cycles))
-                block_config.append(
-                    BlockConfig(input_block.as_list(), output_block, 0, output_cycles)
-                )
-                break
-
-            if output_nhwc_block[split_axis] == self._micro_block.as_list()[split_axis]:
-                split_axis = next(split_order)
-
-            output_nhwc_block[split_axis] = _round_up(
-                _round_up_div(output_nhwc_block[split_axis], 2),
-                self._micro_block.as_list()[split_axis],
-            )
-            output_block = self._create_layout_block(output_nhwc_block, output_layout)
-
-        return block_config
-
-    def _get_subkernel_propagator(
-        self, op_attrs, ifm_propagator, input_layout, output_layout, depth
-    ):
-        op_type = op_attrs.get("op")
-        stride_h = int(op_attrs.get("stride_h", 1))
-        stride_w = int(op_attrs.get("stride_w", 1))
-        transform = ifm_propagator.transform
-
-        if op_type != "ethosu_identity":
-            if input_layout == "NHCWB16":
-                transform[1][-1] = min(transform[1][-1], self._subkernel_limits[0] - stride_h)
-                transform[3][-1] = min(transform[3][-1], self._subkernel_limits[1] - stride_w)
-            else:
-                transform[1][-1] = min(transform[1][-1], self._subkernel_limits[0] - stride_h)
-                transform[2][-1] = min(transform[2][-1], self._subkernel_limits[1] - stride_w)
-
-            if op_type in ("ethosu_pooling", "ethosu_depthwise_conv2d"):
-                if output_layout == "NHCWB16" and input_layout == "NHWC":
-                    transform[3][-1] = depth
-                elif output_layout == "NHCWB16" and input_layout == "NHCWB16":
-                    transform[2][-1] = 1 + ((depth - 1) // 16)
-
-        return Propagator(transform, ifm_propagator.offset)
-
-    def get_valid_block_configs(
-        self,
-        ifm_propagator: Propagator,
-        op_attrs: Dict,
-        ofm_shape: List[int],
-        ofm_channels: int,
-        ifm_channels: int,
-        output_layout: str,
-        input_layout: str,
-        ifm_dtype: str,
-        ofm_dtype: str,
-        kernel_h: int = 1,
-        kernel_w: int = 1,
-    ) -> List[BlockConfig]:
-        """Get all of the valid block configs
-
-        Parameters
-        ----------
-        ifm_propagator: Propagator,
-            The propagator containing the data dependencies between input and output
-        op_attrs: Dict,
-            Dictionary containing operator attributes
-        ofm_shape: List[int],
-            Shape of the output tensor
-        ofm_channels: int,
-            Number of output channels
-        ifm_channels: int,
-            Number of input channels
-        output_layout: str,
-            The layout of the Output Feature Map tensor. Can be "NHWC" or "NHCWB16".
-        input_layout: str,
-            The layout of the Input Feature Map tensor. Can be "NHWC" or "NHCWB16".
-        ifm_dtype: str,
-            Datatype of the Input Feature Map tensor (IFM)
-        ofm_dtype: str,
-            Datatype of the Output Feature Map tensor (OFM)
-        kernel_h: int,
-            Height of kernel
-        kernel_h: int
-            Width of kernel
-
-        Returns
-        ----------
-        List[BlockConfig]
-            List containing all of the valid block configs
-        """
-        valid_block_configs = []
-
-        op_type = op_attrs.get("op")
-        op_str = op_attrs.get("op_str")
-        activation = op_attrs.get("activation", "NONE")
-        upscaling_factor = 1 if op_attrs.get("upscale", "NONE") == "NONE" else 2
-
-        if output_layout == "NHCWB16":
-            output_shape = _Shape([1, ofm_shape[1], ofm_shape[3], ofm_channels])
-        else:
-            output_shape = _Shape(ofm_shape)
-
-        # Define search space
-        max_height = min(output_shape.height, self._max_block_shape.height)
-        min_height = max(self._micro_block.height, upscaling_factor)
-
-        max_width = min(output_shape.width, self._max_block_shape.width)
-        min_width = max(self._micro_block.width, upscaling_factor)
-
-        max_depth = min(ofm_channels, self._max_block_shape.depth)
-        min_depth = max(self._micro_block.depth, upscaling_factor)
-
-        heights = range(min_height, max_height + min_height, min_height)
-        widths = range(min_width, max_width + min_width, min_width)
-        depths = range(min_depth, max_depth + min_depth, min_depth)
-
-        # Handle user-forced block config
-        options = tvm.transform.PassContext.current().config.get("relay.ext.ethos-u.options", None)
-        forced = False
-        if options and options.dev_force_block_config:
-            block_config = [int(v) for v in options.dev_force_block_config.split("x")]
-            assert len(block_config) == 3
-            heights = [block_config[0]]
-            widths = [block_config[1]]
-            depths = [block_config[2]]
-            forced = True
-
-        input_bytewidth = 1 if ifm_dtype == "int8" else 2
-        acc_bytewidth = self._get_accumulator_width(op_type, ifm_dtype)
-        banks_available = self._total_banks - self._reserved_banks
-        if activation == "LUT" and not self._lut_reserved:
-            banks_available -= 2
-
-        # Input block depth has additional limitations for operators that require full input depth
-        input_block_depth = 0
-        partkernel = self.is_partkernel(op_type, ifm_channels, ifm_dtype, kernel_h * kernel_w)
-        if op_type == "ethosu_conv2d":
-            if partkernel:
-                input_block_depth = min(ifm_channels, 16)
-            else:
-                input_block_depth = min(ifm_channels, 32)
-
-        for depth in reversed(depths):
-            if (depth < output_shape.depth) and (depth % self._split_depth != 0) and not forced:
-                # Block depth has to be less than full depth or a multiple of the split depth
-                continue
-
-            subkernel_propagator = self._get_subkernel_propagator(
-                op_attrs, ifm_propagator, input_layout, output_layout, depth
-            )
-
-            for width in reversed(widths):
-                for height in reversed(heights):
-                    if output_layout == "NHCWB16":
-                        output_block = (
-                            1,
-                            height,
-                            1 + ((depth - 1) // 16),
-                            width,
-                            16,
-                        )
-                        order = [1, 2, 4, 3, 0]
-                    else:
-                        output_block = (1, height, width, depth)
-                        order = [1, 2, 3, 4]
-
-                    offset = [0] * len(output_block)
-                    stripes = [1] * len(output_block)
-                    block_stripe_config = StripeConfig(
-                        output_block,
-                        output_block,
-                        output_block,
-                        order,
-                        stripes,
-                        offset,
-                    )
-
-                    # Propagate output block
-                    input_block = subkernel_propagator.propagate(block_stripe_config)
-
-                    input_block_shape = _Shape(input_block.shape, input_layout)
-                    input_block_shape.round_up(self._input_micro_block)
-
-                    output_block_shape = _Shape(output_block, output_layout)
-
-                    if op_type == "ethosu_conv2d":
-                        input_block_shape.depth = input_block_depth
-
-                    # Banks required for input block
-                    input_banks = self._get_input_banks(input_block_shape, input_bytewidth)
-                    # Banks required for accumulation
-                    acc_banks = self._get_accumulator_banks(output_block_shape, acc_bytewidth)
-
-                    if (input_banks + acc_banks) <= banks_available:
-                        output_cycles = self._get_output_cycles(
-                            op_type, op_str, ifm_dtype, ofm_dtype, activation
-                        )
-                        output_cycles *= np.prod(output_block).tolist()
-                        output_cycles = int(math.ceil(output_cycles))
-                        compute_cycles = self._estimate_compute_cycles_per_block(
-                            op_type,
-                            output_block_shape,
-                            input_block_shape,
-                            kernel_h,
-                            kernel_w,
-                            ifm_channels,
-                            "int8",
-                            partkernel,
-                        )
-                        block_config = BlockConfig(
-                            input_block_shape.as_list(), output_block, compute_cycles, output_cycles
-                        )
-
-                        if self._disable_block_culling:
-                            # Block culling disabled - add all block configs that fit
-                            valid_block_configs.append(block_config)
-                        else:
-                            # Add block config only if it's not dominated by an existing block.
-                            # A block config is dominated by another if its output_shape is greater
-                            # or equal in every dimension and strictly greater in at least one
-                            # dimension.
-                            dominated = False
-                            for valid_block in valid_block_configs:
-                                if block_config < valid_block:
-                                    dominated = True
-                                    break
-
-                            if not dominated:
-                                valid_block_configs.append(block_config)
-
-                            # Every consecutive block in the innermost loop will be dominated by
-                            # this one so break
-                            break
-
-        return valid_block_configs
-
-    def _estimate_compute_cycles_per_block(
-        self,
-        op_type: str,
-        block_shape: _Shape,
-        input_block_shape: _Shape,
-        kernel_h: int,
-        kernel_w: int,
-        input_channels: int,
-        ifm_dtype: str,
-        partkernel: bool = False,
-    ) -> Tuple[int, int]:
-        # Calculate the amount of micro blocks per block, per axis
-        num_quantum_x = _round_up_div(block_shape.width, self._micro_block.width)
-        num_quantum_y = _round_up_div(block_shape.height, self._micro_block.height)
-        num_quantum_z = _round_up_div(block_shape.depth, self._micro_block.depth)
-        num_quantum_xy = num_quantum_x * num_quantum_y
-
-        kernel_steps = self.get_kernel_steps(op_type, kernel_h, kernel_w, ifm_dtype, partkernel)
-
-        wd_cycles = self._get_weight_decoder_cycles(op_type)
-        delay_cycles = self._get_delay_cycles(op_type, ifm_dtype)
-        cycle_quantum = 4
-
-        compute_cycles = 0
-        for subkernel_steps in kernel_steps:
-            subkernel_cycles = 1 if op_type == "ethosu_pooling" else subkernel_steps
-            compute_cycles += (
-                max(wd_cycles, cycle_quantum * num_quantum_xy) * subkernel_cycles * num_quantum_z
-            )
-
-            if num_quantum_xy == 1:
-                if num_quantum_z == 1:
-                    compute_cycles += delay_cycles * subkernel_steps
-                elif subkernel_steps > 1:
-                    compute_cycles += delay_cycles * (subkernel_steps - 1) * num_quantum_z
-
-        if partkernel:
-            compute_cycles *= _round_up_div(input_block_shape.depth, 8)
-
-        if op_type == "ethosu_conv2d":
-            compute_cycles *= _round_up_div(input_channels, input_block_shape.depth)
-
-        return compute_cycles
diff --git a/python/tvm/contrib/ethosu/cascader/graph.py b/python/tvm/contrib/ethosu/cascader/graph.py
deleted file mode 100644
index ca0d8fef9e16..000000000000
--- a/python/tvm/contrib/ethosu/cascader/graph.py
+++ /dev/null
@@ -1,268 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Graph objects to define compute graphs for the NPU cascader."""
-from typing import List, Dict
-from enum import IntEnum
-from collections import namedtuple
-import numpy as np
-
-import tvm._ffi
-from tvm import te
-from tvm.runtime import Object
-
-from .stripe_config import StripeConfig
-from .device_config import EthosuDeviceConfig
-from . import _ffi_api
-
-
-# A global store to register matching functions
-REGISTERED_MATCHERS = []
-
-
-TESubgraph = namedtuple("TESubgraph", ["input_tensors", "output_tensor"])
-
-
-class BufferMode(IntEnum):
-    RECOMPUTE = 0
-    ROLLING = 1
-
-
-@tvm._ffi.register_object("contrib.ethosu.cascader.PerformanceInfo")
-class PerformanceInfo(Object):
-    """PerformanceInfo class"""
-
-    @property
-    def compute_cycles(self):
-        return self._compute_cycles
-
-    @property
-    def read_bytes(self):
-        return list(self._read_bytes)
-
-    @property
-    def write_bytes(self):
-        return self._write_bytes
-
-    @property
-    def block_config(self):
-        return self._block_config
-
-
-@tvm._ffi.register_object("contrib.ethosu.cascader.Tensor")
-class Tensor(Object):
-    """Tensor class"""
-
-    def __init__(self, shape, dtype, is_constant=False, compression_ratio=1):
-        self.__init_handle_by_constructor__(
-            _ffi_api.Tensor, shape, dtype, is_constant, compression_ratio
-        )
-
-    def add_producer(self, part):
-        _ffi_api.TensorAddProducer(self, part)
-
-    def add_consumer(self, part):
-        _ffi_api.TensorAddConsumer(self, part)
-
-    @property
-    def producers(self):
-        return list(self._producers)
-
-    @property
-    def consumers(self):
-        return list(self._consumers)
-
-    @property
-    def shape(self):
-        return list(self._shape)
-
-    @property
-    def dtype(self):
-        return self._dtype
-
-    @property
-    def is_constant(self):
-        return self._is_constant
-
-    @property
-    def compression_ratio(self):
-        return self._compression_ratio
-
-    @property
-    def size(self):
-        return self._size
-
-
-class Part(Object):
-    """Part base class"""
-
-    def set_input(self, index: int, tensor: Tensor):
-        _ffi_api.PartSetInput(self, index, tensor)
-
-    def set_output(self, tensor: Tensor):
-        _ffi_api.PartSetOutput(self, tensor)
-
-    def calculate_input_stripe_configs(
-        self, output_stripe_config: StripeConfig
-    ) -> List[StripeConfig]:
-        return list(_ffi_api.PartCalculateInputStripeConfigs(self, output_stripe_config))
-
-    def get_stripe_align_hint(self) -> List[int]:
-        return list(_ffi_api.PartGetStripeAlignHint(self))
-
-    def get_performance_info(
-        self, stripe_config: StripeConfig, buffer_mode: BufferMode
-    ) -> PerformanceInfo:
-        return _ffi_api.PartGetPerformanceInfo(self, stripe_config, buffer_mode)
-
-    @property
-    def input_tensors(self):
-        return list(self._input_tensors)
-
-    @property
-    def output_tensor(self):
-        return self._output_tensor
-
-    @property
-    def propagators(self):
-        return list(self._propagators)
-
-    @property
-    def in_line(self):
-        return self._in_line
-
-    @property
-    def subgraph(self):
-        return TESubgraph(list(self._te_input_tensors), self._te_output_tensor)
-
-
-@tvm._ffi.register_object("contrib.ethosu.cascader.CascaderGraph")
-class CascaderGraph(Object):
-    """A class to describe a graph of Parts and Tensors used by the cascader.
-
-    This class describes a graph consisting of two object types: Tensors and Parts.
-    It defines a topological ordering on the graph such that each Part and Tensor has a
-    position in the ordering. This ordering is used by the Plan and Proposal generation
-    algorithms. It is also the ordering the Parts are expected to be executed in.
-
-    In addition to defining an ordering, the Parts and Tensors are also all given unique
-    IDs which they can be referred to by."""
-
-    def __init__(self, input_tensors: List[Tensor], output_tensors: List[Tensor]):
-        self.__init_handle_by_constructor__(_ffi_api.CascaderGraph, input_tensors, output_tensors)
-
-    def get_part_id(self, part: Part) -> int:
-        return _ffi_api.CascaderGraphGetPartID(self, part)
-
-    def get_tensor_id(self, tensor: Tensor) -> int:
-        return _ffi_api.CascaderGraphGetTensorID(self, tensor)
-
-    @property
-    def input_tensors(self):
-        return list(self._input_tensors)
-
-    @property
-    def output_tensors(self):
-        return list(self._output_tensors)
-
-    @property
-    def tensor_order(self):
-        return list(self._tensor_order)
-
-    @property
-    def part_order(self):
-        return list(self._part_order)
-
-
-def register_matcher(matcher):
-    """Register a match function to the frontend.
-
-    A match function takes a te.Tensor and checks whether it matches
-    a known operator/operator sequence. If it does, it returns a Part
-    which models the behaviour of that operator sequence. Otherwise,
-    it returns None.
-    """
-    REGISTERED_MATCHERS.append(matcher)
-    return matcher
-
-
-def create_cascader_graph(
-    te_graph: TESubgraph, const_dict: Dict[int, np.ndarray], device_config: EthosuDeviceConfig
-) -> CascaderGraph:
-    """Create a CascaderGraph from a Tensor Expression graph and constant dictionary.
-
-    Parameters
-    ----------
-    te_graph : TESubgraph
-        The Tensor Expression graph.
-    const_dict : Dict[int, np.ndarray]
-        The constant dictionary.
-    device_config : EthosuDeviceConfig
-        Target device configuration.
-
-    Returns
-    -------
-    CascaderGraph
-        The CascaderGraph.
-    """
-    tensor_map = {}
-
-    def _visit_tensor(tensor):
-        if tensor not in tensor_map:
-            is_const = False
-            # Logic to determine if the tensor is constant
-            if tensor in list(te_graph.inputs):
-                i = list(te_graph.inputs).index(tensor)
-                if i in const_dict:
-                    is_const = True
-
-            # TODO(@mbaret): Calculate the compression ratio
-            plan_tensor = Tensor(
-                tensor.shape,
-                tensor.dtype,
-                is_constant=is_const,
-            )
-            tensor_map[tensor] = plan_tensor
-            if isinstance(tensor.op, te.PlaceholderOp) or tensor in te_graph.inputs:
-                return
-
-            input_tensors = []
-            # Check whether any of the registered matchers match the current tensor
-            for matcher in REGISTERED_MATCHERS:
-                part = matcher(tensor, device_config)
-                if part:
-                    input_tensors = part.subgraph.input_tensors
-                    break
-
-            assert part is not None, f"The tensor {tensor} doesn't match any part."
-            part.set_output(plan_tensor)
-            plan_tensor.add_producer(part)
-            for i, input_tensor in enumerate(input_tensors):
-                _visit_tensor(input_tensor)
-                part.set_input(i, tensor_map[input_tensor])
-                tensor_map[input_tensor].add_consumer(part)
-
-    for output in te_graph.outputs:
-        _visit_tensor(output)
-
-    input_tensors = []
-    for t in te_graph.inputs:
-        # This is needed because sometimes there are orphaned constants
-        if t in tensor_map:
-            input_tensors.append(tensor_map[t])
-
-    output_tensors = [tensor_map[t] for t in te_graph.outputs]
-    return CascaderGraph(input_tensors, output_tensors)
diff --git a/python/tvm/contrib/ethosu/cascader/logging.py b/python/tvm/contrib/ethosu/cascader/logging.py
deleted file mode 100644
index 0b163eb147e7..000000000000
--- a/python/tvm/contrib/ethosu/cascader/logging.py
+++ /dev/null
@@ -1,70 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""A class to hold logging information about the cascader"""
-from typing import Tuple
-import datetime
-import json
-import os
-import math
-
-
-class Logging:
-    """Cascader logging class"""
-
-    def __init__(self):
-        self.min_memory_usage = 0
-        self.max_memory_usage = 0
-        self.min_cycles = 0
-        self.max_cycles = 0
-
-        self.selected_proposal_idx = -1
-        self.proposals = {}
-        self.cascader_runtime = 0
-
-    def add_proposal(self, idx: int, memory_usage: int, cycles: int):
-        self.proposals[idx] = {"memory_usage": memory_usage, "cycles": cycles}
-
-    def get_extreme_points(self) -> Tuple[int, int, int, int]:
-        min_cycles, min_mem_usage = math.inf, math.inf
-        max_cycles, max_mem_usage = 0, 0
-        for proposal in self.proposals.values():
-            min_mem_usage = min(proposal["memory_usage"], min_mem_usage)
-            max_mem_usage = max(proposal["memory_usage"], max_mem_usage)
-            min_cycles = min(proposal["cycles"], min_cycles)
-            max_cycles = max(proposal["cycles"], max_cycles)
-
-        return min_mem_usage, max_mem_usage, min_cycles, max_cycles
-
-    def dump_json(self):
-        min_mem_usage, max_mem_usage, min_cycles, max_cycles = self.get_extreme_points()
-        with open(os.getcwd() + "/cascader_log.json", "w") as json_file:
-            print(
-                json.dumps(
-                    {
-                        "date": f"{datetime.datetime.now()}",
-                        "cascader_runtime": self.cascader_runtime,
-                        "min_cycles": min_cycles,
-                        "max_cycles": max_cycles,
-                        "min_memory_usage": min_mem_usage,
-                        "max_memory_usage": max_mem_usage,
-                        "selected_proposal": self.selected_proposal_idx,
-                        "proposals": self.proposals,
-                    },
-                    indent=2,
-                ),
-                file=json_file,
-            )
diff --git a/python/tvm/contrib/ethosu/cascader/pareto.py b/python/tvm/contrib/ethosu/cascader/pareto.py
deleted file mode 100644
index 545778934c2c..000000000000
--- a/python/tvm/contrib/ethosu/cascader/pareto.py
+++ /dev/null
@@ -1,41 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Pareto optimisation functions for the NPU cascader."""
-from typing import List
-
-from tvm import Object
-
-from . import _ffi_api
-from .plan import Plan
-
-
-def _get_pareto_frontier(costs: List[List[float]]) -> List[bool]:
-    for i, cost in enumerate(costs):
-        for j, value in enumerate(cost):
-            costs[i][j] = float(value)
-
-    return [bool(v) for v in _ffi_api.GetParetoFrontier(costs)]
-
-
-def _thin_vector(vec: List[Object], max_size: int) -> List[Object]:
-    return list(_ffi_api.ThinVector(vec, max_size))
-
-
-def _pareto_cull_plans(
-    plans: List[Plan], max_plans: int, disable_pareto_metric: bool
-) -> List[Plan]:
-    return list(_ffi_api.ParetoCullPlans(plans, max_plans, disable_pareto_metric))
diff --git a/python/tvm/contrib/ethosu/cascader/parts.py b/python/tvm/contrib/ethosu/cascader/parts.py
deleted file mode 100644
index 12588799a66a..000000000000
--- a/python/tvm/contrib/ethosu/cascader/parts.py
+++ /dev/null
@@ -1,73 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Parts used by the NPU cascader."""
-from typing import List
-import tvm._ffi
-
-from .propagator import Propagator
-from .graph import Part, TESubgraph
-from .block_config import BlockConfig
-from .stripe_config import StripeConfig
-from . import _ffi_api
-
-
-@tvm._ffi.register_object("contrib.ethosu.cascader.InlinePart")
-class InlinePart(Part):
-    """InlinePart class"""
-
-    def __init__(
-        self,
-        te_subgraph: TESubgraph,
-        propagators: List[Propagator],
-    ):
-        self.__init_handle_by_constructor__(
-            _ffi_api.InlinePart,
-            te_subgraph.input_tensors,
-            te_subgraph.output_tensor,
-            propagators,
-        )
-
-
-@tvm._ffi.register_object("contrib.ethosu.cascader.EthosuPart")
-class EthosuPart(Part):
-    """A class to describe a Part to be executed on an Arm(R) Ethos(TM)-U NPU.
-
-    EthosuParts must be provided with an output quantum and the cycles taken to
-    compute an output quantum which depend on the operator the NPU is computing."""
-
-    def __init__(
-        self,
-        te_subgraph: TESubgraph,
-        propagators: List[Propagator],
-        output_quantum: List[int],
-        subkernels: int,
-        valid_block_configs: List[BlockConfig],
-        weight_tensor_idx: int = -1,
-    ):
-        self.__init_handle_by_constructor__(
-            _ffi_api.EthosuPart,
-            te_subgraph.input_tensors,
-            te_subgraph.output_tensor,
-            propagators,
-            output_quantum,
-            subkernels,
-            valid_block_configs,
-            weight_tensor_idx,
-        )
-
-    def get_block_config(self, stripe_config: StripeConfig) -> BlockConfig:
-        return _ffi_api.EthosuPartGetBlockConfig(self, stripe_config)
diff --git a/python/tvm/contrib/ethosu/cascader/plan.py b/python/tvm/contrib/ethosu/cascader/plan.py
deleted file mode 100644
index f960911ca133..000000000000
--- a/python/tvm/contrib/ethosu/cascader/plan.py
+++ /dev/null
@@ -1,167 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Plan class to hold subgraph scheduling information."""
-from typing import Dict, FrozenSet
-import tvm._ffi
-
-from tvm.runtime import Object
-
-from . import _ffi_api
-from .graph import Tensor, Part
-from .tensor_config import TensorConfig, MemoryRegion
-
-
-@tvm._ffi.register_object("contrib.ethosu.cascader.Plan")
-class Plan(Object):
-    """
-    A class which describes how to schedule a subgraph of Parts together.
-
-    A Plan takes the form of a subgraph of connected Parts (recorded in part_group) with
-    TensorConfigs for all of the required Tensors (recorded in tensor_configs). This information
-    can be used to produce a Tensor Expression schedule with inter-operator scheduling. A Plan is
-    necessarily single-output such that all non-output Parts are 'computed_at'ed the scope of the
-    output Part. This is what achieves the technique referred to as 'cascading'. A Plan also has
-    an interior memory region which specifies the region of memory into which all the Plans
-    intermediate buffers should be allocated.
-
-    Additionally, a Plan contains some other information used during the Plan generation and
-    selection algorithms. Both the memory and cycles required to run the Plan are accounted for so
-    that Plans can be ranked and Pareto-culled on these metrics. Furthermore, the TensorConfigs
-    which are 'open' is recorded indicating that these are valid points to merge with another Plan.
-    A Plan can only be turned into a schedule if it has no 'open' TensorConfigs - at which point
-    the Plan is said to be 'closed'.
-
-    Attributes
-    ----------
-    tensor_configs : Dict[Tensor, TensorConfig]
-        The TensorConfigs specified by the Plan.
-    open_configs : FrozenSet[TensorConfig]
-        The TensorConfigs which are 'open' meaning they are a Plan input/output but have
-        'interior' state.
-    output_config : TensorConfig
-        The TensorConfig of the Plan's output tensor.
-    part_group : FrozenSet[Part]
-        The Parts which are covered by the Plan.
-    interior_region : MemoryRegion
-        The MemoryRegion in which to store 'interior' Plan buffers.
-    memory_usage : int
-        The interior memory used by the Plan in bytes.
-    cycles : int
-        The cycles taken to execute the Plan.
-
-    """
-
-    def __init__(
-        self,
-        tensor_configs: Dict[Tensor, TensorConfig],
-        open_configs: FrozenSet[TensorConfig],
-        output_config: TensorConfig,
-        part_group: FrozenSet[Part],
-        interior_region: MemoryRegion,
-        memory_usage: int,
-        cycles: int,
-    ):
-        self.__init_handle_by_constructor__(
-            _ffi_api.Plan,
-            list(tensor_configs.values()),
-            list(open_configs),
-            output_config,
-            list(part_group),
-            interior_region,
-            memory_usage,
-            cycles,
-        )
-
-    def merge(self, other):
-        """
-        Merge two Plans with share an 'open' TensorConfig.
-
-        The current Plan is referred to as the 'upper Plan' and the other Plan as the 'lower
-        Plan'. The 'open' output config of the upper Plan must be an 'open' input config of the
-        lower Plan. The Tensor referenced by these configs is the Tensor on which the two Plans
-        will be merged. The merge process does the following:
-
-        The tensor config maps will be merged with TensorConfigs from the upper Plan taking
-        priority. The open configs will be merged with the TensorConfigs that are being merged
-        having been removed. The output config will be that of the lower Plan. The part groups
-        will be merged. The interior region is necessarily the same for both the upper and lower
-        Plan. The cycles and memory usage will be summed.
-
-        Parameters
-        ----------
-        other : Plan
-            The Plan to merge with.
-
-        Return
-        ------
-        Plan
-            The merged Plan.
-
-        """
-        return _ffi_api.PlanMerge(self, other)
-
-    @property
-    def tensor_configs(self):
-        """The TensorConfigs specified by the Plan."""
-        tensor_configs = {}
-        for config in self._tensor_configs:
-            tensor_configs[config.tensor] = config
-        return tensor_configs
-
-    @property
-    def open_configs(self):
-        """
-        The TensorConfigs which are 'open' meaning they are a Plan input/output but have
-        'interior' state.
-        """
-        return frozenset(self._open_configs)
-
-    @property
-    def output_config(self):
-        """The TensorConfig of the Plan's output tensor."""
-        return self._output_config
-
-    @property
-    def part_group(self):
-        """The Parts which are covered by the Plan."""
-        return frozenset(self._part_group)
-
-    @property
-    def interior_region(self):
-        """The MemoryRegion in which to store 'interior' Plan buffers."""
-        return self._interior_region
-
-    @property
-    def memory_usage(self):
-        """The interior memory used by the Plan in bytes."""
-        return self._memory_usage
-
-    @property
-    def cycles(self):
-        """The cycles taken to execute the Plan."""
-        return self._cycles
-
-    def __repr__(self):
-        return (
-            f"Plan(tensor_configs={self.tensor_configs}, "
-            f"open_configs={self.open_configs}, "
-            f"output_config={self.output_config}, "
-            f"part_group={self.part_group}, "
-            f"interior_region={self.interior_region.name}, "
-            f"memory_usage={self.memory_usage}, "
-            f"cycles={self.cycles}, "
-        )
diff --git a/python/tvm/contrib/ethosu/cascader/plan_generator.py b/python/tvm/contrib/ethosu/cascader/plan_generator.py
deleted file mode 100644
index ed29ff4b5919..000000000000
--- a/python/tvm/contrib/ethosu/cascader/plan_generator.py
+++ /dev/null
@@ -1,77 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Algorithms to generate Plans for a CascaderGraph."""
-from typing import List, Dict, Tuple
-
-from tvm.contrib.ethosu.cascader.tensor_config import MemoryRegion, TensorConfig
-
-from . import _ffi_api
-from .cascader_options import CascaderOptions
-from .plan import Plan
-from .stripe_config import StripeConfig
-from .graph import CascaderGraph, Part, Tensor
-
-
-def _generate_output_stripe_configs(
-    part: Part, stripe_factors: int, enable_striping: bool, multi_dimensional: bool
-) -> List[StripeConfig]:
-    return list(
-        _ffi_api.GenerateOutputStripeConfigs(
-            part, stripe_factors, enable_striping, multi_dimensional
-        )
-    )
-
-
-def _generate_single_plans(
-    part: Part,
-    output_stripe_configs: List[StripeConfig],
-    home_map: Dict[Tensor, List[MemoryRegion]],
-    cascade_region: MemoryRegion,
-) -> List[Plan]:
-    return list(_ffi_api.GenerateSinglePlans(part, output_stripe_configs, home_map, cascade_region))
-
-
-def _generate_graph_plans(
-    graph: CascaderGraph,
-    home_map: Dict[Tensor, List[MemoryRegion]],
-    options: CascaderOptions,
-):
-    return _ffi_api.GenerateGraphPlans(
-        graph,
-        home_map,
-        options,
-    )
-
-
-def get_copy_cycles_hint(tensor_config: TensorConfig) -> Tuple[int, int]:
-    """
-    Returns a hint estimating the number of cycles for the copy
-    specified by tensor_config.
-
-    Parameters
-    ----------
-    tensor_config : TensorConfig
-        The tensor configuration to estimate.
-
-    Returns
-    -------
-    mem2mem_cycles : int
-        Total estimated cycles.
-    initial_mem2mem_cycles : int
-        Estimated cycles for the first block.
-    """
-    return _ffi_api.GetCopyCyclesHint(tensor_config)
diff --git a/python/tvm/contrib/ethosu/cascader/propagator.py b/python/tvm/contrib/ethosu/cascader/propagator.py
deleted file mode 100644
index 636c265923cc..000000000000
--- a/python/tvm/contrib/ethosu/cascader/propagator.py
+++ /dev/null
@@ -1,57 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Propagator class."""
-# pylint: disable=invalid-name
-import tvm._ffi
-
-from tvm.runtime import Object
-
-from . import _ffi_api
-
-
-@tvm._ffi.register_object("contrib.ethosu.cascader.Propagator")
-class Propagator(Object):
-    """Propagator class"""
-
-    def __init__(self, transform, offset):
-        float_transform = list([list(float(v) for v in row) for row in transform])
-        self.__init_handle_by_constructor__(_ffi_api.Propagator, float_transform, offset)
-
-    def propagate(self, stripe_config):
-        return _ffi_api.PropagatorPropagate(self, stripe_config)
-
-    @property
-    def transform(self):
-        """Get the transform matrix"""
-        new_matrix = []
-        for row in self._transform:
-            new_row = []
-            for v in row:
-                new_row.append(v.value)
-
-            new_matrix.append(new_row)
-
-        return new_matrix
-
-    @property
-    def offset(self):
-        """Get the offset matrix"""
-        new_vec = []
-        for v in self._offset:
-            new_vec.append(v.value)
-
-        return new_vec
diff --git a/python/tvm/contrib/ethosu/cascader/proposal.py b/python/tvm/contrib/ethosu/cascader/proposal.py
deleted file mode 100644
index 13184108120e..000000000000
--- a/python/tvm/contrib/ethosu/cascader/proposal.py
+++ /dev/null
@@ -1,106 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Proposal class to hold graph scheduling information."""
-from typing import Dict, FrozenSet, List
-import tvm._ffi
-from tvm.contrib.ethosu.cascader.plan import Plan
-
-from tvm.runtime import Object
-
-from . import _ffi_api
-from .graph import Tensor, Part, CascaderGraph
-from .tensor_config import TensorConfig, MemoryRegion
-
-
-@tvm._ffi.register_object("contrib.ethosu.cascader.Proposal")
-class Proposal(Object):
-    """A class which describes how to schedule a CascaderGraph as a series of disjoint Plans.
-
-    Attributes
-    ----------
-    graph : CascaderGraph
-        The CascaderGraph to which the Proposal applies.
-    part_group : FrozenSet[Part]
-        The Parts which are covered by the Proposal.
-    plans : List[Plan]
-        The Plans used in the Proposal.
-    input_tensor_configs : Dict[Tensor, TensorConfig]
-        The TensorConfigs indexed by Tensor in the Proposal which aren't produced by a Plan.
-    cascade_region : MemoryRegion
-        The MemoryRegion where cascading buffers should be homed.
-    memory_usage : int
-        The memory required to execute the Proposal in the cascading MemoryRegion.
-    cycles : int
-        The estimated cycles taken to execute the Proposal.
-
-    """
-
-    def __init__(
-        self,
-        graph: CascaderGraph,
-        part_group: FrozenSet[Part],
-        plans: List[Plan],
-        input_tensor_configs: Dict[Tensor, TensorConfig],
-        cascade_region: MemoryRegion,
-        memory_usage: Dict[MemoryRegion, int],
-        cycles: int,
-    ):
-        self.__init_handle_by_constructor__(
-            _ffi_api.Proposal,
-            graph,
-            list(part_group),
-            plans,
-            input_tensor_configs,
-            cascade_region,
-            memory_usage,
-            cycles,
-        )
-
-    @property
-    def graph(self) -> CascaderGraph:
-        """The CascaderGraph to which the Proposal applies."""
-        return self._graph
-
-    @property
-    def part_group(self) -> FrozenSet[Part]:
-        """The Parts which are covered by the Proposal."""
-        return frozenset(self._part_group)
-
-    @property
-    def plans(self) -> List[Plan]:
-        """The Plans used in the Proposal."""
-        return list(self._plans)
-
-    @property
-    def input_tensor_configs(self) -> Dict[Tensor, TensorConfig]:
-        """The TensorConfigs indexed by Tensor in the Proposal which aren't produced by a Plan."""
-        return dict(self._input_tensor_configs)
-
-    @property
-    def cascade_region(self) -> MemoryRegion:
-        """The MemoryRegion where cascading buffers should be homed."""
-        return self._cascade_region
-
-    @property
-    def memory_usage(self) -> int:
-        """The memory required to execute the Proposal in the cascading MemoryRegion."""
-        return int(self._memory_usage)
-
-    @property
-    def cycles(self) -> int:
-        """The estimated cycles taken to execute the Proposal."""
-        return int(self._cycles)
diff --git a/python/tvm/contrib/ethosu/cascader/proposal_generator.py b/python/tvm/contrib/ethosu/cascader/proposal_generator.py
deleted file mode 100644
index d79021a20539..000000000000
--- a/python/tvm/contrib/ethosu/cascader/proposal_generator.py
+++ /dev/null
@@ -1,58 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Algorithms to generate Proposals for a Graph."""
-from typing import List, Dict, FrozenSet
-
-from . import _ffi_api
-from .cascader_options import CascaderOptions
-from .plan import Plan
-from .proposal import Proposal
-from .graph import CascaderGraph, Part
-
-
-def generate_proposals(
-    graph: CascaderGraph,
-    home_map: Dict[FrozenSet[Part], List[Plan]],
-    options: CascaderOptions,
-) -> List[Proposal]:
-    """Generate Pareto optimal Proposals for a CascaderGraph.
-
-    This algorithm takes a top-down dynamic programming approach to determining how
-    to optimally combine Plans into Proposals.
-
-    Parameters
-    ----------
-    graph : CascaderGraph
-        The CascaderGraph to generate Proposals for.
-    home_map : Dict[FrozenSet[Part], List[Plan]]
-        The Tensor homing map defining valid memory homes for Tensors.
-    options : CascaderOptions
-        The configuration options with which to run the generator.
-
-    Returns
-    ------
-    List[Proposal]
-        A list of Pareto optimal Proposals.
-
-    """
-    return list(
-        _ffi_api.GenerateProposals(
-            graph,
-            home_map,
-            options,
-        )
-    )
diff --git a/python/tvm/contrib/ethosu/cascader/scheduler.py b/python/tvm/contrib/ethosu/cascader/scheduler.py
deleted file mode 100644
index 5ebc95d7ef88..000000000000
--- a/python/tvm/contrib/ethosu/cascader/scheduler.py
+++ /dev/null
@@ -1,335 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name
-"""Scheduler for cascader which converts Proposals into Schedules."""
-from typing import Tuple, List, Dict, DefaultDict
-from collections import defaultdict
-import time
-import numpy as np
-
-import tvm
-from tvm import te
-from tvm import tir
-from tvm import PoolInfo
-from .cascader_options import CascaderOptions
-from .graph import CascaderGraph, Part, Tensor, TESubgraph
-from .parts import EthosuPart
-from .tensor_config import MemoryRegion
-from .proposal import Proposal
-from .proposal_generator import generate_proposals
-from .plan_generator import get_copy_cycles_hint
-from .graph import create_cascader_graph
-from .device_config import EthosuDeviceConfig
-from .logging import Logging
-
-
-def tile_nd(
-    sch: te.Schedule, tensor: te.Tensor, tile: Tuple[int, ...]
-) -> Tuple[List[tir.IterVar], List[tir.IterVar]]:
-    """Scheduling utility to perform N-dimensional tiling.
-
-    Parameters
-    ----------
-    sch : te.Schedule
-        The schedule to apply the tiling to.
-    tensor : te.Tensor
-        The tensor to apply the tiling to.
-    tile : Tuple[int, ...]
-        The N-dimensional tile size
-
-    Returns
-    -------
-    outer_indices : List[tir.IterVar]
-        The outer iteration variables.
-    inner_indices : List[tir.IterVar]
-        The inner iteration variables.
-
-    """
-    outer_indices = []
-    inner_indices = []
-    for i, size in enumerate(tile):
-        outer, inner = sch[tensor].split(tensor.op.axis[i], size)
-        outer_indices.append(outer)
-        inner_indices.append(inner)
-
-    sch[tensor].reorder(*outer_indices, *inner_indices)
-    return outer_indices, inner_indices
-
-
-def stripe_part(
-    part: Part, stripe_shape: Tuple[int, ...], sch: te.Schedule
-) -> Tuple[te.Stage, tir.IterVar]:
-    """Apply a striping schedule to the TE subgraph represented by a Part."""
-    te_subgraph = part.subgraph
-    te_output_tensor = te_subgraph.output_tensor
-    outer_indices, _ = tile_nd(sch, te_output_tensor, stripe_shape)
-    g = sch.create_group(
-        outputs=te_output_tensor.op.input_tensors,
-        inputs=te_subgraph.input_tensors,
-        include_inputs=False,
-    )
-    g.compute_at(sch[te_output_tensor], outer_indices[-1])
-    for axis in outer_indices:
-        sch[te_output_tensor].unroll(axis)
-
-    return sch[te_output_tensor], outer_indices[-1]
-
-
-def cascade_part(
-    part: Part, stripe_stage: te.Stage, stripe_axis: tir.IterVar, sch: te.Schedule
-) -> None:
-    """Schedule a Part into a cascade indicated by a stripe Stage."""
-    te_subgraph = part.subgraph
-    g = sch.create_group(
-        outputs=te_subgraph.output_tensor, inputs=te_subgraph.input_tensors, include_inputs=False
-    )
-    g.compute_at(stripe_stage, stripe_axis)
-
-
-def update_readers(part: Part, readers: DefaultDict[te.Tensor, List[te.Tensor]]) -> None:
-    """
-    Update a dictionary which stores the te.Tensors that need to be read in
-    order to produce a given te.Tensor.
-    """
-    visited = set()
-
-    def _visit(tensor):
-        if tensor not in visited and tensor not in part.subgraph.input_tensors:
-            visited.add(tensor)
-            for input_tensor in tensor.op.input_tensors:
-                readers[input_tensor].append(tensor)
-                _visit(input_tensor)
-
-    _visit(part.subgraph.output_tensor)
-
-
-def apply_proposal(proposal: Proposal, sch: te.Schedule) -> None:
-    """Apply a Proposal to a Schedule, converting all the Plans into TE scheduling instructions.
-
-    Note that the Schedule is mutated in-place.
-
-    Parameters
-    ----------
-    proposal : Proposal
-        The Proposal to apply to the Schedule.
-    sch : te.Schedule
-        The Schedule to apply to Proposal to.
-
-    """
-    for plan in proposal.plans:
-        for part in plan.part_group:
-            if isinstance(part, EthosuPart):
-                tensor_config = plan.tensor_configs[part.output_tensor]
-                stripe_config = tensor_config.stripe_configs[0]
-                buffer_mode = tensor_config.buffer_mode
-                block_config = part.get_block_config(stripe_config)
-                compute_cycles = part.get_performance_info(
-                    stripe_config, buffer_mode
-                ).compute_cycles
-                iv = part.subgraph.output_tensor.op.axis[0]
-                block_shape = block_config.output_shape
-                if len(block_shape) == 4:
-                    height, width, depth = block_shape[1:]
-                else:
-                    height = block_shape[1]
-                    width = block_shape[3]
-                    depth = block_shape[2] * block_shape[4]
-                sch[part.subgraph.output_tensor].pragma(iv, "block_config_height", height)
-                sch[part.subgraph.output_tensor].pragma(iv, "block_config_width", width)
-                sch[part.subgraph.output_tensor].pragma(iv, "block_config_depth", depth)
-
-                # Attach AttrStmt directly to npu op so it isn't removed by ReplaceOperators
-                npu_op = part.subgraph.output_tensor.op.input_tensors[0].op.input_tensors[0]
-                # Force the pragma to interpret the compute cycles as an int64 value
-                compute_cycles_int64_cast = tvm.tir.IntImm("int64", compute_cycles)
-                sch[npu_op].pragma(
-                    npu_op.op.axis[0], "compute_cycles_hint", compute_cycles_int64_cast
-                )
-
-        output_tensor_config = plan.output_config
-        output_tensor = output_tensor_config.tensor
-        output_part = output_tensor.producers[0]
-        if output_part.in_line:
-            continue
-        stripe_config = output_tensor_config.stripe_configs[0]
-        stripe_shape = [int(x) for x in stripe_config.shape]
-        stripe_stage, stripe_axis = stripe_part(output_part, stripe_shape, sch)
-        copy_te_tensors = []
-        compute_cycles_hints = []
-        readers = defaultdict(list)
-        for part in plan.part_group:
-            if part != output_part:
-                cascade_part(part, stripe_stage, stripe_axis, sch)
-
-            update_readers(part, readers)
-            for i, input_tensor in enumerate(part.input_tensors):
-                tensor_config = plan.tensor_configs[input_tensor]
-                if tensor_config.home_region != tensor_config.copy_region:
-                    copy_te_tensors.append(part.subgraph.input_tensors[i])
-
-                    compute_cycles_hint, _ = get_copy_cycles_hint(tensor_config)
-                    compute_cycles_hints.append(compute_cycles_hint)
-
-        for te_tensor, compute_cycles_hint in zip(copy_te_tensors, compute_cycles_hints):
-            copy_stage = sch.cache_read(te_tensor, "global", readers[te_tensor])
-            sch[copy_stage].pragma(
-                copy_stage.op.axis[0], "compute_cycles_hint", compute_cycles_hint
-            )
-            sch[copy_stage].compute_at(stripe_stage, stripe_axis)
-
-
-def create_home_map(
-    graph: CascaderGraph,
-    io_region: MemoryRegion,
-    constant_region: MemoryRegion,
-    working_regions: List[MemoryRegion],
-) -> Dict[Tensor, List[MemoryRegion]]:
-    """Create a map between Tensors and the MemoryRegions they can be homed in."""
-    home_map = {}
-    for tensor in graph.tensor_order:
-        if tensor.is_constant:
-            home_map[tensor] = [constant_region]
-        elif tensor in graph.input_tensors or tensor in graph.output_tensors:
-            home_map[tensor] = [io_region]
-        else:
-            home_map[tensor] = working_regions
-
-    return home_map
-
-
-def choose_proposal(
-    proposals: List[Proposal], cascade_region: MemoryRegion, select_proposal_idx: int
-):
-    """Choose the best performing Proposal that doesn't overflow the cascade region."""
-    if select_proposal_idx != -1:
-        # Manually select proposal based on index, take modulus the total number of proposals to
-        # ensure that some proposal is always selected.
-        proposal_choice = proposals[select_proposal_idx % len(proposals)]
-    else:
-        proposal_choice = proposals[0]
-        for proposal in reversed(proposals):
-            if proposal.memory_usage < cascade_region.size:
-                proposal_choice = proposal
-                break
-
-    return proposal_choice
-
-
-def extract_memory_info(memory_pool: PoolInfo, memory_pressure: int) -> MemoryRegion:
-    "Create a MemoryRegion based on the info in the memory pool"
-    size = int(memory_pool.size_hint_bytes - memory_pressure)
-    read_bandwidth = int(memory_pool.read_bandwidth_bytes_per_cycle)
-    write_bandwidth = int(memory_pool.write_bandwidth_bytes_per_cycle)
-
-    for param in (size, read_bandwidth, write_bandwidth):
-        assert param != -1, f"{param} needs to be specified for the cascader."
-
-    name_to_burst_length = {
-        target.kind.name: burst for target, burst in memory_pool.target_burst_bytes.items()
-    }
-
-    try:
-        burst_length = int(name_to_burst_length["ethos-u"])
-    except KeyError:
-        burst_length = 1
-
-    return MemoryRegion(
-        name=memory_pool.pool_name,
-        size=size,
-        read_bandwidth=read_bandwidth,
-        write_bandwidth=write_bandwidth,
-        read_latency=int(memory_pool.read_latency_cycles),
-        write_latency=int(memory_pool.write_latency_cycles),
-        burst_length=burst_length,
-    )
-
-
-def cascade(
-    sch: te.Schedule,
-    te_graph: TESubgraph,
-    const_dict: Dict[int, np.ndarray],
-    options: CascaderOptions,
-    io_region: MemoryRegion,
-    constant_region: MemoryRegion,
-    working_regions: List[MemoryRegion],
-    device_config: EthosuDeviceConfig,
-) -> None:
-    """Schedule a Tensor Expression graph using the technique of 'cascading'.
-
-    'Cascading' is a technique whereby operations are split into smaller
-    dependent tiles ('stripes') which can then execute in an interleaved
-    fashion. This allows for operations to execute together rather than
-    sequentially which can reduce intermediate memory requirements and in
-    certain cases improve performance.
-
-    For more detail on 'cascading' as well as how it is implemented, refer to
-    the RFC here: https://github.com/apache/tvm-rfcs/pull/37.
-
-    Parameters
-    ----------
-    sch : te.Schedule
-        The Schedule to apply the cascading to.
-    te_graph : TESubgraph
-        The Tensor Expression graph from which the Schedule was created.
-    const_dict : Dict[int, np.ndarray]
-        A dictionary mapping input index to constant data if that input is
-        to be a constant.
-    options : CascaderOptions
-        Configuration options for the cascading scheduler.
-    io_region : MemoryRegion
-        The MemoryRegion in which input/output tensors should reside.
-    constant_region : MemoryRegion
-        The MemoryRegion in which constants should reside.
-    working_regions : List[MemoryRegion]
-        The MemoryRegions in which intermediate working tensors can reside. The
-        cascading scheduler will select which MemoryRegion to per tensor.
-    device_config : EthosuDeviceConfig
-        Target device configuration.
-
-    """
-    tvmc_options = tvm.transform.PassContext.current().config.get("relay.ext.ethos-u.options", None)
-    log = Logging() if tvmc_options and tvmc_options.dev_cascader_logging else None
-    select_proposal_idx = (
-        int(tvmc_options.dev_select_proposal_idx)
-        if tvmc_options and tvmc_options.dev_select_proposal_idx
-        else -1
-    )
-
-    if log:
-        start = time.time()
-
-    assert options.cascade_region in working_regions
-    # First convert the Tensor Expression graph into a CascaderGraph
-    casc_graph = create_cascader_graph(te_graph, const_dict, device_config)
-    # Then create a mapping between Tensors and their possible memory homes
-    home_map = create_home_map(casc_graph, io_region, constant_region, working_regions)
-    # Generate Proposals for Pareto-optimal ways to cascade the CascaderGraph
-    proposals = generate_proposals(casc_graph, home_map, options)
-    # Select the best Proposal subject to the memory constraints
-    proposal_choice = choose_proposal(proposals, options.cascade_region, select_proposal_idx)
-
-    if log:
-        for idx, proposal in enumerate(proposals):
-            log.add_proposal(idx, proposal.memory_usage, proposal.cycles)
-            if proposal == proposal_choice:
-                log.selected_proposal_idx = idx
-
-        log.cascader_runtime = time.time() - start
-        log.dump_json()
-
-    # Apply the selected Proposal to the Tensor Expression Schedule
-    apply_proposal(proposal_choice, sch)
diff --git a/python/tvm/contrib/ethosu/cascader/stripe_config.py b/python/tvm/contrib/ethosu/cascader/stripe_config.py
deleted file mode 100644
index a575e1c20689..000000000000
--- a/python/tvm/contrib/ethosu/cascader/stripe_config.py
+++ /dev/null
@@ -1,86 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Stripe config class to hold tensor striping information."""
-# pylint: disable=invalid-name
-import tvm._ffi
-
-from tvm.runtime import Object
-
-from . import _ffi_api
-
-
-@tvm._ffi.register_object("contrib.ethosu.cascader.StripeConfig")
-class StripeConfig(Object):
-    """StripeConfig class"""
-
-    def __init__(self, shape, extent, strides, order, stripes, offset):
-        strides = list([float(v) for v in strides])
-        self.__init_handle_by_constructor__(
-            _ffi_api.StripeConfig, shape, extent, strides, order, stripes, offset
-        )
-
-    @property
-    def shape(self):
-        return list(self._shape)
-
-    @property
-    def extent(self):
-        return list(self._extent)
-
-    @property
-    def strides(self):
-        return list([float(v.value) for v in self._strides])
-
-    @property
-    def order(self):
-        return list(self._order)
-
-    @property
-    def stripes(self):
-        return list(self._stripes)
-
-    @property
-    def offset(self):
-        return list(self._offset)
-
-    def __hash__(self):
-        return self._hash
-
-    def __eq__(self, other):
-        return _ffi_api.StripeConfigEqual(self, other)
-
-    def __repr__(self):
-        return (
-            f"StripeConfig(shape={self.shape}, "
-            f"extent={self.extent}, "
-            f"strides={self.strides}, "
-            f"order={self.order}, "
-            f"stripes={self.stripes}, "
-            f"offset={self.offset}"
-        )
-
-
-def count_stripes(stripe_config: StripeConfig, enable_sliding_window: bool = False):
-    stripe_counts = dict(_ffi_api.CountStripes(stripe_config, enable_sliding_window))
-    # Some code to 'de-TVM' the data types and make them pure Python
-    clean_stripe_counts = dict()
-    for stripe, count in stripe_counts.items():
-        clean_stripe = tuple([int(v) for v in stripe])
-        clean_count = int(count)
-        clean_stripe_counts[clean_stripe] = clean_count
-
-    return clean_stripe_counts
diff --git a/python/tvm/contrib/ethosu/cascader/tensor_config.py b/python/tvm/contrib/ethosu/cascader/tensor_config.py
deleted file mode 100644
index 9e48f183ce7b..000000000000
--- a/python/tvm/contrib/ethosu/cascader/tensor_config.py
+++ /dev/null
@@ -1,222 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Tensor config class to hold tensor scheduling information."""
-from typing import List, Union
-from enum import IntEnum
-import tvm._ffi
-from tvm.contrib.ethosu.cascader.stripe_config import StripeConfig
-
-from tvm.runtime import Object
-
-from . import _ffi_api
-from .stripe_config import StripeConfig
-from .graph import Tensor, BufferMode
-
-
-class TensorConfigState(IntEnum):
-    """
-    The 'state' of a TensorConfig as used in the Plan generation algorithm.
-
-    BOUNDARY - Should describe a Plan input/output Tensor.
-    INTERIOR - Should describe an intermediate Tensor in a 'closed' Plan.
-
-    """
-
-    BOUNDARY = 0
-    INTERIOR = 1
-
-
-@tvm._ffi.register_object("contrib.ethosu.cascader.MemoryRegion")
-class MemoryRegion(Object):
-    """
-    MemoryRegion class to store information about device memories.
-
-    Attributes
-    ----------
-    name : str
-        The name of the region.
-    size : int
-        The size of the region.
-    read_bandwidth : int
-        The read bandwidth of the region in bytes per cycle.
-    write_bandwidth : int
-        The write bandwidth of the region in bytes per cycle.
-
-    """
-
-    def __init__(
-        self,
-        name: str,
-        size: int,
-        read_bandwidth: int,
-        write_bandwidth: int,
-        read_latency: int = 0,
-        write_latency: int = 0,
-        burst_length: int = 1,
-    ):
-        self.__init_handle_by_constructor__(
-            _ffi_api.MemoryRegion,
-            name,
-            size,
-            read_bandwidth,
-            write_bandwidth,
-            read_latency,
-            write_latency,
-            burst_length,
-        )
-
-
-@tvm._ffi.register_object("contrib.ethosu.cascader.TensorConfig")
-class TensorConfig(Object):
-    """
-    A class which describes how to realize a Tensor.
-
-    The TensorConfig describes both how a Tensor is scheduled (the order in which it's
-    produced/consumed) and how its allocated in memory (which region it should reside in
-    and whether it should be copied).
-
-    Attributes
-    ----------
-    tensor : Tensor
-        The Tensor the config applies to.
-    home_region : MemoryRegion
-        The region where the tensor is allocated.
-    state : TensorConfigState
-        The state of the TensorConfig.
-
-        The TensorConfigState is only used as part of the Plan generation algorithm. For a Plan
-        to be 'closed' (and therefore not subject to any further merging), all the TensorConfigs
-        that describe Plan input or output Tensors must be in the 'BOUNDARY' state with the rest
-        being 'INTERIOR'. If any of the input or output tensors are described by an 'INTERIOR'
-        TensorConfig, then the Plan is 'open' and should be merged with other 'open' Plans until
-        the result becomes 'closed'.
-    buffer_mode : BufferMode
-        The mode in which the buffer should be realized.
-
-        There are multiple buffering strategies by which a tensor may be realized (computed).
-        These affect the amount of recomputation necessary as well as the size of buffer required
-        to store the tensor. See 'BufferMode' for a description of the allowable buffering modes.
-    stripe_configs : List[StringConfig]
-       The StripeConfigs with which to compute the tensor.
-
-       The StripeConfigs determine the order in which the elements of the tensor should be
-       computed, including potentially computing them multiple times (recompute). Multiple
-       StripeConfigs are used over just a single StripeConfig for the case where the tensor is
-       consumed by two different Parts executing themselves with different StripeConfigs. In this
-       case, there is a StripeConfig per consumer of the tensor.
-    copy_tensor : bool, optional
-        Whether to copy the tensor.
-
-        While a tensor will originally reside in its home region, the TensorConfig may optionally
-        specify that the tensor should be copied (according to the StripeConfigs) into another
-        MemoryRegion. As an example for where this may be used, if a weights tensor initially
-        resides in slow Flash memory then necessarily the home region will be Flash. However, if
-        the weights values are used multiple times by a Part, it may be more performant to choose
-        to copy the weights into a faster memory like SRAM.
-    copy_region : Union[MemoryRegion, None], optional
-        The region to copy the tensor to.
-
-    """
-
-    def __init__(
-        self,
-        tensor: Tensor,
-        home_region: MemoryRegion,
-        state: TensorConfigState,
-        buffer_mode: BufferMode,
-        stripe_configs: List[StripeConfig],
-        copy_tensor: bool = False,
-        copy_region: Union[MemoryRegion, None] = None,
-    ):
-        if copy_region is None:
-            copy_region = home_region
-        self.__init_handle_by_constructor__(
-            _ffi_api.TensorConfig,
-            tensor,
-            home_region,
-            state,
-            buffer_mode,
-            stripe_configs,
-            copy_tensor,
-            copy_region,
-        )
-
-    def get_buffer_size(self):
-        """
-        The size of the buffer needed for the TensorConfig.
-
-        The size of buffer necessary to store a tensor being produced using the TensorConfig is
-        not necessarily just the size of the tensor. In Plans, a tensor may be being produced and
-        consumed in 'stripes' which are smaller than the full tensor. Therefore, the buffer
-        necessary to store the tensor may only need to be as large as the stripe. The precise size
-        of the buffer will depend both on the BufferMode and StripeConfigs (as well as, of course,
-        the Tensor).
-
-        """
-        return _ffi_api.TensorConfigGetBufferSize(self)
-
-    @property
-    def tensor(self):
-        """The Tensor the config applies to."""
-        return self._tensor
-
-    @property
-    def home_region(self):
-        """The region where the tensor is allocated."""
-        return self._home_region
-
-    @property
-    def state(self):
-        """The state of the TensorConfig."""
-        return TensorConfigState(self._state)
-
-    @property
-    def buffer_mode(self):
-        """The mode in which the buffer should be realized."""
-        return BufferMode(self._buffer_mode)
-
-    @property
-    def stripe_configs(self):
-        """The StripeConfigs with which to compute the tensor."""
-        return list(self._stripe_configs)
-
-    @property
-    def copy_tensor(self):
-        """Whether to copy the tensor."""
-        return bool(self._copy_tensor)
-
-    @property
-    def copy_region(self):
-        """The region to copy the tensor to."""
-        return self._copy_region
-
-    def __hash__(self):
-        return self._hash
-
-    def __eq__(self, other):
-        return _ffi_api.TensorConfigEqual(self, other)
-
-    def __repr__(self):
-        return (
-            f"TensorConfig(tensor={self.tensor}, "
-            f"home_region={self.home_region.name}, "
-            f"state={self.state.name}, "
-            f"buffer_mode={self.buffer_mode.name}, "
-            f"stripe_configs={self.stripe_configs}, "
-            f"copy_tensor={self.copy_tensor}, "
-            f"copy_region={self.copy_region.name}"
-        )
diff --git a/python/tvm/contrib/micro/meta_schedule/local_builder_micro.py b/python/tvm/contrib/micro/meta_schedule/local_builder_micro.py
deleted file mode 100644
index 20e0c45836ee..000000000000
--- a/python/tvm/contrib/micro/meta_schedule/local_builder_micro.py
+++ /dev/null
@@ -1,84 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Local builder for microTVM projects that compile on the local host"""
-
-import os
-import tempfile
-from typing import Optional, Dict
-from tvm.ir import IRModule
-from tvm.runtime import NDArray
-from tvm.target import Target
-from tvm.meta_schedule.builder import LocalBuilder
-from tvm.driver.build_module import OperatorModule
-from tvm import micro
-from tvm.contrib.tar import tar
-from tvm.relay.backend import Runtime
-from tvm.driver import build as tvm_build
-from tvm.tir.transform import RemoveWeightLayoutRewriteBlock
-
-
-def get_local_builder_micro():
-    """Return micro-compatible Builder for meta schedule."""
-
-    def _micro_build(
-        mod: IRModule, target: Target, _params: Optional[Dict[str, NDArray]]
-    ) -> OperatorModule:
-        """Build function for micro targets.
-
-        Parameters
-        ----------
-        mod : IRModule
-            The IRModule to be built.
-        target : Target
-            The target to be built.
-        _params : Optional[Dict[str, NDArray]]
-            The parameters to be used for the build. Must be None.
-
-        Returns
-        -------
-        rt_mod : OperatorModule
-            The built Module.
-        """
-
-        # Note: tvm_build assigns "global_symbol" to the name of generated C function
-        # changing it is necessary for micro targets,
-        # since the generated projects already include a main function.
-        prim_func = mod["main"].with_attr("global_symbol", "default_function")
-        mod = IRModule({"main": prim_func})
-        runtime = Runtime("crt", {"system-lib": True})
-        mod = RemoveWeightLayoutRewriteBlock(skip_ndarray_rewrite=True)(mod)
-        rt_mod = tvm_build(mod, target=target, runtime=runtime)
-        return rt_mod
-
-    def _micro_export(mod: OperatorModule) -> str:
-        """Export function for micro targets.
-
-        Parameters
-        ----------
-        mod : OperatorModule
-            The Module to be exported.
-
-        Returns
-        -------
-        artifact_path : str
-            The path to the exported Module.
-        """
-        artifact_path = os.path.join(tempfile.mkdtemp(), "tvm_tmp_mod." + tar.output_format)
-        micro.export_model_library_format(mod, artifact_path)
-        return artifact_path
-
-    return LocalBuilder(f_build=_micro_build, f_export=_micro_export)
diff --git a/python/tvm/contrib/micro/meta_schedule/rpc_runner_micro.py b/python/tvm/contrib/micro/meta_schedule/rpc_runner_micro.py
deleted file mode 100644
index 307855438e71..000000000000
--- a/python/tvm/contrib/micro/meta_schedule/rpc_runner_micro.py
+++ /dev/null
@@ -1,269 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""RPC Runner Micro"""
-
-from contextlib import contextmanager
-from typing import Callable, List, Optional, Union
-from collections import namedtuple
-import signal
-import random
-
-from tvm import micro
-from tvm import nd
-from tvm.contrib.popen_pool import PopenPoolExecutor
-from tvm.rpc.server import Server
-from tvm.rpc.tracker import Tracker
-from tvm.meta_schedule.logging import get_logger
-from tvm.meta_schedule.utils import cpu_count, derived_object
-from tvm.meta_schedule.runner.config import EvaluatorConfig, RPCConfig
-from tvm.meta_schedule.runner import PyRunner, RunnerFuture, RunnerInput
-from tvm.meta_schedule.runner.rpc_runner import RPCRunnerFuture
-from tvm.meta_schedule.runner.utils import T_ARG_INFO_JSON_OBJ_LIST
-
-logger = get_logger(__name__)  # pylint: disable=invalid-name
-
-
-@derived_object
-class RPCRunnerMicro(PyRunner):
-    """RPC based runner for tuning micro models."""
-
-    def __init__(
-        self,
-        platform: str = "crt",
-        project_options: Optional[dict] = None,
-        rpc_configs: Optional[List[RPCConfig]] = None,
-        evaluator_config: Optional[EvaluatorConfig] = None,
-        max_workers: Optional[int] = None,
-        initializer: Optional[Callable[[], None]] = None,
-        session_timeout_sec: int = 300,
-    ) -> None:
-        """Constructor
-
-        Parameters
-        ----------
-        platform: str
-            The platform used for project generation.
-        project_options: dict
-            The options for the generated micro project.
-        rpc_config: RPCConfig
-            The rpc configuration.
-        evaluator_config: EvaluatorConfig
-            The evaluator configuration.
-        max_workers: Optional[int] = None
-            The maximum number of connections. Defaults to number of logical CPU cores.
-        initializer: Optional[Callable[[], None]]
-            The initializer function.
-        session_timeout_sec: int
-            The session timeout, including the pending time. if the number of candidates sent to runner is larger
-            than the runner workers, increase the timeout.
-        """
-        super().__init__()
-        self.platform = platform
-        if project_options is None:
-            project_options = {}
-        self.project_options = project_options
-        self.rpc_configs = rpc_configs
-        self.evaluator_config = EvaluatorConfig._normalized(evaluator_config)
-        self.session_timeout_sec = session_timeout_sec
-
-        if max_workers is None:
-            max_workers = cpu_count(logical=True)
-        logger.info("RPCRunner: max_workers = %d", max_workers)
-        self.pool = PopenPoolExecutor(
-            max_workers=max_workers,
-            timeout=session_timeout_sec,
-            initializer=initializer,
-        )
-
-    def run(self, runner_inputs: List[RunnerInput]) -> List[RunnerFuture]:
-        results: List[RunnerFuture] = []
-
-        for runner_input in runner_inputs:
-            future = RPCRunnerFuture(
-                future=self.pool.submit(
-                    _worker_func,
-                    self.platform,
-                    self.project_options or {},
-                    self.rpc_configs,
-                    self.evaluator_config,
-                    str(runner_input.artifact_path),
-                    str(runner_input.device_type),
-                    tuple(arg_info.as_json() for arg_info in runner_input.args_info),
-                ),
-                timeout_sec=self.session_timeout_sec,
-            )
-            results.append(future)  # type: ignore
-        return results
-
-
-def _worker_func(
-    platform: str,
-    project_options: dict,
-    rpc_configs: List[RPCConfig],
-    evaluator_config: EvaluatorConfig,
-    artifact_path: str,
-    device_type: str,
-    args_info: T_ARG_INFO_JSON_OBJ_LIST,
-) -> List[float]:
-
-    module_loader = micro.AutoTvmModuleLoader(
-        template_project_dir=micro.get_microtvm_template_projects(platform),
-        project_options=project_options,
-    )
-
-    rpc_config = random.choice(rpc_configs)
-    remote_kw = {
-        "device_key": rpc_config.tracker_key,
-        "host": rpc_config.tracker_host,
-        "port": rpc_config.tracker_port,
-        "priority": 0,
-        "timeout": 100,
-    }
-
-    build_result = namedtuple("BuildResult", ["filename"])(artifact_path)
-
-    with module_loader(remote_kw, build_result) as (remote, mod):
-        dev = remote.device(device_type, 0)
-        f_prepare = ""
-        if evaluator_config.enable_cpu_cache_flush:
-            f_prepare = "cache_flush_cpu_non_first_arg"
-        time_f = mod.time_evaluator(
-            mod.entry_name,
-            dev,
-            number=evaluator_config.number,
-            repeat=evaluator_config.repeat,
-            min_repeat_ms=evaluator_config.min_repeat_ms,
-            f_preproc=f_prepare,
-        )
-
-        random_fill = remote.get_function("tvm.contrib.random.random_fill")
-        args = [nd.empty(x[2], x[1], dev) for x in args_info]
-        for arg in args:
-            random_fill(arg)
-        dev.sync()
-
-        costs = time_f(*args).results
-    return costs
-
-
-@contextmanager
-def get_rpc_runner_micro(
-    platform,
-    options,
-    evaluator_config: EvaluatorConfig = None,
-    tracker_host: Optional[str] = None,
-    tracker_port: Union[None, int, str] = None,
-    session_timeout_sec: int = 300,
-    rpc_timeout_sec: int = 10,
-    serial_numbers: List[str] = None,
-):
-    """Parameters
-    ----------
-    platform: str
-        The platform used for project generation.
-    options: dict
-        The options for the generated micro project.
-    evaluator_config: EvaluatorConfig
-        The evaluator configuration.
-    tracker_host: Optional[str]
-        The host url of the rpc server.
-    tracker_port: Union[None, int, str]
-        The TCP port to bind to
-    session_timeout_sec: int
-        The session timeout. if the number of candidates sent to runner is larger
-        than the runner workers, increase the timeout.
-    rpc_timeout_sec:
-        The rpc session timeout.
-    serial_numbers:
-        List of board serial numbers to be used during tuning.
-        For "CRT" and "QEMU" platforms the serial numners are not used,
-        but the length of the list determines the number of runner instances.
-    """
-
-    if evaluator_config is None:
-        evaluator_config = EvaluatorConfig(
-            number=3,
-            repeat=1,
-            min_repeat_ms=100,
-            enable_cpu_cache_flush=False,
-        )
-
-    if tracker_host is None:
-        tracker_host = "127.0.0.1"
-
-    if tracker_port is None:
-        tracker_port = 9000
-    else:
-        tracker_port = int(tracker_port)
-    tracker_port_end = tracker_port + 1000
-
-    if not (serial_numbers):
-        serial_numbers = ["$local$device"]
-
-    tracker = Tracker(
-        port=tracker_port,
-        port_end=tracker_port_end,
-        silent=True,
-        reuse_addr=True,
-        timeout=60,
-    )
-
-    servers = []
-    rpc_configs = []
-    for serial_number in serial_numbers:
-        key = serial_number
-        rpc_config = RPCConfig(
-            tracker_host=tracker_host,
-            tracker_port=tracker_port,
-            tracker_key=key,
-            session_priority=0,
-            session_timeout_sec=rpc_timeout_sec,
-        )
-        rpc_configs.append(rpc_config)
-
-        server = Server(
-            port=tracker_port,
-            port_end=tracker_port_end,
-            key=key,
-            silent=True,
-            tracker_addr=(tracker_host, tracker_port),
-            reuse_addr=True,
-            timeout=60,
-        )
-        servers.append(server)
-
-    def terminate():
-        tracker.terminate()
-        for server in servers:
-            server.terminate()
-
-    def handle_SIGINT(signal, frame):
-        terminate()
-        raise KeyboardInterrupt("Received SIGINT")
-
-    signal.signal(signal.SIGINT, handle_SIGINT)
-
-    try:
-        yield RPCRunnerMicro(
-            platform=platform,
-            project_options=options,
-            rpc_configs=rpc_configs,
-            evaluator_config=evaluator_config,
-            session_timeout_sec=session_timeout_sec,
-        )
-    finally:
-        terminate()
diff --git a/python/tvm/driver/tvmc/__init__.py b/python/tvm/driver/tvmc/__init__.py
index 24bb2bc22146..f7798a851251 100644
--- a/python/tvm/driver/tvmc/__init__.py
+++ b/python/tvm/driver/tvmc/__init__.py
@@ -28,7 +28,6 @@ class TVMCImportError(TVMCException):
     """TVMC TVMCImportError"""
 
 
-from . import micro
 from . import runner
 from . import autotuner
 from . import compiler
diff --git a/python/tvm/driver/tvmc/compiler.py b/python/tvm/driver/tvmc/compiler.py
index 09ba8909e3e7..43c53e8859ad 100644
--- a/python/tvm/driver/tvmc/compiler.py
+++ b/python/tvm/driver/tvmc/compiler.py
@@ -99,10 +99,9 @@ def add_compile_parser(subparsers, _, json_params):
     parser.add_argument(
         "-f",
         "--output-format",
-        choices=["so", "mlf"],
+        choices=["so"],
         default="so",
-        help="output format. Use 'so' for shared object or 'mlf' for Model Library Format "
-        "(only for microTVM targets). Defaults to 'so'.",
+        help="output format. Use 'so' for shared object. Defaults to 'so'.",
     )
     parser.add_argument(
         "--pass-config",
diff --git a/python/tvm/driver/tvmc/composite_target.py b/python/tvm/driver/tvmc/composite_target.py
index e912ab564b55..68f544f06aa2 100644
--- a/python/tvm/driver/tvmc/composite_target.py
+++ b/python/tvm/driver/tvmc/composite_target.py
@@ -23,9 +23,6 @@
 import tvm.contrib.target.vitis_ai  # pylint: disable=unused-import
 
 from tvm.relay.op.contrib.arm_compute_lib import partition_for_arm_compute_lib
-from tvm.relay.op.contrib.ethosn import partition_for_ethosn
-from tvm.relay.op.contrib.cmsisnn import partition_for_cmsisnn
-from tvm.relay.op.contrib.ethosu import partition_for_ethosu
 from tvm.relay.op.contrib.bnns import partition_for_bnns
 from tvm.relay.op.contrib.vitis_ai import partition_for_vitis_ai
 from tvm.relay.op.contrib.clml import partition_for_clml
@@ -55,24 +52,6 @@
         "default_target": None,
         "pass_pipeline": partition_for_arm_compute_lib,
     },
-    "cmsis-nn": {
-        "config_key": "relay.ext.cmsisnn.options",
-        "pass_default": False,
-        "default_target": None,
-        "pass_pipeline": partition_for_cmsisnn,
-    },
-    "ethos-n": {
-        "config_key": "relay.ext.ethos-n.options",
-        "pass_default": False,
-        "default_target": None,
-        "pass_pipeline": partition_for_ethosn,
-    },
-    "ethos-u": {
-        "config_key": "relay.ext.ethos-u.options",
-        "pass_default": False,
-        "default_target": None,
-        "pass_pipeline": partition_for_ethosu,
-    },
     "bnns": {
         "config_key": None,
         "pass_default": False,
diff --git a/python/tvm/driver/tvmc/micro.py b/python/tvm/driver/tvmc/micro.py
deleted file mode 100644
index c65d84c736c0..000000000000
--- a/python/tvm/driver/tvmc/micro.py
+++ /dev/null
@@ -1,315 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=consider-using-from-import
-"""
-Provides support for micro targets (microTVM).
-"""
-import argparse
-import os
-from pathlib import Path
-import shutil
-import sys
-
-from . import TVMCException
-from .main import register_parser
-from .arguments import TVMCSuppressedArgumentParser
-from .project import (
-    get_project_options,
-    get_and_check_options,
-    get_project_dir,
-)
-
-try:
-    import tvm.micro.project as project
-    from tvm.micro import get_microtvm_template_projects
-    from tvm.micro.build import MicroTVMTemplateProjectNotFoundError
-    from tvm.micro.project_api.server import ServerError
-    from tvm.micro.project_api.client import ProjectAPIServerNotFoundError
-
-    SUPPORT_MICRO = True
-except (ImportError, NameError):
-    SUPPORT_MICRO = False
-
-
-@register_parser
-def add_micro_parser(subparsers, main_parser, json_params):
-    """Includes parser for 'micro' context and associated subcommands:
-    create-project (create), build, and flash.
-    """
-
-    if SUPPORT_MICRO is False:
-        # Don't create 'tvmc micro' parser.
-        return
-
-    # Probe available default platform templates.
-    templates = {}
-    for p in ("zephyr", "arduino"):
-        try:
-            templates[p] = get_microtvm_template_projects(p)
-        except MicroTVMTemplateProjectNotFoundError:
-            pass
-
-    micro = subparsers.add_parser("micro", help="select micro context.")
-    micro.set_defaults(func=drive_micro)
-
-    micro_parser = micro.add_subparsers(title="subcommands")
-    # Selecting a subcommand under 'micro' is mandatory
-    micro_parser.required = True
-    micro_parser.dest = "subcommand"
-
-    # 'create_project' subcommand
-    create_project_parser = micro_parser.add_parser(
-        "create-project",
-        aliases=["create"],
-        help="create a project template of a given type or given a template dir.",
-    )
-    create_project_parser.set_defaults(subcommand_handler=create_project_handler)
-    create_project_parser.add_argument(
-        "project_dir",
-        help="project dir where the new project based on the template dir will be created.",
-    )
-    create_project_parser.add_argument("MLF", help="Model Library Format (MLF) .tar archive.")
-    create_project_parser.add_argument(
-        "-f",
-        "--force",
-        action="store_true",
-        help="force project creating even if the specified project directory already exists.",
-    )
-
-    # 'build' subcommand
-    build_parser = micro_parser.add_parser(
-        "build",
-        help="build a project dir, generally creating an image to be flashed, e.g. zephyr.elf.",
-    )
-    build_parser.set_defaults(subcommand_handler=build_handler)
-    build_parser.add_argument("project_dir", help="project dir to build.")
-    build_parser.add_argument("-f", "--force", action="store_true", help="Force rebuild.")
-
-    # 'flash' subcommand
-    flash_parser = micro_parser.add_parser(
-        "flash", help="flash the built image on a given micro target."
-    )
-    flash_parser.set_defaults(subcommand_handler=flash_handler)
-    flash_parser.add_argument("project_dir", help="project dir where the built image is.")
-
-    # For each platform add arguments detected automatically using Project API info query.
-
-    # Create subparsers for the platforms under 'create-project', 'build', and 'flash' subcommands.
-    help_msg = (
-        "you must select a platform from the list. You can pass '-h' for a selected "
-        "platform to list its options."
-    )
-    create_project_platforms_parser = create_project_parser.add_subparsers(
-        title="platforms", help=help_msg, dest="platform"
-    )
-    build_platforms_parser = build_parser.add_subparsers(
-        title="platforms", help=help_msg, dest="platform"
-    )
-    flash_platforms_parser = flash_parser.add_subparsers(
-        title="platforms", help=help_msg, dest="platform"
-    )
-
-    subcmds = {
-        # API method name    Parser associated to method      Handler func to call after parsing
-        "generate_project": [create_project_platforms_parser, create_project_handler],
-        "build": [build_platforms_parser, build_handler],
-        "flash": [flash_platforms_parser, flash_handler],
-    }
-
-    # Helper to add a platform parser to a subcmd parser.
-    def _add_parser(parser, platform):
-        platform_name = platform[0].upper() + platform[1:] + " platform"
-        platform_parser = parser.add_parser(
-            platform, add_help=False, help=f"select {platform_name}."
-        )
-        platform_parser.set_defaults(platform=platform)
-        return platform_parser
-
-    parser_by_subcmd = {}
-    for subcmd, subcmd_parser_handler in subcmds.items():
-        subcmd_parser = subcmd_parser_handler[0]
-        subcmd_parser.required = True  # Selecting a platform or template is mandatory
-        parser_by_platform = {}
-        for platform in templates:
-            new_parser = _add_parser(subcmd_parser, platform)
-            parser_by_platform[platform] = new_parser
-
-        # Besides adding the parsers for each default platform (like Zephyr and Arduino), add a
-        # parser for 'template' to deal with adhoc projects/platforms.
-        new_parser = subcmd_parser.add_parser(
-            "template", add_help=False, help="select an adhoc template."
-        )
-        new_parser.add_argument(
-            "--template-dir", required=True, help="Project API template directory."
-        )
-        new_parser.set_defaults(platform="template")
-        parser_by_platform["template"] = new_parser
-
-        parser_by_subcmd[subcmd] = parser_by_platform
-
-    disposable_parser = TVMCSuppressedArgumentParser(main_parser)
-    try:
-        known_args, _ = disposable_parser.parse_known_args()
-    except TVMCException:
-        return
-
-    try:
-        subcmd = known_args.subcommand
-        platform = known_args.platform
-    except AttributeError:
-        # No subcommand or platform, hence no need to augment the parser for micro targets.
-        return
-
-    # Augment parser with project options.
-
-    if platform == "template":
-        # adhoc template
-        template_dir = str(Path(known_args.template_dir).resolve())
-    else:
-        # default template
-        template_dir = templates[platform]
-
-    try:
-        template = project.TemplateProject.from_directory(template_dir)
-    except ProjectAPIServerNotFoundError:
-        sys.exit(f"Error: Project API server not found in {template_dir}!")
-
-    template_info = template.info()
-
-    options_by_method = get_project_options(template_info)
-
-    # TODO(gromero): refactor to remove this map.
-    subcmd_to_method = {
-        "create-project": "generate_project",
-        "create": "generate_project",
-        "build": "build",
-        "flash": "flash",
-    }
-
-    method = subcmd_to_method[subcmd]
-    parser_by_subcmd_n_platform = parser_by_subcmd[method][platform]
-    _, handler = subcmds[method]
-
-    parser_by_subcmd_n_platform.formatter_class = (
-        # Set raw help text so help_text format works
-        argparse.RawTextHelpFormatter
-    )
-    parser_by_subcmd_n_platform.set_defaults(
-        subcommand_handler=handler,
-        valid_options=options_by_method[method],
-        template_dir=template_dir,
-    )
-
-    required = any([opt["required"] for opt in options_by_method[method]])
-    nargs = "+" if required else "*"
-
-    help_text_by_option = [opt["help_text"] for opt in options_by_method[method]]
-    help_text = "\n\n".join(help_text_by_option) + "\n\n"
-
-    parser_by_subcmd_n_platform.add_argument(
-        "--project-option", required=required, metavar="OPTION=VALUE", nargs=nargs, help=help_text
-    )
-
-    parser_by_subcmd_n_platform.add_argument(
-        "-h",
-        "--help",
-        "--list-options",
-        action="help",
-        help="show this help message which includes platform-specific options and exit.",
-    )
-
-    for one_entry in json_params:
-        micro.set_defaults(**one_entry)
-
-
-def drive_micro(args):
-    # Call proper handler based on subcommand parsed.
-    args.subcommand_handler(args)
-
-
-def create_project_handler(args):
-    """Creates a new project dir."""
-    project_dir = get_project_dir(args.project_dir)
-
-    if os.path.exists(project_dir):
-        if args.force:
-            shutil.rmtree(project_dir)
-        else:
-            raise TVMCException(
-                "The specified project dir already exists. "
-                "To force overwriting it use '-f' or '--force'."
-            )
-
-    template_dir = str(Path(args.template_dir).resolve())
-    if not os.path.exists(template_dir):
-        raise TVMCException(f"Template directory {template_dir} does not exist!")
-
-    mlf_path = str(Path(args.MLF).resolve())
-    if not os.path.exists(mlf_path):
-        raise TVMCException(f"MLF file {mlf_path} does not exist!")
-
-    options = get_and_check_options(args.project_option, args.valid_options)
-
-    try:
-        project.generate_project_from_mlf(template_dir, project_dir, mlf_path, options)
-    except ServerError as error:
-        print("The following error occurred on the Project API server side: \n", error)
-        sys.exit(1)
-
-
-def build_handler(args):
-    """Builds a firmware image given a project dir."""
-    project_dir = get_project_dir(args.project_dir)
-
-    if not os.path.exists(project_dir):
-        raise TVMCException(f"{project_dir} doesn't exist.")
-
-    if os.path.exists(project_dir + "/build"):
-        if args.force:
-            shutil.rmtree(project_dir + "/build")
-        else:
-            raise TVMCException(
-                f"There is already a build in {project_dir}. "
-                "To force rebuild it use '-f' or '--force'."
-            )
-
-    options = get_and_check_options(args.project_option, args.valid_options)
-
-    try:
-        prj = project.GeneratedProject.from_directory(project_dir, options=options)
-        prj.build()
-    except ServerError as error:
-        print("The following error occurred on the Project API server side: ", error)
-        sys.exit(1)
-
-
-def flash_handler(args):
-    """Flashes a firmware image to a target device given a project dir."""
-
-    project_dir = get_project_dir(args.project_dir)
-
-    if not os.path.exists(project_dir + "/build"):
-        raise TVMCException(f"Could not find a build in {project_dir}")
-
-    options = get_and_check_options(args.project_option, args.valid_options)
-
-    try:
-        prj = project.GeneratedProject.from_directory(project_dir, options=options)
-        prj.flash()
-    except ServerError as error:
-        print("The following error occurred on the Project API server side: ", error)
-        sys.exit(1)
diff --git a/python/tvm/driver/tvmc/model.py b/python/tvm/driver/tvmc/model.py
index f39aefdc92b0..73cc8da71cbf 100644
--- a/python/tvm/driver/tvmc/model.py
+++ b/python/tvm/driver/tvmc/model.py
@@ -46,9 +46,7 @@
 """
 import os
 import tarfile
-import json
 from typing import Optional, Union, Dict, Callable, TextIO
-from pathlib import Path
 import numpy as np
 
 import tvm
@@ -61,12 +59,6 @@
 from tvm.runtime.vm import Executable
 
 
-try:
-    from tvm.micro import export_model_library_format
-except ImportError:
-    export_model_library_format = None
-
-
 class TVMCModel(object):
     """Initialize a TVMC model from a relay model definition or a saved file.
 
@@ -308,18 +300,15 @@ def export_package(
             Command line options to be passed to the cross compiler.
         output_format : str
             How to save the modules function library. Must be one of "so" and "tar" to save
-            using the classic format or "mlf" to save using the Model Library Format.
+            using the classic forma.
 
         Returns
         -------
         package_path : str
             The path that the package was saved to.
         """
-        if output_format not in ["so", "tar", "mlf"]:
-            raise TVMCException("Only 'so', 'tar', and 'mlf' output formats are supported.")
-
-        if output_format == "mlf" and cross:
-            raise TVMCException("Specifying the MLF output and a cross compiler is not supported.")
+        if output_format not in ["so", "tar"]:
+            raise TVMCException("Only 'so' and 'tar' output formats are supported.")
 
         if isinstance(executor_factory, Executable):
             package_path = self.export_vm_format(executor_factory, package_path, output_format)
@@ -327,11 +316,6 @@ def export_package(
             package_path = self.export_classic_format(
                 executor_factory, package_path, cross, cross_options, output_format
             )
-        elif output_format == "mlf":
-            if export_model_library_format:
-                package_path = export_model_library_format(executor_factory, package_path)
-            else:
-                raise Exception("micro tvm is not enabled. Set USE_MICRO to ON in config.cmake")
 
         return package_path
 
@@ -354,26 +338,15 @@ class TVMCPackage(object):
     package_path : str
         The path to the saved TVMCPackage that will be loaded.
 
-    project_dir : Path, str
-        If given and loading a MLF file, the path to the project directory that contains the file.
-
     use_vm : bool
         Whether the graph module was compiled with vm or not.
     """
 
-    def __init__(
-        self,
-        package_path: str,
-        project_dir: Optional[Union[Path, str]] = None,
-    ):
+    def __init__(self, package_path: str):
         self._tmp_dir = utils.tempdir()
         self.package_path = package_path
         self.import_package(self.package_path)
 
-        if project_dir and self.type != "mlf":
-            raise TVMCException("Setting 'project_dir' is only allowed when importing a MLF.!")
-        self.project_dir = project_dir
-
     def import_package(self, package_path: str):
         """Load a TVMCPackage from a previously exported TVMCModel.
 
@@ -386,71 +359,37 @@ def import_package(self, package_path: str):
         t = tarfile.open(package_path)
         t.extractall(temp.relpath("."))
 
-        if os.path.exists(temp.relpath("metadata.json")):
-            # Model Library Format (MLF)
-            self.lib_name = None
-            self.lib_path = None
-            with open(temp.relpath("metadata.json")) as metadata_json:
-                metadata = json.load(metadata_json)
-
-            all_module_names = []
-            for name in metadata["modules"].keys():
-                all_module_names.append(name)
-            assert len(all_module_names) == 1, "Multiple modules in MLF is not supported."
-
-            module_name = all_module_names[0]
-            module_metdata = metadata["modules"][module_name]
-            has_graph_executor = "graph" in module_metdata["executors"]
-            graph = (
-                temp.relpath(f"executor-config/graph/{module_name}.graph")
-                if has_graph_executor
-                else None
-            )
-            params = temp.relpath(f"parameters/{module_name}.params")
-
-            self.type = "mlf"
-
-            # Set executor type
-            if len(metadata["modules"][module_name]["executors"]) > 1:
-                executor_types_msg = ",".join(metadata["modules"][module_name]["executors"])
-                raise TVMCException(
-                    f"Found multiple executors with these types: {executor_types_msg}. "
-                    "Currently, only one executor type (aot or graph) is supported."
-                )
-            self.executor_type = metadata["modules"][module_name]["executors"][0]
-
+        # Classic format
+        classic_lib_name_so = "mod.so"
+        classic_lib_name_tar = "mod.tar"
+
+        # VM format
+        vm_lib_name_so = "lib.so"
+        vm_lib_name_tar = "lib.tar"
+
+        if os.path.exists(temp.relpath(classic_lib_name_so)):
+            self.lib_name = classic_lib_name_so
+            self.type = "classic"
+        elif os.path.exists(temp.relpath(classic_lib_name_tar)):
+            self.lib_name = classic_lib_name_tar
+            self.type = "classic"
+        elif os.path.exists(temp.relpath(vm_lib_name_so)):
+            self.lib_name = vm_lib_name_so
+            self.type = "vm"
+        elif os.path.exists(temp.relpath(vm_lib_name_tar)):
+            self.lib_name = vm_lib_name_tar
+            self.type = "vm"
         else:
-            # Classic format
-            classic_lib_name_so = "mod.so"
-            classic_lib_name_tar = "mod.tar"
-
-            # VM format
-            vm_lib_name_so = "lib.so"
-            vm_lib_name_tar = "lib.tar"
-
-            if os.path.exists(temp.relpath(classic_lib_name_so)):
-                self.lib_name = classic_lib_name_so
-                self.type = "classic"
-            elif os.path.exists(temp.relpath(classic_lib_name_tar)):
-                self.lib_name = classic_lib_name_tar
-                self.type = "classic"
-            elif os.path.exists(temp.relpath(vm_lib_name_so)):
-                self.lib_name = vm_lib_name_so
-                self.type = "vm"
-            elif os.path.exists(temp.relpath(vm_lib_name_tar)):
-                self.lib_name = vm_lib_name_tar
-                self.type = "vm"
-            else:
-                raise TVMCException("Couldn't find exported library in the package.")
+            raise TVMCException("Couldn't find exported library in the package.")
 
-            self.lib_path = temp.relpath(self.lib_name)
+        self.lib_path = temp.relpath(self.lib_name)
 
-            graph, params = None, None
-            self.executor_type = "vm"
-            if self.type == "classic":
-                graph = temp.relpath("mod.json")
-                params = temp.relpath("mod.params")
-                self.executor_type = "graph"
+        graph, params = None, None
+        self.executor_type = "vm"
+        if self.type == "classic":
+            graph = temp.relpath("mod.json")
+            params = temp.relpath("mod.params")
+            self.executor_type = "graph"
 
         if params is not None:
             with open(params, "rb") as param_file:
diff --git a/python/tvm/driver/tvmc/runner.py b/python/tvm/driver/tvmc/runner.py
index a86105c7894c..1394936b0a57 100644
--- a/python/tvm/driver/tvmc/runner.py
+++ b/python/tvm/driver/tvmc/runner.py
@@ -23,8 +23,6 @@
 import pathlib
 from typing import Dict, Optional, Union
 from tarfile import ReadError
-import argparse
-import sys
 import json
 
 import numpy as np
@@ -38,37 +36,20 @@
 from tvm.runtime import profiler_vm
 from tvm.relay.param_dict import load_param_dict
 from . import TVMCException
-from .arguments import TVMCSuppressedArgumentParser
-from .project import (
-    get_project_options,
-    get_and_check_options,
-    get_project_dir,
-)
 
 from .main import register_parser
 from .model import TVMCPackage, TVMCResult
 from .result_utils import get_top_results
 from .tracker import tracker_host_port_from_cli
 
-try:
-    import tvm.micro.project as project
-    from tvm.micro.project import TemplateProjectError
-    from tvm.micro.project_api.client import ProjectAPIServerNotFoundError
-
-    SUPPORT_MICRO = True
-except (ImportError, AttributeError) as exception:
-    SUPPORT_MICRO = False
-
 # pylint: disable=invalid-name
 logger = logging.getLogger("TVMC")
 
 
 @register_parser
-def add_run_parser(subparsers, main_parser, json_params):
+def add_run_parser(subparsers, main_parser, json_params):  # pylint: disable=unused-argument
     """Include parser for 'run' subcommand"""
 
-    # Use conflict_handler='resolve' to allow '--list-options' option to be properly overriden when
-    # augmenting the parser with the micro device options (i.e. when '--device micro').
     parser = subparsers.add_parser("run", help="run a compiled module", conflict_handler="resolve")
     parser.set_defaults(func=drive_run)
 
@@ -76,7 +57,7 @@ def add_run_parser(subparsers, main_parser, json_params):
     #      like 'webgpu', etc (@leandron)
     parser.add_argument(
         "--device",
-        choices=["cpu", "cuda", "cl", "metal", "vulkan", "rocm", "micro"],
+        choices=["cpu", "cuda", "cl", "metal", "vulkan", "rocm"],
         default="cpu",
         help="target device to run the compiled module. Defaults to 'cpu'",
     )
@@ -94,7 +75,7 @@ def add_run_parser(subparsers, main_parser, json_params):
         "--print-time",
         action="store_true",
         help="record and print the execution time(s). Enabling print-time will result "
-        " in (1 + repeat * number) executions of the model. (non-micro devices only)",
+        " in (1 + repeat * number) executions of the model.",
     )
     parser.add_argument(
         "--print-top",
@@ -108,7 +89,7 @@ def add_run_parser(subparsers, main_parser, json_params):
         help="generate profiling data from the runtime execution. "
         "Using --profile requires the Graph Executor Debug enabled on TVM. "
         "Profiling may also have an impact on inference time, "
-        "making it take longer to be generated. (non-micro devices only)",
+        "making it take longer to be generated.",
     )
     parser.add_argument("-v", "--verbose", action="count", default=0, help="increase verbosity.")
     parser.add_argument(
@@ -136,80 +117,18 @@ def add_run_parser(subparsers, main_parser, json_params):
     )
     parser.add_argument(
         "--rpc-key",
-        help="the RPC tracker key of the target device. (non-micro devices only)",
+        help="the RPC tracker key of the target device.",
     )
     parser.add_argument(
         "--rpc-tracker",
         help="hostname (required) and port (optional, defaults to 9090) of the RPC tracker, "
-        "e.g. '192.168.0.100:9999'. (non-micro devices only)",
+        "e.g. '192.168.0.100:9999'.",
     )
     parser.add_argument(
         "PATH",
         help="path to the compiled module file or to the project directory if '--device micro' "
         "is selected.",
     )
-    parser.add_argument(
-        "--list-options",
-        action="store_true",
-        help="show all run options and option choices when '--device micro' is selected. "
-        "(micro devices only)",
-    )
-
-    disposable_parser = TVMCSuppressedArgumentParser(main_parser)
-    try:
-        known_args, _ = disposable_parser.parse_known_args()
-    except TVMCException:
-        return
-
-    if vars(known_args).get("device") != "micro":
-        # No need to augment the parser for micro targets.
-        return
-
-    if SUPPORT_MICRO is False:
-        sys.exit(
-            "'--device micro' is not supported. "
-            "Please build TVM with micro support (USE_MICRO ON)!"
-        )
-
-    project_dir = get_project_dir(known_args.PATH)
-
-    try:
-        project_ = project.GeneratedProject.from_directory(project_dir, None)
-    except ProjectAPIServerNotFoundError:
-        sys.exit(f"Error: Project API server not found in {project_dir}!")
-    except TemplateProjectError:
-        sys.exit(
-            "Error: Project directory error. That usually happens when model.tar is not found."
-        )
-
-    project_info = project_.info()
-    options_by_method = get_project_options(project_info)
-    mlf_path = project_info["model_library_format_path"]
-
-    parser.formatter_class = (
-        argparse.RawTextHelpFormatter
-    )  # Set raw help text so customized help_text format works
-
-    parser.set_defaults(valid_options=options_by_method["open_transport"], mlf_path=mlf_path)
-
-    required = any([opt["required"] for opt in options_by_method["open_transport"]])
-    nargs = "+" if required else "*"
-
-    help_text_by_option = [opt["help_text"] for opt in options_by_method["open_transport"]]
-    help_text = "\n\n".join(help_text_by_option) + "\n\n"
-
-    parser.add_argument(
-        "--project-option", required=required, metavar="OPTION=VALUE", nargs=nargs, help=help_text
-    )
-
-    parser.add_argument(
-        "--list-options",
-        action="help",
-        help="show this help message with platform-specific options and exit.",
-    )
-
-    for one_entry in json_params:
-        parser.set_defaults(**one_entry)
 
 
 def drive_run(args):
@@ -222,49 +141,9 @@ def drive_run(args):
     """
 
     path = pathlib.Path(args.PATH)
-    options = None
-    project_dir = None
-    if args.device == "micro":
-        # If it's a micro device, then grab the model.tar path from Project API instead.
-        # args.PATH will be used too since it points to the project directory. N.B.: there is no
-        # way to determine the model.tar path from the project dir or vice-verse (each platform
-        # is free to put model.tar whereever it's convenient).
-        project_dir = path
-        path = pathlib.Path(args.mlf_path)
-
-        # Check for options unavailable for micro targets.
-
-        if args.rpc_key or args.rpc_tracker:
-            raise TVMCException(
-                "--rpc-key and/or --rpc-tracker can't be specified for micro targets."
-            )
-
-        if args.device != "micro":
-            raise TVMCException(
-                f"Device '{args.device}' not supported. "
-                "Only device 'micro' is supported to run a model in MLF, "
-                "i.e. when '--device micro'."
-            )
-
-        if args.profile:
-            raise TVMCException("--profile is not currently supported for micro devices.")
-
-        if args.print_time:
-            raise TVMCException("--print-time is not currently supported for micro devices.")
-
-        # Get and check options for micro targets.
-        options = get_and_check_options(args.project_option, args.valid_options)
-
-    else:
-        # Check for options only availabe for micro targets.
-
-        if args.list_options:
-            raise TVMCException(
-                "--list-options is only availabe on micro targets, i.e. when '--device micro'."
-            )
 
     try:
-        tvmc_package = TVMCPackage(package_path=path, project_dir=project_dir)
+        tvmc_package = TVMCPackage(package_path=path)
     except IsADirectoryError:
         raise TVMCException(f"File {path} must be an archive, not a directory.")
     except FileNotFoundError:
@@ -292,7 +171,6 @@ def drive_run(args):
         number=args.number,
         profile=args.profile,
         end_to_end=args.end_to_end,
-        options=options,
     )
 
     if args.print_time:
@@ -482,7 +360,6 @@ def run_module(
     number: int = 10,
     profile: bool = False,
     end_to_end: bool = False,
-    options: dict = None,
 ):
     """Run a compiled graph executor module locally or remotely with
     optional input values.
@@ -539,24 +416,6 @@ def run_module(
         )
 
     with ExitStack() as stack:
-        # Currently only two package formats are supported: "classic" and
-        # "mlf". The later can only be used for micro targets, i.e. with microTVM.
-        if device == "micro":
-            if tvmc_package.type != "mlf":
-                raise TVMCException(f"Model {tvmc_package.package_path} is not a MLF archive.")
-
-            project_dir = get_project_dir(tvmc_package.project_dir)
-
-            # This is guaranteed to work since project_dir was already checked when
-            # building the dynamic parser to accommodate the project options, so no
-            # checks are in place when calling GeneratedProject.
-            project_ = project.GeneratedProject.from_directory(project_dir, options)
-        else:
-            if tvmc_package.type == "mlf":
-                raise TVMCException(
-                    "You're trying to run a model saved using the Model Library Format (MLF). "
-                    "MLF can only be used to run micro device ('--device micro')."
-                )
 
         if hostname:
             if isinstance(port, str):
@@ -581,12 +440,8 @@ def run_module(
             logger.debug("Running a local session.")
             session = rpc.LocalSession()
 
-        # Micro targets don't support uploading a model. The model to be run
-        # must be already flashed into the micro target before one tries
-        # to run it. Hence skip model upload for micro targets.
-        if device != "micro":
-            session.upload(tvmc_package.lib_path)
-            lib = session.load_module(tvmc_package.lib_name)
+        session.upload(tvmc_package.lib_path)
+        lib = session.load_module(tvmc_package.lib_name)
 
         # TODO expand to other supported devices, as listed in tvm.rpc.client (@leandron)
         logger.debug("Device is %s.", device)
@@ -600,9 +455,6 @@ def run_module(
             dev = session.vulkan()
         elif device == "rocm":
             dev = session.rocm()
-        elif device == "micro":
-            dev = session.device
-            lib = session.get_system_lib()
         else:
             assert device == "cpu"
             dev = session.cpu()
@@ -652,15 +504,8 @@ def run_module(
                 logger.debug("Creating runtime with profiling enabled.")
                 module = debug_executor.create(tvmc_package.graph, lib, dev, dump_root="./prof")
             else:
-                if device == "micro":
-                    logger.debug("Creating runtime (micro) with profiling disabled.")
-                    if tvmc_package.executor_type == "aot":
-                        module = tvm.micro.create_local_aot_executor(session)
-                    else:
-                        module = tvm.micro.create_local_graph_executor(tvmc_package.graph, lib, dev)
-                else:
-                    logger.debug("Creating runtime with profiling disabled.")
-                    module = executor.create(tvmc_package.graph, lib, dev)
+                logger.debug("Creating runtime with profiling disabled.")
+                module = executor.create(tvmc_package.graph, lib, dev)
 
             if tvmc_package.executor_type == "graph":
                 logger.debug("Loading params into the runtime module.")
diff --git a/python/tvm/exec/microtvm_debug_shell.py b/python/tvm/exec/microtvm_debug_shell.py
deleted file mode 100644
index ba0f8bca8217..000000000000
--- a/python/tvm/exec/microtvm_debug_shell.py
+++ /dev/null
@@ -1,158 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=redefined-outer-name, invalid-name
-"""Start an RPC server intended for use as a microTVM debugger.
-
-microTVM aims to be runtime-agnostic, and to that end, frameworks often define command-line tools
-used to launch a debug flow. These tools often manage the process of connecting to an attached
-device using a hardware debugger, exposing a GDB server, and launching GDB connected to that
-server with a source file attached. It's also true that this debugger can typically not be executed
-concurrently with any flash tool, so this integration point is provided to allow TVM to launch and
-terminate any debuggers integrated with the larger microTVM compilation/autotuning flow.
-
-To use this tool, first launch this script in a separate terminal window. Then, provide the hostport
-to your compiler's Flasher instance.
-"""
-
-import argparse
-import logging
-import socket
-import struct
-import sys
-
-import tvm.micro.debugger as _  # NOTE: imported to expose global PackedFuncs over RPC.
-
-from .._ffi.base import py_str
-from ..rpc import base
-from ..rpc import _ffi_api
-
-
-_LOG = logging.getLogger(__name__)
-
-
-def parse_args():
-    """Parse command line arguments to this script."""
-    parser = argparse.ArgumentParser(description="microTVM debug-tool runner")
-    parser.add_argument("--host", default="0.0.0.0", help="hostname to listen on")
-    parser.add_argument("--port", type=int, default=9090, help="hostname to listen on")
-    parser.add_argument(
-        "--impl",
-        help=(
-            "If given, name of a module underneath tvm.micro.contrib "
-            "which contains the Debugger implementation to use. For example, to enable a "
-            "debugger named BarDebugger in python/tvm/micro/contrib/foo.py, specify either "
-            "'tvm.micro.contrib.foo' or 'foo' here. To enable a debugger named BazDebugger in "
-            "a third-party module ext_package.debugger, specify 'ext_package.debugger' here. "
-            "NOTE: the module cannot be in a sub-package of tvm.micro.contrib."
-        ),
-    )
-
-    return parser.parse_args()
-
-
-class ConnectionClosedError(Exception):
-    """Raised when the connection is closed."""
-
-
-def handle_conn(conn, rpc_key):
-    """Handle a single connection that has just been accept'd()."""
-
-    def send(data):
-        conn.sendall(data)
-        return len(data)
-
-    magic = struct.unpack("<i", base.recvall(conn, 4))[0]
-    if magic != base.RPC_MAGIC:
-        conn.close()
-        return
-
-    keylen = struct.unpack("<i", base.recvall(conn, 4))[0]
-    key = py_str(base.recvall(conn, keylen))
-    arr = key.split()
-    expect_header = "client:"
-    server_key = "server:" + rpc_key
-    if arr[0] != expect_header:
-        conn.sendall(struct.pack("<i", base.RPC_CODE_MISMATCH))
-        _LOG.warning("mismatch key from %s", addr)
-        return
-
-    conn.sendall(struct.pack("<i", base.RPC_CODE_SUCCESS))
-    conn.sendall(struct.pack("<i", len(server_key)))
-    conn.sendall(server_key.encode("utf-8"))
-    server = _ffi_api.CreateEventDrivenServer(send, "microtvm-rpc-debugger", key)
-
-    def _readall(n):
-        buf = bytearray()
-        while len(buf) < n:
-            x = conn.recv(n - len(buf))
-            if not x:
-                raise ConnectionClosedError()
-
-            buf = buf + x
-
-        return buf
-
-    while True:
-        packet_length_bytes = _readall(8)
-        packet_length = struct.unpack("<q", packet_length_bytes)[0]
-        if not packet_length:
-            break
-
-        status = server(packet_length_bytes, 3)
-        if status == 0:
-            break
-
-        packet_body = _readall(packet_length)
-        status = server(packet_body, 3)
-
-
-def main():
-    """Main entry point for microTVM debug shell."""
-    args = parse_args()
-    logging.basicConfig(level=logging.INFO)
-    if args.impl:
-        package = None
-        if "." not in args.impl:
-            package = f"tvm.micro.contrib.{args.impl}"
-        importlib.import_module(args.impl, package)
-
-    sock = socket.socket(base.get_addr_family([args.host, args.port]), socket.SOCK_STREAM)
-    # Never set socket SO_REUSEADDR on Windows. The SO_REUSEADDR flag allow reusing the
-    # inactivate TIME_WATI state sockets on POSIX, but on Windows it will allow two or more
-    # activate sockets to bind on the same address and port if they all set SO_REUSEADDR,
-    # and result in indeterminate behavior.
-    if reuse_addr and sys.platform != "win32":
-        sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
-    sock.bind((args.host, args.port))
-    sock.listen(1)
-    bind_addr, bind_port = sock.getsockname()
-    _LOG.info("listening for connections on %s:%d", bind_addr, bind_port)
-
-    while True:
-        conn, peer = sock.accept()
-        _LOG.info("accepted connection from %s", peer)
-        try:
-            handle_conn(conn, "")
-        except ConnectionClosedError:
-            pass
-        finally:
-            conn.close()
-            _LOG.info("closed connection from %s", peer)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/python/tvm/micro/__init__.py b/python/tvm/micro/__init__.py
deleted file mode 100644
index a2dd66e07730..000000000000
--- a/python/tvm/micro/__init__.py
+++ /dev/null
@@ -1,36 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""MicroTVM module for bare-metal backends"""
-from .build import autotvm_build_func
-from .build import AutoTvmModuleLoader
-from .build import get_standalone_crt_dir
-from .build import get_microtvm_template_projects
-from .build import copy_crt_config_header
-
-from .model_library_format import (
-    export_model_library_format,
-    UnsupportedInModelLibraryFormatError,
-)
-from .project import generate_project, GeneratedProject, TemplateProject
-from .session import (
-    create_local_graph_executor,
-    create_local_debug_executor,
-    create_local_aot_executor,
-    Session,
-    SessionTerminatedError,
-)
-from .transport import TransportLogger
diff --git a/python/tvm/micro/base.py b/python/tvm/micro/base.py
deleted file mode 100644
index 86d3fc9fa195..000000000000
--- a/python/tvm/micro/base.py
+++ /dev/null
@@ -1,22 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Base definitions for MicroTVM"""
-
-import tvm
-import tvm._ffi
-
-tvm._ffi._init_api("tvm.micro", "tvm.micro.base")
diff --git a/python/tvm/micro/build.py b/python/tvm/micro/build.py
deleted file mode 100644
index df7d1fc7196d..000000000000
--- a/python/tvm/micro/build.py
+++ /dev/null
@@ -1,200 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""Defines top-level glue functions for building microTVM artifacts."""
-
-import json
-import logging
-import os
-import contextlib
-import enum
-from pathlib import Path
-import shutil
-
-from typing import Union
-from .._ffi import libinfo
-from .. import rpc as _rpc
-
-
-_LOG = logging.getLogger(__name__)
-
-
-STANDALONE_CRT_DIR = None
-
-
-class MicroTVMTemplateProject(enum.Enum):
-    ZEPHYR = "zephyr"
-    ARDUINO = "arduino"
-    CRT = "crt"
-
-    @classmethod
-    def list(cls):
-        return list(map(lambda c: c.value, cls))
-
-
-class CrtNotFoundError(Exception):
-    """Raised when the standalone CRT dirtree cannot be found."""
-
-
-class MicroTVMTemplateProjectNotFoundError(Exception):
-    """Raised when the microTVM template project dirtree cannot be found."""
-
-
-def get_standalone_crt_dir() -> str:
-    """Find the standalone_crt directory.
-
-    Though the C runtime source lives in the tvm tree, it is intended to be distributed with any
-    binary build of TVM. This source tree is intended to be integrated into user projects to run
-    models targeted with --runtime=c.
-
-    Returns
-    -------
-    str :
-        The path to the standalone_crt
-    """
-    global STANDALONE_CRT_DIR
-    if STANDALONE_CRT_DIR is None:
-        for path in libinfo.find_lib_path():
-            crt_path = os.path.join(os.path.dirname(path), "standalone_crt")
-            if os.path.isdir(crt_path):
-                STANDALONE_CRT_DIR = crt_path
-                break
-
-        else:
-            raise CrtNotFoundError()
-
-    return STANDALONE_CRT_DIR
-
-
-def get_microtvm_template_projects(platform: str) -> str:
-    """Find microTVM template project directory for specific platform.
-
-    Parameters
-    ----------
-    platform : str
-        Platform type which should be defined in MicroTVMTemplateProject.
-
-    Returns
-    -------
-    str :
-        Path to template project directory for platform.
-    """
-    if platform not in MicroTVMTemplateProject.list():
-        raise ValueError(f"platform {platform} is not supported.")
-
-    microtvm_template_projects = None
-    for path in libinfo.find_lib_path():
-        template_path = os.path.join(os.path.dirname(path), "microtvm_template_projects")
-        if os.path.isdir(template_path):
-            microtvm_template_projects = template_path
-            break
-    else:
-        raise MicroTVMTemplateProjectNotFoundError()
-
-    return os.path.join(microtvm_template_projects, platform)
-
-
-def copy_crt_config_header(platform: str, output_path: Path):
-    """Copy crt_config header file for a platform to destinatin.
-
-    Parameters
-    ----------
-    platform : str
-        Platform type which should be defined in MicroTVMTemplateProject.
-
-    output_path: Path
-        Output path for crt_config header file.
-    """
-    crt_config_path = Path(get_microtvm_template_projects(platform)) / "crt_config" / "crt_config.h"
-    shutil.copy(crt_config_path, output_path)
-
-
-class AutoTvmModuleLoader:
-    """MicroTVM AutoTVM Module Loader
-
-    Parameters
-    ----------
-    template_project_dir : Union[os.PathLike, str]
-        project template path
-
-    project_options : dict
-        project generation option
-
-    project_dir: str
-        if use_existing is False: The path to save the generated microTVM Project.
-        if use_existing is True: The path to a generated microTVM Project for debugging.
-
-    use_existing: bool
-        skips the project generation and opens transport to the project at the project_dir address.
-    """
-
-    def __init__(
-        self,
-        template_project_dir: Union[os.PathLike, str],
-        project_options: dict = None,
-        project_dir: Union[os.PathLike, str] = None,
-        use_existing: bool = False,
-    ):
-        self._project_options = project_options
-        self._use_existing = use_existing
-
-        if isinstance(template_project_dir, (os.PathLike, str)):
-            self._template_project_dir = str(template_project_dir)
-        elif not isinstance(template_project_dir, str):
-            raise TypeError(f"Incorrect type {type(template_project_dir)}.")
-
-        if isinstance(project_dir, (os.PathLike, str)):
-            self._project_dir = str(project_dir)
-        else:
-            self._project_dir = None
-
-    @contextlib.contextmanager
-    def __call__(self, remote_kw, build_result):
-        with open(build_result.filename, "rb") as build_file:
-            build_result_bin = build_file.read()
-
-        # In case we are tuning on multiple physical boards (with Meta-schedule), the tracker
-        # device_key is the serial_number of the board that wil be used in generating micro session.
-        # For CRT projects, and in cases that the serial number is not provided
-        # (including tuning with AutoTVM), the serial number field doesn't change.
-        if "board" in self._project_options and "$local$device" not in remote_kw["device_key"]:
-            self._project_options["serial_number"] = remote_kw["device_key"]
-
-        tracker = _rpc.connect_tracker(remote_kw["host"], remote_kw["port"])
-        remote = tracker.request(
-            remote_kw["device_key"],
-            priority=remote_kw["priority"],
-            session_timeout=remote_kw["timeout"],
-            session_constructor_args=[
-                "tvm.micro.compile_and_create_micro_session",
-                build_result_bin,
-                self._template_project_dir,
-                json.dumps(self._project_options),
-                self._project_dir,
-                self._use_existing,
-            ],
-        )
-        system_lib = remote.get_function("runtime.SystemLib")()
-        yield remote, system_lib
-
-
-def autotvm_build_func():
-    """A dummy build function which causes autotvm to use a different export format."""
-
-
-# A sentinel value for the output format.
-autotvm_build_func.output_format = ".model-library-format"
diff --git a/python/tvm/micro/class_factory.py b/python/tvm/micro/class_factory.py
deleted file mode 100644
index 9fac63c63916..000000000000
--- a/python/tvm/micro/class_factory.py
+++ /dev/null
@@ -1,104 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""Defines a utility for representing deferred class instatiations as JSON."""
-
-import importlib
-import json
-import typing
-
-
-JsonSerializable = typing.Union[int, float, str, None, bool]
-
-
-class SerializedFactoryError(Exception):
-    """Raised when ClassFactory.from_json is invoked with an invalid JSON blob."""
-
-
-class ClassFactory:
-    """Describes a JSON-serializable class instantiation, for use with the RPC server."""
-
-    # When not None, the superclass from which all cls must derive.
-    SUPERCLASS = None
-
-    def __init__(
-        self,
-        cls: typing.Callable,
-        init_args: typing.List[JsonSerializable],
-        init_kw: typing.Dict[str, JsonSerializable],
-    ):
-        self.cls = cls
-        self.init_args = init_args
-        self.init_kw = init_kw
-
-    def override_kw(self, **kw_overrides):
-        kwargs = self.init_kw
-        if kw_overrides:
-            kwargs = dict(kwargs)
-            for k, v in kw_overrides.items():
-                kwargs[k] = v
-
-        return self.__class__(self.cls, self.init_args, kwargs)
-
-    def instantiate(self):
-        return self.cls(*self.init_args, **self.init_kw)
-
-    @property
-    def to_json(self):
-        return json.dumps(
-            {
-                "cls": ".".join([self.cls.__module__, self.cls.__name__]),
-                "init_args": self.init_args,
-                "init_kw": self.init_kw,
-            }
-        )
-
-    EXPECTED_KEYS = ("cls", "init_args", "init_kw")
-
-    @classmethod
-    def from_json(cls, data):
-        """Reconstruct a ClassFactory instance from its JSON representation.
-
-        Parameters
-        ----------
-        data : str
-            The JSON representation of the ClassFactory.
-
-        Returns
-        -------
-        ClassFactory :
-            The reconstructed ClassFactory instance.
-
-        Raises
-        ------
-        SerializedFactoryError :
-            If the JSON object represented by `data` is malformed.
-        """
-        obj = json.loads(data)
-        if not isinstance(obj, dict):
-            raise SerializedFactoryError(f"deserialized json payload: want dict, got: {obj!r}")
-
-        for key in cls.EXPECTED_KEYS:
-            if key not in obj:
-                raise SerializedFactoryError(
-                    f"deserialized json payload: expect key {key}, got: {obj!r}"
-                )
-
-        cls_package_name, cls_name = obj["cls"].rsplit(".", 1)
-        cls_package = importlib.import_module(cls_package_name)
-        cls_obj = getattr(cls_package, cls_name)
-        return cls(cls_obj, obj["init_args"], obj["init_kw"])
diff --git a/python/tvm/micro/contrib/stm32/__init__.py b/python/tvm/micro/contrib/stm32/__init__.py
deleted file mode 100644
index 8558f5335955..000000000000
--- a/python/tvm/micro/contrib/stm32/__init__.py
+++ /dev/null
@@ -1,20 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""Module container of STM32 code generator."""
-
-from .emitter import CodeEmitter, get_input_tensor_name, get_output_tensor_name
diff --git a/python/tvm/micro/contrib/stm32/emitter.py b/python/tvm/micro/contrib/stm32/emitter.py
deleted file mode 100644
index af0eb53ad325..000000000000
--- a/python/tvm/micro/contrib/stm32/emitter.py
+++ /dev/null
@@ -1,1376 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# pylint: disable=line-too-long
-
-"""Code emission for the STM32 targets."""
-
-import contextlib
-import json
-import os
-import re
-import shutil
-import tarfile
-import textwrap
-
-from datetime import datetime
-
-import numpy as np
-
-import tvm
-from tvm.contrib import utils
-
-AI_API_VERSION_MAJOR = 1
-AI_API_VERSION_MINOR = 0
-AI_API_VERSION_MICRO = 0
-
-AI_TOOLS_REVISION = "v1"
-
-DBAR = "=" * 60
-
-
-def _fix_name(node_name):
-    """Replace ':' with '_' in names like 'InputImg:0'"""
-    return node_name.replace(":", "_")
-
-
-def get_input_tensor_name(node_name):
-    return _fix_name(node_name)
-
-
-def get_output_tensor_name(node_name, idx):
-    return _fix_name(node_name) + "_" + str(idx)
-
-
-def _get_node_args_name(node_name):
-    return _fix_name(node_name) + "_args"
-
-
-def _get_node_arg_types_name(node_name):
-    return _fix_name(node_name) + "_arg_type_ids"
-
-
-def _get_type_size(dltype):
-    if dltype in ("uint64", "int64"):
-        return 8
-    if dltype in ("uint32", "int32", "float32"):
-        return 4
-    if dltype in ("uint16", "int16"):
-        return 2
-    if dltype in ("uint8", "int8"):
-        return 1
-    raise ValueError(f"Data type {dltype} is not supported")
-
-
-C_TYPE_TO_DLTYPE = {
-    "uint64": "kDLUInt, 64, 1",
-    "int64": "kDLInt, 64, 1",
-    "float32": "kDLFloat, 32, 1",
-    "uint32": "kDLUInt, 32, 1",
-    "int32": "kDLInt, 32, 1",
-    "uint16": "kDLUInt, 16, 1",
-    "int16": "kDLInt, 16, 1",
-    "uint8": "kDLUInt, 8, 1",
-    "int8": "kDLInt, 8, 1",
-}
-
-
-def _get_type_data(dltype):
-    try:
-        return C_TYPE_TO_DLTYPE[dltype]
-    except KeyError:
-        raise ValueError(f"Data type {dltype} is not supported")
-
-
-def _get_aligned_offset(offset, dltype):
-    align = _get_type_size(dltype)
-    if offset % align != 0:
-        offset = offset + (align - offset % align)
-    return offset
-
-
-def _get_num_tensor_elts(shape):
-    size = 1
-    for dim in shape:
-        size = size * dim
-    return size
-
-
-def _get_tensor_size_bytes(dims, dltype):
-    size = _get_num_tensor_elts(dims)
-    return size * _get_type_size(dltype)
-
-
-def _preprocess_code(src):
-    """Hack the C code implementing the model."""
-    dst = "#include <stdio.h>\n" "#include <math.h>\n\n"
-    dst = dst + src
-    return dst
-
-
-class CodeEmitter(object):
-    """Code emitter class."""
-
-    DATA_ALIGNMENT_BYTES = 8
-
-    def __init__(self, include_activations=True, include_inputs=True, include_outputs=True):
-        """Initialize the Emitter instance.
-
-        Parameters
-        ----------
-        include_activations:
-            The Emitter allocates the storage for the activations data
-            and places it in a specific data section. If Falsr, the
-            main application is responsible for allocating the activations
-            storage. Default: True.
-
-        include_inputs/include_outputs:
-            The Emitter allocates the storage for the input/output data.
-            This storage is shared with the activations and placed in the
-            specific activations data section. If False, the main
-            application is responsible for allocating the input/output
-            data storage. Default: True.
-
-        Returns
-        -------
-            CodeEmitter object.
-
-        """
-
-        # Static model: activations placed into a nn_data_act section
-        # Dynamic model: activations need to be malloc'ed by the
-        #   applications.
-        self.activations_static = include_activations
-
-        # Inputs/outputs may be allocated within the activations or
-        # separately.
-        # TODO: Separate the inputs from activations inside TVM.
-        if include_inputs:
-            assert (
-                self.activations_static == True
-            ), "###Error: Static inputs are not allowed without activations."
-        self.inputs_static = include_inputs
-
-        if include_outputs:
-            assert (
-                self.activations_static == True
-            ), "###Error: Static outputs are not allowed without activations."
-        self.outputs_static = include_outputs
-
-        # Parsed graph
-        self._nodes = []
-        self._arg_nodes = []
-        self._outputs = []
-        self._attrs = {}
-        self._node_row_ptr = []
-
-        # Parameters
-        self._params = {}
-
-        # Filled by data_placement()
-        self._weights = {}
-        self._activations = {}
-        self._input_data = {}
-        self._output_data = {}
-        self._nodes_size = 0
-        self._weights_size = 0
-        self._activations_size = 0
-
-        self._quantization = {}
-
-    def _extract_quantization_info(self, quantization):
-        """Build dictionary with quantization infos."""
-
-        for dl_tensor_name in self._input_data:
-            if dl_tensor_name in quantization:
-                self._quantization[dl_tensor_name] = quantization[dl_tensor_name]
-
-        # Matching outputs is more difficult because TVM does not preserve
-        # output tensor names.
-        # We only support models with a single output now.
-        assert len(self._output_data) == 1, "Multiple outputs models are not yet supported."
-
-        for dl_tensor_name in self._output_data:
-            for name in quantization:
-                if name not in self._input_data:
-                    self._quantization["output"] = quantization[name]
-                    break
-
-    def _get_node_arg_name(self, arg):
-        arg_nid = arg[0]
-        arg_idx = arg[1]
-        arg_node = self._nodes[arg_nid]
-        arg_name = self._nodes[arg_nid]["name"]
-        if arg_node["op"] == "null":
-            # parameter
-            dl_tensor_name = get_input_tensor_name(arg_name)
-        elif arg_node["name"] == "reshape_nop":
-            # Handle __nop
-            src = arg_node["inputs"][0]
-            dl_tensor_name = self._get_node_arg_name(src)
-        else:
-            # activation
-            dl_tensor_name = get_output_tensor_name(arg_name, arg_idx)
-        return dl_tensor_name
-
-    def _tensor_is_output(self, nid, idx):
-        for out in self._outputs:
-            out_nid = out[0]
-            out_idx = out[1]
-            if out_nid == nid and out_idx == idx:
-                return True
-        return False
-
-    def _get_tensor_from_node(self, nid, idx):
-        # 'eid' is index into the dltype', 'shape', etc.
-        eid = self._node_row_ptr[nid] + idx
-        dltype = self._attrs["dltype"][1][eid]
-        dims = self._attrs["shape"][1][eid]
-        storage_id = self._attrs["storage_id"][1][eid]
-        ndim = len(dims)
-        size = _get_tensor_size_bytes(dims, dltype)
-
-        tensor = {
-            "dltype": dltype,
-            "ndim": ndim,
-            "dims": dims,
-            "strides": None,
-            "storage_id": storage_id,
-            "byte_offset": 0,
-            "offset": 0,
-            "size": size,
-        }
-
-        return tensor
-
-    def _compute_data_placement(self):
-        """Compute inputs, outputs, weight, activation sizes"""
-
-        self._inputs = self._arg_nodes.copy()
-
-        # weights:
-        offset = 0
-
-        for key in self._params:
-
-            # First, find the node in graph
-            nid = 0
-            for node in self._nodes:
-                if node["name"] == key:
-                    break
-                nid += 1
-
-            dl_tensor_name = get_input_tensor_name(key)
-            tensor = self._get_tensor_from_node(nid, 0)
-
-            # Compute the offset
-            dltype = tensor["dltype"]
-            aligned_offset = _get_aligned_offset(offset, dltype)
-            tensor["offset"] = aligned_offset
-
-            for idx in self._arg_nodes:
-                node = self._nodes[idx]
-                node_name = node["name"]
-                if node_name == key:
-                    self._inputs.remove(idx)
-
-            self._weights[dl_tensor_name] = tensor
-
-            # Next offset
-            offset = aligned_offset + tensor["size"]
-
-        self._weights_size = offset
-
-        # activations:
-        buffer_list_ = {}
-
-        nid = 0
-        for node in self._nodes:
-
-            if node["op"] == "null":
-                nid += 1
-                continue
-
-            if node["op"] != "tvm_op":
-                raise ValueError(f"Only TVM ops are supported")
-
-            node_name = node["name"]
-            node_attrs = node["attrs"]
-            func_name = node_attrs["func_name"]
-            num_outputs = int(node_attrs["num_outputs"])
-
-            if func_name == "__nop":
-                assert node_name == "reshape_nop", f"Unsupported __nop operator {node_name}."
-                assert num_outputs == 1
-                assert not self._tensor_is_output(nid, 0)
-                nid += 1
-                continue
-
-            for idx in range(num_outputs):
-
-                # Do not count the '_outputs'
-                if self._tensor_is_output(nid, idx):
-                    continue
-
-                dl_tensor_name = get_output_tensor_name(node_name, idx)
-                tensor = self._get_tensor_from_node(nid, idx)
-
-                # Remember this tensor with the storage id
-                storage_id = tensor["storage_id"]
-                if storage_id not in buffer_list_:
-                    buffer_list_[storage_id] = []
-                buffer_entry = buffer_list_[storage_id]
-                buffer_entry.append(tensor)
-
-                self._activations[dl_tensor_name] = tensor
-
-            self._nodes_size = self._nodes_size + 1
-
-            nid += 1
-
-        # Compute '_input_data'
-        offset = 0
-        for nid in self._inputs:
-            node = self._nodes[nid]
-            node_name = node["name"]
-
-            # Arthur: I suppose that input nodes only have a single
-            #         output dependency
-            dl_tensor_name = get_input_tensor_name(node_name)
-
-            # This tensor is at some index inside '_input_data' dictionary
-            # depending on the '_inputs' list order. We refer to this position
-            # when generating the XXX.h file.
-            tensor = self._get_tensor_from_node(nid, 0)
-
-            if self.inputs_static:
-
-                # Remember this tensor with the storage id
-                storage_id = tensor["storage_id"]
-                if storage_id not in buffer_list_:
-                    buffer_list_[storage_id] = []
-                buffer_entry = buffer_list_[storage_id]
-                buffer_entry.append(tensor)
-            else:
-
-                # Compute the offset
-                dltype = tensor["dltype"]
-                aligned_offset = _get_aligned_offset(offset, dltype)
-                tensor["offset"] = aligned_offset
-
-            self._input_data[dl_tensor_name] = tensor
-
-            # Next offset
-            offset = aligned_offset + tensor["size"]
-
-        # Compute '_output_data'
-        offset = 0
-        for output in self._outputs:
-            nid = output[0]
-            idx = output[1]
-
-            node = self._nodes[nid]
-            node_name = node["name"]
-
-            dl_tensor_name = get_output_tensor_name(node_name, idx)
-
-            tensor = self._get_tensor_from_node(nid, idx)
-
-            if self.outputs_static:
-
-                # Remember this tensor with the storage id
-                storage_id = tensor["storage_id"]
-                if storage_id not in buffer_list_:
-                    buffer_list_[storage_id] = []
-                buffer_entry = buffer_list_[storage_id]
-                buffer_entry.append(tensor)
-            else:
-
-                # Compute the offset
-                dltype = tensor["dltype"]
-                aligned_offset = _get_aligned_offset(offset, dltype)
-                tensor["offset"] = aligned_offset
-
-            self._output_data[dl_tensor_name] = tensor
-
-            # Next offset
-            offset = aligned_offset + tensor["size"]
-
-        # Go over all storage IDs and compute offsets and _activations_size
-        offset = 0
-        for storage_id in buffer_list_:
-            buffer_entry = buffer_list_[storage_id]
-
-            new_offset = offset
-            for tensor in buffer_entry:
-                assert tensor["storage_id"] == storage_id
-                dltype = tensor["dltype"]
-                aligned_offset = _get_aligned_offset(offset, dltype)
-                tensor["offset"] = aligned_offset
-                size = tensor["size"]
-                if (aligned_offset + size) > new_offset:
-                    new_offset = aligned_offset + size
-            offset = new_offset
-
-        self._activations_size = offset
-
-    def _parse_model(self, quantization=None):
-        """Parse the module. Build internal data structures.
-
-        Parameters
-        ----------
-        module : TVM module or ModuleLibraryFormat object
-           The module to parse
-
-        quantization: Dictionary
-           The quantization information for model inputs/outputs.
-        """
-
-        for key in self._graph:
-            if key == "nodes":
-                self._nodes = self._graph["nodes"]
-            elif key == "arg_nodes":
-                self._arg_nodes = self._graph["arg_nodes"]
-            elif key == "node_row_ptr":
-                self._node_row_ptr = self._graph["node_row_ptr"]
-            elif key == "heads":
-                self._outputs = self._graph["heads"]
-            elif key == "attrs":
-                self._attrs = self._graph["attrs"]
-            elif key == "metadata":
-                continue
-            else:
-                print("### Error: JSON key {} not supported".format(key))
-                assert False
-
-        # Build all tensor lists
-        self._compute_data_placement()
-
-        # Extract quantization info for inputs/outputs
-        if quantization is not None:
-            self._extract_quantization_info(quantization)
-
-    def parse_library_format(self, model_library_format_path, quantization=None):
-        """Parse the module. Build internal data structures.
-
-        Parameters
-        ----------
-        model_library_format_path :
-           The ModuleLibraryFormat object to parse
-
-        quantization: Dictionary
-           The quantization information for model inputs/outputs.
-        """
-
-        temp_dir = utils.tempdir()
-        extract_path = temp_dir.relpath("extract")
-        os.mkdir(extract_path)
-        with tarfile.TarFile(model_library_format_path) as f:
-            f.extractall(extract_path)
-
-        with open(os.path.join(extract_path, "metadata.json")) as metadata_f:
-            metadata = json.load(metadata_f)
-
-        all_module_names = []
-        for name in metadata["modules"].keys():
-            all_module_names.append(name)
-        assert len(metadata["modules"]) == 1, "Multiple modules is not supported."
-
-        # Extract informations from the Model Library Format
-        graph_file = os.path.join(
-            extract_path, "executor-config", "graph", f"{all_module_names[0]}.graph"
-        )
-        with open(graph_file, "r") as f:
-            # returns JSON object as a dictionary
-            graph_dict = json.load(f)
-
-        params_dict = {}
-        param_file = os.path.join(extract_path, "parameters", "default.params")
-        with open(param_file, "rb") as f:
-            params = tvm.runtime.load_param_dict(f.read())
-
-            # Map -> Python Dict
-            tmp_dict = {}
-            for (k, v) in params.items():
-                tmp_dict[k] = v
-
-            # Sort params for debugging
-            for k in sorted(tmp_dict.keys()):
-                params_dict[k] = tmp_dict[k]
-
-        src_dir = os.path.join(extract_path, "codegen", "host", "src")
-        # List of strings from Model Library Format C files
-        src_files = []
-        for filename in os.listdir(src_dir):
-            with open(os.path.join(src_dir, filename), "r") as fin:
-                src = fin.read()
-                src_files.append(src)
-
-        self._graph = graph_dict
-        self._params = params_dict
-        self._lib = src_files
-
-        self._parse_model(quantization)
-
-    def parse_module(self, module, quantization=None):
-        """Parse the module. Build internal data structures.
-
-        Parameters
-        ----------
-        module : TVM Runtime Module
-           The module to parse.
-
-        quantization: Dictionary
-           The quantization information for model inputs/outputs.
-        """
-
-        graph = module.get_json()
-        if not isinstance(graph, (str,)):
-            try:
-                graph = graph._tvm_graph_json()
-            except AttributeError:
-                raise ValueError("Type %s is not supported" % type(graph))
-
-        # Sort params for debugging
-        params_dict = {}
-        tmp_params = module.get_params()
-        for k in sorted(tmp_params.keys()):
-            params_dict[k] = tmp_params[k]
-
-        self._graph = json.loads(graph)
-        self._params = params_dict
-        self._lib = module.get_lib()
-
-        self._parse_model(quantization)
-
-    def _emit_params_data(self, name, out_h, out_c):
-        """Emits the network_data[c,h] files with parameters."""
-
-        name_upper = name.upper()
-
-        # XXX_data.h
-
-        out_h.write(
-            textwrap.dedent(
-                f"""\
-        #ifndef __{name_upper}_DATA_H_
-        #define __{name_upper}_DATA_H_
-
-        #include \"ai_runtime_api.h\"
-
-        AI_API_ENTRY
-        const ai_ptr ai_{name}_data_weights_get (void);
-
-        #endif /* __{name_upper}_DATA_H_ */
-        """
-            )
-        )
-
-        # XXX_data.cc
-
-        out_c.write(
-            textwrap.dedent(
-                f"""
-        #include \"{name}_data.h\"
-
-        const ai_ptr ai_{name}_data_weights_get (void)
-        {{
-          AI_ALIGNED({self.DATA_ALIGNMENT_BYTES}) static const __attribute__ ((section(\".nn_weights\"))) uint8_t s_{name}_weights[] = {{
-        """
-            )
-        )
-
-        # Weights are arranged in the order of 'params_'
-        offset = 0
-
-        for key in self._params:
-            data = self._params[key]  # ND Array
-            npdata = data.asnumpy()
-            blob = npdata.tobytes()
-
-            out_c.write(f'// "{key}": \n')
-            out_c.write(f"\t")
-
-            count = 0
-
-            # Align by emitting garbage between un-aligned data
-            dl_tensor_name = get_input_tensor_name(key)
-            tensor = self._weights[dl_tensor_name]
-            tensor_offset = tensor["offset"]
-            tensor_size = tensor["size"]
-
-            while offset < tensor_offset:
-                count += 1
-                out_c.write("0x{:02X}, ".format(0))
-                if count == 12:
-                    out_c.write("\n\t")
-                    count = 0
-                offset += 1
-
-            for val in blob:
-                count += 1
-                out_c.write("0x{:02X}, ".format(val))
-                if count == 12:
-                    out_c.write("\n\t")
-                    count = 0
-
-            offset += tensor_size
-
-            out_c.write(f"\n")
-
-        out_c.write(
-            textwrap.dedent(
-                f"""\
-          }};
-          return (const ai_ptr)s_{name}_weights;
-        }}
-        """
-            )
-        )
-
-    def _emit_open(self, name, out_h, out_c):
-        """Emits the network.h file with a few network defines and
-        writes the header part of the network.c file."""
-
-        name_upper = name.upper()
-
-        input_size = len(self._input_data)
-        output_size = len(self._output_data)
-
-        # XXX.h
-
-        out_h.write(
-            textwrap.dedent(
-                f"""\
-        #ifndef __AI_{name_upper}_H__
-        #define __AI_{name_upper}_H__
-
-        #include \"ai_runtime_api.h\"
-
-        #define _{name_upper}_INPUTS_COUNT_ ({input_size})
-        #define _{name_upper}_OUTPUTS_COUNT_ ({output_size})
-        #define _{name_upper}_ACTIVATION_BYTES_ ({self._activations_size})
-        """
-            )
-        )
-
-        # XXX.c
-
-        out_c.write(
-            textwrap.dedent(
-                f"""\
-        #include <stdio.h>
-
-        #include \"dlpack/dlpack.h\"
-        #include \"tvm/runtime/c_runtime_api.h\"
-        #include \"{name}.h\"
-        #include \"{name}_data.h\"
-        """
-            )
-        )
-
-    def _emit_close(self, name, out_h, out_c):
-        """Emits the ai_model_info structure."""
-
-        name_upper = name.upper()
-
-        # datetime object containing current date and time
-        now = datetime.now()
-        # dd/mm/YY H:M:S
-        dt_string = now.strftime("%d/%m/%Y %H:%M:%S")
-
-        # XXX.h
-
-        out_h.write(f"#endif /*__AI_{name_upper}_H__*/ \n")
-
-        # XXX.c
-
-        if self.activations_static:
-            out_c.write(
-                f'AI_ALIGNED({self.DATA_ALIGNMENT_BYTES}) __attribute__ ((section(".{name}.nn_data_act"))) uint8_t {name}_activations[{self._activations_size}];\n'
-            )
-        else:
-            out_c.write(f"AI_STATIC ai_ptr {name}_activations = NULL;")
-
-        # Emit network structure
-        num_inputs = len(self._input_data)
-        num_outputs = len(self._output_data)
-
-        tool_version = tvm.__version__
-        api_version = f"{AI_API_VERSION_MAJOR}.{AI_API_VERSION_MINOR}.{AI_API_VERSION_MICRO}.0"
-
-        out_c.write(
-            textwrap.dedent(
-                f"""
-        AI_API_ENTRY  __attribute__ ((section(".nn_models"))) ai_model_info {name}_network = {{
-          .name = \"{name}\",
-          .datetime = \"{dt_string}\",
-          .revision = \"{AI_TOOLS_REVISION}\",
-          .tool_version = \"{tool_version}\",
-          .api_version = \"{api_version}\",
-          .n_nodes = {self._nodes_size},
-          .n_inputs = {num_inputs},
-          .n_outputs = {num_outputs},
-          .activations_size = {self._activations_size},
-          .params_size = {self._weights_size},
-          .activations = {name}_activations,
-          .inputs = _InputsList,
-          .outputs = _OutputsList,
-          .ai_get_params = &ai_{name}_data_weights_get,
-          .ai_create = &ai_{name}_create,
-          .ai_destroy = &ai_{name}_destroy,
-          .ai_run = &ai_{name}_run
-        }};
-        """
-            )
-        )
-
-    def _emit_tensor_shape(self, dl_tensor_name, ndim, shape, strides, out_c):
-        out_c.write(f"AI_STATIC int64_t {dl_tensor_name}_shape[{ndim}] = {{{shape[1:-1]}}}; \n")
-        assert strides is None, f"###Error: non-compact tensors are not handled yet."
-        out_c.write(f"AI_STATIC int64_t {dl_tensor_name}_strides[{ndim}] = {{}}; \n")
-
-    def _emit_tensor_quant(self, dl_tensor_name, out_c):
-
-        if dl_tensor_name in self._quantization:
-            quantization = self._quantization[dl_tensor_name]
-
-        # At this time, TVM only supports quantization info with
-        # single output models.
-        elif dl_tensor_name in self._output_data and "output" in self._quantization.keys():
-            quantization = self._quantization["output"]
-        else:
-            quantization = None
-
-        if quantization is not None:
-            scale = quantization["scale"]
-            zero_point = quantization["zero_point"]
-
-            # Sometimes we get a scalar with ScaleAsNumpy.
-            # This seem to mean not quantized ?
-            if not isinstance(scale, np.ndarray):
-                assert scale == 0.0, f"Non-quantized tensor with scale != 0.0"
-                assert (
-                    not isinstance(zero_point, np.ndarray) and zero_point == 0
-                ), f"Non-quantized tensor with zero_point != 0"
-                return None
-
-            scale_size = len(scale)
-            zero_point_size = len(zero_point)
-
-            assert len(scale) == len(
-                zero_point
-            ), f"Inconsistent quantizations scale:{scale} vs zero-point:{zero_point}"
-
-            if len(scale) == 1:
-                quant_name = dl_tensor_name + "_quant"
-
-                out_c.write(f"AI_STATIC float {quant_name}_scale[{scale_size}] = {{ ")
-                for val in scale:
-                    out_c.write(f"{val}, ")
-                out_c.write(f"}};\n")
-                out_c.write(f"AI_STATIC int32_t {quant_name}_zero_point[{zero_point_size}] = {{ ")
-                for val in zero_point:
-                    out_c.write(f"{val}, ")
-                out_c.write(f"}};")
-                out_c.write(
-                    textwrap.dedent(
-                        f"""
-                AI_STATIC ai_quantization_info {quant_name} = {{
-                  .scale = {quant_name}_scale,
-                  .zero_point = {quant_name}_zero_point,
-                  .dim = -1
-                }};
-                """
-                    )
-                )
-
-                return quant_name
-
-        return None
-
-    def _emit_tensor_init(self, dl_tensor_name, tensor, out_c):
-        """Emits the tensor instantiation code."""
-
-        dltype = tensor["dltype"]
-        dims = tensor["dims"]
-        strides = tensor["strides"]
-        byte_offset = tensor["byte_offset"]
-        dtype = _get_type_data(dltype)
-        ndim = len(dims)
-        shape = str(dims)
-        self._emit_tensor_shape(dl_tensor_name, ndim, shape, strides, out_c)
-
-        # Quantization
-        quant_name = self._emit_tensor_quant(dl_tensor_name, out_c)
-
-        # Contents
-        #
-        # TODO: use the 'storage_id':
-        #   "    .ctx = {{ {} }}, \n".format(str(storage_id)[1:-1])
-        out_c.write(
-            textwrap.dedent(
-                f"""
-        AI_ALIGNED({self.DATA_ALIGNMENT_BYTES}) AI_STATIC ai_tensor {dl_tensor_name} = {{
-          .dltensor = {{
-            .data = (ai_ptr)(NULL),
-            .device = {{kDLCPU,0}},
-            .ndim = {ndim},
-            .dtype = {{{dtype}}},
-            .shape = {dl_tensor_name}_shape,
-            .strides = {dl_tensor_name}_strides,
-            .byte_offset = {byte_offset}
-          }},
-        """
-            )
-        )
-
-        # Figure out quantization, if exists
-        if quant_name is not None:
-            out_c.write(f"  .quant = &{quant_name} \n")
-        else:
-            out_c.write(f"  .quant = NULL \n")
-        out_c.write(f"}}; \n")
-
-    def _emit_activation_buffers(self, name, out_c):
-        # pylint: disable=unused-argument
-        """Emits activation tensors, including inputs/outputs."""
-
-        out_c.write(
-            textwrap.dedent(
-                f"""\
-        //
-        // Inputs:
-        //
-        """
-            )
-        )
-
-        # shape/buffer
-        for dl_tensor_name in self._input_data:
-            tensor = self._input_data[dl_tensor_name]
-            self._emit_tensor_init(dl_tensor_name, tensor, out_c)
-            out_c.write(f"\n")
-        out_c.write(f"\n")
-
-        # tensor
-        idx = 0
-        out_c.write(f"AI_STATIC ai_tensor * _InputsList[] = {{ \n")
-        for dl_tensor_name in self._input_data:
-            out_c.write(f"  &{dl_tensor_name}, // [{idx}]\n")
-            idx = idx + 1
-        out_c.write(f"}}; \n")
-        out_c.write(f"\n")
-
-        out_c.write(
-            textwrap.dedent(
-                f"""\
-        //
-        // Activations:
-        //
-        """
-            )
-        )
-        for dl_tensor_name in self._activations:
-            tensor = self._activations[dl_tensor_name]
-            self._emit_tensor_init(dl_tensor_name, tensor, out_c)
-        out_c.write(f"\n")
-
-        # Outputs:
-        out_c.write(
-            textwrap.dedent(
-                f"""\
-        //
-        // Outputs:
-        //
-        """
-            )
-        )
-        for dl_tensor_name in self._output_data:
-            tensor = self._output_data[dl_tensor_name]
-            self._emit_tensor_init(dl_tensor_name, tensor, out_c)
-            out_c.write(f"\n")
-        out_c.write(f"\n")
-
-        idx = 0
-        out_c.write(f"AI_STATIC ai_tensor * _OutputsList[] = {{ \n")
-        for dl_tensor_name in self._output_data:
-            out_c.write(f"  &{dl_tensor_name}, // [{idx}]\n")
-            idx = idx + 1
-        out_c.write(f"}}; \n")
-        out_c.write(f"\n")
-
-    def _emit_params_buffers(self, name, out_c):
-        """Emits all parameter tensors."""
-
-        out_c.write(
-            textwrap.dedent(
-                f"""
-        //
-        // Weights: {name}
-        //
-        """
-            )
-        )
-        for dl_tensor_name in self._weights:
-            tensor = self._weights[dl_tensor_name]
-            self._emit_tensor_init(dl_tensor_name, tensor, out_c)
-        out_c.write(f"\n")
-
-    def _emit_network(self, name, out_c):
-        """Emits prototypes for the network operator functions."""
-
-        out_c.write(
-            textwrap.dedent(
-                f"""
-        //
-        // Network: {name}
-        //
-        """
-            )
-        )
-        for node in self._nodes:
-            if node["op"] == "null":
-                continue
-            assert node["op"] == "tvm_op", f"###Error: Only TVM ops are supported."
-            node_attrs = node["attrs"]
-            func_name = node_attrs["func_name"]
-
-            if func_name == "__nop":
-                continue
-
-            out_c.write(
-                f"TVM_DLL int32_t {func_name}(void * args, void * arg_type_ids, int32_t num_args); \n"
-            )
-        out_c.write(f"\n")
-
-    def _emit_tensor_activation(self, dl_tensor_name, tensor, out_c):
-
-        storage_id = tensor["storage_id"]
-        offset = tensor["offset"]
-        out_c.write(
-            textwrap.indent(
-                textwrap.dedent(
-                    f"""
-        //
-        // {dl_tensor_name}: storage_id:{storage_id}
-        //
-        {dl_tensor_name}.dltensor.data = (ai_ptr)(activations + {offset});
-        """
-                ),
-                "  ",
-            )
-        )
-
-    def _emit_activation_init(self, name, out_c):
-        """Emits buffer initialization code for activation tensors."""
-
-        out_c.write(
-            textwrap.dedent(
-                f"""
-        // {DBAR}
-        //   {name}_configure_activations
-        // {DBAR}
-        AI_STATIC AI_INLINE
-        ai_status {name}_configure_activations (
-          const ai_ptr activations
-        )
-        {{
-          if (activations == NULL) {{
-            TVMAPISetLastError (\"Non-null activations arena is required for this model.\");
-            return AI_STATUS_ERROR;
-          }}
-        """
-            )
-        )
-
-        # Allocate inputs with the static model
-        if self.inputs_static:
-            for dl_tensor_name in self._input_data:
-                tensor = self._input_data[dl_tensor_name]
-                self._emit_tensor_activation(dl_tensor_name, tensor, out_c)
-
-        # Prepare activation buffers
-        for dl_tensor_name in self._activations:
-            tensor = self._activations[dl_tensor_name]
-            self._emit_tensor_activation(dl_tensor_name, tensor, out_c)
-
-        # Allocate outputs with the static model
-        if self.outputs_static:
-            for dl_tensor_name in self._output_data:
-                tensor = self._output_data[dl_tensor_name]
-                self._emit_tensor_activation(dl_tensor_name, tensor, out_c)
-
-        out_c.write(
-            textwrap.dedent(
-                f"""
-          return AI_STATUS_OK;
-        }}
-        """
-            )
-        )
-
-    def _emit_params_init(self, name, out_c):
-        """Emits buffer initialization code for params tensors."""
-
-        out_c.write(
-            textwrap.dedent(
-                f"""
-        // {DBAR}
-        //   {name}_configure_weights
-        // {DBAR}
-        AI_STATIC AI_INLINE
-        ai_status {name}_configure_weights (
-          const ai_ptr weights
-        )
-        {{
-          if (weights == NULL) {{
-            TVMAPISetLastError(\"Non-null weights arena is required for this model.\");
-            return AI_STATUS_ERROR;
-          }}
-        """
-            )
-        )
-
-        for dl_tensor_name in self._weights:
-            tensor = self._weights[dl_tensor_name]
-            offset = tensor["offset"]
-            out_c.write(
-                textwrap.indent(
-                    textwrap.dedent(
-                        f"""\
-            //
-            //  {dl_tensor_name}
-            //
-            {dl_tensor_name}.dltensor.data = (ai_ptr)(weights + {offset});
-            """
-                    ),
-                    "  ",
-                )
-            )
-
-        out_c.write(
-            textwrap.dedent(
-                f"""
-          return AI_STATUS_OK;
-        }}
-        """
-            )
-        )
-
-    def _emit_init(self, name, out_c):
-        """Emits buffer initialization code."""
-
-        self._emit_activation_init(name, out_c)
-        self._emit_params_init(name, out_c)
-
-    def _emit_run(self, name, out_h, out_c):
-        """Emits the run function code."""
-
-        out_h.write(
-            textwrap.dedent(
-                f"""
-        AI_API_ENTRY
-        ai_status ai_{name}_run (
-          ai_tensor *inputs[],
-          ai_tensor *outputs[]
-        );
-        """
-            )
-        )
-
-        out_c.write(
-            textwrap.dedent(
-                f"""
-        // {DBAR}
-        //   ai_{name}_run
-        // {DBAR}
-        AI_API_ENTRY
-        ai_status ai_{name}_run (
-          ai_tensor *inputs[],
-          ai_tensor *outputs[]
-        )
-        {{
-        """
-            )
-        )
-
-        # Execute nodes one by one
-        nid = 0
-
-        for node in self._nodes:
-            node_name = node["name"]
-            node_name_upper = node_name.upper()
-
-            nid += 1
-
-            if node["op"] == "null":
-                continue
-
-            assert node["op"] == "tvm_op", f"###Error: Only TVM ops are supported."
-            node_attrs = node["attrs"]
-            func_name = node_attrs["func_name"]
-
-            if func_name == "__nop":
-                continue
-
-            out_c.write(f"  // \n")
-            out_c.write(f"  // {func_name}\n")
-            out_c.write(f"  // \n")
-
-            # Prepare TVM packed function - this is the one called
-            if name == "__nop":
-                print("      exec: __nop")
-                continue
-
-            if name == "__copy":
-                print("      exec: __copy")
-                continue
-
-            # Get function from the TVM module
-            #
-            #  void * args         : arg_values.data()
-            #  void * arg_type_ids : arg_tcodes.data()
-            #  int32_t num_args    : arg_values.size()
-
-            dl_args_name = _get_node_args_name(node_name)
-            dl_arg_types_name = _get_node_arg_types_name(node_name)
-
-            num_inputs = len(node["inputs"])
-            num_outputs = int(node_attrs["num_outputs"])
-            num_args = num_inputs + num_outputs
-
-            out_c.write(f"  TVMValue {dl_args_name}[{num_args}]; \n")
-            out_c.write(f"  int32_t {dl_arg_types_name}[{num_args}]; \n")
-
-            curr_idx = 0
-
-            for arg in node["inputs"]:
-                dl_tensor_name = self._get_node_arg_name(arg)
-                #
-                # If this input is not an activation or a parameter => find the input
-                #
-                if dl_tensor_name not in self._weights and dl_tensor_name not in self._activations:
-
-                    assert dl_tensor_name in self._input_data, "Tensor {} not registered ?".format(
-                        dl_tensor_name
-                    )
-
-                    input_idx = 0
-                    for dl_entry_name in self._input_data:
-                        if dl_entry_name == dl_tensor_name:
-                            break
-                        input_idx += 1
-                    out_c.write(
-                        f"  {dl_args_name}[{curr_idx}].v_handle = &inputs[{input_idx}]->dltensor; \n"
-                    )
-                else:
-                    out_c.write(
-                        f"  {dl_args_name}[{curr_idx}].v_handle = &{dl_tensor_name}.dltensor; \n"
-                    )
-                out_c.write(f"  {dl_arg_types_name}[{curr_idx}] = kTVMNDArrayHandle; \n")
-
-                curr_idx += 1
-
-            for idx in range(num_outputs):
-                dl_tensor_name = get_output_tensor_name(node_name, idx)
-
-                # If this output is not an activation => find the output
-                if dl_tensor_name not in self._activations:
-
-                    assert dl_tensor_name in self._output_data
-
-                    output_idx = 0
-                    for dl_exit_name in self._output_data:
-                        if dl_exit_name == dl_tensor_name:
-                            break
-                        output_idx += 1
-                    out_c.write(
-                        f"  {dl_args_name}[{curr_idx}].v_handle = &outputs[{output_idx}]->dltensor; \n"
-                    )
-                else:
-                    out_c.write(
-                        f"  {dl_args_name}[{curr_idx}].v_handle = &{dl_tensor_name}.dltensor; \n"
-                    )
-                out_c.write(f"  {dl_arg_types_name}[{curr_idx}] = kTVMNDArrayHandle; \n")
-                out_c.write(f"\n")
-
-                curr_idx += 1
-
-            # call this function
-            out_c.write(
-                textwrap.dedent(
-                    f"""
-            #if (_VERBOSE_ > 0)
-              printf (\"  {func_name}  ... \\r\\n\");
-            #endif
-              if ({func_name} ({dl_args_name}, {dl_arg_types_name}, {num_args})) {{
-                TVMAPISetLastError("Invalid handle");
-                return AI_STATUS_ERROR;
-              }}
-            #if (_VERBOSE_ > 0)
-              printf (\"  {func_name}  Done.\\r\\n\");
-            #endif
-            """
-                )
-            )
-        out_c.write(f"\n")
-        out_c.write(
-            textwrap.dedent(
-                f"""
-          return AI_STATUS_OK;
-        }}
-        """
-            )
-        )
-        out_c.write(f"\n")
-
-    def _emit_create_destroy(self, name, out_h, out_c):
-        """Emits the create/destroy functions."""
-
-        out_h.write(
-            textwrap.dedent(
-                f"""
-        AI_API_ENTRY
-        ai_status ai_{name}_create (
-          const ai_ptr weights,
-          const ai_ptr activations
-        );
-        """
-            )
-        )
-
-        out_h.write(
-            textwrap.dedent(
-                f"""
-        AI_API_ENTRY
-        ai_status ai_{name}_destroy ();
-        """
-            )
-        )
-
-        out_c.write(
-            textwrap.dedent(
-                f"""
-        // {DBAR}
-        //   ai_{name}_create
-        // {DBAR}
-        AI_API_ENTRY
-        ai_status ai_{name}_create(
-          const ai_ptr weights,
-          const ai_ptr activations
-        )
-        {{
-          ai_status status = AI_STATUS_OK;
-          status = {name}_configure_weights (weights);
-          if (status != AI_STATUS_OK) {{
-            return status;
-          }}
-          status = {name}_configure_activations (activations);
-          if (status != AI_STATUS_OK) {{
-            return status;
-          }}
-          return AI_STATUS_OK;
-        }}
-        """
-            )
-        )
-
-        out_c.write(
-            textwrap.dedent(
-                f"""
-        // {DBAR}
-        //   ai_{name}_destroy
-        // {DBAR}
-        AI_API_ENTRY
-        ai_status ai_{name}_destroy ()
-        {{
-          return AI_STATUS_OK;
-        }}
-        """
-            )
-        )
-
-    def emit_code(self, dest_dir, model_name):
-        """Emits the C code implementing the model."""
-
-        # Build the directory structure
-        if os.path.exists(dest_dir):
-            raise ValueError(f"emit_code.Error: {dest_dir} exists.")
-
-        # Make a new one
-        os.makedirs(dest_dir)
-
-        # Fix the model name
-        model_name = re.sub("[^0-9a-zA-Z_]+", "_", model_name)
-        model_name = model_name.lower()
-
-        # Write the C code: we can parse the string
-        if isinstance(self._lib, list):
-            # List of strings from Model Library Format C files
-            for idx, src in enumerate(self._lib):
-                code = _preprocess_code(src)
-                filename = os.path.join(dest_dir, f"{model_name}_lib{idx}.c")
-                with open(filename, "w") as fout:
-                    fout.write(code)
-        else:
-            # a TVM RuntimeGraphFactory
-            src = self._lib.get_source(fmt="c")
-            code = _preprocess_code(src)
-            filename = os.path.join(dest_dir, f"{model_name}_lib.c")
-            with open(filename, "w") as fout:
-                fout.write(code)
-
-        # Save params as binary data
-        saved_params = tvm.runtime.save_param_dict(self._params)
-        params_name = os.path.join(dest_dir, model_name + ".params")
-        with open(params_name, "wb") as f:
-            f.write(saved_params)
-
-        # Write the .json
-        graph_name = os.path.join(dest_dir, model_name + ".json")
-        json_string = json.dumps(self._graph, indent=4)
-        with open(graph_name, "w") as f:
-            print(json_string, file=f)
-
-        # emit X_data[c,h]
-        data_h_name = os.path.join(dest_dir, model_name + "_data.h")
-        data_c_name = os.path.join(dest_dir, model_name + "_data.c")
-        model_h_name = os.path.join(dest_dir, model_name + ".h")
-        model_c_name = os.path.join(dest_dir, model_name + ".c")
-
-        with contextlib.ExitStack() as exit_stack:
-
-            # emit X[c,h]
-
-            data_h = exit_stack.enter_context(open(data_h_name, "w"))
-            data_c = exit_stack.enter_context(open(data_c_name, "w"))
-            out_h = exit_stack.enter_context(open(model_h_name, "w"))
-            out_c = exit_stack.enter_context(open(model_c_name, "w"))
-
-            self._emit_params_data(model_name, data_h, data_c)
-
-            self._emit_open(model_name, out_h, out_c)
-            self._emit_params_buffers(model_name, out_c)
-            self._emit_activation_buffers(model_name, out_c)
-            self._emit_network(model_name, out_c)
-
-            self._emit_init(model_name, out_c)
-            self._emit_create_destroy(model_name, out_h, out_c)
-            self._emit_run(model_name, out_h, out_c)
-
-            self._emit_close(model_name, out_h, out_c)
diff --git a/python/tvm/micro/debugger.py b/python/tvm/micro/debugger.py
deleted file mode 100644
index 9829a3929eeb..000000000000
--- a/python/tvm/micro/debugger.py
+++ /dev/null
@@ -1,388 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=consider-using-with
-
-"""Defines functions for controlling debuggers for micro TVM binaries."""
-
-import atexit
-import abc
-import errno
-import logging
-import os
-import shlex
-import signal
-import subprocess
-import sys
-import termios
-import threading
-import time
-
-import psutil
-
-from .._ffi import register_func
-from . import class_factory
-from . import transport
-from .transport.file_descriptor import FdTransport
-
-
-_LOG = logging.getLogger(__name__)
-
-
-class Debugger(metaclass=abc.ABCMeta):
-    """An interface for controlling micro TVM debuggers."""
-
-    @abc.abstractmethod
-    def start(self):
-        """Start the debugger, but do not block on it.
-
-        The runtime will continue to be driven in the background.
-        """
-        raise NotImplementedError()
-
-    @abc.abstractmethod
-    def stop(self):
-        """Terminate the debugger."""
-        raise NotImplementedError()
-
-
-class GdbDebugger(Debugger):
-    """Handles launching, suspending signals, and potentially dealing with terminal issues."""
-
-    # Number of seconds to wait in stop() for a graceful shutdown. After this time has elapsed,
-    # the debugger is kill()'d.
-    _GRACEFUL_SHUTDOWN_TIMEOUT_SEC = 5.0
-
-    # The instance of GdbDebugger that's currently started.
-    _STARTED_INSTANCE = None
-
-    @classmethod
-    def _stop_all(cls):
-        if cls._STARTED_INSTANCE:
-            cls._STARTED_INSTANCE.stop()
-
-    def __init__(self):
-        super(GdbDebugger, self).__init__()
-        self._is_running = False
-        self._is_running_lock = threading.RLock()
-        self._child_exited_event = threading.Event()
-        self._signals_reset_event = threading.Event()
-
-    @abc.abstractmethod
-    def popen_kwargs(self):
-        raise NotImplementedError()
-
-    def _internal_stop(self):
-        if not self._is_running:
-            return
-
-        os.kill(os.getpid(), signal.SIGUSR1)
-        self._signals_reset_event.wait()
-        termios.tcsetattr(sys.stdin.fileno(), termios.TCSAFLUSH, self.old_termios)
-
-        try:
-            children = psutil.Process(self.popen.pid).children(recursive=True)
-            for c in children:
-                c.terminate()
-                _, alive = psutil.wait_procs(children, timeout=self._GRACEFUL_SHUTDOWN_TIMEOUT_SEC)
-                for a in alive:
-                    a.kill()
-        except psutil.NoSuchProcess:
-            pass
-        finally:
-            self.__class__._STARTED_INSTANCE = None
-            self._is_running = False
-            self._child_exited_event.set()
-
-    def _wait_for_child(self):
-        self.popen.wait()
-        with self._is_running_lock:
-            self._internal_stop()
-
-    @classmethod
-    def _sigusr1_handler(cls, signum, stack_frame):  # pylint: disable=unused-argument
-        assert (
-            cls._STARTED_INSTANCE is not None
-        ), "overridden sigusr1 handler should not be invoked when GDB not started"
-        signal.signal(signal.SIGINT, cls._STARTED_INSTANCE.old_sigint_handler)
-        signal.signal(signal.SIGUSR1, cls._STARTED_INSTANCE.old_sigusr1_handler)
-        cls._STARTED_INSTANCE._signals_reset_event.set()
-
-    @classmethod
-    def _sigint_handler(cls, signum, stack_frame):  # pylint: disable=unused-argument
-        assert (
-            cls._STARTED_INSTANCE is not None
-        ), "overridden sigint handler should not be invoked when GDB not started"
-        with cls._STARTED_INSTANCE._is_running_lock:
-            exists = cls._STARTED_INSTANCE._is_running
-        if exists:
-            try:
-                os.killpg(cls._STARTED_INSTANCE.child_pgid, signal.SIGINT)
-            except ProcessLookupError:
-                pass
-
-    def start(self):
-        with self._is_running_lock:
-            assert not self._is_running
-            assert not self._STARTED_INSTANCE
-
-            kwargs = self.popen_kwargs()
-            self.did_start_new_session = kwargs.setdefault("start_new_session", True)
-
-            self.old_termios = termios.tcgetattr(sys.stdin.fileno())
-            self.popen = subprocess.Popen(**kwargs)
-            self._is_running = True
-            self.old_sigint_handler = signal.signal(signal.SIGINT, self._sigint_handler)
-            self.old_sigusr1_handler = signal.signal(signal.SIGUSR1, self._sigusr1_handler)
-            self.__class__._STARTED_INSTANCE = self
-            try:
-                self.child_pgid = os.getpgid(self.popen.pid)
-            except Exception:
-                self.stop()
-                raise
-            with self._is_running_lock:
-                self._is_child_alive = True
-            t = threading.Thread(target=self._wait_for_child)
-            t.daemon = True
-            t.start()
-
-    def stop(self):
-        self._child_exited_event.wait()
-
-
-atexit.register(GdbDebugger._stop_all)
-
-
-class GdbTransportDebugger(GdbDebugger):
-    """A debugger that uses a single GDB subprocess as both the transport and the debugger.
-
-    Opens pipes for the target's stdin and stdout, launches GDB and configures GDB's target
-    arguments to read and write from the pipes using /dev/fd.
-    """
-
-    def __init__(self, args, **popen_kw):
-        super(GdbTransportDebugger, self).__init__()
-        self.args = args
-        self.popen_kw = popen_kw
-
-    def popen_kwargs(self):
-        stdin_read, stdin_write = os.pipe()
-        stdout_read, stdout_write = os.pipe()
-
-        os.set_inheritable(stdin_read, True)
-        os.set_inheritable(stdout_write, True)
-
-        sysname = os.uname()[0]
-        if sysname == "Darwin":
-            args = [
-                "lldb",
-                "-O",
-                f"target create {self.args[0]}",
-                "-O",
-                f"settings set target.input-path /dev/fd/{stdin_read}",
-                "-O",
-                f"settings set target.output-path /dev/fd/{stdout_write}",
-            ]
-            if len(self.args) > 1:
-                args.extend(
-                    ["-O", "settings set target.run-args {}".format(" ".join(self.args[1:]))]
-                )
-        elif sysname == "Linux":
-            args = [
-                "gdb",
-                "-ex",
-                f"file {self.args[0]}",
-                "-ex",
-                (
-                    f"set args {' '.join(shlex.quote(a) for a in self.args[1:])} "
-                    f"</dev/fd/{stdin_read} >/dev/fd/{stdout_write}"
-                ),
-            ]
-        else:
-            raise NotImplementedError(f"System {sysname} is not yet supported")
-
-        self.fd_transport = FdTransport(
-            stdout_read, stdin_write, transport.debug_transport_timeouts()
-        )
-        self.fd_transport.open()
-
-        return {
-            "args": args,
-            "pass_fds": [stdin_read, stdout_write],
-        }
-
-    def _internal_stop(self):
-        self.fd_transport.close()
-        super(GdbTransportDebugger, self)._internal_stop()
-
-    class _Transport(transport.Transport):
-        def __init__(self, gdb_transport_debugger):
-            self.gdb_transport_debugger = gdb_transport_debugger
-
-        def timeouts(self):
-            return transport.debug_transport_timeouts()
-
-        def open(self):
-            pass  # Pipes opened by parent class.
-
-        def write(self, data, timeout_sec):
-            end_time = time.monotonic() + timeout_sec if timeout_sec is not None else None
-            while True:
-                try:
-                    return self.gdb_transport_debugger.fd_transport.write(data, timeout_sec)
-                except OSError as exc:
-                    # NOTE: this error sometimes happens when writes are initiated before the child
-                    # process launches.
-                    if exc.errno == errno.EAGAIN:
-                        if end_time is None or time.monotonic() < end_time:
-                            time.sleep(0.1)  # sleep to avoid excessive CPU usage
-                            continue
-
-                    raise exc
-
-            raise base.IoTimeoutError()
-
-        def read(self, n, timeout_sec):
-            end_time = time.monotonic() + timeout_sec if timeout_sec is not None else None
-            while True:
-                try:
-                    return self.gdb_transport_debugger.fd_transport.read(n, timeout_sec)
-                except OSError as exc:
-                    # NOTE: this error sometimes happens when reads are initiated before the child
-                    # process launches.
-                    if exc.errno == errno.EAGAIN:
-                        if end_time is None or time.monotonic() < end_time:
-                            time.sleep(0.1)  # sleep to avoid excessive CPU usage
-                            continue
-
-                    raise exc
-
-            raise base.IoTimeoutError()
-
-        def close(self):
-            pass  # Pipes closed by parent class (DebugWrapperTransport calls stop() next).
-
-    def transport(self):
-        return self._Transport(self)
-
-
-class GdbRemoteDebugger(GdbDebugger):
-    """A Debugger that invokes GDB and attaches to a remote GDBserver-based target."""
-
-    def __init__(
-        self, gdb_binary, remote_hostport, debug_binary, wrapping_context_manager=None, **popen_kw
-    ):
-        super(GdbRemoteDebugger, self).__init__()
-        self.gdb_binary = gdb_binary
-        self.remote_hostport = remote_hostport
-        self.debug_binary = debug_binary
-        self.wrapping_context_manager = wrapping_context_manager
-        self.popen_kw = popen_kw
-
-    def popen_kwargs(self):
-        kwargs = {
-            "args": [
-                self.gdb_binary,
-                "-iex",
-                f"file {self.debug_binary}",
-                "-iex",
-                f"target remote {self.remote_hostport}",
-            ],
-        }
-        kwargs.update(self.popen_kw)
-
-        return kwargs
-
-    def start(self):
-        if self.wrapping_context_manager is not None:
-            self.wrapping_context_manager.__enter__()
-        super(GdbRemoteDebugger, self).start()
-
-    def stop(self):
-        try:
-            super(GdbRemoteDebugger, self).stop()
-        finally:
-            if self.wrapping_context_manager is not None:
-                self.wrapping_context_manager.__exit__(None, None, None)
-
-
-GLOBAL_DEBUGGER = None
-
-
-class DebuggerFactory(class_factory.ClassFactory):
-
-    SUPERCLASS = Debugger
-
-
-def launch_debugger(debugger_factory, *args, **kw):
-    global GLOBAL_DEBUGGER
-    if GLOBAL_DEBUGGER is not None:
-        stop_debugger()
-
-    GLOBAL_DEBUGGER = debugger_factory.instantiate(*args, **kw)
-    GLOBAL_DEBUGGER.start()
-
-
-@register_func("tvm.micro.debugger.launch_debugger")
-def _launch_debugger(debugger_factory_json):
-    launch_debugger(DebuggerFactory.from_json(debugger_factory_json))
-
-
-@register_func("tvm.micro.debugger.stop_debugger")
-def stop_debugger():
-    global GLOBAL_DEBUGGER
-    if GLOBAL_DEBUGGER is not None:
-        try:
-            GLOBAL_DEBUGGER.stop()
-        finally:
-            GLOBAL_DEBUGGER = None
-
-
-class RpcDebugger(Debugger):
-    """A Debugger instance that launches the actual debugger on a remote TVM RPC server."""
-
-    def __init__(self, rpc_session, factory, wrapping_context_manager=None):
-        super(RpcDebugger, self).__init__()
-        self._factory = factory
-        self.launch_debugger = rpc_session.get_function("tvm.micro.debugger.launch_debugger")
-        self.stop_debugger = rpc_session.get_function("tvm.micro.debugger.stop_debugger")
-        self.wrapping_context_manager = wrapping_context_manager
-
-    def start(self):
-        if self.wrapping_context_manager is not None:
-            self.wrapping_context_manager.__enter__()
-
-        try:
-            self.launch_debugger(self._factory.to_json)
-        except Exception:
-            if self.wrapping_context_manager is not None:
-                self.wrapping_context_manager.__exit__(None, None, None)
-            raise
-
-        try:
-            input("Press [Enter] when debugger is set")
-        except Exception:
-            self.stop()
-            raise
-
-        self._is_running = True
-
-    def stop(self):
-        try:
-            self.stop_debugger()
-        finally:
-            if self.wrapping_context_manager is not None:
-                self.wrapping_context_manager.__exit__(None, None, None)
diff --git a/python/tvm/micro/model_library_format.py b/python/tvm/micro/model_library_format.py
deleted file mode 100644
index e54f4bfed1dd..000000000000
--- a/python/tvm/micro/model_library_format.py
+++ /dev/null
@@ -1,669 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=cell-var-from-loop, use-list-literal
-
-"""Defines functions for exporting to Model Library Format."""
-
-import datetime
-import json
-import os
-import pathlib
-import re
-import tarfile
-import typing
-
-import tvm
-from tvm.micro import get_standalone_crt_dir, get_microtvm_template_projects
-
-from .._ffi import get_global_func
-from ..contrib import utils
-from ..driver import build_module
-from ..relay import param_dict
-from ..relay.backend import executor_factory
-from ..relay.backend.name_transforms import prefix_generated_name, to_c_variable_style
-from ..tir import expr
-
-# This should be kept identical to runtime::symbol::tvm_module_main
-MAIN_FUNC_NAME_STR = "__tvm_main__"
-STANDALONE_CRT_URL = "./runtime"
-CRT_TEMPLATE_FILES_URL = "./templates"
-METADATA_FILE = "metadata.json"
-
-
-class UnsupportedInModelLibraryFormatError(Exception):
-    """Raised when export_model_library_format does not support the given Module tree."""
-
-
-def generate_c_interface_header(
-    module_name,
-    inputs,
-    outputs,
-    pools,
-    io_pool_allocations,
-    devices,
-    workspace_size,
-    include_path,
-    input_sizes,
-    output_sizes,
-):
-    """Generate C Interface header to be included in MLF"""
-    mangled_name = to_c_variable_style(prefix_generated_name(module_name))
-    metadata_header = os.path.join(include_path, f"{mangled_name}.h")
-
-    interface_c_create = tvm._ffi.get_global_func("runtime.InterfaceCCreate")
-    interface_c_module = interface_c_create(
-        module_name,
-        inputs,
-        outputs,
-        pools,
-        io_pool_allocations,
-        devices,
-        workspace_size,
-        input_sizes,
-        output_sizes,
-    )
-
-    with open(metadata_header, "w") as header_file:
-        header_file.write(interface_c_module.get_source())
-
-    return metadata_header
-
-
-# List of type_key for modules which are ephemeral and do not need to be exported.
-EPHEMERAL_MODULE_TYPE_KEYS = ("metadata_module",)
-
-
-def _populate_codegen_dir(
-    mods: typing.Union[
-        typing.List[executor_factory.ExecutorFactoryModule],
-        typing.List[tvm.runtime.Module],
-    ],
-    codegen_dir: str,
-):
-    """Populate the codegen sub-directory as part of a Model Library Format export.
-
-    Parameters
-    ----------
-    mods : List[tvm.relay.backend.executor_factory.ExecutorFactoryModule], List[tvm.runtime.Module]
-        A list of the return value of tvm.relay.build, which
-        will be exported into Model Library Format.
-    codegen_dir : str
-        Path to the codegen directory on disk.
-    module_name: Optional[str]
-        Name used to prefix the generated source files
-
-    """
-    dso_modules = []
-    for mod in mods:
-        if isinstance(mod, executor_factory.ExecutorFactoryModule):
-            lib = mod.lib
-        elif isinstance(mod, tvm.runtime.Module):
-            lib = mod
-        else:
-            raise RuntimeError(f"Not supported module type: {type(mod)}")
-
-        dso_modules = lib._collect_dso_modules()
-        non_dso_modules = lib._collect_from_import_tree(lambda m: m not in dso_modules)
-
-        # Filter ephemeral modules which cannot be exported.
-        dso_modules = [m for m in dso_modules if m.type_key not in EPHEMERAL_MODULE_TYPE_KEYS]
-        non_dso_modules = [
-            m for m in non_dso_modules if m.type_key not in EPHEMERAL_MODULE_TYPE_KEYS
-        ]
-
-        if non_dso_modules:
-            raise UnsupportedInModelLibraryFormatError(
-                f"Don't know how to export non-c or non-llvm modules; found: {non_dso_modules!r}"
-            )
-
-        mod_indices = {"lib": 0, "src": 0}
-        host_codegen_dir = os.path.join(codegen_dir, "host")
-        lib_name = (
-            f"{mod.libmod_name}_lib"
-            if isinstance(mod, executor_factory.ExecutorFactoryModule)
-            else "lib"
-        )
-
-        for dso_mod in dso_modules:
-            if dso_mod.type_key == "c":
-                assert dso_mod.format in ["c", "cc", "cpp"]
-                ext = dso_mod.format
-                index = mod_indices["src"]
-                mod_indices["src"] += 1
-                parent_dir = os.path.join(host_codegen_dir, "src")
-                file_name = os.path.join(parent_dir, f"{lib_name}{index}.{ext}")
-            elif dso_mod.type_key == "llvm":
-                index = mod_indices["lib"]
-                mod_indices["lib"] += 1
-                parent_dir = os.path.join(host_codegen_dir, "lib")
-                file_name = os.path.join(parent_dir, f"{lib_name}{index}.o")
-            else:
-                assert (
-                    False
-                ), f"do not expect module with type_key={lib.type_key} from _collect_dso_modules"
-
-            if not os.path.exists(parent_dir):
-                os.makedirs(parent_dir)
-            dso_mod.save(file_name)
-
-
-def _build_memory_map(mod):
-    ret = dict()
-    if isinstance(mod, executor_factory.GraphExecutorFactoryModule):
-        ret["sids"] = _build_sid_map(mod.graph_json)
-    ret["functions"] = _build_function_memory_map(mod.function_metadata)
-    return ret
-
-
-def _build_sid_map(graph_json):
-    """Build a simpler storage id info map from graph JSON.
-
-    Parameters
-    ----------
-    graph_json : str
-        String representation of the graph_json created from tvm.relay.build().
-
-    Returns
-    -------
-    list :
-        A list with one entry per storage id describing that memory.
-    """
-    graph = json.loads(graph_json)
-    seen_storage_ids = set()
-    memory_map = []
-    for node_id, storage_id in enumerate(graph["attrs"]["storage_id"][1]):
-        if storage_id in seen_storage_ids:
-            continue
-
-        seen_storage_ids.add(storage_id)
-        num_elements = 1
-        for dim in graph["attrs"]["shape"][1][storage_id]:
-            num_elements *= dim
-
-        dltype = graph["attrs"]["dltype"][1][storage_id]
-        m = re.match(r"^[a-zA-Z]+([0-9]+)$", dltype)
-        assert m, f"Exported graph contains unknown dltype {dltype}"
-
-        elem_bits = int(m.group(1))
-
-        map_entry = {
-            "storage_id": storage_id,
-            "size_bytes": (num_elements * elem_bits + 7) // 8,
-        }
-        if node_id in graph["arg_nodes"]:
-            map_entry["input_binding"] = graph["nodes"][node_id]["name"]
-
-        memory_map.append(map_entry)
-
-    return memory_map
-
-
-def _create_type_metadata(input_type):
-    return {
-        "size": int(_shape_to_size(input_type.shape, input_type.dtype)),
-        "dtype": str(input_type.dtype),
-    }
-
-
-def _flatten_tuple_outputs(ret_type, predefined_names, offset=0):
-    if isinstance(ret_type, tvm.ir.tensor_type.TensorType):
-        name = predefined_names[offset] if predefined_names else f"output{offset}"
-        return {name: ret_type}
-
-    added_fields = len(ret_type.fields)
-    outputs = {}
-    for output_index in range(added_fields):
-        next_output = offset + len(outputs)
-        outputs.update(
-            _flatten_tuple_outputs(ret_type.fields[output_index], predefined_names, next_output)
-        )
-
-    return outputs
-
-
-def _get_outputs_from_ret_type(ret_type, predefined_names):
-    if isinstance(ret_type, tvm.ir.tensor_type.TensorType):
-        name = predefined_names[0] if predefined_names else "output"
-        return {name: ret_type}
-    return _flatten_tuple_outputs(ret_type, predefined_names)
-
-
-def _build_function_memory_map(function_metadata):
-    """Build a simple map that shows how much workspace is required to execute
-    each primitive function. The main_func describes how much memory is required
-    to execute the main control code.
-
-    Parameters
-    ----------
-    function_metadata : Map<String, FunctionInfo>
-        This contains all the compiled metadata on a function basis
-
-    Returns
-    -------
-    dict :
-        This will have two entries:
-        1.) A list with one entry per function describing local memory it is using.
-        2.) A global memory requirement if all functions are executed sequentially
-    """
-    device_max_workspace = dict()
-    main_func_metadata = function_metadata[MAIN_FUNC_NAME_STR]
-    func_entries = []
-    target_local_entries = dict()
-
-    for func_name, finfo in function_metadata.items():
-        # Skip a few unsupported cases:
-        # 1. The main function metadata is exported elsewhere.
-        # 2. BYOC operator implementations do not currently export useful FunctionInfo.
-        if func_name == MAIN_FUNC_NAME_STR or not finfo.tir_primfuncs:
-            continue
-        if func_name not in target_local_entries.keys():
-            target_local_entries[func_name] = list()
-        for target in dict(finfo.workspace_sizes).keys():
-            workspace_size = finfo.workspace_sizes[target]
-            target_entry = {
-                "device": int(target.get_target_device_type()),
-                "workspace_size_bytes": int(workspace_size),
-            }
-            target_local_entries[func_name].append(target_entry)
-            if workspace_size >= device_max_workspace.get(int(target.get_target_device_type()), 0):
-                device_max_workspace[int(target.get_target_device_type())] = workspace_size
-
-    for func_name, target_entries_ in target_local_entries.items():
-        func_entry = {
-            "function_name": str(func_name),
-            "workspace": target_entries_,
-        }
-        func_entries.append(func_entry)
-
-    target_main_entries = dict()
-
-    def _create_empty_entry(target_device_type):
-        return {
-            "device": int(target_device_type),
-            "workspace_size_bytes": 0,
-            "constants_size_bytes": 0,
-            "io_size_bytes": 0,
-        }
-
-    for target in dict(main_func_metadata.workspace_sizes).keys():
-        main_func_local_workspace = main_func_metadata.workspace_sizes[target]
-        target_main_entries[int(target.get_target_device_type())] = _create_empty_entry(
-            int(target.get_target_device_type())
-        )
-        target_main_entries[int(target.get_target_device_type())]["workspace_size_bytes"] = int(
-            device_max_workspace.get(int(target.get_target_device_type()), 0)
-        ) + int(main_func_local_workspace)
-
-    for target in dict(main_func_metadata.constant_sizes).keys():
-        if int(target.get_target_device_type()) not in target_main_entries.keys():
-            target_main_entries[int(target.get_target_device_type())] = _create_empty_entry(
-                int(target.get_target_device_type())
-            )
-        target_main_entries[int(target.get_target_device_type())]["constants_size_bytes"] = int(
-            main_func_metadata.constant_sizes[target]
-        )
-
-    for target in dict(main_func_metadata.io_sizes).keys():
-        if int(target.get_target_device_type()) not in target_main_entries.keys():
-            target_main_entries[int(target.get_target_device_type())] = _create_empty_entry(
-                int(target.get_target_device_type())
-            )
-        target_main_on_device = target_main_entries[int(target.get_target_device_type())]
-        target_main_on_device["io_size_bytes"] = int(main_func_metadata.io_sizes[target])
-
-        main_relay_func = main_func_metadata.relay_primfuncs[target]
-        target_main_on_device["inputs"] = {
-            input_param.name_hint: _create_type_metadata(input_param.checked_type)
-            for input_param in main_relay_func.params
-        }
-        predefined_names = (
-            main_relay_func.attrs["output_tensor_names"]
-            if "output_tensor_names" in main_relay_func.attrs
-            else None
-        )
-        target_main_on_device["outputs"] = {
-            name: _create_type_metadata(output_type)
-            for name, output_type in _get_outputs_from_ret_type(
-                main_relay_func.ret_type, predefined_names
-            ).items()
-        }
-
-    ret = {
-        "operator_functions": func_entries,
-        "main": list(target_main_entries.values()),
-    }
-    return ret
-
-
-def _get_pools_from_module(mod):
-    return list(dict(mod.executor_codegen_metadata.pool_inputs).values())
-
-
-def _get_io_pool_allocation_from_module(mod):
-    return dict(mod.executor_codegen_metadata.io_pool_allocations)
-
-
-def _should_generate_interface_header(mod):
-    return "interface-api" in mod.executor and mod.executor["interface-api"] == "c"
-
-
-def _make_tar(source_dir, tar_file_path, modules):
-    """Build a tar file from source_dir."""
-    with tarfile.open(tar_file_path, "w") as tar_f:
-
-        def reset(tarinfo):
-            tarinfo.uid = tarinfo.gid = 0
-            tarinfo.uname = tarinfo.gname = "root"
-            return tarinfo
-
-        tar_f.add(str(source_dir), arcname=".", filter=reset)
-
-        for mod in modules:
-            is_aot = isinstance(mod, executor_factory.AOTExecutorFactoryModule)
-            if is_aot and str(mod.runtime) == "crt":
-                crt_template_path = pathlib.Path(get_microtvm_template_projects("crt"))
-                tar_f.add(get_standalone_crt_dir(), arcname=STANDALONE_CRT_URL)
-
-                # Add template files from CRT template project
-                for file in [
-                    "templates/crt_config.h.template",
-                    "templates/platform.c.template",
-                ]:
-                    tar_f.add(
-                        crt_template_path / pathlib.Path(file),
-                        arcname=f"{CRT_TEMPLATE_FILES_URL}/{pathlib.Path(file).name}",
-                    )
-                break
-
-
-_GENERATED_VERSION = 7
-
-
-def _is_module_names_unique(mods: typing.List[executor_factory.ExecutorFactoryModule]):
-    """Check if built modules have unique names.
-
-    Parameters
-    ----------
-    mods : List[tvm.relay.backend.executor_factory.ExecutorFactoryModule]
-        A list of the return value of tvm.relay.build,
-        which will be exported into Model Library Format.
-    """
-    all_names = []
-    for mod in mods:
-        all_names.append(mod.libmod_name)
-
-    return len(set(all_names)) == len(all_names)
-
-
-def _export_graph_model_library_format(
-    mods: typing.List[executor_factory.ExecutorFactoryModule], tempdir: pathlib.Path
-):
-    """Export a tvm.relay.build artifact in Model Library Format.
-
-    Parameters
-    ----------
-    mods : List[tvm.relay.backend.executor_factory.ExecutorFactoryModule]
-        A list of the return value of tvm.relay.build,
-        which will be exported into Model Library Format.
-    tempdir : pathlib.Path
-        Temporary directory to populate with Model Library Format contents.
-    """
-
-    assert _is_module_names_unique(mods), "Multiple modules should have unique names."
-
-    metadata = {
-        "version": _GENERATED_VERSION,
-    }
-    metadata["modules"] = {}
-    for mod in mods:
-        is_aot = isinstance(mod, executor_factory.AOTExecutorFactoryModule)
-        executor = ["aot"] if is_aot else ["graph"]
-        module_name = mod.libmod_name
-        metadata["modules"][module_name] = {
-            "model_name": module_name,
-            "export_datetime": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%SZ"),
-            "memory": _build_memory_map(mod),
-            "target": [str(t) for t in mod.target],
-            "executors": executor,
-            "style": "full-model",
-        }
-
-        if is_aot and (str(mod.runtime) == "crt"):
-            standalone_crt = {
-                "short_name": "tvm_standalone_crt",
-                "url": f"{STANDALONE_CRT_URL}",
-                "url_type": "mlf_path",
-                "version_spec": f"{tvm.__version__}",
-            }
-            external_dependencies = [standalone_crt]
-            metadata["modules"][module_name]["external_dependencies"] = external_dependencies
-
-    with open(tempdir / METADATA_FILE, "w") as json_f:
-        json.dump(metadata, json_f, indent=2, sort_keys=True)
-
-    codegen_dir = tempdir / "codegen"
-    codegen_dir.mkdir()
-    _populate_codegen_dir(mods, codegen_dir)
-
-    parameters_dir = tempdir / "parameters"
-    parameters_dir.mkdir()
-    src_dir = tempdir / "src"
-    src_dir.mkdir()
-    graph_config_dir = tempdir / "executor-config" / "graph"
-    for mod in mods:
-        if _should_generate_interface_header(mod):
-            include_path = codegen_dir / "host" / "include"
-            if not include_path.exists():
-                include_path.mkdir()
-
-            devices = mod.get_devices()
-            pools = _get_pools_from_module(mod)
-            io_pool_allocations = _get_io_pool_allocation_from_module(mod)
-            main_func = metadata["modules"][mod.libmod_name]["memory"]["functions"]["main"][0]
-            workspace_size = int(main_func["workspace_size_bytes"])
-            inputs = main_func["inputs"]
-            outputs = main_func["outputs"]
-            inputs_sizes = {name: property_map["size"] for name, property_map in inputs.items()}
-            output_sizes = {name: property_map["size"] for name, property_map in outputs.items()}
-            input_names = list(inputs.keys())
-            output_names = list(outputs.keys())
-
-            generate_c_interface_header(
-                mod.libmod_name,
-                input_names,
-                output_names,
-                pools,
-                io_pool_allocations,
-                devices,
-                workspace_size,
-                include_path,
-                inputs_sizes,
-                output_sizes,
-            )
-
-        is_aot = isinstance(mod, executor_factory.AOTExecutorFactoryModule)
-        param_filename = parameters_dir / f"{mod.libmod_name}.params"
-        with open(param_filename, "wb") as f:
-            f.write(param_dict.save_param_dict(mod.params))
-
-        with open(src_dir / f"{mod.libmod_name}.relay", "w") as f:
-            f.write(str(mod.ir_mod))
-
-        if not is_aot:
-            if not graph_config_dir.exists():
-                graph_config_dir.mkdir(parents=True)
-            with open(graph_config_dir / f"{mod.libmod_name}.graph", "w") as f:
-                f.write(mod.get_executor_config())
-
-
-class NonStaticShapeError(Exception):
-    """Raised when a shape has elements other than IntImm."""
-
-
-def _shape_to_size(shape, dtype):
-    bits_per_item = int(
-        re.match(r"((float)|(int)|(uint))(?P<width_bits>[0-9]+)", dtype).group("width_bits")
-    )
-    assert bits_per_item is not None, f"don't know how to compute size of type {dtype}"
-    total_bits = bits_per_item
-    for s in shape:
-        total_bits *= s
-
-    return (total_bits + 7) // 8
-
-
-def _write_tir_and_build_operator_memory_map(src_dir, targets, ir_module_by_target):
-    def _eval_shape(param_name, buffer_shape):
-        shape = []
-        for x in buffer_shape:
-            if not isinstance(x, expr.IntImm):
-                raise NonStaticShapeError(
-                    f"Parameter {param_name} has shape with non-IntImm elements: {buffer_shape}"
-                )
-            shape.append(x.value)
-        return shape
-
-    memory_map = {}
-    for target in targets:
-        # TODO(mbs): The device type is not unique, better would be to use target.kind.name
-        target_device_type = target.get_target_device_type()
-        ir_mod = ir_module_by_target[target]
-        printer = get_global_func("relay.ir.ModelLibraryFormatPrinter")(False, None, False)
-        with open(src_dir / f"tir-{target_device_type}.txt", "w") as f:
-            f.write(printer["print"](ir_mod))
-
-        for v in ir_mod.get_global_vars():
-            map_entry = []
-            for p, b in ir_mod[v.name_hint].buffer_map.items():
-                shape = _eval_shape(p.name, b.shape)
-                buffer_size_bytes = _shape_to_size(shape, str(b.dtype))
-                # NOTE: cannot tell what is an input or output at this point.
-                map_entry.append(
-                    {
-                        "size_bytes": buffer_size_bytes,
-                        "shape": [int(x) for x in b.shape],
-                        "dtype": b.dtype,
-                        "input_binding": printer["get_var_name"](p),
-                    }
-                )
-            memory_map[v.name_hint] = map_entry
-
-    return memory_map
-
-
-def _export_operator_model_library_format(mod: build_module.OperatorModule, tempdir):
-    """Export the result of tvm.build() in Model Library Format.
-    Parameters
-    ----------
-    mod : runtime.Module
-        The Module returned from tvm.build().
-    tempdir : str
-        Path to the .tar archive to generate.
-    """
-    targets = []
-    for target in mod.ir_module_by_target.keys():
-        if str(target.kind) not in ("llvm", "c"):
-            raise UnsupportedInModelLibraryFormatError(
-                f"Operator has non-DSO-exportable target {target!s}, which is not yet supported in "
-                "Model Library Format"
-            )
-
-        targets.append(target)
-
-    src_dir = tempdir / "src"
-    src_dir.mkdir()
-    memory_map = _write_tir_and_build_operator_memory_map(src_dir, targets, mod.ir_module_by_target)
-
-    metadata = {
-        "version": _GENERATED_VERSION,
-        "model_name": mod.name,
-        "export_datetime": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%SZ"),
-        "memory": memory_map,
-        "target": [str(t) for t in targets],
-        "executors": [],
-        "style": "operator",
-    }
-    with open(tempdir / METADATA_FILE, "w") as metadata_f:
-        json.dump(metadata, metadata_f)
-
-    codegen_dir = tempdir / "codegen"
-    codegen_dir.mkdir()
-    _populate_codegen_dir(list([mod]), codegen_dir)
-
-
-ExportableModule = typing.Union[
-    build_module.OperatorModule,
-    executor_factory.AOTExecutorFactoryModule,
-    executor_factory.GraphExecutorFactoryModule,
-]
-
-
-def export_model_library_format(
-    mods: typing.Union[ExportableModule, typing.List[ExportableModule]],
-    file_name: typing.Union[str, pathlib.Path],
-):
-    """Export the build artifact in Model Library Format.
-
-    This function creates a .tar archive containing the build artifacts in a standardized
-    layout. It's intended to allow downstream automation to build TVM artifacts against the C
-    runtime.
-
-    Parameters
-    ----------
-    mod : ExportableModule, List[ExportableModule]
-        The return value of tvm.build or tvm.relay.build.
-    file_name : str
-        Path to the .tar archive to generate.
-
-    Returns
-    -------
-    file_name : str
-        The path to the generated .tar archive.
-    """
-    modules = mods
-    if not isinstance(mods, list):
-        modules = list([mods])
-
-    operator_module_type = all(isinstance(mod, build_module.OperatorModule) for mod in modules)
-    graph_module_type = all(
-        isinstance(
-            mod,
-            (
-                executor_factory.AOTExecutorFactoryModule,
-                executor_factory.GraphExecutorFactoryModule,
-            ),
-        )
-        for mod in modules
-    )
-
-    file_name = pathlib.Path(file_name)
-    tempdir = utils.tempdir()
-
-    if operator_module_type:
-        if len(modules) != 1:
-            raise RuntimeError("Multiple operator is not supported.")
-        _export_operator_model_library_format(modules[0], tempdir.path)
-    elif graph_module_type:
-        _export_graph_model_library_format(modules, tempdir.path)
-    else:
-        raise NotImplementedError(
-            f"Don't know how to export module of type {modules[0].__class__!r}"
-        )
-
-    _make_tar(tempdir.path, file_name, modules)
-
-    return file_name
diff --git a/python/tvm/micro/project.py b/python/tvm/micro/project.py
deleted file mode 100644
index 32d2cbf4db71..000000000000
--- a/python/tvm/micro/project.py
+++ /dev/null
@@ -1,224 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""Defines glue wrappers around the Project API which mate to TVM interfaces."""
-
-import pathlib
-from typing import Union
-
-from .. import __version__
-from ..contrib import utils
-from .build import get_standalone_crt_dir
-from .model_library_format import ExportableModule, export_model_library_format
-from .project_api import client
-from .transport import Transport, TransportTimeouts
-
-
-def add_unspecified_options(options: dict, server_project_options: list) -> dict:
-    """Adds default value of project template options that are not specified by user."""
-    if not options:
-        options = dict()
-    for option in server_project_options:
-        name = option["name"]
-        if name not in options.keys():
-            options[name] = option["default"]
-    return options
-
-
-class ProjectTransport(Transport):
-    """A Transport implementation that uses the Project API client."""
-
-    def __init__(self, api_client, options):
-        self._api_client = api_client
-        self._options = options
-        self._timeouts = None
-
-    def timeouts(self):
-        assert self._timeouts is not None, "Transport not yet opened"
-        return self._timeouts
-
-    def open(self):
-        reply = self._api_client.open_transport(self._options)
-        self._timeouts = TransportTimeouts(**reply["timeouts"])
-
-    def close(self):
-        if not self._api_client.is_shutdown:
-            self._api_client.close_transport()
-            self._api_client.shutdown()
-
-    def write(self, data, timeout_sec):
-        self._api_client.write_transport(data, timeout_sec)
-
-    def read(self, n, timeout_sec):
-        return self._api_client.read_transport(n, timeout_sec)["data"]
-
-
-class TemplateProjectError(Exception):
-    """Raised when the Project API server given to GeneratedProject reports is_template=True."""
-
-
-class GeneratedProject:
-    """Defines a glue interface to interact with a generated project through the API server."""
-
-    @classmethod
-    def from_directory(cls, project_dir: Union[pathlib.Path, str], options: dict):
-        return cls(client.instantiate_from_dir(project_dir), options)
-
-    def __init__(self, api_client, options):
-        self._api_client = api_client
-        self._info = self._api_client.server_info_query(__version__)
-        if self._info["is_template"]:
-            raise TemplateProjectError()
-        self._options = add_unspecified_options(options, self._info["project_options"])
-
-    def build(self):
-        self._api_client.build(self._options)
-
-    def flash(self):
-        self._api_client.flash(self._options)
-
-    def transport(self):
-        return ProjectTransport(self._api_client, self._options)
-
-    def info(self):
-        return self._info
-
-    @property
-    def options(self):
-        return self._options
-
-    @options.setter
-    def options(self, options):
-        self._options = options
-
-
-class NotATemplateProjectError(Exception):
-    """Raised when the API server given to TemplateProject reports is_template=false."""
-
-
-class TemplateProject:
-    """Defines a glue interface to interact with a template project through the API Server."""
-
-    @classmethod
-    def from_directory(cls, template_project_dir):
-        return cls(client.instantiate_from_dir(template_project_dir))
-
-    def __init__(self, api_client):
-        self._api_client = api_client
-        self._info = self._api_client.server_info_query(__version__)
-        if not self._info["is_template"]:
-            raise NotATemplateProjectError()
-
-    def _check_project_options(self, options: dict):
-        """Check if options are valid ProjectOptions"""
-        available_options = [option["name"] for option in self.info()["project_options"]]
-        if options and not set(options.keys()).issubset(available_options):
-            raise ValueError(
-                f"""options:{list(options)} include non valid ProjectOptions.
-                        Here is a list of available options:{list(available_options)}."""
-            )
-
-    def generate_project_from_mlf(self, model_library_format_path, project_dir, options: dict):
-        """Generate a project from MLF file."""
-        self._check_project_options(options)
-        options = add_unspecified_options(options, self._info["project_options"])
-
-        self._api_client.generate_project(
-            model_library_format_path=str(model_library_format_path),
-            standalone_crt_dir=get_standalone_crt_dir(),
-            project_dir=project_dir,
-            options=options,
-        )
-
-        return GeneratedProject.from_directory(project_dir, options)
-
-    def info(self):
-        return self._info
-
-    def generate_project(self, graph_executor_factory, project_dir, options):
-        """Generate a project given GraphRuntimeFactory."""
-        model_library_dir = utils.tempdir()
-        model_library_format_path = model_library_dir.relpath("model.tar")
-        export_model_library_format(graph_executor_factory, model_library_format_path)
-
-        return self.generate_project_from_mlf(model_library_format_path, project_dir, options)
-
-
-def generate_project(
-    template_project_dir: Union[pathlib.Path, str],
-    module: ExportableModule,
-    generated_project_dir: Union[pathlib.Path, str],
-    options: dict = None,
-):
-    """Generate a project for an embedded platform that contains the given model.
-
-    Parameters
-    ----------
-    template_project_path : pathlib.Path or str
-        Path to a template project containing a microTVM Project API server.
-
-    generated_project_path : pathlib.Path or str
-        Path to a directory to be created and filled with the built project.
-
-    module : ExportableModule
-        A runtime.Module exportable as Model Library Format. The value returned from tvm.relay.build
-        or tvm.build.
-
-    options : dict
-        If given, Project API options given to the microTVM API server found in both
-        template_project_path and generated_project_path.
-
-    Returns
-    -------
-    GeneratedProject :
-        A class that wraps the generated project and which can be used to further interact with it.
-    """
-    template = TemplateProject.from_directory(str(template_project_dir))
-    return template.generate_project(module, str(generated_project_dir), options)
-
-
-def generate_project_from_mlf(
-    template_project_dir: Union[pathlib.Path, str],
-    project_dir: Union[pathlib.Path, str],
-    mlf_path: Union[pathlib.Path, str],
-    options: dict,
-):
-    """Generate a project from a platform template and an existing Model Library Format archive.
-
-    Parameters
-    ----------
-    template_project_path : pathlib.Path or str
-        Path to a template project containing a microTVM Project API server.
-
-    project_dir : pathlib.Path or str
-        Path to a directory where the project will be created.
-
-    mlf_path : pathlib.Path or str
-        Path to the Model Library Format archive that will be used when creating
-        the new project. The archive file will be copied to project_dir.
-
-    options : dict
-        Project API options given to the microTVM API server for the specified platform.
-
-    Returns
-    -------
-    GeneratedProject :
-        A class that wraps the generated project and which can be used to further interact with it.
-    """
-
-    template = TemplateProject.from_directory(str(template_project_dir))
-    return template.generate_project_from_mlf(str(mlf_path), str(project_dir), options)
diff --git a/python/tvm/micro/project_api/__init__.py b/python/tvm/micro/project_api/__init__.py
deleted file mode 100644
index 9915040a922c..000000000000
--- a/python/tvm/micro/project_api/__init__.py
+++ /dev/null
@@ -1,17 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""MicroTVM Project API Client and Server"""
diff --git a/python/tvm/micro/project_api/client.py b/python/tvm/micro/project_api/client.py
deleted file mode 100644
index e6edce94c051..000000000000
--- a/python/tvm/micro/project_api/client.py
+++ /dev/null
@@ -1,241 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=consider-using-with
-"""
-Project API client.
-"""
-import base64
-import io
-import json
-import logging
-import platform
-import os
-import pathlib
-import subprocess
-import sys
-import typing
-
-from . import server
-
-_LOG = logging.getLogger(__name__)
-
-
-class ProjectAPIErrorBase(Exception):
-    """Base class for all Project API errors."""
-
-
-class ConnectionShutdownError(ProjectAPIErrorBase):
-    """Raised when a request is made but the connection has been closed."""
-
-
-class MalformedReplyError(ProjectAPIErrorBase):
-    """Raised when the server responds with an invalid reply."""
-
-
-class MismatchedIdError(ProjectAPIErrorBase):
-    """Raised when the reply ID does not match the request."""
-
-
-class ProjectAPIServerNotFoundError(ProjectAPIErrorBase):
-    """Raised when the Project API server can't be found in the repo."""
-
-
-class UnsupportedProtocolVersionError(ProjectAPIErrorBase):
-    """Raised when the protocol version returned by the API server is unsupported."""
-
-
-class RPCError(ProjectAPIErrorBase):
-    def __init__(self, request, error):
-        ProjectAPIErrorBase.__init__()
-        self.request = request
-        self.error = error
-
-    def __str__(self):
-        return f"Calling project API method {self.request['method']}:" "\n" f"{self.error}"
-
-
-class ProjectAPIClient:
-    """A client for the Project API."""
-
-    def __init__(
-        self,
-        read_file: typing.BinaryIO,
-        write_file: typing.BinaryIO,
-        testonly_did_write_request: typing.Optional[typing.Callable] = None,
-    ):
-        self.read_file = io.TextIOWrapper(read_file, encoding="UTF-8", errors="strict")
-        self.write_file = io.TextIOWrapper(
-            write_file, encoding="UTF-8", errors="strict", write_through=True
-        )
-        self.testonly_did_write_request = testonly_did_write_request
-        self.next_request_id = 1
-
-    @property
-    def is_shutdown(self):
-        return self.read_file.closed
-
-    def shutdown(self):
-        if self.is_shutdown:  # pylint: disable=using-constant-test
-            return
-
-        self.read_file.close()
-        self.write_file.close()
-
-    def _request_reply(self, method, params):
-        if self.is_shutdown:  # pylint: disable=using-constant-test
-            raise ConnectionShutdownError("connection already closed")
-
-        request = {
-            "jsonrpc": "2.0",
-            "method": method,
-            "params": params,
-            "id": self.next_request_id,
-        }
-        self.next_request_id += 1
-
-        request_str = json.dumps(request)
-        self.write_file.write(request_str)
-        _LOG.debug("send -> %s", request_str)
-        self.write_file.write("\n")
-        if self.testonly_did_write_request:
-            self.testonly_did_write_request()  # Allow test to assert on server processing.
-        reply_line = self.read_file.readline()
-        _LOG.debug("recv <- %s", reply_line)
-        if not reply_line:
-            self.shutdown()
-            raise ConnectionShutdownError("got EOF reading reply from API server")
-
-        reply = json.loads(reply_line)
-
-        if reply.get("jsonrpc") != "2.0":
-            raise MalformedReplyError(
-                f"Server reply should include 'jsonrpc': '2.0'; "
-                f"saw jsonrpc={reply.get('jsonrpc')!r}"
-            )
-
-        if reply["id"] != request["id"]:
-            raise MismatchedIdError(
-                f"Reply id ({reply['id']}) does not equal request id ({request['id']}"
-            )
-
-        if "error" in reply:
-            raise server.JSONRPCError.from_json(f"calling method {method}", reply["error"])
-
-        if "result" not in reply:
-            raise MalformedReplyError(f"Expected 'result' key in server reply, got {reply!r}")
-
-        return reply["result"]
-
-    def server_info_query(self, tvm_version: str):
-        reply = self._request_reply("server_info_query", {"tvm_version": tvm_version})
-        if reply["protocol_version"] != server.ProjectAPIServer._PROTOCOL_VERSION:
-            raise UnsupportedProtocolVersionError(
-                f'microTVM API Server supports protocol version {reply["protocol_version"]}; '
-                f"want {server.ProjectAPIServer._PROTOCOL_VERSION}"
-            )
-
-        return reply
-
-    def generate_project(
-        self,
-        model_library_format_path: str,
-        standalone_crt_dir: str,
-        project_dir: str,
-        options: dict = None,
-    ):
-        return self._request_reply(
-            "generate_project",
-            {
-                "model_library_format_path": model_library_format_path,
-                "standalone_crt_dir": standalone_crt_dir,
-                "project_dir": project_dir,
-                "options": (options if options is not None else {}),
-            },
-        )
-
-    def build(self, options: dict = None):
-        return self._request_reply("build", {"options": (options if options is not None else {})})
-
-    def flash(self, options: dict = None):
-        return self._request_reply("flash", {"options": (options if options is not None else {})})
-
-    def open_transport(self, options: dict = None):
-        return self._request_reply(
-            "open_transport", {"options": (options if options is not None else {})}
-        )
-
-    def close_transport(self):
-        return self._request_reply("close_transport", {})
-
-    def read_transport(self, n, timeout_sec):
-        reply = self._request_reply("read_transport", {"n": n, "timeout_sec": timeout_sec})
-        reply["data"] = base64.b85decode(reply["data"])
-        return reply
-
-    def write_transport(self, data, timeout_sec):
-        return self._request_reply(
-            "write_transport",
-            {"data": str(base64.b85encode(data), "utf-8"), "timeout_sec": timeout_sec},
-        )
-
-
-# NOTE: windows support untested
-SERVER_LAUNCH_SCRIPT_FILENAME = (
-    f"launch_microtvm_api_server.{'sh' if platform.system() != 'Windows' else '.bat'}"
-)
-
-
-SERVER_PYTHON_FILENAME = "microtvm_api_server.py"
-
-
-def instantiate_from_dir(project_dir: typing.Union[pathlib.Path, str], debug: bool = False):
-    """Launch server located in project_dir, and instantiate a Project API Client
-    connected to it."""
-    proc_args = None
-    project_dir = pathlib.Path(project_dir)
-
-    python_script = project_dir / SERVER_PYTHON_FILENAME
-    if python_script.is_file():
-        proc_args = [sys.executable, str(python_script)]
-
-    launch_script = project_dir / SERVER_LAUNCH_SCRIPT_FILENAME
-    if launch_script.is_file():
-        proc_args = [str(launch_script), str(python_script)]
-
-    if proc_args is None:
-        raise ProjectAPIServerNotFoundError(
-            f"No Project API server found in project directory: {project_dir}"
-            "\n"
-            f"Tried: {SERVER_LAUNCH_SCRIPT_FILENAME}, {SERVER_PYTHON_FILENAME}"
-        )
-
-    api_server_read_fd, tvm_write_fd = os.pipe()
-    tvm_read_fd, api_server_write_fd = os.pipe()
-
-    proc_args.extend(["--read-fd", str(api_server_read_fd), "--write-fd", str(api_server_write_fd)])
-    if debug:
-        proc_args.append("--debug")
-
-    api_server_proc = subprocess.Popen(  # pylint: disable=unused-variable
-        proc_args, bufsize=0, pass_fds=(api_server_read_fd, api_server_write_fd), cwd=project_dir
-    )
-    os.close(api_server_read_fd)
-    os.close(api_server_write_fd)
-
-    return ProjectAPIClient(
-        os.fdopen(tvm_read_fd, "rb", buffering=0), os.fdopen(tvm_write_fd, "wb", buffering=0)
-    )
diff --git a/python/tvm/micro/project_api/server.py b/python/tvm/micro/project_api/server.py
deleted file mode 100644
index 2a1a41cb8a5a..000000000000
--- a/python/tvm/micro/project_api/server.py
+++ /dev/null
@@ -1,884 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name
-
-"""Defines a basic Project API server template.
-
-This file is meant to be imported or copied into Project API servers, so it should not have any
-imports or dependencies outside of things strictly required to run the API server.
-"""
-
-import abc
-import argparse
-import base64
-import collections
-import enum
-import io
-import json
-import logging
-import os
-import pathlib
-import re
-import select
-import sys
-import time
-import traceback
-import typing
-
-
-_LOG = logging.getLogger(__name__)
-
-
-_ProjectOption = collections.namedtuple(
-    "ProjectOption", ("name", "choices", "default", "type", "required", "optional", "help")
-)
-
-
-class ProjectOption(_ProjectOption):
-    """Class used to keep the metadata associated to project options."""
-
-    def __new__(cls, name, **kw):
-        """Override __new__ to force all options except name to be specified as kwargs."""
-        assert "name" not in kw
-        assert (
-            "required" in kw or "optional" in kw
-        ), "at least one of 'required' or 'optional' must be specified."
-        assert "type" in kw, "'type' field must be specified."
-
-        kw["name"] = name
-        for param in ["choices", "default", "required", "optional"]:
-            kw.setdefault(param, None)
-
-        return super().__new__(cls, **kw)
-
-    def replace(self, attributes):
-        """Update attributes associated to the project option."""
-        updated_option = self
-        return updated_option._replace(**attributes)
-
-
-ServerInfo = collections.namedtuple(
-    "ServerInfo", ("platform_name", "is_template", "model_library_format_path", "project_options")
-)
-
-
-# Timeouts supported by the underlying C++ MicroSession.
-#
-# session_start_retry_timeout_sec : float
-#     Number of seconds to wait for the device to send a kSessionStartReply after sending the
-#     initial session start message. After this time elapses another
-#     kSessionTerminated-kSessionStartInit train is sent. 0 disables this.
-# session_start_timeout_sec : float
-#     Total number of seconds to wait for the session to be established. After this time, the
-#     client gives up trying to establish a session and raises an exception.
-# session_established_timeout_sec : float
-#     Number of seconds to wait for a reply message after a session has been established. 0
-#     disables this.
-TransportTimeouts = collections.namedtuple(
-    "TransportTimeouts",
-    [
-        "session_start_retry_timeout_sec",
-        "session_start_timeout_sec",
-        "session_established_timeout_sec",
-    ],
-)
-
-
-class ErrorCode(enum.IntEnum):
-    """Enumerates error codes which can be returned. Includes JSON-RPC standard and custom codes."""
-
-    # Custom (in reserved error code space).
-    SERVER_ERROR = -32000  # A generic error was raised while processing the request.
-
-    # JSON-RPC standard
-    PARSE_ERROR = -32700
-    INVALID_REQUEST = -32600
-    METHOD_NOT_FOUND = -32601
-    INVALID_PARAMS = -32602
-    INTERNAL_ERROR = -32603
-
-
-class JSONRPCError(Exception):
-    """An error class with properties that meet the JSON-RPC error spec."""
-
-    def __init__(self, code, message, data, client_context=None):
-        Exception.__init__(self)
-        self.code = code
-        self.message = message
-        self.data = data
-        self.client_context = client_context
-
-    def to_json(self):
-        return {
-            "code": self.code,
-            "message": self.message,
-            "data": self.data,
-        }
-
-    def __str__(self):
-        data_str = ""
-        if self.data:
-            if isinstance(self.data, dict) and self.data.get("traceback"):
-                data_str = f'\n{self.data["traceback"]}'
-            else:
-                data_str = f"\n{self.data!r}"
-        return f"JSON-RPC error # {self.code}: {self.message}" + data_str
-
-    @classmethod
-    def from_json(cls, client_context, json_error):
-        """Convert an encapsulated ServerError into JSON-RPC compliant format."""
-        found_server_error = False
-        try:
-            if ErrorCode(json_error["code"]) == ErrorCode.SERVER_ERROR:
-                found_server_error = True
-        except ValueError:
-            ServerError.from_json(client_context, json_error)
-
-        if found_server_error:
-            return ServerError.from_json(client_context, json_error)
-
-        return cls(
-            json_error["code"],
-            json_error["message"],
-            json_error.get("data", None),
-            client_context=client_context,
-        )
-
-
-class ServerError(JSONRPCError):
-    """Superclass for JSON-RPC errors which occur while processing valid requests."""
-
-    @classmethod
-    def from_exception(cls, exc, **kw):
-        to_return = cls(**kw)
-        to_return.set_traceback(traceback.TracebackException.from_exception(exc).format())
-        return to_return
-
-    def __init__(self, message=None, data=None, client_context=None):
-        if self.__class__ == ServerError:
-            assert message is not None, "Plain ServerError must have message="
-        else:
-            assert (
-                message is None
-            ), f"ServerError subclasses must not supply message=; got {message!r}"
-            message = self.__class__.__name__
-
-        super(ServerError, self).__init__(ErrorCode.SERVER_ERROR, message, data)
-        self.client_context = client_context
-
-    def __str__(self):
-        context_str = f"{self.client_context}: " if self.client_context is not None else ""
-        super_str = super(ServerError, self).__str__()
-        return context_str + super_str
-
-    def set_traceback(self, traceback):  # pylint: disable=redefined-outer-name
-        """Format a traceback to be embedded in the JSON-RPC format."""
-
-        if self.data is None:
-            self.data = {}
-
-        if "traceback" not in self.data:
-            # NOTE: TVM's FFI layer reorders Python stack traces several times and strips
-            # intermediary lines that start with "Traceback". This logic adds a comment to the first
-            # stack frame to explicitly identify the first stack frame line that occurs on the
-            # server.
-            traceback_list = list(traceback)
-
-            # The traceback list contains one entry per stack frame, and each entry contains 1-2
-            # lines:
-            #    File "path/to/file", line 123, in <method>:
-            #      <copy of the line>
-            # We want to place a comment on the first line of the outermost frame to indicate this
-            # is the server-side stack frame.
-            first_frame_list = traceback_list[1].split("\n")
-            self.data["traceback"] = (
-                traceback_list[0]
-                + f"{first_frame_list[0]}  # <--- Outermost server-side stack frame\n"
-                + "\n".join(first_frame_list[1:])
-                + "".join(traceback_list[2:])
-            )
-
-    @classmethod
-    def from_json(cls, client_context, json_error):
-        assert json_error["code"] == ErrorCode.SERVER_ERROR
-
-        for sub_cls in cls.__subclasses__():
-            if sub_cls.__name__ == json_error["message"]:
-                return sub_cls(
-                    data=json_error.get("data"),
-                    client_context=client_context,
-                )
-
-        return cls(
-            json_error["message"], data=json_error.get("data"), client_context=client_context
-        )
-
-
-class TransportClosedError(ServerError):
-    """Raised when a transport can no longer be used due to underlying I/O problems."""
-
-
-class IoTimeoutError(ServerError):
-    """Raised when the I/O operation could not be completed before the timeout.
-
-    Specifically:
-     - when no data could be read before the timeout
-     - when some of the write data could be written before the timeout
-
-    Note the asymmetric behavior of read() vs write(), since in one case the total length of the
-    data to transfer is known.
-    """
-
-
-class UnsupportedTVMVersionError(ServerError):
-    """Raised when the version of TVM supplied to server_info_query is unsupported."""
-
-
-class ProjectAPIHandler(metaclass=abc.ABCMeta):
-    """The interface class for all Project API implementations.
-
-    Extend this class in your microtvm_api_server.py and implement each function defined here.
-    """
-
-    @abc.abstractmethod
-    def server_info_query(self, tvm_version: str) -> ServerInfo:
-        """Initial request issued by TVM to retrieve metadata about this API server and project.
-
-        Should this API server not
-
-        Parameters
-        ----------
-        tvm_version : str
-            The value of tvm.__version__.
-
-        Returns
-        -------
-        ServerInfo :
-            A ServerInfo namedtuple containing the metadata needed by TVM.
-
-        Raises
-        ------
-        UnsupportedTVMVersionError :
-           When tvm_version indicates a known-unsupported version of TVM.
-        """
-        raise NotImplementedError()
-
-    @abc.abstractmethod
-    def generate_project(
-        self,
-        model_library_format_path: pathlib.Path,
-        standalone_crt_dir: pathlib.Path,
-        project_dir: pathlib.Path,
-        options: dict,
-    ):
-        """Generate a project from the given artifacts, copying ourselves to that project.
-
-        Parameters
-        ----------
-        model_library_format_path : pathlib.Path
-            Path to the Model Library Format tar archive.
-        standalone_crt_dir : pathlib.Path
-            Path to the root directory of the "standalone_crt" TVM build artifact. This contains the
-            TVM C runtime.
-        project_dir : pathlib.Path
-            Path to a nonexistent directory which should be created and filled with the generated
-            project.
-        options : dict
-            Dict mapping option name to ProjectOption.
-        """
-        raise NotImplementedError()
-
-    @abc.abstractmethod
-    def build(self, options: dict):
-        """Build the project, enabling the flash() call to made.
-
-        Parameters
-        ----------
-        options : Dict[str, ProjectOption]
-            ProjectOption which may influence the build, keyed by option name.
-        """
-        raise NotImplementedError()
-
-    @abc.abstractmethod
-    def flash(self, options: dict):
-        """Program the project onto the device.
-
-        Parameters
-        ----------
-        options : Dict[str, ProjectOption]
-            ProjectOption which may influence the programming process, keyed by option name.
-        """
-        raise NotImplementedError()
-
-    @abc.abstractmethod
-    def open_transport(self, options: dict) -> TransportTimeouts:
-        """Open resources needed for the transport layer.
-
-        This function might e.g. open files or serial ports needed in write_transport or
-        read_transport.
-
-        Calling this function enables the write_transport and read_transport calls. If the
-        transport is not open, this method is a no-op.
-
-        Parameters
-        ----------
-        options : Dict[str, ProjectOption]
-            ProjectOption which may influence the programming process, keyed by option name.
-        """
-        raise NotImplementedError()
-
-    @abc.abstractmethod
-    def close_transport(self):
-        """Close resources needed to operate the transport layer.
-
-        This function might e.g. close files or serial ports needed in write_transport or
-        read_transport.
-
-        Calling this function disables the write_transport and read_transport calls. If the
-        transport is not open, this method is a no-op.
-        """
-        raise NotImplementedError()
-
-    @abc.abstractmethod
-    # pylint: disable=unidiomatic-typecheck
-    def read_transport(self, n: int, timeout_sec: typing.Union[float, type(None)]) -> bytes:
-        """Read data from the transport.
-
-        Parameters
-        ----------
-        n : int
-            The exact number of bytes to read from the transport.
-        timeout_sec : Union[float, None]
-            Number of seconds to wait for at least one byte to be written before timing out. If
-            timeout_sec is 0, write should attempt to service the request in a non-blocking fashion.
-            If timeout_sec is None, write should block until all `n` bytes of data can be returned.
-
-        Returns
-        -------
-        bytes :
-            Data read from the channel. Should be exactly `n` bytes long.
-
-        Raises
-        ------
-        TransportClosedError :
-            When the transport layer determines that the transport can no longer send or receive
-            data due to an underlying I/O problem (i.e. file descriptor closed, cable removed, etc).
-
-        IoTimeoutError :
-            When `timeout_sec` elapses without receiving any data.
-        """
-        raise NotImplementedError()
-
-    @abc.abstractmethod
-    def write_transport(self, data: bytes, timeout_sec: float):
-        """Write data to the transport.
-
-        This function should either write all bytes in `data` or raise an exception.
-
-        Parameters
-        ----------
-        data : bytes
-            The data to write over the channel.
-        timeout_sec : Union[float, None]
-            Number of seconds to wait for all bytes to be written before timing out. If timeout_sec
-            is 0, write should attempt to service the request in a non-blocking fashion. If
-            timeout_sec is None, write should block until it has written all data.
-
-        Raises
-        ------
-        TransportClosedError :
-            When the transport layer determines that the transport can no longer send or receive
-            data due to an underlying I/O problem (i.e. file descriptor closed, cable removed, etc).
-
-        IoTimeoutError :
-            When `timeout_sec` elapses without receiving any data.
-        """
-        raise NotImplementedError()
-
-
-class ProjectAPIServer:
-    """Base class for Project API Servers.
-
-    This API server implements communication using JSON-RPC 2.0:
-        https://www.jsonrpc.org/specification
-
-    Suggested use of this class is to import this module or copy this file into Project Generator
-    implementations, then instantiate it with server.start().
-
-    This RPC server is single-threaded, blocking, and one-request-at-a-time. Don't get anxious.
-    """
-
-    _PROTOCOL_VERSION = 1
-
-    def __init__(
-        self, read_file: typing.BinaryIO, write_file: typing.BinaryIO, handler: ProjectAPIHandler
-    ):
-        """Initialize a new ProjectAPIServer.
-
-        Parameters
-        ----------
-        read_file : BinaryIO
-            A file-like object used to read binary data from the client.
-        write_file : BinaryIO
-            A file-like object used to write binary data to the client.
-        handler : ProjectAPIHandler
-            A class which extends the abstract class ProjectAPIHandler and implements the server RPC
-            functions.
-        """
-        self._read_file = io.TextIOWrapper(read_file, encoding="UTF-8", errors="strict")
-        self._write_file = io.TextIOWrapper(
-            write_file, encoding="UTF-8", errors="strict", write_through=True
-        )
-        self._handler = handler
-
-    def serve_forever(self):
-        """Serve requests until no more are available."""
-        has_more = True
-        while has_more:
-            has_more = self.serve_one_request()
-
-    def serve_one_request(self):
-        """Read, process, and reply to a single request from read_file.
-
-        When errors occur reading the request line or loading the request into JSON, they are
-        propagated to the caller (the stream is then likely corrupted and no further requests
-        should be served. When errors occur past this point, they are caught and send back to the
-        client.
-
-        Return
-        ----------
-        bool :
-            True when more data could be read from read_file, False otherwise.
-        """
-        try:
-            line = self._read_file.readline()
-            _LOG.debug("read request <- %s", line)
-            if not line:
-                return False
-
-            request = json.loads(line)
-
-        except EOFError:
-            _LOG.error("EOF")
-            return False
-
-        except Exception:  # pylint: disable=broad-except
-            _LOG.error("Caught error reading request", exc_info=1)
-            return False
-
-        did_validate = False
-        try:
-            self._validate_request(request)
-            did_validate = True
-            self._dispatch_request(request)
-        except JSONRPCError as exc:
-            if isinstance(exc, ServerError):
-                exc.set_traceback(traceback.TracebackException.from_exception(exc).format())
-            request_id = None if not did_validate else request.get("id")
-            self._reply_error(request_id, exc)
-            return did_validate
-        except Exception as exc:  # pylint: disable=broad-except
-            message = "validating request"
-            if did_validate:
-                message = f"calling method {request['method']}"
-
-            exc = ServerError.from_exception(exc, message=message)
-            request_id = None if not isinstance(request, dict) else request.get("id")
-            self._reply_error(request_id, exc)
-            return did_validate
-
-        return True
-
-    VALID_METHOD_RE = re.compile("^[a-zA-Z0-9_]+$")
-
-    def _validate_request(self, request):
-        if not isinstance(request, dict):
-            raise JSONRPCError(
-                ErrorCode.INVALID_REQUEST, f"request: want dict; got {request!r}", None
-            )
-
-        jsonrpc = request.get("jsonrpc")
-        if jsonrpc != "2.0":
-            raise JSONRPCError(
-                ErrorCode.INVALID_REQUEST, f'request["jsonrpc"]: want "2.0"; got {jsonrpc!r}', None
-            )
-
-        method = request.get("method")
-        if not isinstance(method, str):
-            raise JSONRPCError(
-                ErrorCode.INVALID_REQUEST, f'request["method"]: want str; got {method!r}', None
-            )
-
-        if not self.VALID_METHOD_RE.match(method):
-            raise JSONRPCError(
-                ErrorCode.INVALID_REQUEST,
-                f'request["method"]: should match regex {self.VALID_METHOD_RE.pattern}; '
-                f"got {method!r}",
-                None,
-            )
-
-        params = request.get("params")
-        if not isinstance(params, dict):
-            raise JSONRPCError(
-                ErrorCode.INVALID_REQUEST, f'request["params"]: want dict; got {type(params)}', None
-            )
-
-        request_id = request.get("id")
-        # pylint: disable=unidiomatic-typecheck
-        if not isinstance(request_id, (str, int, type(None))):
-            raise JSONRPCError(
-                ErrorCode.INVALID_REQUEST,
-                f'request["id"]: want str, number, null; got {request_id!r}',
-                None,
-            )
-
-    def _dispatch_request(self, request):
-        method = request["method"]
-
-        interface_method = getattr(ProjectAPIHandler, method, None)
-        if interface_method is None:
-            raise JSONRPCError(
-                ErrorCode.METHOD_NOT_FOUND, f'{request["method"]}: no such method', None
-            )
-
-        has_preprocessing = True
-        dispatch_method = getattr(self, f"_dispatch_{method}", None)
-        if dispatch_method is None:
-            dispatch_method = getattr(self._handler, method)
-            has_preprocessing = False
-
-        request_params = request["params"]
-        params = {}
-
-        for var_name, var_type in typing.get_type_hints(interface_method).items():
-            if var_name in ("self", "return"):
-                continue
-
-            # NOTE: types can only be JSON-compatible types, so var_type is expected to be of type
-            # 'type'.
-            if var_name not in request_params:
-                raise JSONRPCError(
-                    ErrorCode.INVALID_PARAMS,
-                    f'method {request["method"]}: parameter {var_name} not given',
-                    None,
-                )
-
-            param = request_params[var_name]
-            if not has_preprocessing and not isinstance(param, var_type):
-                raise JSONRPCError(
-                    ErrorCode.INVALID_PARAMS,
-                    f'method {request["method"]}: parameter {var_name}: want {var_type!r}, '
-                    f"got {type(param)!r}",
-                    None,
-                )
-
-            params[var_name] = param
-
-        extra_params = [p for p in request["params"] if p not in params]
-        if extra_params:
-            raise JSONRPCError(
-                ErrorCode.INVALID_PARAMS,
-                f'{request["method"]}: extra parameters: {", ".join(extra_params)}',
-                None,
-            )
-
-        return_value = dispatch_method(**params)
-        self._write_reply(request["id"], result=return_value)
-
-    def _write_reply(self, request_id, result=None, error=None):
-        reply_dict = {
-            "jsonrpc": "2.0",
-            "id": request_id,
-        }
-
-        if error is not None:
-            assert (
-                result is None
-            ), f"Want either result= or error=, got result={result!r} and error={error!r})"
-            reply_dict["error"] = error
-        else:
-            reply_dict["result"] = result
-
-        reply_str = json.dumps(reply_dict)
-        _LOG.debug("write reply -> %r", reply_dict)
-        self._write_file.write(reply_str)
-        self._write_file.write("\n")
-
-    def _reply_error(self, request_id, exception):
-        self._write_reply(request_id, error=exception.to_json())
-
-    def _dispatch_generate_project(
-        self, model_library_format_path, standalone_crt_dir, project_dir, options
-    ):
-        return self._handler.generate_project(
-            pathlib.Path(model_library_format_path),
-            pathlib.Path(standalone_crt_dir),
-            pathlib.Path(project_dir),
-            options,
-        )
-
-    def _dispatch_server_info_query(self, tvm_version):
-        query_reply = self._handler.server_info_query(tvm_version)
-        to_return = query_reply._asdict()
-        if to_return["model_library_format_path"] is not None:
-            to_return["model_library_format_path"] = str(to_return["model_library_format_path"])
-        to_return.setdefault("protocol_version", self._PROTOCOL_VERSION)
-        to_return["project_options"] = [o._asdict() for o in query_reply.project_options]
-        return to_return
-
-    def _dispatch_open_transport(self, options):
-        reply = self._handler.open_transport(options)
-        return {"timeouts": reply._asdict()}
-
-    def _dispatch_read_transport(self, n, timeout_sec):
-        reply_data = self._handler.read_transport(n, timeout_sec)
-        return {"data": str(base64.b85encode(reply_data), "utf-8")}
-
-    def _dispatch_write_transport(self, data, timeout_sec):
-        self._handler.write_transport(base64.b85decode(data), timeout_sec)
-
-
-def _await_nonblocking_ready(rlist, wlist, timeout_sec=None, end_time=None):
-    if end_time is None:
-        return True
-
-    if timeout_sec is None:
-        timeout_sec = max(0, end_time - time.monotonic())
-    rlist, wlist, xlist = select.select(rlist, wlist, rlist + wlist, timeout_sec)
-    if not rlist and not wlist and not xlist:
-        raise IoTimeoutError()
-
-    return True
-
-
-def read_with_timeout(fd, n, timeout_sec):  # pylint: disable=invalid-name
-    """Read data from a file descriptor, with timeout.
-
-    This function is intended as a helper function for implementations of ProjectAPIHandler
-    read_transport. Tested on Linux and OS X. Not tested on Windows.
-
-    Parameters
-    ----------
-    fd : int
-        File descriptor to read from. Must be opened in non-blocking mode (e.g. with O_NONBLOCK)
-        if timeout_sec is not None.
-
-    n : int
-        Maximum number of bytes to read.
-
-    timeout_sec : float or None
-        If not None, maximum number of seconds to wait before raising IoTimeoutError.
-
-    Returns
-    -------
-    bytes :
-        If at least one byte was received before timeout_sec, returns a bytes object with length
-        in [1, n]. If timeout_sec is None, returns the equivalent of os.read(fd, n).
-
-    Raises
-    ------
-    IoTimeoutException :
-        When timeout_sec is not None and that number of seconds elapses before any data is read.
-    """
-    end_time = None if timeout_sec is None else time.monotonic() + timeout_sec
-
-    while True:
-        _await_nonblocking_ready([fd], [], end_time=end_time)
-        try:
-            to_return = os.read(fd, n)
-            break
-        except BlockingIOError:
-            pass
-
-    # When EOF is reached, close the file.
-    if not to_return:
-        os.close(fd)
-        raise TransportClosedError()
-
-    return to_return
-
-
-def write_with_timeout(fd, data, timeout_sec):  # pylint: disable=invalid-name
-    """Write data to a file descriptor, with timeout.
-
-    This function is intended as a helper function for implementations of ProjectAPIHandler
-    write_transport. Tested on Linux and OS X. Not tested on Windows.
-
-    Parameters
-    ----------
-    fd : int
-        File descriptor to read from. Must be opened in non-blocking mode (e.g. with O_NONBLOCK)
-        if timeout_sec is not None.
-
-    data : bytes
-        Data to write.
-
-    timeout_sec : float or None
-        If not None, maximum number of seconds to wait before raising IoTimeoutError.
-
-    Returns
-    -------
-    int :
-        The number of bytes written to the file descriptor, if any bytes were written. A value
-        in [1, len(data)]. If timeout_sec is None, returns the equivalent of os.write(fd, data).
-
-    Raises
-    ------
-    IoTimeoutException :
-        When timeout_sec is not None and that number of seconds elapses before any data is read.
-    """
-    end_time = None if timeout_sec is None else time.monotonic() + timeout_sec
-
-    num_written = 0
-    while data:
-        try:
-            _await_nonblocking_ready([], [fd], end_time=end_time)
-        except IoTimeoutError as exc:
-            if num_written:
-                return num_written
-
-            raise exc
-
-        num_written_this_cycle = os.write(fd, data)
-
-        if not num_written_this_cycle:
-            os.close(fd)
-            raise base.TransportClosedError()
-
-        data = data[num_written_this_cycle:]
-        num_written += num_written_this_cycle
-
-    return num_written
-
-
-def default_project_options(**kw) -> typing.List[ProjectOption]:
-    """Get default Project Options
-
-    Attributes of any default option can be updated. Here is an example
-    when attribute `optional` from `verbose` option needs to be updates:
-
-        default_project_options(verbose={"optional": ["build"]})
-
-    This will update the `optional` attribute of `verbose` ProjectOption
-    to be `["build"]`.
-
-    Returns
-    -------
-    options: List[ProjectOption]
-        A list of default ProjectOption with modifications.
-    """
-    options = [
-        ProjectOption(
-            "verbose",
-            optional=["generate_project"],
-            type="bool",
-            default=False,
-            help="Run build with verbose output.",
-        ),
-        ProjectOption(
-            "project_type",
-            required=["generate_project"],
-            type="str",
-            help="Type of project to generate.",
-        ),
-        ProjectOption(
-            "board",
-            required=["generate_project"],
-            type="str",
-            help="Name of the board to build for.",
-        ),
-        ProjectOption(
-            "cmsis_path",
-            optional=["generate_project"],
-            type="str",
-            default=None,
-            help="Path to the CMSIS directory.",
-        ),
-        ProjectOption(
-            "warning_as_error",
-            optional=["generate_project"],
-            type="bool",
-            default=False,
-            help="Treat warnings as errors and raise an Exception.",
-        ),
-        ProjectOption(
-            "compile_definitions",
-            optional=["generate_project"],
-            type="str",
-            default=None,
-            help="Extra definitions added project compile.",
-        ),
-        ProjectOption(
-            "extra_files_tar",
-            optional=["generate_project"],
-            type="str",
-            default=None,
-            help="If given, during generate_project, "
-            "uncompress the tarball at this path into the project dir.",
-        ),
-    ]
-    for name, config in kw.items():
-        option_found = False
-        for ind, option in enumerate(options):
-            if option.name == name:
-                options[ind] = option.replace(config)
-                option_found = True
-                break
-        if not option_found:
-            raise ValueError("Option {} was not found in default ProjectOptions.".format(name))
-
-    return options
-
-
-def main(handler: ProjectAPIHandler, argv: typing.List[str] = None):
-    """Start a Project API server.
-
-    Parameters
-    ----------
-    argv : list[str]
-        Command-line parameters to this program. If not given, sys.argv is used.
-    handler : ProjectAPIHandler
-        Handler class that implements the API server RPC calls.
-    """
-    if argv is None:
-        argv = sys.argv[1:]
-
-    parser = argparse.ArgumentParser(description="Generic TVM Project API server entry point")
-    parser.add_argument(
-        "--read-fd",
-        type=int,
-        required=True,
-        help="Numeric file descriptor where RPC requests should be read.",
-    )
-    parser.add_argument(
-        "--write-fd",
-        type=int,
-        required=True,
-        help="Numeric file descriptor where RPC replies should be written.",
-    )
-    parser.add_argument(
-        "--debug", action="store_true", help="When given, configure logging at DEBUG level."
-    )
-    args = parser.parse_args()
-
-    logging.basicConfig(level="DEBUG" if args.debug else "INFO", stream=sys.stderr)
-
-    read_file = os.fdopen(args.read_fd, "rb", buffering=0)
-    write_file = os.fdopen(args.write_fd, "wb", buffering=0)
-
-    server = ProjectAPIServer(read_file, write_file, handler)
-    server.serve_forever()
diff --git a/python/tvm/micro/session.py b/python/tvm/micro/session.py
deleted file mode 100644
index dacff9aa6d80..000000000000
--- a/python/tvm/micro/session.py
+++ /dev/null
@@ -1,349 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""Defines a top-level glue class that operates the Transport and Flasher classes."""
-
-import json
-import logging
-import sys
-import os
-import pathlib
-import shutil
-from typing import Union
-
-from tvm.runtime.executor.aot_executor import AotModule
-from ..error import register_error
-from .._ffi import get_global_func, register_func
-from ..contrib import graph_executor
-from ..contrib import utils
-from ..contrib.debugger import debug_executor
-from ..rpc import RPCSession
-from . import project
-from .transport import IoTimeoutError
-from .transport import TransportLogger
-
-try:
-    from .base import _rpc_connect
-except ImportError:
-    raise ImportError("micro tvm is not enabled. Set USE_MICRO to ON in config.cmake")
-
-
-@register_error
-class SessionTerminatedError(Exception):
-    """Raised when a transport read operation discovers that the remote session is terminated."""
-
-
-class Session:
-    """MicroTVM Device Session
-
-    Parameters
-    ----------
-    config : dict
-        configuration for this session (as generated by
-        `tvm.micro.device.host.default_config()`, for example)
-
-    Example
-    --------
-    .. code-block:: python
-
-      c_mod = ...  # some module generated with "c" as the target
-      dev_config = micro.device.arm.stm32f746xx.default_config('127.0.0.1', 6666)
-      with tvm.micro.Session(dev_config) as sess:
-          micro_mod = sess.create_micro_mod(c_mod)
-    """
-
-    def __init__(
-        self,
-        transport_context_manager=None,
-        session_name="micro-rpc",
-        timeout_override=None,
-    ):
-        """Configure a new session.
-
-        Parameters
-        ----------
-        transport_context_manager : ContextManager[transport.Transport]
-            If given, `flasher` and `binary` should not be given. On entry, this context manager
-            should establish a transport between this TVM instance and the device.
-        session_name : str
-            Name of the session, used for debugging.
-        timeout_override : TransportTimeouts
-            If given, TransportTimeouts that govern the way Receive() behaves. If not given, this is
-            determined by calling has_flow_control() on the transport.
-        """
-        self.transport_context_manager = transport_context_manager
-        self.session_name = session_name
-        self.timeout_override = timeout_override
-
-        self._rpc = None
-        self._graph_executor = None
-        self._enable_rpc_logger = False
-
-        self._exit_called = False
-
-    def get_system_lib(self):
-        return self._rpc.get_function("runtime.SystemLib")()
-
-    def create_aot_executor(self):
-        return self._rpc.get_function("tvm.aot_executor.create")(
-            self.get_system_lib(), self.device, "default"
-        )
-
-    def _wrap_transport_read(self, n, timeout_microsec):
-        try:
-            return self.transport.read(
-                n, float(timeout_microsec) / 1e6 if timeout_microsec is not None else None
-            )
-        except IoTimeoutError:
-            return bytes([])
-
-    def _wrap_transport_write(self, data, timeout_microsec):
-        self.transport.write(
-            data, float(timeout_microsec) / 1e6 if timeout_microsec is not None else None
-        )
-
-        return len(data)  # TODO(areusch): delete
-
-    def __enter__(self):
-        """Initialize this session and establish an RPC session with the on-device RPC server.
-
-        Returns
-        -------
-        Session :
-            Returns self.
-        """
-        self.transport = TransportLogger(
-            self.session_name, self.transport_context_manager, level=logging.DEBUG
-        ).__enter__()
-
-        try:
-            timeouts = self.timeout_override
-            if timeouts is None:
-                timeouts = self.transport.timeouts()
-
-            self._rpc = RPCSession(
-                _rpc_connect(
-                    self.session_name,
-                    self._wrap_transport_write,
-                    self._wrap_transport_read,
-                    int(timeouts.session_start_retry_timeout_sec * 1e6),
-                    int(timeouts.session_start_timeout_sec * 1e6),
-                    int(timeouts.session_established_timeout_sec * 1e6),
-                    self._cleanup,
-                    self._enable_rpc_logger,
-                )
-            )
-            self.device = self._rpc.cpu(0)
-            return self
-
-        except:
-            self.transport.__exit__(*sys.exc_info())
-            raise
-
-    def __exit__(self, exc_type, exc_value, exc_traceback):
-        """Tear down this session and associated RPC session resources."""
-        if not self._exit_called:
-            self._exit_called = True
-            self.transport.__exit__(exc_type, exc_value, exc_traceback)
-            shutdown_func = self._rpc._sess.get_function("CloseRPCConnection")
-            shutdown_func()
-
-    def _cleanup(self):
-        self.__exit__(None, None, None)
-
-
-def lookup_remote_linked_param(mod, storage_id, template_tensor, device):
-    """Lookup a parameter that has been pre-linked into a remote (i.e. over RPC) Module.
-
-    This function signature matches the signature built by
-
-    Parameters
-    ----------
-    mod : tvm.runtime.Module
-        The remote Module containing the pre-linked parameters.
-    storage_id : int
-        An integer identifying the pre-linked paramter to find
-    template_tensor : DLTensor
-        A DLTensor containing metadata that should be filled-in to the returned NDArray. This
-        function should mostly not inspect this, and just pass it along to
-        NDArrayFromRemoteOpaqueHandle.
-    device : Device
-        The remote CPU device to be used with the returned NDArray.
-
-    Returns
-    -------
-    tvm.nd.NDArray :
-        NDArray containing the pre-linked parameter.
-    """
-    try:
-        lookup_linked_param = mod.get_function("_lookup_linked_param")
-    except AttributeError:
-        return None
-
-    remote_data = lookup_linked_param(storage_id)
-    if remote_data is None:
-        return None
-
-    return get_global_func("tvm.rpc.NDArrayFromRemoteOpaqueHandle")(
-        mod, remote_data, template_tensor, device, None
-    )
-
-
-def create_local_graph_executor(graph_json_str, mod, device):
-    """Create a local graph executor driving execution on the remote CPU device given.
-
-    Parameters
-    ----------
-    graph_json_str : str
-        A string containing the graph representation.
-
-    mod : tvm.runtime.Module
-        The remote module containing functions in graph_json_str.
-
-    device : tvm.runtime.Device
-        The remote CPU execution device.
-
-    Returns
-    -------
-    tvm.contrib.GraphExecutor :
-         A local graph executor instance that executes on the remote device.
-    """
-    device_type_id = [device.device_type, device.device_id]
-    fcreate = get_global_func("tvm.graph_executor.create")
-    return graph_executor.GraphModule(
-        fcreate(graph_json_str, mod, lookup_remote_linked_param, *device_type_id)
-    )
-
-
-def create_local_debug_executor(graph_json_str, mod, device, dump_root=None):
-    """Create a local debug runtime driving execution on the remote CPU device given.
-
-    Parameters
-    ----------
-    graph_json_str : str
-        A string containing the graph representation.
-
-    mod : tvm.runtime.Module
-        The remote module containing functions in graph_json_str.
-
-    device : tvm.runtime.Device
-        The remote CPU execution device.
-
-    dump_root : Optional[str]
-        If given, passed as dump_root= to GraphModuleDebug.
-
-    Returns
-    -------
-    tvm.contrib.GraphExecutor :
-         A local graph executor instance that executes on the remote device.
-    """
-    device_type_id = [device.device_type, device.device_id]
-    fcreate = get_global_func("tvm.graph_executor_debug.create")
-    return debug_executor.GraphModuleDebug(
-        fcreate(graph_json_str, mod, lookup_remote_linked_param, *device_type_id),
-        [device],
-        graph_json_str,
-        dump_root=dump_root,
-    )
-
-
-def create_local_aot_executor(session: Session):
-    """Create a local AoT executor driving execution on the remote CPU device given.
-
-    Parameters
-    ----------
-    session : Session
-        A microTVM device session.
-
-    Returns
-    -------
-    tvm.runtime.executor.aot_executor.AotModule :
-         A local AoT executor instance that executes on the remote device.
-    """
-    return AotModule(session.create_aot_executor())
-
-
-@register_func("tvm.micro.compile_and_create_micro_session")
-def compile_and_create_micro_session(
-    mod_src_bytes: bytes,
-    template_project_dir: str,
-    project_options: dict = None,
-    project_dir: Union[os.PathLike, str] = None,
-    use_existing: bool = False,
-):
-    """Compile the given libraries and sources into a MicroBinary, then invoke create_micro_session.
-
-    Parameters
-    ----------
-    mod_src_bytes : bytes
-        The content of a tarfile which contains the TVM-generated sources which together form the
-        SystemLib. This tar is expected to be created by export_library. The tar will be extracted
-        into a directory and the sources compiled into a MicroLibrary using the Compiler.
-
-    template_project_dir: str
-        The path to a template microTVM Project API project which is used to generate the embedded
-        project that is built and flashed onto the target device.
-
-    project_options: dict
-        Options for the microTVM API Server contained in template_project_dir.
-
-    project_dir: Union[os.PathLike, str]
-        if use_existing is False: The path to save the generated microTVM Project.
-        if use_existing is True: The path to a generated microTVM Project for debugging.
-
-    use_existing: bool
-        skips the project generation and opens transport to the project at the project_dir address.
-    """
-
-    if use_existing:
-        project_dir = pathlib.Path(project_dir)
-        assert project_dir.is_dir(), f"{project_dir} does not exist."
-        build_dir = project_dir / "generated-project" / "build"
-        shutil.rmtree(build_dir)
-        generated_project = project.GeneratedProject.from_directory(
-            project_dir / "generated-project",
-            options=json.loads(project_options),
-        )
-    else:
-        if project_dir:
-            temp_dir = utils.tempdir(custom_path=project_dir, keep_for_debug=True)
-        else:
-            temp_dir = utils.tempdir()
-
-        model_library_format_path = temp_dir / "model.tar.gz"
-        with open(model_library_format_path, "wb") as mlf_f:
-            mlf_f.write(mod_src_bytes)
-
-        try:
-            template_project = project.TemplateProject.from_directory(template_project_dir)
-            generated_project = template_project.generate_project_from_mlf(
-                model_library_format_path,
-                str(temp_dir / "generated-project"),
-                options=json.loads(project_options),
-            )
-        except Exception as exception:
-            logging.error("Project Generate Error: %s", str(exception))
-            raise exception
-
-    generated_project.build()
-    generated_project.flash()
-    transport = generated_project.transport()
-
-    rpc_session = Session(transport_context_manager=transport)
-    # RPC exit is called by cleanup function.
-    rpc_session.__enter__()
-    return rpc_session._rpc._sess
diff --git a/python/tvm/micro/testing/__init__.py b/python/tvm/micro/testing/__init__.py
deleted file mode 100644
index 0dc24102cb89..000000000000
--- a/python/tvm/micro/testing/__init__.py
+++ /dev/null
@@ -1,20 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""Allows the tools specified below to be imported directly from tvm.micro.testing"""
-from .evaluation import tune_model, create_aot_session, predict_labels_aot
-from .utils import get_supported_boards, get_target
diff --git a/python/tvm/micro/testing/aot_test_utils.py b/python/tvm/micro/testing/aot_test_utils.py
deleted file mode 100644
index 991a3f0ddb8e..000000000000
--- a/python/tvm/micro/testing/aot_test_utils.py
+++ /dev/null
@@ -1,120 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""
-This file provides utilities for running AOT tests, especially for Corstone.
-
-"""
-
-import logging
-import itertools
-import shutil
-
-import pytest
-
-import tvm
-from tvm.testing.aot import AOTTestRunner
-
-pytest.importorskip("tvm.micro")
-
-_LOG = logging.getLogger(__name__)
-
-
-AOT_DEFAULT_RUNNER = AOTTestRunner()
-
-# AOT Test Runner using the Arm® Corstone™-300 Reference Systems
-# see: https://developer.arm.com/ip-products/subsystem/corstone/corstone-300
-AOT_CORSTONE300_RUNNER = AOTTestRunner(
-    makefile="corstone300",
-    prologue="""
-    UartStdOutInit();
-    """,
-    includes=["uart_stdout.h"],
-    pass_config={
-        "relay.ext.cmsisnn.options": {
-            "mcpu": "cortex-m55",
-        }
-    },
-)
-
-AOT_USMP_CORSTONE300_RUNNER = AOTTestRunner(
-    makefile="corstone300",
-    prologue="""
-    UartStdOutInit();
-    """,
-    includes=["uart_stdout.h"],
-    pass_config={
-        "relay.ext.cmsisnn.options": {
-            "mcpu": "cortex-m55",
-        },
-        "tir.usmp.enable": True,
-    },
-)
-
-AOT_APROFILE_AEM_RUNNER = AOTTestRunner(
-    makefile="aprofile_aem",
-    includes=[],
-    pass_config={
-        "tir.usmp.enable": False,
-        # AOT test infra generates 'fake' tensor inputs which fails asserts
-        "tir.disable_assert": True,
-    },
-)
-
-
-def parametrize_aot_options(test):
-    """Parametrize over valid option combinations"""
-
-    requires_arm_eabi = pytest.mark.skipif(
-        shutil.which("arm-none-eabi-gcc") is None, reason="ARM embedded toolchain unavailable"
-    )
-
-    interface_api = ["packed", "c"]
-    use_unpacked_api = [True, False]
-    test_runner = [AOT_DEFAULT_RUNNER, AOT_CORSTONE300_RUNNER]
-
-    all_combinations = itertools.product(interface_api, use_unpacked_api, test_runner)
-
-    # Filter out packed operators with c interface
-    valid_combinations = filter(
-        lambda parameters: not (parameters[0] == "c" and not parameters[1]),
-        all_combinations,
-    )
-
-    # Only use reference system for C interface and unpacked API calls
-    valid_combinations = filter(
-        lambda parameters: not (
-            parameters[2] == AOT_CORSTONE300_RUNNER
-            and (parameters[0] == "packed" or not parameters[1])
-        ),
-        valid_combinations,
-    )
-
-    # Skip reference system tests if running in i386 container
-    marked_combinations = map(
-        lambda parameters: pytest.param(*parameters, marks=[requires_arm_eabi])
-        if parameters[2] == AOT_CORSTONE300_RUNNER
-        else parameters,
-        valid_combinations,
-    )
-
-    func = pytest.mark.parametrize(
-        ["interface_api", "use_unpacked_api", "test_runner"],
-        marked_combinations,
-    )(test)
-
-    return tvm.testing.skip_if_32bit(reason="Reference system unavailable in i386 container")(func)
diff --git a/python/tvm/micro/testing/evaluation.py b/python/tvm/micro/testing/evaluation.py
deleted file mode 100644
index c16b97f61df3..000000000000
--- a/python/tvm/micro/testing/evaluation.py
+++ /dev/null
@@ -1,174 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""
-Provides high-level functions for instantiating and timing AOT models. Used
-by autotuning tests in tests/micro, and may be used for more performance
-tests in the future.
-
-"""
-
-import logging
-from io import StringIO
-from pathlib import Path
-from contextlib import ExitStack
-import tempfile
-import shutil
-
-import tvm
-from tvm.relay.op.contrib import cmsisnn
-
-
-def tune_model(
-    platform,
-    board,
-    target,
-    mod,
-    params,
-    num_trials,
-    tuner_cls=tvm.autotvm.tuner.GATuner,
-    project_options=None,
-):
-    """Autotunes a model with microTVM and returns a StringIO with the tuning logs"""
-    with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}):
-        tasks = tvm.autotvm.task.extract_from_program(mod["main"], {}, target)
-    assert len(tasks) > 0
-    assert isinstance(params, dict)
-
-    project_options = {
-        "board": board,
-        "project_type": "host_driven",
-        **(project_options or {}),
-    }
-
-    module_loader = tvm.micro.AutoTvmModuleLoader(
-        template_project_dir=tvm.micro.get_microtvm_template_projects(platform),
-        project_options=project_options,
-    )
-
-    builder = tvm.autotvm.LocalBuilder(
-        n_parallel=1,
-        build_kwargs={"build_option": {"tir.disable_vectorize": True}},
-        do_fork=False,
-        build_func=tvm.micro.autotvm_build_func,
-        runtime=tvm.relay.backend.Runtime("crt", {"system-lib": True}),
-    )
-    runner = tvm.autotvm.LocalRunner(number=1, repeat=1, timeout=100, module_loader=module_loader)
-    measure_option = tvm.autotvm.measure_option(builder=builder, runner=runner)
-
-    results = StringIO()
-    for task in tasks:
-        tuner = tuner_cls(task)
-
-        tuner.tune(
-            n_trial=num_trials,
-            measure_option=measure_option,
-            callbacks=[
-                tvm.autotvm.callback.log_to_file(results),
-                tvm.autotvm.callback.progress_bar(num_trials, si_prefix="M"),
-            ],
-            si_prefix="M",
-        )
-        # Note that we might not find a working schedule at all, in which case
-        # tuner.best_flops would equal zero. This is not good, but checking for
-        # this case will happen elsewhere.
-
-    return results
-
-
-def create_aot_session(
-    platform,
-    board,
-    target,
-    mod,
-    params,
-    build_dir=Path(tempfile.mkdtemp()),
-    tune_logs=None,
-    timeout_override=None,
-    use_cmsis_nn=False,
-    project_options=None,
-    use_existing=False,
-):
-    """AOT-compiles and uploads a model to a microcontroller, and returns the RPC session"""
-
-    executor = tvm.relay.backend.Executor("aot")
-    crt_runtime = tvm.relay.backend.Runtime("crt", {"system-lib": True})
-
-    with ExitStack() as stack:
-        config = {"tir.disable_vectorize": True}
-        if use_cmsis_nn:
-            config["relay.ext.cmsisnn.options"] = {"mcpu": target.mcpu}
-        stack.enter_context(tvm.transform.PassContext(opt_level=3, config=config))
-        if use_cmsis_nn:
-            mod = cmsisnn.partition_for_cmsisnn(mod, params, mcpu=target.mcpu)
-        if tune_logs is not None:
-            stack.enter_context(tvm.autotvm.apply_history_best(tune_logs))
-
-        lowered = tvm.relay.build(
-            mod,
-            target=target,
-            params=params,
-            runtime=crt_runtime,
-            executor=executor,
-        )
-    parameter_size = len(tvm.runtime.save_param_dict(lowered.get_params()))
-    print(f"Model parameter size: {parameter_size}")
-
-    project_options = {
-        "board": board,
-        "project_type": "host_driven",
-        # {} shouldn't be the default value for project options ({}
-        # is mutable), so we use this workaround
-        **(project_options or {}),
-    }
-
-    if use_existing:
-        shutil.rmtree(build_dir / "project" / "build")
-        project = tvm.micro.GeneratedProject.from_directory(
-            build_dir / "project",
-            options=project_options,
-        )
-
-    else:
-        project = tvm.micro.generate_project(
-            str(tvm.micro.get_microtvm_template_projects(platform)),
-            lowered,
-            build_dir / "project",
-            project_options,
-        )
-
-    project.build()
-    project.flash()
-    return tvm.micro.Session(project.transport(), timeout_override=timeout_override)
-
-
-def predict_labels_aot(session, aot_executor, input_data, runs_per_sample=1):
-    """Predicts labels for each sample in input_data using host-driven AOT.
-    Returns an iterator of (label, runtime) tuples. This function can only
-    be used with models for which the output is the confidence for each class."""
-
-    assert aot_executor.get_num_inputs() == 1
-    assert aot_executor.get_num_outputs() == 1
-    assert runs_per_sample > 0
-
-    for counter, sample in enumerate(input_data):
-        logging.info("Evaluating sample %d", counter)
-        aot_executor.get_input(0).copyfrom(sample)
-        result = aot_executor.module.time_evaluator("run", session.device, number=runs_per_sample)()
-        predicted_label = aot_executor.get_output(0).numpy().argmax()
-        runtime = result.mean
-        yield predicted_label, runtime
diff --git a/python/tvm/micro/testing/pytest_plugin.py b/python/tvm/micro/testing/pytest_plugin.py
deleted file mode 100644
index 3a828ea3a01e..000000000000
--- a/python/tvm/micro/testing/pytest_plugin.py
+++ /dev/null
@@ -1,150 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# pylint: disable=invalid-name,redefined-outer-name
-""" microTVM testing fixtures used to deduce testing argument
-    values from testing parameters """
-
-import pathlib
-import os
-import datetime
-import pytest
-
-from tvm.contrib.utils import tempdir
-
-from .utils import get_supported_platforms, get_supported_boards
-
-
-def pytest_addoption(parser):
-    """Adds more pytest arguments"""
-    parser.addoption(
-        "--platform",
-        choices=get_supported_platforms(),
-        help=("microTVM platform for tests."),
-    )
-    parser.addoption(
-        "--board",
-        choices=list(get_supported_boards("zephyr").keys())
-        + list(get_supported_boards("arduino").keys()),
-        help=(
-            "microTVM boards for tests. Board refers to instances"
-            "of microcontrollers/emulators defined in a platform."
-        ),
-    )
-    parser.addoption(
-        "--test-build-only",
-        action="store_true",
-        default=False,
-        help="Only run tests that don't require physical hardware.",
-    )
-    parser.addoption(
-        "--microtvm-debug",
-        action="store_true",
-        default=False,
-        help=(
-            "If set true, it will keep the project directory for debugging."
-            "Also, it will enable debug level logging in project generation."
-        ),
-    )
-    parser.addoption(
-        "--serial-number",
-        default=None,
-        help=(
-            "Board serial number. This is used to run test on a "
-            "specific board when multiple boards with the same type exist."
-        ),
-    )
-
-
-def pytest_generate_tests(metafunc):
-    """Hooks into pytest to add platform and board fixtures to tests that
-    require them. To make sure that "platform" and "board" are treated as
-    parameters for the appropriate tests (and included in the test names),
-    we add them as function level parametrizations. This prevents data
-    from being overwritten in Junit XML files if multiple platforms
-    or boards are tested."""
-
-    for argument in ["platform", "board"]:
-        if argument in metafunc.fixturenames:
-            value = metafunc.config.getoption(f"--{argument}", default=None)
-
-            if not value:
-                raise ValueError(
-                    f"Test {metafunc.function.__name__} in module {metafunc.module.__name__} "
-                    f"requires a --{argument} argument, but none was given."
-                )
-
-            metafunc.parametrize(argument, [metafunc.config.getoption(f"--{argument}")])
-
-
-@pytest.fixture(scope="session")
-def microtvm_debug(request):
-    return request.config.getoption("--microtvm-debug")
-
-
-def pytest_collection_modifyitems(config, items):
-    if config.getoption("--test-build-only"):
-        skip_hardware_tests = pytest.mark.skip(reason="--test-build-only was passed")
-        for item in items:
-            if "requires_hardware" in item.keywords:
-                item.add_marker(skip_hardware_tests)
-
-
-@pytest.fixture
-def workspace_dir(request, board, microtvm_debug):
-    """Creates workspace directory for each test."""
-    parent_dir = pathlib.Path(os.path.dirname(request.module.__file__))
-    board_workspace = (
-        parent_dir / f"workspace_{board}" / datetime.datetime.now().strftime("%Y-%m-%dT%H-%M-%S")
-    )
-    board_workspace_base = str(board_workspace)
-    number = 1
-    while board_workspace.exists():
-        board_workspace = pathlib.Path(board_workspace_base + f"-{number}")
-        number += 1
-
-    if not os.path.exists(board_workspace.parent):
-        os.makedirs(board_workspace.parent)
-
-    keep_for_debug = microtvm_debug if microtvm_debug else None
-    test_temp_dir = tempdir(custom_path=board_workspace, keep_for_debug=keep_for_debug)
-    return test_temp_dir
-
-
-@pytest.fixture(autouse=True)
-def skip_by_board(request, board):
-    """Skip test if board is in the list."""
-    if request.node.get_closest_marker("skip_boards"):
-        if board in request.node.get_closest_marker("skip_boards").args[0]:
-            pytest.skip("skipped on this board: {}".format(board))
-
-
-def pytest_configure(config):
-    config.addinivalue_line(
-        "markers",
-        "skip_boards(board): skip test for the given board",
-    )
-
-
-@pytest.fixture
-def serial_number(request):
-    serial_number = request.config.getoption("--serial-number")
-    if serial_number:
-        serial_number_splitted = serial_number.split(",")
-        if len(serial_number_splitted) > 1:
-            return serial_number_splitted
-    return serial_number
diff --git a/python/tvm/micro/testing/utils.py b/python/tvm/micro/testing/utils.py
deleted file mode 100644
index 755a85839d02..000000000000
--- a/python/tvm/micro/testing/utils.py
+++ /dev/null
@@ -1,185 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""Defines the test methods used with microTVM."""
-
-import io
-from functools import lru_cache
-import json
-import logging
-from pathlib import Path
-import tarfile
-import time
-from typing import Union
-import numpy as np
-
-import tvm
-from tvm import relay
-from tvm.micro.project_api.server import IoTimeoutError
-
-# Timeout in seconds for AOT transport.
-TIMEOUT_SEC = 10
-
-
-@lru_cache(maxsize=None)
-def get_supported_platforms():
-    return ["arduino", "zephyr"]
-
-
-@lru_cache(maxsize=None)
-def get_supported_boards(platform: str):
-    template = Path(tvm.micro.get_microtvm_template_projects(platform))
-    with open(template / "boards.json") as f:
-        return json.load(f)
-
-
-def get_target(platform: str, board: str = None) -> tvm.target.Target:
-    """Intentionally simple function for making Targets for microcontrollers.
-    If you need more complex arguments, one should call target.micro directly. Note
-    that almost all, but not all, supported microcontrollers are Arm-based."""
-    if platform == "crt":
-        return tvm.target.target.micro("host")
-
-    if not board:
-        raise ValueError(f"`board` type is required for {platform} platform.")
-
-    model = get_supported_boards(platform)[board]["model"]
-    return tvm.target.target.micro(model, options=["-device=arm_cpu"])
-
-
-def check_tune_log(log_path: Union[Path, str]):
-    """Read the tuning log and check each result."""
-    with open(log_path, "r") as f:
-        lines = f.readlines()
-
-    for line in lines:
-        if len(line) > 0:
-            tune_result = json.loads(line)
-            assert tune_result["result"][0][0] < 1000000000.0
-
-
-def aot_transport_init_wait(transport):
-    """Send init message to microTVM device until it receives wakeup sequence."""
-    while True:
-        try:
-            aot_transport_find_message(transport, "wakeup", timeout_sec=TIMEOUT_SEC)
-            break
-        except IoTimeoutError:
-            transport.write(b"init%", timeout_sec=TIMEOUT_SEC)
-
-
-def aot_transport_find_message(transport, expression: str, timeout_sec: int) -> str:
-    """Read transport message until it finds the expression."""
-    timeout = timeout_sec
-    start_time = time.monotonic()
-    while True:
-        data = _read_line(transport, timeout)
-        logging.debug("new line: %s", data)
-        if expression in data:
-            return data
-        timeout = max(0, timeout_sec - (time.monotonic() - start_time))
-
-
-def _read_line(transport, timeout_sec: int) -> str:
-    data = bytearray()
-    while True:
-        new_data = transport.read(1, timeout_sec=timeout_sec)
-        logging.debug("read data: %s", new_data)
-        for item in new_data:
-            data.append(item)
-            if str(chr(item)) == "\n":
-                return data.decode(encoding="utf-8")
-
-
-def mlf_extract_workspace_size_bytes(mlf_tar_path: Union[Path, str]) -> int:
-    """Extract an MLF archive file and read workspace size from metadata file."""
-
-    workspace_size = 0
-    with tarfile.open(mlf_tar_path, "r:*") as tar_file:
-        tar_members = [tar_info.name for tar_info in tar_file.getmembers()]
-        assert "./metadata.json" in tar_members
-        with tar_file.extractfile("./metadata.json") as f:
-            metadata = json.load(f)
-            for mod_name in metadata["modules"].keys():
-                workspace_size += metadata["modules"][mod_name]["memory"]["functions"]["main"][0][
-                    "workspace_size_bytes"
-                ]
-            return workspace_size
-
-
-def get_conv2d_relay_module():
-    """Generate a conv2d Relay module for testing."""
-    data_shape = (1, 3, 64, 64)
-    weight_shape = (8, 3, 5, 5)
-    data = relay.var("data", relay.TensorType(data_shape, "int8"))
-    weight = relay.var("weight", relay.TensorType(weight_shape, "int8"))
-    y = relay.nn.conv2d(
-        data,
-        weight,
-        padding=(2, 2),
-        channels=8,
-        kernel_size=(5, 5),
-        data_layout="NCHW",
-        kernel_layout="OIHW",
-        out_dtype="int32",
-    )
-    f = relay.Function([data, weight], y)
-    mod = tvm.IRModule.from_expr(f)
-    mod = relay.transform.InferType()(mod)
-    return mod
-
-
-def _npy_dtype_to_ctype(data: np.ndarray) -> str:
-    if data.dtype == "int8":
-        return "int8_t"
-    elif data.dtype == "int32":
-        return "int32_t"
-    elif data.dtype == "uint8":
-        return "uint8_t"
-    elif data.dtype == "float32":
-        return "float"
-    else:
-        raise ValueError(f"Data type {data.dtype} not expected.")
-
-
-def create_header_file(
-    tensor_name: str, npy_data: np.array, output_path: str, tar_file: tarfile.TarFile
-):
-    """
-    This method generates a header file containing the data contained in the numpy array provided
-    and adds the header file to a tar file.
-    It is used to capture the tensor data (for both inputs and output).
-    """
-    header_file = io.StringIO()
-    header_file.write("#include <stddef.h>\n")
-    header_file.write("#include <stdint.h>\n")
-    header_file.write("#include <dlpack/dlpack.h>\n")
-    header_file.write(f"const size_t {tensor_name}_len = {npy_data.size};\n")
-    header_file.write(f"{_npy_dtype_to_ctype(npy_data)} {tensor_name}[] =")
-
-    header_file.write("{")
-    for i in np.ndindex(npy_data.shape):
-        header_file.write(f"{npy_data[i]}, ")
-    header_file.write("};\n\n")
-
-    header_file_bytes = bytes(header_file.getvalue(), "utf-8")
-    raw_path = Path(output_path) / f"{tensor_name}.h"
-    tar_info = tarfile.TarInfo(name=str(raw_path))
-    tar_info.size = len(header_file_bytes)
-    tar_info.mode = 0o644
-    tar_info.type = tarfile.REGTYPE
-    tar_file.addfile(tar_info, io.BytesIO(header_file_bytes))
diff --git a/python/tvm/micro/transport.py b/python/tvm/micro/transport.py
deleted file mode 100644
index 8e95ff7ea77a..000000000000
--- a/python/tvm/micro/transport.py
+++ /dev/null
@@ -1,278 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""Defines abstractions and implementations of the RPC transport used with micro TVM."""
-
-import abc
-import logging
-import string
-import typing
-
-from .project_api.server import IoTimeoutError, TransportTimeouts
-from .project_api.server import TransportClosedError
-
-
-_ = TransportClosedError  # work around pylint unused-import error
-
-
-_LOG = logging.getLogger(__name__)
-
-
-def debug_transport_timeouts(session_start_retry_timeout_sec=0):
-    return TransportTimeouts(
-        session_start_retry_timeout_sec=session_start_retry_timeout_sec,
-        session_start_timeout_sec=0,
-        session_established_timeout_sec=0,
-    )
-
-
-class Transport(metaclass=abc.ABCMeta):
-    """The abstract Transport class used for micro TVM."""
-
-    def __enter__(self):
-        self.open()
-        return self
-
-    def __exit__(self, exc_type, exc_value, exc_traceback):
-        self.close()
-
-    @abc.abstractmethod
-    def timeouts(self):
-        """Return TransportTimeouts suitable for use with this transport.
-
-        See the TransportTimeouts documentation in python/tvm/micro/session.py.
-        """
-        raise NotImplementedError()
-
-    @abc.abstractmethod
-    def open(self):
-        """Open any resources needed to send and receive RPC protocol data for a single session."""
-        raise NotImplementedError()
-
-    @abc.abstractmethod
-    def close(self):
-        """Release resources associated with this transport."""
-        raise NotImplementedError()
-
-    @abc.abstractmethod
-    def read(self, n, timeout_sec):
-        """Read up to n bytes from the transport.
-
-        Parameters
-        ----------
-        n : int
-            Maximum number of bytes to read from the transport.
-        timeout_sec : Union[float, None]
-            Number of seconds to wait for all `n` bytes to be received before timing out. The
-            transport can wait additional time to account for transport latency or bandwidth
-            limitations based on the selected configuration and number of bytes being received. If
-            timeout_sec is 0, read should attempt to service the request in a non-blocking fashion.
-            If timeout_sec is None, read should block until at least 1 byte of data can be returned.
-
-        Returns
-        -------
-        bytes :
-            Data read from the channel. Less than `n` bytes may be returned, but 0 bytes should
-            never be returned. If returning less than `n` bytes, the full timeout_sec, plus any
-            internally-added timeout, should be waited. If a timeout or transport error occurs,
-            an exception should be raised rather than simply returning empty bytes.
-
-
-        Raises
-        ------
-        TransportClosedError :
-            When the transport layer determines that the transport can no longer send or receive
-            data due to an underlying I/O problem (i.e. file descriptor closed, cable removed, etc).
-
-        IoTimeoutError :
-            When `timeout_sec` elapses without receiving any data.
-        """
-        raise NotImplementedError()
-
-    @abc.abstractmethod
-    def write(self, data, timeout_sec):
-        """Write data to the transport channel.
-
-        Parameters
-        ----------
-        data : bytes
-            The data to write over the channel.
-        timeout_sec : Union[float, None]
-            Number of seconds to wait for at least one byte to be written before timing out. The
-            transport can wait additional time to account for transport latency or bandwidth
-            limitations based on the selected configuration and number of bytes being received. If
-            timeout_sec is 0, write should attempt to service the request in a non-blocking fashion.
-            If timeout_sec is None, write should block until at least 1 byte of data can be
-            returned.
-
-        Returns
-        -------
-        int :
-            The number of bytes written to the underlying channel. This can be less than the length
-            of `data`, but cannot be 0 (raise an exception instead).
-
-        Raises
-        ------
-        TransportClosedError :
-            When the transport layer determines that the transport can no longer send or receive
-            data due to an underlying I/O problem (i.e. file descriptor closed, cable removed, etc).
-
-        IoTimeoutError :
-            When `timeout_sec` elapses without receiving any data.
-        """
-        raise NotImplementedError()
-
-
-class TransportLogger(Transport):
-    """Wraps a Transport implementation and logs traffic to the Python logging infrastructure."""
-
-    def __init__(self, name, child, logger=None, level=logging.INFO):
-        self.name = name
-        self.child = child
-        self.logger = logger or _LOG
-        self.level = level
-
-    # Construct PRINTABLE to exclude whitespace from string.printable.
-    PRINTABLE = string.digits + string.ascii_letters + string.punctuation
-
-    @classmethod
-    def _to_hex(cls, data):
-        lines = []
-        if not data:
-            lines.append("")
-            return lines
-
-        for i in range(0, (len(data) + 15) // 16):
-            chunk = data[i * 16 : (i + 1) * 16]
-            hex_chunk = " ".join(f"{c:02x}" for c in chunk)
-            ascii_chunk = "".join((chr(c) if chr(c) in cls.PRINTABLE else ".") for c in chunk)
-            lines.append(f"{i * 16:04x}  {hex_chunk:47}  {ascii_chunk}")
-
-        if len(lines) == 1:
-            lines[0] = lines[0][6:]
-
-        return lines
-
-    def timeouts(self):
-        return self.child.timeouts()
-
-    def open(self):
-        self.logger.log(self.level, "%s: opening transport", self.name)
-        self.child.open()
-
-    def close(self):
-        self.logger.log(self.level, "%s: closing transport", self.name)
-        return self.child.close()
-
-    def read(self, n, timeout_sec):
-        timeout_str = f"{timeout_sec:5.2f}s" if timeout_sec is not None else " None "
-        try:
-            data = self.child.read(n, timeout_sec)
-        except IoTimeoutError:
-            self.logger.log(
-                self.level,
-                "%s: read {%s} %4d B -> [IoTimeoutError %s]",
-                self.name,
-                timeout_str,
-                n,
-                timeout_str,
-            )
-            raise
-        except Exception as err:
-            self.logger.log(
-                self.level,
-                "%s: read {%s} %4d B -> [err: %s]",
-                self.name,
-                timeout_str,
-                n,
-                err.__class__.__name__,
-                exc_info=1,
-            )
-            raise err
-
-        hex_lines = self._to_hex(data)
-        if len(hex_lines) > 1:
-            self.logger.log(
-                self.level,
-                "%s: read {%s} %4d B -> [%3d B]:\n%s",
-                self.name,
-                timeout_str,
-                n,
-                len(data),
-                "\n".join(hex_lines),
-            )
-        else:
-            self.logger.log(
-                self.level,
-                "%s: read {%s} %4d B -> [%3d B]: %s",
-                self.name,
-                timeout_str,
-                n,
-                len(data),
-                hex_lines[0],
-            )
-
-        return data
-
-    def write(self, data, timeout_sec):
-        timeout_str = f"{timeout_sec:5.2f}s" if timeout_sec is not None else " None "
-        try:
-            self.child.write(data, timeout_sec)
-        except IoTimeoutError:
-            self.logger.log(
-                self.level,
-                "%s: write {%s}       <- [%3d B]: [IoTimeoutError %s]",
-                self.name,
-                timeout_str,
-                len(data),
-                timeout_str,
-            )
-            raise
-        except Exception as err:
-            self.logger.log(
-                self.level,
-                "%s: write {%s}       <- [%3d B]: [err: %s]",
-                self.name,
-                timeout_str,
-                len(data),
-                err.__class__.__name__,
-                exc_info=1,
-            )
-            raise err
-
-        hex_lines = self._to_hex(data)
-        if len(hex_lines) > 1:
-            self.logger.log(
-                self.level,
-                "%s: write {%s}        <- [%3d B]:\n%s",
-                self.name,
-                timeout_str,
-                len(data),
-                "\n".join(hex_lines),
-            )
-        else:
-            self.logger.log(
-                self.level,
-                "%s: write {%s}        <- [%3d B]: %s",
-                self.name,
-                timeout_str,
-                len(data),
-                hex_lines[0],
-            )
-
-
-TransportContextManager = typing.ContextManager[Transport]
diff --git a/python/tvm/relay/backend/contrib/ethosu/__init__.py b/python/tvm/relay/backend/contrib/ethosu/__init__.py
deleted file mode 100644
index be77a81e4eb5..000000000000
--- a/python/tvm/relay/backend/contrib/ethosu/__init__.py
+++ /dev/null
@@ -1,24 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Arm(R) Ethos(TM)-U NPU codegen modules for Relay."""
-from . import util
-from . import legalize
-from . import preprocess
-from . import codegen
-from . import vela_api
-from . import tir_to_cs_translator
-from . import softmax_rewriter
diff --git a/python/tvm/relay/backend/contrib/ethosu/_ffi_api.py b/python/tvm/relay/backend/contrib/ethosu/_ffi_api.py
deleted file mode 100644
index 2057790b51cb..000000000000
--- a/python/tvm/relay/backend/contrib/ethosu/_ffi_api.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""FFI APIs for relay transformation passes."""
-import tvm._ffi  # type: ignore
-
-tvm._ffi._init_api("relay.ext.ethos-u", __name__)
-tvm._ffi._init_api("tir.contrib.ethos-u", __name__)
diff --git a/python/tvm/relay/backend/contrib/ethosu/codegen.py b/python/tvm/relay/backend/contrib/ethosu/codegen.py
deleted file mode 100644
index 04b40a9e64ea..000000000000
--- a/python/tvm/relay/backend/contrib/ethosu/codegen.py
+++ /dev/null
@@ -1,751 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Codegen for Arm(R) Ethos(TM)-U NPU"""
-from collections import defaultdict
-from typing import List, Callable
-
-from ethosu.vela import api as vapi
-import tvm
-from tvm import relay
-from tvm.relay.backend.contrib.ethosu.tir.compiler import LowerToTIR
-from tvm.relay.backend.contrib.ethosu.tir.scheduler import copy_constants
-from tvm.contrib.ethosu.cascader import (
-    cascade,
-    EthosuDeviceConfig,
-    CascaderOptions,
-    MemoryRegion,
-    extract_memory_info,
-)
-from tvm.relay.backend.contrib.ethosu.legalize import LegalizeEthosU
-from tvm.relay.backend.contrib.ethosu import tir_to_cs_translator, util, vela_api
-from tvm.relay.expr_functor import ExprMutator, ExprVisitor, Call
-from tvm.relay import expr as _expr
-
-# pylint: disable=unused-import
-from tvm.relay.backend.contrib.ethosu.op import op_attrs
-from tvm.relay.backend.contrib.ethosu import op
-
-from . import _ffi_api
-
-
-class OptimizeLUTs(ExprMutator):
-    """A pass to merge an identity operator with a LUT based activation function with
-    a preceding operator provided that operator can do a table lookup for the activation
-    in the hardware"""
-
-    def __init__(self):
-        super().__init__()
-        self.lut_ops = {
-            "contrib.ethosu.conv2d": op.ethosu_conv2d,
-            "contrib.ethosu.depthwise_conv2d": op.ethosu_depthwise_conv2d,
-            "contrib.ethosu.pooling": op.ethosu_pooling,
-            "contrib.ethosu.binary_elementwise": op.ethosu_binary_elementwise,
-        }
-
-    def create_op_with_lut(self, call):
-        """Extract the parameters and attributes from the NPU operator and create
-        a new operator with LUT.
-
-        Parameters
-        ----------
-        call : tvm.relay.expr.Call
-            The current call node being visited.
-
-        Returns
-        -------
-        tvm.relay.expr.Call
-            The new operator with LUT.
-        """
-        identity = call
-        ethosu_op = call.args[0]
-        lut = identity.args[1]
-        activation = identity.attrs.activation
-
-        new_attrs = dict(ethosu_op.attrs)
-        new_attrs["activation"] = activation
-
-        # Assume that LUT is always the last argument
-        new_args = ethosu_op.args[:-1] + [lut]
-        assert ethosu_op.op.name in self.lut_ops.keys()
-
-        return self.lut_ops[ethosu_op.op.name](*new_args, **new_attrs)
-
-    def visit_call(self, call: tvm.relay.expr.Call) -> tvm.relay.expr.Call:
-        """Recursively visit call nodes in the input graph and if an ethosu.identity
-        operator with LUT is found and the preceding operator has a LUT attribute, create
-        a new NPU operator.
-
-        Parameters
-        ----------
-        call : tvm.relay.expr.Call
-            The current call node being visited.
-
-        Returns
-        -------
-        tvm.relay.expr.Call
-            The input call node in the case the current call node does
-            not refer to an Op. Else, a new call node with a new operator.
-        """
-        new_call = call
-        lut_activations = ["TANH", "LUT", "SIGMOID"]
-
-        if isinstance(call.op, tvm.ir.Op) and isinstance(call.args[0], tvm.relay.expr.Call):
-            producer_op = call.args[0]
-            # Check if the producer can do a LUT operation
-            if (
-                producer_op.op.name in self.lut_ops.keys()
-                and call.op.name == "contrib.ethosu.identity"
-                and call.attrs.activation in lut_activations
-            ):
-                # Check the producer doesn't already have a LUT
-                has_lut = producer_op.attrs.activation in lut_activations
-                if not has_lut:
-                    new_call = self.create_op_with_lut(call)
-
-        new_call = super().visit_call(new_call)
-
-        return new_call
-
-
-@util.create_npu_function_pass(opt_level=1)
-class LUTsOptimizer:
-    """Register LUTsOptimizer as a relay pass."""
-
-    def transform_npu_function(self, _, func: relay.Function) -> relay.Function:
-        """Visit relay nodes in the given NPU function.
-
-        Parameters
-        ----------
-        func : tvm.relay.function.Function
-            The function to apply the optimization pass for multiple LUTs to.
-
-        Returns
-        -------
-        mod : tvm.IRModule
-            New module with optimized LUTs.
-        """
-        return OptimizeLUTs().visit(func)
-
-    def __call__(self, *args, **kwargs):
-        pass
-
-
-class AnalyzeConsumers(ExprVisitor):
-    """Traverses the graph to determine consumers that are NPU operations and
-    which have restrictions to use NHCWB16 layout. The result is maintained in
-    `npu_consumers` and `restrictions`.
-
-    Attributes
-    ----------
-    npu_consumers : Dict[tvm.relay.expr.Call, List[bool]]
-        Mapping from NPU operation to list of boolean values that represent
-        whether or not each consumer is an NPU operation.
-    restrictions : Dict[tvm.relay.expr.Call, List[bool]]
-        Mapping from NPU operation to list of boolean values that represent
-        whether or not operation has restrictions to use NHCWB16 layout.
-    optimize_ops : Dict[str, Callable]
-        A map from NPU operation name to function that creates NPU operation.
-    """
-
-    def __init__(self, optimize_ops):
-        self.npu_consumers = defaultdict(list)
-        self.restrictions = defaultdict(list)
-        self.optimize_ops = optimize_ops
-        super().__init__()
-
-    def visit_call(self, call: relay.Call):
-        is_npu_consumer = call.op.name in self.optimize_ops
-        args = []
-
-        # Expand tuples
-        for arg in call.args:
-            if isinstance(arg, relay.Tuple):
-                args.extend(arg.fields)
-            else:
-                args.append(arg)
-
-        for arg in args:
-            if isinstance(arg, relay.Call) and arg.op.name in self.optimize_ops:
-                self.npu_consumers[arg].append(is_npu_consumer)
-                # ReduceSum requires NHWC input in case input tensor has type int32 or
-                # accelerator is Ethos_U65_512
-                # https://review.mlplatform.org/plugins/gitiles/ml/ethos-u/ethos-u-vela/+/refs/tags/3.7.0/ethosu/vela/graph_optimiser_util.py#126
-                has_restrictions = (
-                    call.op.name == "contrib.ethosu.pooling"
-                    and call.attrs["pooling_type"] == "SUM"
-                    and (
-                        arg.checked_type.dtype == "int32"
-                        or vela_api.get_accelerator_config() == vapi.NpuAccelerator.Ethos_U65_512
-                    )
-                )
-                self.restrictions[arg].append(has_restrictions)
-
-        super().visit_call(call)
-
-
-class LayoutOptimization(ExprMutator):
-    """A pass to optimize the layout of NPU operations by converting to brick format (NHCWB16).
-    This pass traverses the graph and attempts to alter the input/output layouts when an NPU
-    operation is visited. Whether or not the input/output layout can be altered for a given NPU
-    operation depends on the following:
-
-    Check alter input layout: For each argument, if the producer is also an NPU operation and
-        its output is altered to brick format and there are no restrictions, then the input layout
-        with respect to the current argument is altered to brick format.
-
-    Check alter output layout: If all consumers (child nodes) are an NPU operation and
-        there are no restrictions, then the output layout is altered to brick format.
-
-    Note
-    ----
-    In order for this pass to be run, the consumers of each NPU operation must first be analyzed
-    by the `AnalyzeConsumers` pass, since Relay doesn't keep a reference to child nodes.
-
-    Attributes
-    ----------
-    npu_consumers : Dict[tvm.relay.expr.Call, List[bool]]
-        A map from current call to a list boolean values that state whether or not each consumer
-        is an NPU operation.
-    restrictions : Dict[tvm.relay.expr.Call, List[bool]]
-        A map from current call to a list boolean values that state
-        whether or not operation has restrictions to use NHCWB16 layout.
-    optimize_ops : Dict[str, Callable]
-        A map from NPU operation name to function that creates NPU operation.
-    """
-
-    def __init__(self, npu_consumers, restrictions, optimize_ops):
-        self.npu_consumers = npu_consumers
-        self.restrictions = restrictions
-        self.optimize_ops = optimize_ops
-        super().__init__()
-
-    def alter_ethosu_op_layout(self, call: tvm.relay.expr.Call) -> tvm.relay.expr.Call:
-        """Alter the layouts of given NPU operation to brick format if possible.
-
-        Parameters
-        ----------
-        call : tvm.relay.expr.Call
-            The call pointing to an NPU operation that will be checked if
-            the layout needs altering.
-
-        Returns
-        -------
-        new_call : tvm.relay.expr.Call
-            New call with altered layouts.
-        """
-
-        def are_all_consumers_npu(call):
-            """
-            Check whether or not each consumer is an NPU operation.
-            Parameters
-            ----------
-            call : tvm.relay.expr.Call
-                The call pointing to an NPU operation.
-
-            Returns
-            -------
-            all_consumers_npu : bool
-                Whether each consumer is an NPU operation.
-            """
-            consumers = self.npu_consumers[call]
-            return consumers and all(consumers)
-
-        def check_restrictions(call):
-            """
-            Check if there are any restrictions for call to use NHCWB16 layout.
-            Parameters
-            ----------
-            call : tvm.relay.expr.Call
-                The call pointing to an NPU operation.
-
-            Returns
-            -------
-            any_restrictions : bool
-                Whether there are restrictions.
-            """
-            restrictions = self.restrictions[call]
-            return restrictions and any(restrictions)
-
-        assert isinstance(call.attrs, tvm.ir.Attrs), (
-            f"The attributes for operator '{call.op.name}' could not be "
-            "found. Did you register the relay.attrs.Ethosu<opname>Attrs "
-            "object in python api?"
-        )
-
-        new_attrs = dict(call.attrs)
-
-        # Check if we can rewrite the input layouts
-        input_count = 0
-        for arg in call.args:
-            input_count += 1
-            if arg not in self.npu_consumers:
-                continue
-            parent_has_brick_output = are_all_consumers_npu(arg)
-            parent_has_restrictions = check_restrictions(arg)
-            if parent_has_brick_output and not parent_has_restrictions:
-                layout_string = "ifm_layout" if input_count <= 1 else f"ifm{input_count}_layout"
-                new_attrs[layout_string] = "NHCWB16"
-
-        # Check if we can rewrite the output layouts
-        has_brick_output = are_all_consumers_npu(call)
-        has_restrictions = check_restrictions(call)
-        if has_brick_output and not has_restrictions:
-            new_attrs["ofm_layout"] = "NHCWB16"
-
-        name = call.op.name
-        return self.optimize_ops[name](*call.args, **new_attrs)
-
-    def visit_call(self, call: tvm.relay.expr.Call) -> tvm.relay.expr.Call:
-        """Recursively visit call nodes in the input graph and alter the
-        layout of an op if needed.
-
-        Parameters
-        ----------
-        call : tvm.relay.expr.Call
-            The current call node being visited.
-
-        Returns
-        -------
-        tvm.relay.expr.Call
-            The input call node in the case the current call node does
-            not refer to an Op. Else, a new call node with altered Op
-            attributes.
-        """
-        if isinstance(call.op, tvm.ir.Op) and call.op.name in self.optimize_ops:
-            call = self.alter_ethosu_op_layout(call)
-        return super().visit_call(call)
-
-
-@util.create_npu_function_pass(opt_level=1)
-class LayoutOptimizer:
-    """Register LayoutOptimizer as a Relay pass."""
-
-    def transform_npu_function(self, _, func: relay.Function) -> relay.Function:
-        """A pass to optimize the layout of NPU operations. If both the
-        producer and consumer of a tensor are NPU operators, then the
-        layout is converted from NHWC to NHCWB16 as this is the layout NPU
-        uses internally."""
-
-        optimize_ops = {
-            "contrib.ethosu.conv2d": op.ethosu_conv2d,
-            "contrib.ethosu.depthwise_conv2d": op.ethosu_depthwise_conv2d,
-            "contrib.ethosu.pooling": op.ethosu_pooling,
-            "contrib.ethosu.binary_elementwise": op.ethosu_binary_elementwise,
-            "contrib.ethosu.unary_elementwise": op.ethosu_unary_elementwise,
-        }
-
-        analyze = AnalyzeConsumers(optimize_ops)
-        analyze.visit(func)
-        return LayoutOptimization(analyze.npu_consumers, analyze.restrictions, optimize_ops).visit(
-            func
-        )
-
-    def __call__(self, *args, **kwargs):
-        pass
-
-
-class PadsWithMultipleConsumersReplicator(ExprMutator):
-    """A pass to handle the situation when nn.pad operator has
-    more than one qnn.conv2d consumer.
-
-             pad
-           /     \
-       Conv2D   Conv2D
-
-    In this case, because of the peculiarities of pattern parsing,
-    conv2d does not get into the composite for the NPU.
-    Therefore, pads are added so that each has only one consumer.
-    """
-
-    def __init__(self):
-        super().__init__()
-        # a set to record hashes of an pads which already have one qnn.conv2d consumer
-        self.hashes = set()
-
-    def visit_call(self, call: tvm.relay.expr.Call) -> tvm.relay.expr.Call:
-        if (
-            isinstance(call.op, tvm.ir.Op)
-            and isinstance(call.args[0], Call)
-            and isinstance(call.args[0].op, tvm.ir.Op)
-            and call.op == relay.op.get("qnn.conv2d")
-            and call.args[0].op == relay.op.get("nn.pad")
-        ):
-            if tvm.ir.structural_hash(call.args[0]) not in self.hashes:
-                # add the hash of nn.pad to set
-                self.hashes.add(tvm.ir.structural_hash(call.args[0]))
-            else:
-                # if this pad already has a conv2d consumer, duplicate the pad
-                # and make it an input for current conv2d
-                used_pad = self.visit(call.args[0])
-                used_pad_args = [self.visit(arg) for arg in used_pad.args]
-                new_pad = Call(
-                    used_pad.op, used_pad_args, used_pad.attrs, used_pad.type_args, used_pad.span
-                )
-                new_conv2d_args = []
-                for i, arg in enumerate(call.args):
-                    if i == 0:
-                        new_conv2d_args.append(self.visit(new_pad))
-                    else:
-                        new_conv2d_args.append(self.visit(arg))
-                new_conv2d_op = self.visit(call.op)
-                expr__ = _expr.CallWithFields(
-                    call,
-                    new_conv2d_op,
-                    new_conv2d_args,
-                    call.attrs,
-                    call.type_args,
-                    None,
-                    call.span,
-                )
-                return expr__
-
-        new_args = [self.visit(arg) for arg in call.args]
-        new_op = self.visit(call.op)
-        expr__ = _expr.CallWithFields(
-            call, new_op, new_args, call.attrs, call.type_args, None, call.span
-        )
-        return expr__
-
-
-def replicate_pads(mod):
-    """Traverses the Relay graph to replicate nn.pad operators if thay have
-    multiple qnn.conv2d consumers. That making remove the situation when
-    e.g. pad+conv2d corresponds qnn_conv2d_pattern, but can not be grouped
-    because several conv2d use the same pad operation.
-
-    Parameters
-    ----------
-    tvm.ir.IRModule
-        The IRModule that gets generated from a relay frontend.
-
-    Returns
-    -------
-    tvm.ir.IRModule
-        The IRModule without nn.pad operators with multiple consumers.
-    """
-    replicator = PadsWithMultipleConsumersReplicator()
-    for global_var, func in mod.functions.items():
-        func = replicator.visit(func)
-        mod.update_func(global_var, func)
-    return mod
-
-
-class AnalyzeConcatArgs(ExprVisitor):
-    """Traverses the graph to determine which arguments were passed into the
-    concatenation operation and how many times they are used. The result is
-    maintained in `args_usage` and is a dictionary where the key is the concatenation argument and
-    the value is the number of uses of this argument.
-
-    Attributes
-    ----------
-    args_usage : Dict[tvm.relay.expr.Call, int]
-        Mapping from concatenation arguments to count their usage as concatenate arguments.
-    """
-
-    def __init__(self):
-        self.args_usage = defaultdict(int)
-        super().__init__()
-
-    def visit_call(self, call: relay.Call):
-        args = []
-
-        # Expand tuples
-        for arg in call.args:
-            if isinstance(arg, relay.Tuple):
-                args.extend(arg.fields)
-            else:
-                args.append(arg)
-
-        if isinstance(call.op, tvm.ir.Op) and call.op.name == "concatenate":
-            for arg in args:
-                if isinstance(arg, relay.Call):
-                    self.args_usage[arg] += 1
-
-        super().visit_call(call)
-
-
-class ConcatArgsCopier(ExprMutator):
-    """A pass for copying concatenation arguments that are used in multiple concatenation
-    operations. For a concatenation argument that is used n times, n - 1 copy operations
-    will be created.
-
-    Attributes
-    ----------
-    args_usage : Dict[tvm.relay.expr.Call, int]
-        Mapping from concatenation arguments to count their usage as concatenate arguments.
-    """
-
-    def __init__(self, args_usage):
-        super().__init__()
-        self.args_usage = args_usage
-
-    def visit_call(self, call: tvm.relay.expr.Call) -> tvm.relay.expr.Call:
-        if isinstance(call.op, tvm.ir.Op) and call.op == relay.op.get("concatenate"):
-            args = []
-
-            # Expand tuples
-            for arg in call.args:
-                if isinstance(arg, relay.Tuple):
-                    args.extend(arg.fields)
-                else:
-                    args.append(arg)
-            new_args = []
-            for arg in args:
-                visited = self.visit(arg)
-                if self.args_usage[arg] > 1:
-                    # Add copy operation
-                    lut = relay.const([], "int8")
-                    new_op = op.ethosu_identity(visited, lut)
-                    new_args.append(new_op)
-                    self.args_usage[arg] -= 1
-                else:
-                    new_args.append(visited)
-
-            new_args = [relay.Tuple(new_args)]
-        else:
-            new_args = [self.visit(arg) for arg in call.args]
-        new_op = self.visit(call.op)
-        new_call = _expr.CallWithFields(
-            call, new_op, new_args, call.attrs, call.type_args, None, call.span
-        )
-        return new_call
-
-
-@util.create_npu_function_pass(opt_level=1)
-class CopyReusedConcatBuffers:
-    """Register CopyReusedConcatBuffers as a Relay pass."""
-
-    def transform_npu_function(self, _, func: relay.Function) -> relay.Function:
-        """A pass to copy concatenation arguments which are used more than once in
-        concatenation operation. This is the preparation for the next RemoveConcatenates
-        pass to prevent a situation where an argument used in multiple concatenations
-        will be written to only one resulting buffer."""
-
-        analyze = AnalyzeConcatArgs()
-        analyze.visit(func)
-
-        return ConcatArgsCopier(analyze.args_usage).visit(func)
-
-    def __call__(self, *args, **kwargs):
-        pass
-
-
-def IdentityOptimizer():  # pylint: disable=invalid-name
-    """Pass that removes redundant identities
-
-    Return
-    ------
-    Pass
-        The module pass.
-    """
-    return _ffi_api.IdentityOptimizer()
-
-
-def OutlineCompilerFunctions(compiler_name):  # pylint: disable=invalid-name
-    """Pass that outlines functions given a named Compiler attribute.
-
-    Parameters
-    ----------
-    compiler_name
-        The name of the compiler to look for and outline.
-
-    Return
-    ------
-    Pass
-        The module pass.
-    """
-    return _ffi_api.OutlineCompilerFunctions(compiler_name)
-
-
-@tvm._ffi.register_func("relay.ext.ethos-u.constant_updater")
-def constant_updater(expr, symbol):  # pylint: disable=unused-argument
-    """
-    The constant updater process happen after lowering in the core compiler.
-    For the NPU, we dont want the build process to extract constants to be loaded in
-    the runtime as we are embedding them inside the C runtime.Module.
-    """
-    return dict()
-
-
-def _create_cascader(
-    options: CascaderOptions,
-    io_region: MemoryRegion,
-    constant_region: MemoryRegion,
-    working_regions: List[MemoryRegion],
-    device_config: EthosuDeviceConfig,
-) -> Callable:
-    def _cascader(te_graph, const_dict, sch):
-        cascade(
-            sch,
-            te_graph,
-            const_dict,
-            options,
-            io_region,
-            constant_region,
-            working_regions,
-            device_config,
-        )
-
-    return _cascader
-
-
-def _ethos_u55_cascader(sram, enable_striping) -> Callable:
-    # TODO(ekalda): Extract the flash info from ConstantPools once it is implemented
-    flash = MemoryRegion(name="FLASH", size=10**7, read_bandwidth=4, write_bandwidth=4)
-
-    device_config = EthosuDeviceConfig(util.get_accelerator_config())
-    cascader_options = CascaderOptions(
-        cascade_region=sram,
-        max_proposals=64,
-        stripe_factors=5,
-        max_plan_size=10,
-        always_copy_size=1024,
-        max_open_plans=8,
-        max_closed_plans=32,
-        enable_striping=enable_striping,
-    )
-    return _create_cascader(
-        options=cascader_options,
-        io_region=sram,
-        constant_region=flash,
-        working_regions=[sram],
-        device_config=device_config,
-    )
-
-
-def _calculate_memory_pressure(mod: tvm.ir.IRModule) -> int:
-    """
-    Calculates a worst-case estimate of the memory consumed at the callsite of
-    each microNPU function. This value can be used as a hint to guide the cascader,
-    indicating how aggressively it will need to optimize the input module to fit
-    into the memory that remains in the memory workspace.
-
-    Parameters
-    ----------
-    mod : tvm.ir.IRModule
-        The input module
-
-    Returns
-    -------
-    int
-        Memory pressure value for the module.
-    """
-    memory_pressure = 0
-
-    @util.create_npu_function_pass(opt_level=1)
-    class CalculateMemoryPressure:
-        """
-        Traverse the module and get total memory used by external NPU functions.
-        """
-
-        def transform_npu_function(self, _, func: relay.Function) -> relay.Function:
-            nonlocal memory_pressure
-            max_val = max(func.attrs["used_memory"])
-            memory_pressure += max_val
-            return func
-
-    CalculateMemoryPressure()(mod)  # pylint: disable=not-callable
-
-    io_used_memory = 0
-    if not tvm.tir.usmp.utils.use_workspace_io_is_enabled():
-        io_used_memory = int(mod["main"].attrs["io_used_memory"])
-
-    return memory_pressure - io_used_memory
-
-
-@tvm._ffi.register_func("relay.ext.ethos-u.relay_to_tir")
-def relay_to_tir(mod: tvm.ir.IRModule) -> tvm.ir.IRModule:
-    """
-    This is the hook for python-based lowering of a Relay module which lowers NPU
-    external functions to TIR.
-
-    Parameters
-    ----------
-    mod : tvm.ir.IRModule
-        This is the Relay module.
-
-    Returns
-    -------
-    mod : tvm.ir.IRModule
-        The Relay module with scheduled NPU external functions.
-    """
-    mod = OutlineCompilerFunctions("ethos-u")(mod)
-    mod = LegalizeEthosU()(mod)
-    mod = CopyReusedConcatBuffers()(mod)
-    mod = LUTsOptimizer()(mod)
-    mod = relay.transform.InferType()(mod)
-    mod = IdentityOptimizer()(mod)
-    mod = LayoutOptimizer()(mod)
-    mod = relay.transform.InferType()(mod)
-
-    device_contexts = {
-        gv: "ethos-u" for gv, _ in filter(lambda x: util.is_npu_func(x[1]), mod.functions.items())
-    }
-    mod = mod.with_attr("device_contexts", device_contexts)
-
-    # Use the cascader if it is enabled for the U55 accelerator, otherwise use copy_constants
-    # scheduler
-    if util.is_cascader_enabled():
-        if util.get_accelerator_config() == "ethos-u65-256":
-            raise ValueError("Cascading is not supported for the U65 accelerator")
-
-        workspace_memory_pools = mod.attrs["workspace_memory_pools"]
-
-        if not workspace_memory_pools:
-            raise ValueError("Workspace memory pool needs to be provided for the U55 cascader")
-        if len(workspace_memory_pools.pools) != 1:
-            raise ValueError("Exactly one workspace pool needs to be provided for the U55 cascader")
-
-        memory_pressure = _calculate_memory_pressure(mod)
-        sram = extract_memory_info(workspace_memory_pools.pools[0], memory_pressure)
-        tir_mod = LowerToTIR(_ethos_u55_cascader(sram, util.is_striping_enabled()))(mod)
-    else:
-        scheduler = None if util.is_copying_constants_disabled() else copy_constants()
-        tir_mod = LowerToTIR(scheduler)(mod)
-
-    return tir_mod
-
-
-@tvm._ffi.register_func("relay.ext.ethos-u.primfunc_to_artifact")
-def primfunc_to_artifact(primfunc: tvm.tir.PrimFunc) -> util.CompilationArtifact:
-    """
-    This is the hook for python-based lowering of TIR PrimFunc
-    that has undergone unified optimization to compilation
-    artifact destined for the microNPU.
-
-    Parameters
-    ----------
-    primfunc : tir.PrimFunc
-        TIR PrimFunc that has undergone unified optimizations
-
-    Returns
-    -------
-    CompilationArtifact
-        This is a structure that holds the binary artifacts
-        for the microNPU
-    """
-    symbol = str(primfunc.attrs["global_symbol"])
-    const_dict = primfunc.attrs["ethos-u.constants"]
-    tir_mod = tvm.IRModule()
-    tir_mod[symbol] = primfunc
-
-    const_dict_np = dict()
-    for buffer_var in const_dict.keys():
-        const_dict_np[buffer_var] = const_dict[buffer_var].numpy()
-
-    cmms, encoded_constants, base_addresses = tir_to_cs_translator.translate(tir_mod, const_dict_np)
-    return util.CompilationArtifact(symbol, cmms, encoded_constants, base_addresses)
diff --git a/python/tvm/relay/backend/contrib/ethosu/legalize.py b/python/tvm/relay/backend/contrib/ethosu/legalize.py
deleted file mode 100644
index 199193f75939..000000000000
--- a/python/tvm/relay/backend/contrib/ethosu/legalize.py
+++ /dev/null
@@ -1,1790 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name, unused-argument, import-outside-toplevel
-# pylint: disable=no-value-for-parameter, use-list-literal
-"""A set of passes to legalize some of operations for the NPU"""
-from typing import List, Type, Callable
-import math
-
-import numpy as np  # type: ignore
-from ethosu.vela import scaling, fp_math
-
-import tvm  # type: ignore
-from tvm import relay
-from tvm.relay.dataflow_pattern import DFPatternCallback  # type: ignore
-from tvm.relay.dataflow_pattern import wildcard
-from tvm.relay.dataflow_pattern import is_op
-from tvm.relay.dataflow_pattern import rewrite
-from tvm.relay.dataflow_pattern import CallPattern
-from tvm.relay.backend.contrib.ethosu import op as ethosu_ops  # type: ignore
-from tvm.relay.backend.contrib.ethosu import vela_api
-from tvm.relay.backend.contrib.ethosu import util
-from tvm.relay.backend.contrib.ethosu.softmax_rewriter import SoftmaxRewriter
-from tvm.relay.op.contrib import ethosu as ethosu_patterns  # type: ignore
-
-
-class SplitRewriter(DFPatternCallback):
-    """This rewriting converts split operations into a sequence of
-    strided_slice operations, because codegen is going to be based
-    on strided_slices that will define the slice of the tensor that
-    will be fed to the consumer.
-    """
-
-    def __init__(self):
-        super().__init__(require_type=True)
-        self.split_in = wildcard()
-        self.pattern = is_op("split")(self.split_in)
-
-    @staticmethod
-    def get_section_begin_coords(split: tvm.relay.Expr) -> List[int]:
-        """Currently, the split operator takes an array of indices or an integer
-        indicating the number of splits. However, its an array of indices could
-        represent both cases, therefore this function just make it an array of
-        indices where each index represent the co-ordinate of beginning of each
-        section -- defines as section begins.
-
-        Parameters
-        ----------
-        split : tvm.relay.Expr
-            The Relay Call expression for a split operator
-
-        Returns
-        -------
-        section_begins : List[int]
-            A list containing integers corresponding to section
-            begins
-        """
-        indices_or_sections = split.attrs.indices_or_sections
-        input_shape = split.args[0].checked_type.shape
-        split_axis = split.attrs.axis
-
-        if isinstance(indices_or_sections, tvm.ir.container.Array):
-            # 0 is the beginning of the first section.
-            return [0] + list(indices_or_sections)
-        split_axis_len = input_shape[split_axis].value
-        section_length = split_axis_len // indices_or_sections
-        return list(range(0, split_axis_len, section_length))
-
-    def callback(
-        self, pre: tvm.relay.Expr, post: tvm.relay.Expr, node_map: tvm.ir.container.Map
-    ) -> tvm.relay.Expr:
-        split_input = post.args[0]
-        split_begins = list()
-        split_ends = list()
-        section_begins_in_split_axis = self.get_section_begin_coords(post)
-        for split_cord in section_begins_in_split_axis:
-            # first begin is [0, 0, ... , 0]
-            begin_shape = [0 for i in range(len(split_input.checked_type.shape))]
-            begin_shape[post.attrs.axis] = split_cord
-            split_begins.append(begin_shape)
-
-            end_shape = list(split_input.checked_type.shape)
-            # Only the split axis coordinate changes
-            end_shape[post.attrs.axis] = split_cord
-            split_ends.append(end_shape)
-
-        # Coordinates needs to be shifted left because beginning
-        # of the next section is the end of the previous
-        split_ends = split_ends[1:]
-        # Last section end is the shape of the tensor itself.
-        split_ends.append(list(split_input.checked_type.shape))
-
-        strided_slices = list()
-        for sb, se in zip(split_begins, split_ends):
-            strided_slices.append(relay.strided_slice(split_input, sb, se))
-
-        return relay.Tuple(strided_slices)
-
-
-class PartitionedSplitRewriter(DFPatternCallback):
-    """This pass brings the split out of the partitioned function"""
-
-    def __init__(self):
-        super().__init__(require_type=True, rewrite_once=True)
-        self.pattern = (
-            wildcard().has_attr({"Composite": ethosu_patterns.SplitParams.composite_name})
-        )(wildcard())
-
-    def callback(
-        self, pre: tvm.relay.Expr, post: tvm.relay.Expr, node_map: tvm.ir.container.Map
-    ) -> tvm.relay.Expr:
-        split_input = post.args[0]
-        split_params = ethosu_patterns.SplitParams(post.op.body)
-        indices_or_sections = split_params.indices_or_sections
-        axis = split_params.axis
-        return relay.op.split(split_input, indices_or_sections, axis=axis).astuple()
-
-
-def get_lut_from_func(
-    ifm_scale: float,
-    ifm_zp: int,
-    ofm_scale: float,
-    ofm_zp: int,
-    func: Callable[[float], float],
-    dtype,
-) -> List[int]:
-    """Calculates the values of the lookup table based on the calculation function"""
-
-    assert dtype in ["int8", "int16"]
-
-    if dtype == "int8":
-        lut_values = list()
-        qmin, qmax = np.iinfo(dtype).min, np.iinfo(dtype).max
-        for x in range(qmin, qmax + 1):
-            x_real = ifm_scale * (x - ifm_zp)
-            out_real = func(x_real)
-            lut_result = int(util.round_away_zero(ofm_zp + out_real / ofm_scale))
-            lut_result = min(qmax, max(qmin, lut_result))
-            lut_values.append(lut_result)
-
-        return lut_values
-    else:
-        # dtype == "int16"
-        table_min = np.iinfo(np.int16).min
-        table_max = np.iinfo(np.int16).max
-
-        input_min = ifm_scale * (table_min - ifm_zp)
-        input_max = ifm_scale * (table_max - ifm_zp)
-
-        output_min = ofm_scale * (table_min - ofm_zp)
-        output_max = ofm_scale * (table_max - ofm_zp)
-        # Create 16 bit lut following the reference
-        nbr_steps = 512
-        step = (input_max - input_min) / nbr_steps
-        half_step = step / 2
-        output_scaling_inv = (table_max - table_min + 1) / (output_max - output_min)
-
-        values = []
-        for i in range(nbr_steps):
-            val = func(input_min + i * step)
-            val_midpoint = func(input_min + i * step + half_step)
-            val_next = func(input_min + (i + 1) * step)
-
-            sample_val = util.round_away_zero(val * output_scaling_inv)
-            midpoint_interp_val = util.round_away_zero(
-                (val_next * output_scaling_inv + util.round_away_zero(val * output_scaling_inv)) / 2
-            )
-            midpoint_val = util.round_away_zero(val_midpoint * output_scaling_inv)
-            midpoint_err = midpoint_interp_val - midpoint_val
-            bias = util.round_away_zero(midpoint_err / 2)
-
-            lut_result = min(max(sample_val - bias, table_min), table_max)
-            values.append(lut_result)
-
-        val = util.round_away_zero(func(input_max) * output_scaling_inv)
-        lut_result = min(max(val, table_min), table_max)
-        values.append(lut_result)
-        # Convert to hardware 16bit lut with base and slope
-        lut = [0] * nbr_steps
-        for i in range(nbr_steps):
-            slope = (int(values[i + 1]) - int(values[i])) << 16
-            base = int(values[i])
-            lut[i] = slope + base
-
-        return lut
-
-
-class LutActivationRewriter(DFPatternCallback):
-    """A class to create an identity operator with the LUT"""
-
-    def __init__(
-        self,
-        params_class: Type,
-        activation_type: str,
-        calc_func: Callable[[float], float],
-    ):
-        super().__init__(require_type=True, rewrite_once=True)
-        self.params_class = params_class
-        self.pattern = (wildcard().has_attr({"Composite": params_class.composite_name}))(wildcard())
-        self.activation_type = activation_type
-        self.calc_func = calc_func
-
-    def callback(self, pre: tvm.relay.Expr, post: tvm.relay.Expr, node_map: tvm.ir.container.Map):
-        params = self.params_class(post.op.body)
-        params.ifm.tensor = post.args[0]
-
-        input_scale = float(params.ifm.q_params.scale_f32)
-        input_zp = int(params.ifm.q_params.zero_point)
-        output_scale = float(params.ofm.q_params.scale_f32)
-        output_zp = int(params.ofm.q_params.zero_point)
-
-        # Validation function from pattern matching checks that the input type can be int8 or int16
-        ifm_dtype = params.ifm.dtype
-        if ifm_dtype == "int8":
-            lut_values = get_lut_from_func(
-                input_scale, input_zp, output_scale, output_zp, self.calc_func, ifm_dtype
-            )
-            lut = relay.const(lut_values, dtype=ifm_dtype)
-
-            # We baked the requantization into the LUT, so we don't requantize the identity operator
-            identity = ethosu_ops.ethosu_identity(
-                ifm=params.ifm.tensor,
-                lut=lut,
-                ifm_scale=input_scale,
-                ifm_zero_point=input_zp,
-                ofm_scale=input_scale,
-                ofm_zero_point=input_zp,
-                activation=self.activation_type,
-            )
-
-        else:
-            # ifm_dtype == "int16"
-            lut = get_lut_from_func(
-                input_scale, input_zp, output_scale, output_zp, self.calc_func, ifm_dtype
-            )
-            lut = relay.const(lut, dtype="int32")
-            identity = ethosu_ops.ethosu_identity(
-                ifm=params.ifm.tensor,
-                lut=lut,
-                ifm_scale=input_scale,
-                ifm_zero_point=0,
-                ofm_scale=output_scale,
-                ofm_zero_point=0,
-                activation=self.activation_type,
-            )
-
-        return identity
-
-
-class TanhRewriter(LutActivationRewriter):
-    """This pass adds tanh as a LUT to the identity operator"""
-
-    def __init__(self):
-        super().__init__(
-            params_class=ethosu_patterns.TanhParams, activation_type="TANH", calc_func=math.tanh
-        )
-
-
-class TanhFixedPointRewriter(LutActivationRewriter):
-    """This pass adds tanh with fixed point as a LUT to the identity operator"""
-
-    def __init__(self):
-        super().__init__(
-            params_class=ethosu_patterns.TanhFixedPointParams,
-            activation_type="TANH",
-            calc_func=math.tanh,
-        )
-
-
-def sigmoid_calc_func(x: float) -> float:
-    """Function to calculate the values for sigmoid"""
-    # These limits are inherited from TFLite
-    upper_limit = 8.0
-    lower_limit = -8.0
-
-    if x <= lower_limit:
-        y = 0.0
-    elif x >= upper_limit:
-        y = 1.0
-    else:
-        y = 1 / (1 + math.exp(-x))
-    return y
-
-
-class SigmoidRewriter(LutActivationRewriter):
-    """This pass adds sigmoid as a LUT for identity op"""
-
-    def __init__(self):
-        super().__init__(
-            params_class=ethosu_patterns.SigmoidParams,
-            activation_type="SIGMOID",
-            calc_func=sigmoid_calc_func,
-        )
-
-
-def leaky_relu_calc_func(x: float, alpha: float) -> float:
-    """Function to calculate the values for leaky relu."""
-    return x if x >= 0 else x * alpha
-
-
-class LeakyReLURewriter(DFPatternCallback):
-    """This pass adds leaky relu as a LUT for identity op."""
-
-    def __init__(self):
-        super().__init__(require_type=True, rewrite_once=True)
-        self.params_class = ethosu_patterns.LeakyReLUParams
-        self.pattern = wildcard().has_attr({"Composite": self.params_class.composite_name})(
-            wildcard()
-        )
-
-    def callback(self, pre: tvm.relay.Expr, post: tvm.relay.Expr, node_map: tvm.ir.container.Map):
-        params = self.params_class(post.op.body)
-        params.ifm.tensor = post.args[0]
-
-        input_scale = np.double(float(params.ifm.q_params.scale_f32))
-        input_zp = int(params.ifm.q_params.zero_point)
-        output_scale = np.double(float(params.ofm.q_params.scale_f32))
-        output_zp = int(params.ofm.q_params.zero_point)
-
-        alpha = params.alpha
-
-        # The calculation of the LUT values is similar to that in Vela
-        # convert_lrelu_to_lut(op, arch)
-        # (https://review.mlplatform.org/plugins/gitiles/ml/ethos-u/ethos-u-vela/+/refs/tags/3.2.0/ethosu/vela/tflite_graph_optimiser.py#864)  # pylint: disable=line-too-long
-        alpha_scalar = 1
-        alpha_scale, alpha_shift = scaling.elementwise_mul_scale(input_scale, alpha, output_scale)
-        identity_scale, identity_shift = scaling.elementwise_mul_scale(input_scale, 1, output_scale)
-
-        dtype = params.ifm.dtype
-        qmin, qmax = np.iinfo(dtype).min, np.iinfo(dtype).max
-
-        def calculate_lut_value(i):
-            zp_shift = (
-                fp_math.multiply_by_quantized_multiplier(
-                    alpha_scalar * (i - input_zp), alpha_scale, alpha_shift
-                )
-                if i < input_zp
-                else fp_math.multiply_by_quantized_multiplier(
-                    i - input_zp, identity_scale, identity_shift
-                )
-            )
-
-            return min(qmax, max(qmin, output_zp + zp_shift))
-
-        values = list(map(calculate_lut_value, range(qmin, qmax + 1)))
-        lut = relay.const(values, dtype=dtype)
-
-        # We baked the requantization into the LUT, so we don't requantize the identity operator
-        identity = ethosu_ops.ethosu_identity(
-            ifm=params.ifm.tensor,
-            lut=lut,
-            ifm_scale=input_scale,
-            ifm_zero_point=input_zp,
-            ofm_scale=input_scale,
-            ofm_zero_point=input_zp,
-            activation="LUT",
-        )
-
-        return identity
-
-
-class HardSwishRewriter(DFPatternCallback):
-    """Convert ethosu.hard_swish composite function to add operation with LUT."""
-
-    def __init__(self):
-        super().__init__(require_type=True, rewrite_once=True)
-        self.params_class = ethosu_patterns.HardSwishParams
-        self.pattern = wildcard().has_attr({"Composite": self.params_class.composite_name})(
-            wildcard()
-        )
-
-    def callback(self, pre: tvm.relay.Expr, post: tvm.relay.Expr, node_map: tvm.ir.container.Map):
-        params = self.params_class(post.op.body)
-        params.ifm.tensor = post.args[0]
-
-        # The calculation of the LUT values is similar to that in Vela
-        # convert_hardswish_to_lut(op, arch, nng)
-        # (https://review.mlplatform.org/plugins/gitiles/ml/ethos-u/ethos-u-vela/+/refs/tags/3.2.0/ethosu/vela/tflite_graph_optimiser.py#719)  # pylint: disable=line-too-long
-        input_scale = np.double(params.ifm.q_params.scale_f32)
-        input_zp = int(params.ifm.q_params.zero_point)
-        hires_input_scale = (1 / 128) * input_scale
-
-        output_scale = np.double(params.ofm.q_params.scale_f32)
-        output_zp = int(params.ofm.q_params.zero_point)
-        output_scale, output_shift = scaling.quantise_scale(hires_input_scale / output_scale)
-        output_scale_16 = fp_math.downscale_multiplier_int32_to_int16(output_scale)
-        output_shift = 31 - output_shift
-        output_shift = -output_shift if output_shift < 0 else 0
-
-        dtype = params.ifm.dtype
-        qmin, qmax = np.iinfo(dtype).min, np.iinfo(dtype).max
-
-        def calculate_relu_multiplier(inp, input_scale):
-            rmultiplier = np.double(3 / 32768)
-            rscale, rshift = scaling.quantise_scale(input_scale / rmultiplier)
-            rscale_16 = fp_math.downscale_multiplier_int32_to_int16(rscale)
-
-            rvalue = np.int16(inp)
-            if rshift < 31:
-                rvalue = fp_math.shift_left16(rvalue, 30 - rshift)
-                rvalue = fp_math.saturating_rounding_mul16(rvalue, rscale_16)
-                rvalue = fp_math.shift_left16(rvalue, 1)
-            elif rshift > 31:
-                rvalue = fp_math.saturating_rounding_mul16(rvalue, rscale_16)
-                rvalue = fp_math.rounding_divide_by_pot(rvalue, rshift - 31)
-            else:
-                rvalue = fp_math.saturating_rounding_mul16(rvalue, rscale_16)
-
-            rvalue = (rvalue + (1 << 15)) >> 1
-            return rvalue
-
-        def calculate_lut_values(i):
-            hires_input_value = (i - input_zp) * 128
-            preshift_input_value = fp_math.saturating_rounding_mul16(
-                hires_input_value, output_scale_16
-            )
-            relu_value = calculate_relu_multiplier(hires_input_value, hires_input_scale)
-            lut_result = fp_math.saturating_mul16(relu_value, preshift_input_value)
-            lut_result = fp_math.rounding_divide_by_pot(lut_result, output_shift) + output_zp
-            return min(qmax, max(qmin, lut_result))
-
-        values = list(map(calculate_lut_values, range(-128, 128)))
-        lut = relay.const(values, dtype=dtype)
-
-        # We baked the requantization into the LUT, so we don't requantize the identity operator
-        identity = ethosu_ops.ethosu_identity(
-            ifm=params.ifm.tensor,
-            lut=lut,
-            ifm_scale=input_scale,
-            ifm_zero_point=input_zp,
-            ofm_scale=input_scale,
-            ofm_zero_point=input_zp,
-            activation="LUT",
-        )
-
-        return identity
-
-
-class Conv2DRewriter(DFPatternCallback):
-    """Convert conv2d related composite functions into ethosu_conv2d operators"""
-
-    def __init__(self):
-        super().__init__(require_type=True)
-        self.pattern = (wildcard().has_attr({"Composite": "ethos-u.qnn_conv2d"}))(wildcard())
-
-    def callback(
-        self, pre: tvm.relay.Expr, post: tvm.relay.Expr, node_map: tvm.ir.container.Map
-    ) -> tvm.relay.Expr:
-        params = ethosu_patterns.QnnConv2DParams(post.op.body)
-        params.ifm.tensor = post.args[0]
-        channels_map = {
-            "NHWC": 3,
-        }
-        kernel_size_map = {
-            "HWIO": params.weights.shape[0:2],
-            "OHWI": params.weights.shape[1:3],
-            "HWOI": params.weights.shape[0:2],
-        }
-        activation_map = {"clip": "CLIP"}
-        weight_to_ohwi_transform_map = {"HWIO": [3, 0, 1, 2]}
-        weights_values = params.weights.values
-        weights_values_ohwi = np.transpose(
-            weights_values, weight_to_ohwi_transform_map[str(params.weights.layout)]
-        )
-        if params.activation:
-            activation = activation_map[params.activation.op.name]
-            clip_min = int(params.activation.attrs.a_min)
-            clip_max = int(params.activation.attrs.a_max)
-        else:
-            activation = "NONE"
-            clip_min = 0
-            clip_max = 0
-        scale_bias = vela_api.pack_biases(
-            biases=params.biases.tensor.data.asnumpy(),
-            ifm_scale=params.ifm.q_params.scale_f32,
-            ifm_dtype=np.dtype(params.ifm.dtype),
-            weight_scales=params.weights.q_params.scale_f32,
-            ofm_scale=params.ofm.q_params.scale_f32,
-            is_activation_tanh_or_sigmoid=activation in ["TANH", "SIGMOID"],
-        )
-        ethosu_conv2d = ethosu_ops.ethosu_conv2d(
-            ifm=post.args[0],
-            weight=relay.const(weights_values_ohwi, params.weights.values.dtype),
-            scale_bias=relay.const(scale_bias, "uint8"),
-            lut=relay.const([], dtype="int8"),
-            ifm_scale=float(params.ifm.q_params.scale_f32),
-            ifm_zero_point=int(params.ifm.q_params.zero_point),
-            weight_zero_point=int(params.weights.q_params.zero_point),
-            ofm_scale=float(params.ofm.q_params.scale_f32),
-            ofm_zero_point=int(params.ofm.q_params.zero_point),
-            kernel_shape=kernel_size_map[str(params.weights.layout)],
-            ofm_channels=params.ofm.shape[channels_map[str(params.ofm.layout)]],
-            strides=params.strides,
-            padding=params.padding,
-            dilation=params.dilation,
-            activation=activation,
-            clip_min=clip_min,
-            clip_max=clip_max,
-            upscale="NONE",
-            ifm_layout=str(params.ifm.layout),
-            ofm_layout=str(params.ofm.layout),
-        )
-        return ethosu_conv2d
-
-
-class Conv2DTransposeRewriter(DFPatternCallback):
-    """Convert conv2d_transpose related composite functions into
-    ethosu_conv2d_transpose operators."""
-
-    def __init__(self):
-        super().__init__(require_type=True)
-        self.pattern = (wildcard().has_attr({"Composite": "ethos-u.qnn_conv2d_transpose"}))(
-            wildcard()
-        )
-
-    def callback(
-        self, pre: tvm.relay.Expr, post: tvm.relay.Expr, node_map: tvm.ir.container.Map
-    ) -> tvm.relay.Expr:
-        params = ethosu_patterns.QnnConv2DTransposeParams(post.op.body)
-        params.ifm.tensor = post.args[0]
-
-        ofm_shape = params.ofm.shape
-        legalize_padding = params.legalize_padding
-
-        weight_to_ohwi_transform_map = {"IOHW": [1, 2, 3, 0]}
-        weights_values = params.weights.values
-        weights_values_ohwi = np.transpose(
-            weights_values, weight_to_ohwi_transform_map[str(params.weights.layout)]
-        )
-        weights_values_ohwi = np.flip(weights_values_ohwi, (1, 2))
-        weights = relay.const(weights_values_ohwi, dtype=params.weights.values.dtype)
-
-        bias_values = (
-            params.biases.tensor.data.asnumpy()
-            if params.biases
-            else np.zeros((params.ifm.shape[-1]))
-        )
-        scale_bias = vela_api.pack_biases(
-            biases=bias_values,
-            ifm_scale=params.ifm.q_params.scale_f32,
-            ifm_dtype=np.dtype(params.ifm.dtype),
-            weight_scales=params.weights.q_params.scale_f32,
-            ofm_scale=params.ofm.q_params.scale_f32,
-            is_activation_tanh_or_sigmoid=False,
-        )
-
-        reduced_op = ethosu_ops.ethosu_conv2d(
-            ifm=post.args[0],
-            weight=weights,
-            scale_bias=relay.const(scale_bias, "uint8"),
-            lut=relay.const([], dtype="int8"),
-            ifm_scale=float(params.ifm.q_params.scale_f32),
-            ifm_zero_point=int(params.ifm.q_params.zero_point),
-            weight_zero_point=int(params.weights.q_params.zero_point),
-            ofm_scale=float(params.ofm.q_params.scale_f32),
-            ofm_zero_point=int(params.ofm.q_params.zero_point),
-            kernel_shape=params.kernel_shape,
-            ofm_channels=int(ofm_shape[-1]),
-            strides=(1, 1),
-            padding=legalize_padding,
-            dilation=params.dilation,
-            ifm_layout=str(params.ifm.layout),
-            ofm_layout=str(params.ofm.layout),
-            upscale="ZEROS",
-        )
-
-        # Remove additional padding by 'cropping' back to expected size
-        return relay.strided_slice(reduced_op, (0, 0, 0, 0), ofm_shape)
-
-
-class DepthwiseConv2DRewriter(DFPatternCallback):
-    """Convert ethosu.qnn_depthwise_conv2d composite functions to ethosu_depthwise_conv2d
-    operators"""
-
-    def __init__(self):
-        super().__init__(require_type=True)
-        self.pattern = (
-            wildcard().has_attr(
-                {"Composite": ethosu_patterns.QnnDepthwiseConv2DParams.composite_name}
-            )
-        )(wildcard())
-
-    def callback(
-        self, pre: tvm.relay.Expr, post: tvm.relay.Expr, node_map: tvm.ir.container.Map
-    ) -> tvm.relay.Expr:
-        params = ethosu_patterns.QnnDepthwiseConv2DParams(post.op.body)
-        params.ifm.tensor = post.args[0]
-        channels_map = {
-            "NHWC": 3,
-        }
-        kernel_shape_map = {
-            "HWOI": params.weights.shape[0:2],
-        }
-
-        weights_values = params.weights.values
-        weights_values_ohwi = np.moveaxis(weights_values, [0, 1, 2, 3], [1, 2, 0, 3])
-
-        activation = "NONE"
-        # Activations requiring LUT is currently not supported, so setting it to an empty list
-        lut = relay.const([], "int8")
-        clip_min = 0
-        clip_max = 0
-        if params.activation:
-            activation = ethosu_patterns.QnnDepthwiseConv2DParams.activation_map[
-                params.activation.op.name
-            ]
-            if activation == "CLIP":
-                clip_min = int(params.activation.attrs.a_min)
-                clip_max = int(params.activation.attrs.a_max)
-        scale_bias = vela_api.pack_biases(
-            biases=params.biases.tensor.data.asnumpy(),
-            ifm_scale=params.ifm.q_params.scale_f32,
-            ifm_dtype=np.dtype(params.ifm.dtype),
-            weight_scales=params.weights.q_params.scale_f32,
-            ofm_scale=params.ofm.q_params.scale_f32,
-            is_activation_tanh_or_sigmoid=activation in ["TANH", "SIGMOID"],
-        )
-
-        ethosu_depthwise_conv2d = ethosu_ops.ethosu_depthwise_conv2d(
-            post.args[0],  # IFM
-            relay.const(weights_values_ohwi, params.weights.values.dtype),
-            relay.const(scale_bias, "uint8"),
-            lut,
-            float(params.ifm.q_params.scale_f32),
-            int(params.ifm.q_params.zero_point),
-            int(params.weights.q_params.zero_point),
-            float(params.ofm.q_params.scale_f32),
-            int(params.ofm.q_params.zero_point),
-            kernel_shape_map[str(params.weights.layout)],
-            params.ofm.shape[channels_map[str(params.ofm.layout)]],
-            strides=params.strides,
-            padding=params.padding,
-            dilation=params.dilation,
-            activation=activation,
-            clip_min=clip_min,
-            clip_max=clip_max,
-            upscale="NONE",
-            ifm_layout=str(params.ifm.layout),
-            ofm_layout=str(params.ofm.layout),
-            ofm_dtype=str(params.ofm.dtype),
-        )
-        return ethosu_depthwise_conv2d
-
-
-class PoolingRewriter(DFPatternCallback):
-    """Convert ethosu.avgpool2d and ethosu.maxpool2d composite functions to
-    ethosu_pooling operators"""
-
-    def __init__(
-        self,
-        params_class: Type,
-        pattern: CallPattern,
-    ):
-        super().__init__(require_type=True)
-        self.params_class = params_class
-        self.pattern = pattern
-
-    def callback(
-        self, pre: tvm.relay.Expr, post: tvm.relay.Expr, node_map: tvm.ir.container.Map
-    ) -> tvm.relay.Expr:
-        params = self.params_class(post.op.body)
-        params.ifm.tensor = post.args[0]
-        channels_map = {
-            "NHWC": 3,
-        }
-
-        activation_map = {"clip": "CLIP"}
-        if params.activation:
-            activation = activation_map[params.activation.op.name]
-            clip_min = int(params.activation.attrs.a_min)
-            clip_max = int(params.activation.attrs.a_max)
-        else:
-            activation = "NONE"
-            clip_min = 0
-            clip_max = 0
-
-        # Activations requiring LUT is currently not supported, so setting it to an empty list
-        lut = relay.const([], dtype="int8")
-
-        # If ethosu.avgpool2d has strides which are not supported by the NPU, convert
-        # ethosu.avgpool2d composite functions to ethosu_pooling operator with stride=[1, 1].
-        # Since the spatial dimensions of ifm and the pooling kernel coincide and the padding
-        # is [0, 0, 0, 0], the application of the pooling kernel will be done only once,
-        # which will give us the desired output
-        strides = params.strides
-        if params.strides[0] > 3 or params.strides[1] > 3:
-            strides = [1, 1]
-
-        return ethosu_ops.ethosu_pooling(
-            ifm=post.args[0],
-            lut=lut,
-            pooling_type=params.pooling_type,
-            ifm_scale=params.ifm.q_params.scale_f32,
-            ifm_zero_point=params.ifm.q_params.zero_point,
-            ofm_scale=params.ofm.q_params.scale_f32,
-            ofm_zero_point=params.ofm.q_params.zero_point,
-            pool_shape=params.pool_shape,
-            ofm_channels=params.ofm.shape[channels_map[str(params.ofm.layout)]],
-            ofm_dtype=params.ofm.dtype,
-            strides=strides,
-            padding=params.padding,
-            activation=activation,
-            clip_min=clip_min,
-            clip_max=clip_max,
-            upscale="NONE",
-            ifm_layout=str(params.ifm.layout),
-            ofm_layout=str(params.ofm.layout),
-        )
-
-
-class MaxPoolingRewriter(PoolingRewriter):
-    def __init__(self):
-        super().__init__(
-            params_class=ethosu_patterns.MaxPool2DParams,
-            pattern=(
-                wildcard().has_attr({"Composite": ethosu_patterns.MaxPool2DParams.composite_name})
-            )(wildcard()),
-        )
-
-
-class AvgPoolingRewriter(PoolingRewriter):
-    def __init__(self):
-        super().__init__(
-            params_class=ethosu_patterns.AvgPool2DParams,
-            pattern=(
-                wildcard().has_attr({"Composite": ethosu_patterns.AvgPool2DParams.composite_name})
-            )(wildcard()),
-        )
-
-
-class BinaryElementwiseRewriter(DFPatternCallback):
-    """Convert ethosu binary elementwise composite functions to
-    ethosu_binary_elementwise operators"""
-
-    def __init__(
-        self,
-        params_class: Type,
-        pattern: CallPattern,
-    ):
-        super().__init__(require_type=True)
-        self.params_class = params_class
-        self.pattern = pattern
-
-    @staticmethod
-    def reshape_input(
-        inputs: List["TensorParams"],
-    ) -> List[tvm.relay.Expr]:
-        """Reshape the inputs so that the following binary elementwise
-        operator receives 4-dimensional inputs.
-
-        Parameters
-        ----------
-        inputs: List[TensorParams]
-            The inputs to reshape.
-
-        Returns
-        -------
-        reshaped_inputs: List[tvm.relay.Expr]
-            The new reshaped inputs.
-        """
-        reshaped_inputs = []
-        for i in inputs:
-            in_shape = i.shape
-            if len(in_shape) < 4:
-                pad_size = 4 - len(in_shape)
-                new_shape = ([1] * pad_size) + in_shape
-                new_call = relay.reshape(i.tensor, new_shape)
-                reshaped_inputs.append(new_call)
-            else:
-                reshaped_inputs.append(i.tensor)
-        return reshaped_inputs
-
-    @staticmethod
-    def reshape_output(output: tvm.relay.Expr, ifm_input_shape: List[int]) -> tvm.relay.Expr:
-        """Reshape the output back to the original dimensionality.
-        Since the NPU must have the brodcastable tensor as the
-        second operand, the original shape of the first ifm must
-        be the output shape.
-
-        Parameters
-        ----------
-        output: tvm.relay.Expr
-            The output to reshape.
-
-        ifm_input_shape: List[int]
-            The shape of the non-reshaped ifm tensor.
-
-        Returns
-        -------
-        reshaped_output: tvm.relay.Expr
-            The reshaped output expression.
-        """
-        if len(ifm_input_shape) == 4:
-            return output
-        reshaped_output = relay.reshape(output, ifm_input_shape)
-        return reshaped_output
-
-    def callback(
-        self, pre: tvm.relay.Expr, post: tvm.relay.Expr, node_map: tvm.ir.container.Map
-    ) -> tvm.relay.Expr:
-        params = self.params_class(post.op.body)
-        params.ifm.tensor = post.args[1] if params.reversed_operands else post.args[0]
-        params.ifm2.tensor = post.args[0] if params.reversed_operands else post.args[1]
-
-        activation_map = {"clip": "CLIP"}
-        if params.activation:
-            activation = activation_map[params.activation.op.name]
-            clip_min = int(params.activation.attrs.a_min)
-            clip_max = int(params.activation.attrs.a_max)
-        else:
-            activation = "NONE"
-            clip_min = 0
-            clip_max = 0
-
-        # We don't yet support activation functions that need to get legalized to LUTs.
-        lut = relay.const([], dtype="int8")
-
-        inputs = [params.ifm, params.ifm2]
-        inputs = self.reshape_input(inputs)
-
-        ethosu_binary_elementwise = ethosu_ops.ethosu_binary_elementwise(
-            ifm=inputs[0],
-            ifm2=inputs[1],
-            lut=lut,
-            operator_type=params.operator_type,
-            ifm_scale=float(params.ifm.q_params.scale_f32),
-            ifm_zero_point=int(params.ifm.q_params.zero_point),
-            ifm2_scale=float(params.ifm2.q_params.scale_f32),
-            ifm2_zero_point=int(params.ifm2.q_params.zero_point),
-            ofm_scale=float(params.ofm.q_params.scale_f32),
-            ofm_zero_point=int(params.ofm.q_params.zero_point),
-            ifm_channels=params.ifm.shape[-1] if params.ifm.shape else 1,
-            ifm2_channels=params.ifm2.shape[-1] if params.ifm2.shape else 1,
-            reversed_operands=params.reversed_operands,
-            ofm_dtype=params.ofm.dtype,
-            activation=activation,
-            clip_min=clip_min,
-            clip_max=clip_max,
-            ifm_layout=str(params.ifm.layout),
-            ifm2_layout=str(params.ifm2.layout),
-            ofm_layout=str(params.ofm.layout),
-        )
-        output = self.reshape_output(ethosu_binary_elementwise, params.ifm.shape)
-        return output
-
-
-class AddRewriter(BinaryElementwiseRewriter):
-    def __init__(self):
-        super().__init__(
-            params_class=ethosu_patterns.AddParams,
-            pattern=(wildcard().has_attr({"Composite": ethosu_patterns.AddParams.composite_name}))(
-                wildcard(), wildcard()
-            ),
-        )
-
-
-class SubRewriter(BinaryElementwiseRewriter):
-    def __init__(self):
-        super().__init__(
-            params_class=ethosu_patterns.SubParams,
-            pattern=(wildcard().has_attr({"Composite": ethosu_patterns.SubParams.composite_name}))(
-                wildcard(), wildcard()
-            ),
-        )
-
-
-class MulRewriter(BinaryElementwiseRewriter):
-    def __init__(self):
-        super().__init__(
-            params_class=ethosu_patterns.MulParams,
-            pattern=(wildcard().has_attr({"Composite": ethosu_patterns.MulParams.composite_name}))(
-                wildcard(), wildcard()
-            ),
-        )
-
-
-class MinRewriter(BinaryElementwiseRewriter):
-    def __init__(self):
-        super().__init__(
-            params_class=ethosu_patterns.MinParams,
-            pattern=(wildcard().has_attr({"Composite": ethosu_patterns.MinParams.composite_name}))(
-                wildcard(), wildcard()
-            ),
-        )
-
-
-class MaxRewriter(BinaryElementwiseRewriter):
-    def __init__(self):
-        super().__init__(
-            params_class=ethosu_patterns.MaxParams,
-            pattern=(wildcard().has_attr({"Composite": ethosu_patterns.MaxParams.composite_name}))(
-                wildcard(), wildcard()
-            ),
-        )
-
-
-class ShlRewriter(BinaryElementwiseRewriter):
-    def __init__(self):
-        super().__init__(
-            params_class=ethosu_patterns.ShlParams,
-            pattern=(wildcard().has_attr({"Composite": ethosu_patterns.ShlParams.composite_name}))(
-                wildcard(), wildcard()
-            ),
-        )
-
-
-class StridedSliceRewriter(DFPatternCallback):
-    """This pass brings the strided slice out of the partitioned function"""
-
-    def __init__(self):
-        super().__init__(require_type=True, rewrite_once=True)
-        self.pattern = (
-            wildcard().has_attr({"Composite": ethosu_patterns.StridedSliceParams.composite_name})
-        )(wildcard())
-
-    def callback(
-        self, pre: tvm.relay.Expr, post: tvm.relay.Expr, node_map: tvm.ir.container.Map
-    ) -> tvm.relay.Expr:
-
-        slice_input = post.args[0]
-
-        # TODO(lhutton1) For an unknown reason compilation will fail for strides of 4
-        # dimensions, so we cannot use params.strides as this will sometimes give
-        # strides as [1, 1, 1, 1]. Since we only support strides of 1, hardcoding this
-        # value for now.
-        strides = [1]
-
-        params = ethosu_patterns.StridedSliceParams(post.op.body)
-        strided_slice = relay.op.strided_slice(
-            slice_input,
-            params.begin,
-            params.end,
-            strides=strides,
-            axes=params.axes,
-            slice_mode=params.slice_mode,
-        )
-        return strided_slice
-
-
-class ReshapeRewriter(DFPatternCallback):
-    """This pass brings the reshape out of the partitioned function"""
-
-    def __init__(self):
-        super().__init__(require_type=True, rewrite_once=True)
-        self.pattern = (
-            wildcard().has_attr({"Composite": ethosu_patterns.ReshapeParams.composite_name})
-        )(wildcard())
-
-    def callback(
-        self, pre: tvm.relay.Expr, post: tvm.relay.Expr, node_map: tvm.ir.container.Map
-    ) -> tvm.relay.Expr:
-        reshape_input = post.args[0]
-        reshape_params = ethosu_patterns.ReshapeParams(post.op.body)
-        new_shape = reshape_params.new_shape
-        return relay.op.reshape(reshape_input, newshape=new_shape)
-
-
-class NoOpRewriter(DFPatternCallback):
-    """This pass adds an idenity operator to reshape and strided slice to avoid a no op
-    without a consumer"""
-
-    def __init__(self):
-        super().__init__(require_type=True, rewrite_once=True)
-        self.reshape = is_op("reshape")(wildcard())
-        self.strided_slice = is_op("strided_slice")(wildcard())
-        self.pattern = self.reshape | self.strided_slice
-
-    def callback(
-        self, pre: tvm.relay.Expr, post: tvm.relay.Expr, node_map: tvm.ir.container.Map
-    ) -> tvm.relay.Expr:
-        if pre.checked_type.dtype == "int32":
-            return post
-        return ethosu_ops.ethosu_identity(ifm=post, lut=relay.const([], dtype="int8"))
-
-
-class UnaryElementwiseRewriter(DFPatternCallback):
-    """
-    Convert ethosu unary elementwise composite function to
-    ethosu_unary_elementwise operators
-    """
-
-    def __init__(self, params_class: Type, pattern: CallPattern):
-        super().__init__(require_type=True)
-        self.params_class = params_class
-        self.pattern = pattern
-
-    def callback(
-        self, pre: tvm.relay.Expr, post: tvm.relay.Expr, node_map: tvm.ir.container.Map
-    ) -> tvm.relay.Expr:
-        params = self.params_class(post.op.body)
-        params.ifm.tensor = post.args[0]
-
-        activation_map = {"clip": "CLIP"}
-        if params.activation:
-            activation = activation_map[params.activation.op.name]
-            clip_min = int(params.activation.attrs.a_min)
-            clip_max = int(params.activation.attrs.a_max)
-        else:
-            activation = "NONE"
-            clip_min = 0
-            clip_max = 0
-
-        # We don't yet support activation functions that use LUT.
-        lut = relay.const([], dtype="int8")
-
-        unary_input_shape = params.ifm.shape
-        # If the input tensor is not 4D, enter reshapes before and after the unary operator
-        if len(params.ifm.shape) == 4:
-            unary_input = params.ifm.tensor
-        else:
-            pad_size = 4 - len(unary_input_shape)
-            unary_input_shape = ([1] * pad_size) + unary_input_shape
-            unary_input = relay.op.reshape(params.ifm.tensor, newshape=unary_input_shape)
-
-        ethosu_unary_elementwise = ethosu_ops.ethosu_unary_elementwise(
-            ifm=unary_input,
-            lut=lut,
-            operator_type=params.operator_type,
-            ifm_scale=float(params.ifm.q_params.scale_f32),
-            ifm_zero_point=int(params.ifm.q_params.zero_point),
-            ofm_scale=float(params.ofm.q_params.scale_f32),
-            ofm_zero_point=int(params.ofm.q_params.zero_point),
-            ofm_channels=unary_input_shape[3],
-            activation=activation,
-            clip_min=clip_min,
-            clip_max=clip_max,
-            ifm_layout=str(params.ifm.layout),
-            ofm_layout=str(params.ofm.layout),
-        )
-        if len(params.ifm.shape) == 4:
-            op = ethosu_unary_elementwise
-        else:
-            op = relay.op.reshape(ethosu_unary_elementwise, newshape=params.ifm.shape)
-        return op
-
-
-class AbsRewriter(UnaryElementwiseRewriter):
-    def __init__(self):
-        super().__init__(
-            params_class=ethosu_patterns.AbsParams,
-            pattern=(wildcard().has_attr({"Composite": ethosu_patterns.AbsParams.composite_name}))(
-                wildcard()
-            ),
-        )
-
-
-class MeanRewriter(DFPatternCallback):
-    """Convert ethosu.mean composite functions to an equivalent legalization:
-    - Case 1 (ifm qparams == ofm qparams): ethosu_pooling
-    - Case 2 (else): ethosu_depthwise_conv2d
-    """
-
-    def __init__(self):
-        super().__init__(require_type=True)
-        self.pattern = (
-            wildcard().has_attr({"Composite": ethosu_patterns.MeanParams.composite_name})
-        )(wildcard())
-
-    def callback(
-        self, pre: tvm.relay.Expr, post: tvm.relay.Expr, node_map: tvm.ir.container.Map
-    ) -> tvm.relay.Expr:
-        params = ethosu_patterns.MeanParams(post.op.body)
-        params.ifm.tensor = post.args[0]
-
-        ifm_shape = params.ifm.shape
-        ofm_shape = params.ofm.shape
-        lut = relay.const([], "int8")
-        axis = params.axis
-        reduced_op = params.ifm.tensor
-
-        # Enforce 4d input
-        if len(ifm_shape) < 4:
-            axis = [x + 1 for x in axis]
-            if len(ifm_shape) == 3:
-                ifm_shape = [1, params.height, params.width, ifm_shape[2]]
-            else:
-                ifm_shape = [1, params.height, params.width, 1]
-            reduced_op = relay.reshape(reduced_op, ifm_shape)
-
-        filter_height = ifm_shape[1] if 1 in axis else 1
-        filter_width = ifm_shape[2] if 2 in axis else 1
-        in_channels = out_channels = ifm_shape[-1]
-
-        # If the height is greater than max kernel height, reshape the input
-        # from [filter_height, filter_width] to [1, (filter_height*filter_width)]
-        # only in the case the axis is [1, 2].
-        if axis == [1, 2] and filter_height > 64:
-            ifm_shape = (ifm_shape[0], 1, filter_height * filter_width, in_channels)
-            filter_width = filter_height * filter_width
-            filter_height = 1
-            reduced_op = relay.reshape(reduced_op, ifm_shape)
-
-        if (
-            params.ifm.q_params.scale_f32 == params.ofm.q_params.scale_f32
-            and params.ifm.q_params.zero_point == params.ofm.q_params.zero_point
-        ):
-            reduced_op = ethosu_ops.ethosu_pooling(
-                ifm=reduced_op,
-                lut=lut,
-                pooling_type="AVG",
-                ifm_scale=float(params.ifm.q_params.scale_f32),
-                ifm_zero_point=0,
-                ofm_scale=float(params.ofm.q_params.scale_f32),
-                ofm_zero_point=0,
-                pool_shape=(filter_height, filter_width),
-                ofm_channels=out_channels,
-                ofm_dtype=params.ofm.dtype,
-                rounding_mode="TRUNCATE",
-            )
-        else:
-            weight_scale = 1 / (filter_height * filter_width)
-            weight_values = np.ones([out_channels, filter_height, filter_width, 1])
-            bias = -1 * int(params.ifm.q_params.zero_point) * filter_height * filter_width
-
-            scale_bias = vela_api.pack_biases(
-                biases=np.ones([ifm_shape[-1]]) * bias,
-                ifm_scale=params.ifm.q_params.scale_f32,
-                ifm_dtype=np.dtype(params.ifm.dtype),
-                weight_scales=np.array([weight_scale], dtype=np.float),
-                ofm_scale=params.ofm.q_params.scale_f32,
-                is_activation_tanh_or_sigmoid=False,
-            )
-            reduced_op = ethosu_ops.ethosu_depthwise_conv2d(
-                ifm=reduced_op,
-                weight=relay.const(weight_values, params.ifm.dtype),
-                scale_bias=relay.const(scale_bias, "uint8"),
-                lut=lut,
-                ifm_scale=float(params.ifm.q_params.scale_f32),
-                ifm_zero_point=0,
-                weight_zero_point=0,
-                ofm_scale=float(params.ofm.q_params.scale_f32),
-                ofm_zero_point=int(params.ofm.q_params.zero_point),
-                kernel_shape=(filter_height, filter_width),
-                ofm_channels=out_channels,
-                rounding_mode="NATURAL",
-                ofm_dtype=params.ofm.dtype,
-            )
-
-        # Reshape to original ofm shape
-        if len(ofm_shape) < 4:
-            reduced_op = relay.reshape(reduced_op, ofm_shape)
-
-        return reduced_op
-
-
-class SumRewriter(DFPatternCallback):
-    """
-    Convert ethosu.sum composite functions to pooling operations
-    """
-
-    def __init__(self):
-        super().__init__(require_type=True)
-        self.pattern = (
-            wildcard().has_attr({"Composite": ethosu_patterns.SumParams.composite_name})
-        )(wildcard())
-
-    def callback(
-        self, pre: tvm.relay.Expr, post: tvm.relay.Expr, node_map: tvm.ir.container.Map
-    ) -> tvm.relay.Expr:
-
-        params = ethosu_patterns.SumParams(post.op.body)
-
-        ifm_shape = params.ifm.shape
-        ofm_shape = params.ofm.shape
-        lut = relay.const([], "int8")
-        reduced_op = post.args[0]
-
-        # Enforce 4d input
-        if len(ifm_shape) == 3:
-            ifm_shape = [1, params.height, params.width, ifm_shape[2]]
-            reduced_op = relay.reshape(reduced_op, ifm_shape)
-
-        activation_map = {"clip": "CLIP"}
-        if params.activation:
-            activation = activation_map[params.activation.op.name]
-            clip_min = int(params.activation.attrs.a_min)
-            clip_max = int(params.activation.attrs.a_max)
-        else:
-            activation = "NONE"
-            clip_min = 0
-            clip_max = 0
-
-        reduced_op = ethosu_ops.ethosu_pooling(
-            ifm=reduced_op,
-            lut=lut,
-            pooling_type="SUM",
-            ifm_scale=float(params.ifm.q_params.scale_f32),
-            ifm_zero_point=int(params.ifm.q_params.zero_point),
-            ofm_scale=float(params.ofm.q_params.scale_f32),
-            ofm_zero_point=0,
-            pool_shape=(1, 1),
-            ofm_channels=1,
-            ofm_dtype="int32",
-            activation=activation,
-            clip_min=clip_min,
-            clip_max=clip_max,
-            ifm_layout=params.ifm.layout,
-            ofm_layout=params.ofm.layout,
-            rounding_mode="NATURAL",
-        )
-
-        # Convert tensor dtype from int32 to int8
-        scalar_tensor = relay.const(np.ones([1, 1, 1, 1], dtype="int32"), dtype="int32")
-        reduced_op = ethosu_ops.ethosu_binary_elementwise(
-            ifm=reduced_op,
-            ifm2=scalar_tensor,
-            lut=lut,
-            operator_type="MUL",
-            ifm_scale=0.0,
-            ifm_zero_point=0,
-            ifm2_scale=0.0,
-            ifm2_zero_point=0,
-            ofm_scale=0.0,
-            ofm_zero_point=int(params.ofm.q_params.zero_point),
-            ifm_channels=1,
-            ifm2_channels=1,
-            reversed_operands=False,
-            ofm_dtype="int8",
-        )
-
-        # Reshape to original ofm shape
-        if len(ofm_shape) < 4:
-            reduced_op = relay.reshape(reduced_op, ofm_shape)
-
-        return reduced_op
-
-
-class ConcatRewriter(DFPatternCallback):
-    """The newer versions of TFLite converters return a concatenate operator that concatenates
-    tensors with same QNN params (if the QNN params of tensors were initially different,
-    the converter adds a requantize node), so this rewriter replaces the QNN concatenate with
-    "normal" concatenate"""
-
-    def __init__(self):
-        super().__init__(require_type=True, rewrite_once=True)
-        self.pattern = (
-            wildcard().has_attr({"Composite": ethosu_patterns.ConcatParams.composite_name})
-        )(None)
-
-    def callback(
-        self, pre: tvm.relay.Expr, post: tvm.relay.Expr, node_map: tvm.ir.container.Map
-    ) -> tvm.relay.Expr:
-        # Find the tensors that are inputs to the concat and the scales and zero points
-        concat_args = list()
-        for arg in post.args:
-            if isinstance(arg, (tvm.relay.expr.Call, tvm.relay.expr.TupleGetItem)):
-                concat_args.append(arg)
-
-        axis = post.op.body.attrs.axis
-        concat = relay.op.concatenate(relay.Tuple(concat_args), axis=axis)
-        return concat
-
-
-class RequantizeRewriter(DFPatternCallback):
-    """Convert ethos-u.requantize composite function to an identity operation."""
-
-    def __init__(self):
-        super().__init__(require_type=True)
-        self.pattern = (
-            wildcard().has_attr({"Composite": ethosu_patterns.RequantizeParams.composite_name})
-        )(wildcard())
-
-    def callback(
-        self, pre: tvm.relay.Expr, post: tvm.relay.Expr, node_map: tvm.ir.container.Map
-    ) -> tvm.relay.Expr:
-        params = ethosu_patterns.RequantizeParams(post.op.body)
-        params.ifm.tensor = post.args[0]
-
-        lut = relay.const([], "int8")
-
-        return ethosu_ops.ethosu_identity(
-            ifm=params.ifm.tensor,
-            lut=lut,
-            ifm_scale=float(params.ifm.q_params.scale_f32),
-            ifm_zero_point=int(params.ifm.q_params.zero_point),
-            ofm_scale=float(params.ofm.q_params.scale_f32),
-            ofm_zero_point=int(params.ofm.q_params.zero_point),
-            rounding_mode="NATURAL",
-        )
-
-
-class Resize2dRewriter(DFPatternCallback):
-    """
-    Convert ethos-u.resize2d composite function to an equivalent operation that
-    performs the relevant upsampling operation.
-
-    Case 1: No upsampling (upscale factor of 1):
-        Identity.
-    Case 1: Nearest neighbor upsampling:
-        1x1 pooling with 2x2 nearest neighbor upsampling.
-    Case 2: Bilinear upsampling:
-        2x2 average pool with 2x2 nearest neighbor upsampling.
-    """
-
-    def __init__(self):
-        super().__init__(require_type=True)
-        self.pattern = (
-            wildcard().has_attr({"Composite": ethosu_patterns.Resize2dParams.composite_name})
-        )(wildcard())
-
-    def callback(
-        self, pre: tvm.relay.Expr, post: tvm.relay.Expr, node_map: tvm.ir.container.Map
-    ) -> tvm.relay.Expr:
-        params = ethosu_patterns.Resize2dParams(post.op.body)
-        params.ifm.tensor = post.args[0]
-
-        lut = relay.const([], "int8")
-        ifm_shape = params.ifm.shape
-        in_channels = ifm_shape[-1]
-        reduced_op = params.ifm.tensor
-        current_size = np.array(ifm_shape[1:3])
-        output_size = np.array(params.size)
-
-        if (current_size == output_size).all():
-            return ethosu_ops.ethosu_identity(
-                reduced_op,
-                lut,
-                ifm_scale=float(params.ifm.q_params.scale_f32),
-                ifm_zero_point=int(params.ifm.q_params.zero_point),
-                ofm_scale=float(params.ofm.q_params.scale_f32),
-                ofm_zero_point=int(params.ofm.q_params.zero_point),
-            )
-
-        padding = [0, 0, 0, 0]
-        rounding_mode = "TFL"
-        pool_shape = [1, 1]
-        if params.method == "linear":
-            pool_shape = [2, 2]
-            rounding_mode = "NATURAL"
-            if params.coordinate_transformation_mode == "asymmetric":
-                # Use SAME padding.
-                ypad = Resize2dRewriter.get_required_padding(ifm_shape[1])
-                xpad = Resize2dRewriter.get_required_padding(ifm_shape[2])
-                padding = [ypad // 2, xpad // 2, (ypad + 1) // 2, (xpad + 1) // 2]
-
-        return ethosu_ops.ethosu_pooling(
-            ifm=reduced_op,
-            lut=lut,
-            pooling_type="AVG",
-            ifm_scale=float(params.ifm.q_params.scale_f32),
-            ifm_zero_point=int(params.ifm.q_params.zero_point),
-            ofm_scale=float(params.ofm.q_params.scale_f32),
-            ofm_zero_point=int(params.ofm.q_params.zero_point),
-            pool_shape=pool_shape,
-            ofm_channels=in_channels,
-            ofm_dtype=params.ofm.dtype,
-            strides=[1, 1],
-            padding=padding,
-            upscale="NEAREST",
-            rounding_mode=rounding_mode,
-        )
-
-    @staticmethod
-    def get_required_padding(input_size: int, pool_size: int = 2) -> int:
-        """Gets the amount of padding required needed to achieve
-        'SAME' padding for a given axis."""
-        needed_input = (input_size - 1) + pool_size
-        total_padding = max(0, needed_input - input_size)
-        return total_padding
-
-
-class ExpandDimsRewriter(DFPatternCallback):
-    """Legalize expand dims to a reshape operator."""
-
-    def __init__(self):
-        super().__init__(require_type=True, rewrite_once=True)
-        self.pattern = (
-            wildcard().has_attr({"Composite": ethosu_patterns.ExpandDimsParams.composite_name})
-        )(None)
-
-    def callback(
-        self, pre: tvm.relay.Expr, post: tvm.relay.Expr, node_map: tvm.ir.container.Map
-    ) -> tvm.relay.Expr:
-        params = ethosu_patterns.ExpandDimsParams(post.op.body)
-        return relay.op.reshape(post.args[0], newshape=params.output.shape)
-
-
-class SqueezeRewriter(DFPatternCallback):
-    """Legalize squeeze to a reshape operator."""
-
-    def __init__(self):
-        super().__init__(require_type=True, rewrite_once=True)
-        self.pattern = (
-            wildcard().has_attr({"Composite": ethosu_patterns.SqueezeParams.composite_name})
-        )(None)
-
-    def callback(
-        self, pre: tvm.relay.Expr, post: tvm.relay.Expr, node_map: tvm.ir.container.Map
-    ) -> tvm.relay.Expr:
-        params = ethosu_patterns.SqueezeParams(post.op.body)
-        return relay.op.reshape(post.args[0], newshape=params.output.shape)
-
-
-class FullyConnectedRewriter(DFPatternCallback):
-    """Legalize Fully Connected (with bias and clip) to an NPU operator"""
-
-    def __init__(self):
-        super().__init__(require_type=True)
-        self.pattern = (
-            wildcard().has_attr({"Composite": ethosu_patterns.FullyConnectedParams.composite_name})
-        )(wildcard())
-
-    def callback(self, pre, post, node_map):
-        params = ethosu_patterns.FullyConnectedParams(post.op.body)
-        params.ifm.tensor = post.args[0]
-
-        # IFM reshapes
-        ifm = post.args[0]
-        if len(params.ifm.shape) != 4 or not params.ifm.shape[1] == params.ifm.shape[2] == 1:
-            ifm = relay.reshape(ifm, (1, 1, 1, params.ifm.shape[-1]))
-
-        # Weight transformations
-        weights_values = params.weights.values
-        weights_values_ohwi = np.expand_dims(weights_values, axis=(1, 2))
-        if params.activation:
-            activation = "CLIP"
-            clip_min = int(params.activation.attrs.a_min)
-            clip_max = int(params.activation.attrs.a_max)
-        else:
-            activation = "NONE"
-            clip_min = 0
-            clip_max = 0
-        bias_values = (
-            params.biases.tensor.data.asnumpy()
-            if params.biases
-            else np.zeros((params.ofm.shape[-1]))
-        )
-        scale_bias = vela_api.pack_biases(
-            biases=bias_values,
-            ifm_scale=params.ifm.q_params.scale_f32,
-            ifm_dtype=np.dtype(params.ifm.dtype),
-            weight_scales=params.weights.q_params.scale_f32,
-            ofm_scale=params.ofm.q_params.scale_f32,
-            is_activation_tanh_or_sigmoid=False,
-        )
-        ethosu_fc = ethosu_ops.ethosu_conv2d(
-            ifm=ifm,
-            weight=relay.const(weights_values_ohwi, params.weights.values.dtype),
-            scale_bias=relay.const(scale_bias, "uint8"),
-            lut=relay.const([], dtype="int8"),
-            ifm_scale=float(params.ifm.q_params.scale_f32),
-            ifm_zero_point=int(params.ifm.q_params.zero_point),
-            weight_zero_point=int(params.weights.q_params.zero_point),
-            ofm_scale=float(params.ofm.q_params.scale_f32),
-            ofm_zero_point=int(params.ofm.q_params.zero_point),
-            kernel_shape=[1, 1],
-            ofm_channels=params.weights.shape[0],
-            strides=(1, 1),
-            padding=(0, 0, 0, 0),
-            dilation=(1, 1),
-            activation=activation,
-            clip_min=clip_min,
-            clip_max=clip_max,
-            upscale="NONE",
-            ifm_layout="NHWC",
-            ofm_layout="NHWC",
-        )
-
-        if len(params.ofm.shape) != 4 or not params.ofm.shape[1] == params.ofm.shape[2] == 1:
-            ethosu_fc = relay.reshape(ethosu_fc, params.ofm.shape)
-        return ethosu_fc
-
-
-class MatrixMultiplicationRewriter(DFPatternCallback):
-    """Legalize matrix multiplication with two tensors into sequence of NPU operators"""
-
-    def __init__(
-        self,
-        params_class: Type,
-        pattern: CallPattern,
-    ):
-        super().__init__(require_type=True)
-        self.pattern = pattern
-        self.params_class = params_class
-
-    def callback(self, pre, post, node_map):
-        params = self.params_class(post.op.body)
-        ifm = post.args[0]
-        ifm2 = post.args[1]
-        lut = relay.const([], dtype=params.ifm.dtype)
-        activation_map = {"clip": "CLIP"}
-        if params.activation:
-            activation = activation_map[params.activation.op.name]
-            clip_min = int(params.activation.attrs.a_min)
-            clip_max = int(params.activation.attrs.a_max)
-        else:
-            activation = "NONE"
-            clip_min = 0
-            clip_max = 0
-
-        # Reshape ifm to NHWC
-        ifm = relay.reshape(ifm, (1, 1, *params.ifm.shape))
-        # Split the second matrix to get columns
-        columns = list(relay.op.split(ifm2, params.ofm.shape[-1], axis=0))
-
-        res_columns = []
-        for column in columns:
-            ifm2 = relay.reshape(column, (1, 1, 1, params.ifm.shape[-1]))
-            # Multiplying the first matrix by a column
-            ethosu_binary_elementwise = ethosu_ops.ethosu_binary_elementwise(
-                ifm=ifm,
-                ifm2=ifm2,
-                lut=lut,
-                operator_type="MUL",
-                ifm_zero_point=int(params.ifm.q_params.zero_point),
-                ifm_scale=0.0,
-                ifm2_zero_point=int(params.weights.q_params.zero_point),
-                ifm2_scale=0.0,
-                ofm_scale=0.0,
-                ofm_zero_point=0,
-                ifm_channels=params.ifm.shape[-1],
-                ifm2_channels=params.ifm.shape[-1],
-                reversed_operands=False,
-                ofm_dtype="int32",
-            )
-
-            # Use reduce sum to get result column
-            reduce_sum = ethosu_ops.ethosu_pooling(
-                ifm=ethosu_binary_elementwise,
-                lut=lut,
-                pooling_type="SUM",
-                ifm_zero_point=0,
-                ifm_scale=float(params.weights.q_params.scale_f32)
-                * float(params.ifm.q_params.scale_f32),
-                ofm_scale=float(params.ofm.q_params.scale_f32),
-                ofm_zero_point=0,
-                pool_shape=(1, 1),
-                ofm_channels=1,
-                ofm_dtype="int32",
-                activation=activation,
-                clip_min=clip_min,
-                clip_max=clip_max,
-                rounding_mode="NATURAL",
-            )
-
-            # Convert tensor dtype from int32 to output dtype
-            scalar_tensor = relay.const(np.ones([1, 1, 1, 1], dtype="int32"), dtype="int32")
-            reduce_sum = ethosu_ops.ethosu_binary_elementwise(
-                ifm=reduce_sum,
-                ifm2=scalar_tensor,
-                lut=lut,
-                operator_type="MUL",
-                ifm_scale=0.0,
-                ifm_zero_point=0,
-                ifm2_scale=0.0,
-                ifm2_zero_point=0,
-                ofm_scale=0.0,
-                ofm_zero_point=int(params.ofm.q_params.zero_point),
-                ifm_channels=1,
-                ifm2_channels=1,
-                reversed_operands=False,
-                ofm_dtype=params.ofm.dtype,
-            )
-
-            res_columns.append(reduce_sum)
-
-        # Concatenate result columns
-        concat = relay.op.concatenate(relay.Tuple(res_columns), axis=3)
-        return relay.reshape(concat, params.ofm.shape)
-
-
-class MatMulRewriter(MatrixMultiplicationRewriter):
-    """Convert ethos-u.matmul composite function to sequence of NPU operators"""
-
-    def __init__(self):
-        super().__init__(
-            params_class=ethosu_patterns.MatMulParams,
-            pattern=(
-                wildcard().has_attr({"Composite": ethosu_patterns.MatMulParams.composite_name})
-            )(wildcard(), wildcard()),
-        )
-
-
-class MatMulFixedPointRewriter(MatrixMultiplicationRewriter):
-    """Convert ethos-u.matmul_fixed_point composite function to sequence of NPU operators"""
-
-    def __init__(self):
-        super().__init__(
-            params_class=ethosu_patterns.MatMulFixedPointParams,
-            pattern=(
-                wildcard().has_attr(
-                    {"Composite": ethosu_patterns.MatMulFixedPointParams.composite_name}
-                )
-            )(wildcard(), wildcard()),
-        )
-
-
-class PadRewriter(DFPatternCallback):
-    """Convert ethos-u.pad2d composite function to ethosu_depthwise_conv2d
-    operator"""
-
-    def __init__(self):
-        super().__init__(require_type=True)
-        self.pattern = (
-            wildcard().has_attr({"Composite": ethosu_patterns.PadParams.composite_name})
-        )(wildcard())
-
-    def callback(
-        self, pre: tvm.relay.Expr, post: tvm.relay.Expr, node_map: tvm.ir.container.Map
-    ) -> tvm.relay.Expr:
-        params = ethosu_patterns.PadParams(post.op.body)
-        params.ifm.tensor = post.args[0]
-        channels_map = {
-            "NHWC": 3,
-        }
-        w_h, w_w = (1, 1)
-        # OHWI format for the ethosu_depthwise_conv2d kernel weights
-        weight_shape = (params.ifm.shape[-1], w_h, w_w, 1)
-        weights = relay.const(np.full(weight_shape, 1), params.ifm.dtype)
-        scale_bias = vela_api.pack_biases(
-            biases=np.zeros(params.ifm.shape[-1]),
-            ifm_scale=params.ifm.q_params.scale_f32,
-            ifm_dtype=np.dtype(params.ifm.dtype),
-            weight_scales=np.array(1.0, dtype=np.float32),
-            ofm_scale=params.ofm.q_params.scale_f32,
-            is_activation_tanh_or_sigmoid=False,
-        )
-
-        return ethosu_ops.ethosu_depthwise_conv2d(
-            ifm=post.args[0],
-            weight=weights,
-            scale_bias=relay.const(scale_bias, "uint8"),
-            lut=relay.const([], "int8"),
-            ifm_scale=float(params.ifm.q_params.scale_f32),
-            ifm_zero_point=int(params.ifm.q_params.zero_point.item()),
-            weight_zero_point=0,
-            ofm_scale=float(params.ofm.q_params.scale_f32),
-            ofm_zero_point=int(params.ofm.q_params.zero_point.item()),
-            kernel_shape=(w_h, w_w),
-            ofm_channels=params.ofm.shape[channels_map[str(params.ofm.layout)]],
-            strides=(1, 1),
-            padding=params.padding,
-            dilation=(1, 1),
-            activation="NONE",
-            clip_min=0,
-            clip_max=0,
-            upscale="NONE",
-            ifm_layout=str(params.ifm.layout),
-            ofm_layout=str(params.ofm.layout),
-            ofm_dtype=str(params.ofm.dtype),
-        )
-
-
-class ChannelPadRewriter(DFPatternCallback):
-    """Convert ethos-u.channel-pad composite function to the Relay concatenate operation"""
-
-    def __init__(self):
-        super().__init__(require_type=True)
-        self.pattern = (
-            wildcard().has_attr({"Composite": ethosu_patterns.ChannelPadParams.composite_name})
-        )(wildcard())
-
-    def callback(
-        self, pre: tvm.relay.Expr, post: tvm.relay.Expr, node_map: tvm.ir.container.Map
-    ) -> tvm.relay.Expr:
-        params = ethosu_patterns.ChannelPadParams(post.op.body)
-        params.ifm.tensor = post.args[0]
-
-        concat_args = list()
-        lut = relay.const([], dtype="int8")
-        # pad channels before
-        if params.ch_padding[0] > 0:
-            shape1 = list(params.ifm.shape)
-            shape1[3] = params.ch_padding[0].value
-            pad_channels = relay.Constant(
-                tvm.nd.array(
-                    np.full(
-                        shape=shape1,
-                        fill_value=int(params.ifm.q_params.zero_point),
-                        dtype=params.ifm.dtype,
-                    )
-                )
-            )
-            identity1 = ethosu_ops.ethosu_identity(
-                ifm=pad_channels,
-                lut=lut,
-                ifm_scale=float(params.ifm.q_params.scale_f32),
-                ifm_zero_point=int(params.ifm.q_params.zero_point),
-                ofm_scale=float(params.ofm.q_params.scale_f32),
-                ofm_zero_point=int(params.ofm.q_params.zero_point),
-            )
-            concat_args.append(identity1)
-
-        identity2 = ethosu_ops.ethosu_identity(
-            ifm=params.ifm.tensor,
-            lut=lut,
-            ifm_scale=float(params.ifm.q_params.scale_f32),
-            ifm_zero_point=int(params.ifm.q_params.zero_point),
-            ofm_scale=float(params.ofm.q_params.scale_f32),
-            ofm_zero_point=int(params.ofm.q_params.zero_point),
-        )
-        concat_args.append(identity2)
-
-        # pad channels after
-        if params.ch_padding[1] > 0:
-            shape3 = list(params.ifm.shape)
-            shape3[3] = params.ch_padding[1].value
-            pad_channels3 = relay.Constant(
-                tvm.nd.array(
-                    np.full(
-                        shape=shape3,
-                        fill_value=int(params.ifm.q_params.zero_point),
-                        dtype=params.ifm.dtype,
-                    )
-                )
-            )
-            identity3 = ethosu_ops.ethosu_identity(
-                ifm=pad_channels3,
-                lut=lut,
-                ifm_scale=float(params.ifm.q_params.scale_f32),
-                ifm_zero_point=int(params.ifm.q_params.zero_point),
-                ofm_scale=float(params.ofm.q_params.scale_f32),
-                ofm_zero_point=int(params.ofm.q_params.zero_point),
-            )
-            concat_args.append(identity3)
-
-        return relay.op.concatenate(relay.Tuple(concat_args), axis=3)
-
-
-@util.create_npu_function_pass(opt_level=1)
-class LegalizeEthosU:
-    """This is the pass to call graph-rewrites to perform graph transformation
-    in a way such that the operations are replaced with hardware/codegen supported
-    operations.
-    """
-
-    def transform_npu_function(self, _, func: relay.Function) -> relay.Function:
-        """This is the method that replaces the operations with hardware/codegen supported
-        operations.
-        """
-        rewriters = [
-            PartitionedSplitRewriter(),
-            FullyConnectedRewriter(),
-            MatMulRewriter(),
-            MatMulFixedPointRewriter(),
-            SplitRewriter(),
-            ChannelPadRewriter(),
-            Conv2DRewriter(),
-            Conv2DTransposeRewriter(),
-            DepthwiseConv2DRewriter(),
-            MaxPoolingRewriter(),
-            AvgPoolingRewriter(),
-            PadRewriter(),
-            AddRewriter(),
-            SubRewriter(),
-            MulRewriter(),
-            MinRewriter(),
-            MaxRewriter(),
-            ShlRewriter(),
-            AbsRewriter(),
-            TanhRewriter(),
-            TanhFixedPointRewriter(),
-            HardSwishRewriter(),
-            LeakyReLURewriter(),
-            MeanRewriter(),
-            SumRewriter(),
-            SoftmaxRewriter(),
-            ConcatRewriter(),
-            SigmoidRewriter(),
-            RequantizeRewriter(),
-            Resize2dRewriter(),
-            ExpandDimsRewriter(),
-            SqueezeRewriter(),
-            ReshapeRewriter(),
-            StridedSliceRewriter(),
-            NoOpRewriter(),
-        ]
-        for rewriter in rewriters:
-            func = rewrite(rewriter, func)
-
-        return func
-
-    def __call__(self, *args, **kwargs):
-        # pylint is unable figure out the decorated
-        # class is callable, thus adding this to
-        # suppress the warning.
-        pass
diff --git a/python/tvm/relay/backend/contrib/ethosu/op/__init__.py b/python/tvm/relay/backend/contrib/ethosu/op/__init__.py
deleted file mode 100644
index 8d51c8a5abea..000000000000
--- a/python/tvm/relay/backend/contrib/ethosu/op/__init__.py
+++ /dev/null
@@ -1,24 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"Relay operators for the Arm(R) Ethos(TM)-U NPU"
-
-from .convolution import ethosu_conv2d
-from .depthwise import ethosu_depthwise_conv2d
-from .pooling import ethosu_pooling
-from .binary_elementwise import ethosu_binary_elementwise
-from .identity import ethosu_identity
-from .unary_elementwise import ethosu_unary_elementwise
diff --git a/python/tvm/relay/backend/contrib/ethosu/op/binary_elementwise.py b/python/tvm/relay/backend/contrib/ethosu/op/binary_elementwise.py
deleted file mode 100644
index f4022d8619a2..000000000000
--- a/python/tvm/relay/backend/contrib/ethosu/op/binary_elementwise.py
+++ /dev/null
@@ -1,242 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=unused-argument
-"""Relay operators for binary elementwise operators for Arm(R) Ethos(TM)-U NPU"""
-from typing import Optional
-import tvm
-from tvm.relay.op import _make
-from tvm.topi.generic import schedule_injective
-from tvm.relay.op.op import OpStrategy
-from tvm.relay.op import strategy as _strategy
-
-from ..te import binary_elementwise_compute
-
-
-def _extract_ethosu_binary_elementwise_params(attrs, args):
-    """Get the parameters necessary to construct a ethosu_binary_elementwise compute TE
-    from a ethosu_binary_elementwise Relay call."""
-    ifm = args[0]
-    ifm2 = args[1]
-    lut = args[2]
-    operator_type = attrs.operator_type
-    ifm_scale = attrs.ifm_scale
-    ifm_zero_point = attrs.ifm_zero_point
-    ifm2_scale = attrs.ifm2_scale
-    ifm2_zero_point = attrs.ifm2_zero_point
-    ofm_scale = attrs.ofm_scale
-    ofm_zero_point = attrs.ofm_zero_point
-    ifm_channels = attrs.ifm_channels
-    ifm2_channels = attrs.ifm2_channels
-    reversed_operands = attrs.reversed_operands
-    activation = attrs.activation
-    clip_min = attrs.clip_min
-    clip_max = attrs.clip_max
-    rounding_mode = attrs.rounding_mode
-    ifm_layout = attrs.ifm_layout
-    ifm2_layout = attrs.ifm2_layout
-    ofm_layout = attrs.ofm_layout
-    ofm_dtype = attrs.ofm_dtype
-    use_rescale = attrs.use_rescale
-    rescale_scale = attrs.rescale_scale
-    rescale_shift = attrs.rescale_shift
-
-    return (
-        ifm,
-        ifm2,
-        lut,
-        operator_type,
-        ifm_scale,
-        ifm_zero_point,
-        ifm2_scale,
-        ifm2_zero_point,
-        ofm_scale,
-        ofm_zero_point,
-        ifm_channels,
-        ifm2_channels,
-        reversed_operands,
-        activation,
-        clip_min,
-        clip_max,
-        rounding_mode,
-        ifm_layout,
-        ifm2_layout,
-        ofm_layout,
-        ofm_dtype,
-        use_rescale,
-        rescale_scale,
-        rescale_shift,
-    )
-
-
-@tvm.ir.register_op_attr("contrib.ethosu.binary_elementwise", "FTVMCompute")
-def create_ethosu_binary_elementwise_compute(attrs, args, out_type):
-    """Create an ethosu_binary_elementwise compute op."""
-    params = _extract_ethosu_binary_elementwise_params(attrs, args)
-    op = binary_elementwise_compute(*params)
-    return [op]
-
-
-@tvm.ir.register_op_attr("contrib.ethosu.binary_elementwise", "FTVMStrategy")
-def binary_elementwise_strategy_ethosu(attrs, inputs, out_type, target):
-    strategy = OpStrategy()
-    strategy.add_implementation(
-        create_ethosu_binary_elementwise_compute,
-        _strategy.wrap_topi_schedule(schedule_injective),
-        name="ethosu_binary_elementwise",
-    )
-    return strategy
-
-
-def ethosu_binary_elementwise(
-    ifm: tvm.relay.Expr,
-    ifm2: tvm.relay.Expr,
-    lut: tvm.relay.Expr,
-    operator_type: str,
-    ifm_scale: float,
-    ifm_zero_point: int,
-    ifm2_scale: float,
-    ifm2_zero_point: int,
-    ofm_scale: float,
-    ofm_zero_point: int,
-    ifm_channels: int,
-    ifm2_channels: int,
-    reversed_operands: bool,
-    ofm_dtype: str,
-    activation: Optional[str] = "NONE",
-    clip_min: Optional[int] = 0,
-    clip_max: Optional[int] = 0,
-    rounding_mode: Optional[str] = "TFL",
-    ifm_layout: Optional[str] = "NHWC",
-    ifm2_layout: Optional[str] = "NHWC",
-    ofm_layout: Optional[str] = "NHWC",
-    use_rescale: Optional[bool] = False,
-    rescale_scale: Optional[int] = 0,
-    rescale_shift: Optional[int] = 0,
-) -> tvm.relay.Call:
-    """This is a quantized binary elementwise operation as supported by
-    the NPU. It accepts either NHWC or NHCWB16 format
-    for the input data.
-
-    Parameters
-    ----------
-    ifm : tvm.relay.Expr
-        The Input Feature Map tensor (IFM).
-    ifm2 : tvm.relay.Expr
-        The Input Feature Map tensor 2 (IFM2).
-    lut : tvm.relay.Expr
-        The look-up table of values to use if activation = "LUT".
-    operator_type: str
-        The type of the binary elementwise operator.
-            "ADD"
-            "SUB"
-            "MUL"
-            "MIN"
-            "MAX"
-            "SHR"
-            "SHL"
-    ifm_scale : float
-        The quantization scale for the Input Feature Map tensor.
-    ifm_zero_point : int
-        The quantization zero point for the Input Feature Map tensor.
-    ifm2_scale : float
-        The quantization scale for the Input Feature Map tensor 2.
-    ifm2_zero_point : int
-        The quantization zero point for the Input Feature Map tensor 2.
-    ofm_scale : float
-        The quantization scale for the Output Feature Map tensor.
-    ofm_zero_point : int
-       The quantization zero point for the Output Feature Map tensor.
-    ifm_channels : int
-        The number of the Input Feature Map channels.
-    ifm2_channels : int
-        The number of the Input Feature Map 2 channels.
-    reversed_operands : bool
-        True if IFM2 is the first operand and IFM is the second operand.
-    ofm_dtype: str
-        The Output Feature Map tensor type.
-        MUL, ADD, SUB {IFM}->{OFM}:
-          {uint8, int8 int32} -> {uint8, int8, int32}, any pairing
-        MAX, MIN:
-          IFM and OFM must be of the same type, one of:
-          {int8, uint8}
-        SHR {IFM}->{OFM}:
-          {int32}->{int8, uint8, int32}, any pairing"
-        SHL:
-          {int32}->{int32} only
-    activation : str, optional
-        The activation function to use.
-            "NONE" - no activation function.
-            "CLIP" - clip the output between clip_min and clip_max.
-            "TANH" - tanh activation function.
-            "SIGMOID" - sigmoid activation function.
-            "LUT" - use a look-up table to perform the activation function.
-        Available activations for activation type:
-            {int8, uint8}: "NONE", "CLIP", "TANH", "SIGMOID", "LUT"
-            {int32}: "NONE"
-    clip_min : int, optional
-        The minimum clipping value if activation = "CLIP".
-    clip_max : int, optional
-        The maximum clipping value if activation = "CLIP".
-    rounding_mode : str, optional
-        The rounding mode to apply to the Output Feature Map tensor.
-            "TFL" - Tensorflow Lite rounding scheme.
-            "TRUNCATE" - Truncate towards zero.
-            "NATURAL" - Round to nearest value, with x.5 rounded up towards +infinity.
-    ifm_layout : str, optional
-        The layout of the Input Feature Map tensor. Can be "NHWC" or "NHCWB16".
-    ifm2_layout : str, optional
-        The layout of the Input Feature Map tensor 2. Can be "NHWC" or "NHCWB16".
-    ofm_layout : str, optional
-        The layout of the Output Feature Map tensor. Can be "NHWC" or "NHCWB16".
-    use_rescale : bool, optional
-        Use explicit scaling if True.
-    rescale_scale : int, optional
-        Scale value for rescale. For 32-bit operations scale is not applied but shift is.
-    rescale_shift : int, optional
-        Shift value for rescale.
-
-    Returns
-    -------
-    out : tvm.relay.Call
-        A call to the ethosu_binary_elementwise op.
-    """
-    return _make.ethosu_binary_elementwise(
-        ifm,
-        ifm2,
-        lut,
-        operator_type,
-        ifm_scale,
-        ifm_zero_point,
-        ifm2_scale,
-        ifm2_zero_point,
-        ofm_scale,
-        ofm_zero_point,
-        ifm_channels,
-        ifm2_channels,
-        reversed_operands,
-        activation,
-        clip_min,
-        clip_max,
-        rounding_mode,
-        ifm_layout,
-        ifm2_layout,
-        ofm_layout,
-        ofm_dtype,
-        use_rescale,
-        rescale_scale,
-        rescale_shift,
-    )
diff --git a/python/tvm/relay/backend/contrib/ethosu/op/convolution.py b/python/tvm/relay/backend/contrib/ethosu/op/convolution.py
deleted file mode 100644
index 3e1cf847b0e6..000000000000
--- a/python/tvm/relay/backend/contrib/ethosu/op/convolution.py
+++ /dev/null
@@ -1,213 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=unused-argument
-"""Relay operators for convolutions for Arm(R) Ethos(TM)-U NPU"""
-from typing import Tuple
-
-import tvm  # type: ignore
-from tvm.relay.op import _make  # type: ignore
-from tvm.topi.generic import schedule_injective  # type: ignore
-from tvm.relay.op.op import OpStrategy  # type: ignore
-from tvm.relay.op import strategy as _strategy
-
-from ..te import conv2d_compute
-
-
-def _extract_ethosu_conv2d_params(attrs, args):
-    """Get the parameters necessary to construct a compute TE
-    from a ethosu_conv2d Relay call."""
-    ifm = args[0]
-    weight = args[1]
-    scale_bias = args[2]
-    lut = args[3]
-    ifm_scale = attrs.ifm_scale
-    ifm_zero_point = attrs.ifm_zero_point
-    weight_zero_point = attrs.weight_zero_point
-    ofm_scale = attrs.ofm_scale
-    ofm_zero_point = attrs.ofm_zero_point
-    strides = attrs.strides
-    padding = attrs.padding
-    dilation = attrs.dilation
-    activation = attrs.activation
-    clip_min = attrs.clip_min
-    clip_max = attrs.clip_max
-    rounding_mode = attrs.rounding_mode
-    upscale = attrs.upscale
-    ifm_layout = attrs.ifm_layout
-    ofm_layout = attrs.ofm_layout
-
-    return (
-        ifm,
-        weight,
-        scale_bias,
-        lut,
-        ifm_scale,
-        ifm_zero_point,
-        weight_zero_point,
-        ofm_scale,
-        ofm_zero_point,
-        strides,
-        padding,
-        dilation,
-        activation,
-        clip_min,
-        clip_max,
-        rounding_mode,
-        upscale,
-        ifm_layout,
-        ofm_layout,
-    )
-
-
-@tvm.ir.register_op_attr("contrib.ethosu.conv2d", "FTVMCompute")
-def create_ethosu_conv2d_compute(attrs, args, out_type):
-    """Create an ethosu_conv2d compute op."""
-    params = _extract_ethosu_conv2d_params(attrs, args)
-    op = conv2d_compute(*params)
-    return [op]
-
-
-@tvm.ir.register_op_attr("contrib.ethosu.conv2d", "FTVMStrategy")
-def conv2d_strategy_ethosu(attrs, inputs, out_type, target):
-    strategy = OpStrategy()
-    strategy.add_implementation(
-        create_ethosu_conv2d_compute,
-        _strategy.wrap_topi_schedule(schedule_injective),
-        name="ethosu_conv2d",
-    )
-    return strategy
-
-
-def ethosu_conv2d(
-    ifm: tvm.relay.Expr,
-    weight: tvm.relay.Expr,
-    scale_bias: tvm.relay.Expr,
-    lut: tvm.relay.Expr,
-    ifm_scale: float,
-    ifm_zero_point: int,
-    weight_zero_point: int,
-    ofm_scale: float,
-    ofm_zero_point: int,
-    kernel_shape: Tuple[int, int],
-    ofm_channels: int,
-    strides: Tuple[int, int] = (1, 1),
-    padding: Tuple[int, int, int, int] = (0, 0, 0, 0),
-    dilation: Tuple[int, int] = (1, 1),
-    activation: str = "NONE",
-    clip_min: int = 0,
-    clip_max: int = 0,
-    rounding_mode: str = "TFL",
-    upscale: str = "NONE",
-    ifm_layout: str = "NHWC",
-    ofm_layout: str = "NHWC",
-) -> tvm.relay.Call:
-    """This is a quantized 2D convolution operation as supported by
-    the NPU. It accepts either NHWC or NHCWB16 format
-    for the input data and OHWI format for the kernel weights.
-
-    Reference: https://developer.arm.com/documentation/102420/0200/
-
-    Note that the per-channel weight scale and bias tensor must be
-    packed together into a combined tensor of uint80s. This is represented
-    in TVM by a (channels, 10) tensor of type uint8. For more detail,
-    refer to the Technical Reference Manual linked above.
-
-    Parameters
-    ----------
-    ifm : tvm.relay.Expr
-        The Input Feature Map tensor (IFM).
-    weight : tvm.relay.Expr
-        The weight tensor.
-    scale_bias : tvm.relay.Expr
-        The packed per-channel weight scale and bias tensor.
-    lut : tvm.relay.Expr
-        The look-up table of values to use if activation = "LUT".
-    ifm_scale : float
-        The quantization scale for the Input Feature Map tensor.
-    ifm_zero_point : int
-        The quantization zero point for the Input Feature Map tensor.
-    weight_zero_point : int
-        The quantization zero point for the weight tensor.
-    ofm_scale : int
-        The quantization scale for the Output Feature Map tensor.
-    ofm_zero_point : int
-        The quantization zero point for the Output Feature Map tensor.
-    kernel_shape : tuple of int
-        The 2 dimensional kernel shape as (kernel_height, kernel_width).
-    ofm_channels : int
-        The number of the Output Feature Map channels.
-    strides : tuple of int, optional
-        The 2 dimensional strides as (stride_height, stride_width).
-    padding : tuple of int, optional
-        The 4 dimensional padding as (pad_top, pad_left, pad_bottom, pad_right).
-    dilation : tuple of int, optional
-        The 2 dimensional dilation as (dilation_height, dilation_width).
-    activation : str, optional
-        The activation function to use.
-            "NONE" - no activation function.
-            "CLIP" - clip the output between clip_min and clip_max.
-            "TANH" - tanh activation function.
-            "SIGMOID" - sigmoid activation function.
-            "LUT" - use a look-up table to perform the activation function.
-    clip_min : int, optional
-        The minimum clipping value if activation = "CLIP"
-    clip_max : int, optional,
-        The maximum clipping value if activation = "CLIP"
-    rounding_mode : str, optional
-        The rounding mode to apply to the Output Feature Map tensor.
-            "TFL" - Tensorflow Lite rounding scheme.
-            "TRUNCATE" - Truncate towards zero.
-            "NATURAL" - Round to nearest value, with x.5 rounded up towards +infinity.
-    upscale : str, optional
-        The 2x2 upscaling mode to apply to the Input Feature Map tensor.
-            "NONE" - no upscaling.
-            "NEAREST" - upscale using nearest neighbour.
-            "ZEROS" - upscale using zeros.
-    ifm_layout : str, optional
-        The layout of the Input Feature Map tensor. Can be "NHWC" or "NHCWB16".
-    ofm_layout : str, optional
-        The layout of the Output Feature Map tensor. Can be "NHWC" or "NHCWB16".
-
-    Returns
-    -------
-    tvm.relay.Call
-        A call to the ethosu_conv2d op.
-
-    """
-    return _make.ethosu_conv2d(
-        ifm,
-        weight,
-        scale_bias,
-        lut,
-        ifm_scale,
-        ifm_zero_point,
-        weight_zero_point,
-        ofm_scale,
-        ofm_zero_point,
-        kernel_shape,
-        ofm_channels,
-        strides,
-        padding,
-        dilation,
-        activation,
-        clip_min,
-        clip_max,
-        rounding_mode,
-        upscale,
-        ifm_layout,
-        ofm_layout,
-    )
diff --git a/python/tvm/relay/backend/contrib/ethosu/op/depthwise.py b/python/tvm/relay/backend/contrib/ethosu/op/depthwise.py
deleted file mode 100644
index 3df3e2d81303..000000000000
--- a/python/tvm/relay/backend/contrib/ethosu/op/depthwise.py
+++ /dev/null
@@ -1,221 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=unused-argument
-"""Relay operator for depthwise convolution for Arm(R) Ethos(TM)-U NPU"""
-
-from typing import Tuple
-
-import tvm
-from tvm.relay.op import _make
-from tvm.topi.generic import schedule_injective
-from tvm.relay.op.op import OpStrategy
-from tvm.relay.op import strategy as _strategy
-
-from ..te import depthwise_conv2d_compute
-
-
-def _extract_ethosu_depthwise_conv2d_params(attrs, args):
-    """Get the parameters necessary to construct a ethosu_depthwise_conv2d compute TE
-    from a ethosu_depthwise_conv2d Relay call."""
-    ifm = args[0]
-    weight = args[1]
-    scale_bias = args[2]
-    lut = args[3]
-    ifm_scale = attrs.ifm_scale
-    ifm_zero_point = attrs.ifm_zero_point
-    weight_zero_point = attrs.weight_zero_point
-    ofm_scale = attrs.ofm_scale
-    ofm_zero_point = attrs.ofm_zero_point
-    strides = attrs.strides
-    padding = attrs.padding
-    dilation = attrs.dilation
-    activation = attrs.activation
-    clip_min = attrs.clip_min
-    clip_max = attrs.clip_max
-    rounding_mode = attrs.rounding_mode
-    upscale = attrs.upscale
-    ifm_layout = attrs.ifm_layout
-    ofm_layout = attrs.ofm_layout
-    ofm_dtype = attrs.ofm_dtype
-
-    return (
-        ifm,
-        weight,
-        scale_bias,
-        lut,
-        ifm_scale,
-        ifm_zero_point,
-        weight_zero_point,
-        ofm_scale,
-        ofm_zero_point,
-        strides,
-        padding,
-        dilation,
-        activation,
-        clip_min,
-        clip_max,
-        rounding_mode,
-        upscale,
-        ifm_layout,
-        ofm_layout,
-        ofm_dtype,
-    )
-
-
-@tvm.ir.register_op_attr("contrib.ethosu.depthwise_conv2d", "FTVMCompute")
-def create_ethosu_depthwise_conv2d_compute(attrs, args, out_type):
-    """Create an ethosu_depthwise_conv2d compute op."""
-    params = _extract_ethosu_depthwise_conv2d_params(attrs, args)
-    op = depthwise_conv2d_compute(*params)
-    return [op]
-
-
-@tvm.ir.register_op_attr("contrib.ethosu.depthwise_conv2d", "FTVMStrategy")
-def depthwise_conv2d_strategy_ethosu(attrs, inputs, out_type, target):
-    strategy = OpStrategy()
-    strategy.add_implementation(
-        create_ethosu_depthwise_conv2d_compute,
-        _strategy.wrap_topi_schedule(schedule_injective),
-        name="ethosu_depthwise_conv2d",
-    )
-    return strategy
-
-
-def ethosu_depthwise_conv2d(
-    ifm: tvm.relay.Expr,
-    weight: tvm.relay.Expr,
-    scale_bias: tvm.relay.Expr,
-    lut: tvm.relay.Expr,
-    ifm_scale: float,
-    ifm_zero_point: int,
-    weight_zero_point: int,
-    ofm_scale: float,
-    ofm_zero_point: int,
-    kernel_shape: Tuple[int, int],
-    ofm_channels: int,
-    strides: Tuple[int, int] = (1, 1),
-    padding: Tuple[int, int, int, int] = (0, 0, 0, 0),
-    dilation: Tuple[int, int] = (1, 1),
-    activation: str = "NONE",
-    clip_min: int = 0,
-    clip_max: int = 0,
-    rounding_mode: str = "TFL",
-    upscale: str = "NONE",
-    ifm_layout: str = "NHWC",
-    ofm_layout: str = "NHWC",
-    ofm_dtype: str = "int8",
-) -> tvm.relay.Call:
-    """This is a quantized 2D depthwise convolution operation as supported by
-    the NPU. It accepts either NHWC or NHCWB16 format
-    for the input data and OHWI format for the kernel weights.
-
-    Reference: https://developer.arm.com/documentation/102420/0200/
-
-    Note that the per-channel weight scale and bias tensor must be
-    packed together into a combined tensor of uint80s. This is represented
-    in TVM by a (channels, 10) tensor of type uint8. For more detail,
-    refer to the Technical Reference Manual linked above.
-
-    Parameters
-    ----------
-    ifm : tvm.relay.Expr
-        The Input Feature Map tensor (IFM).
-    weight : tvm.relay.Expr
-        The weight tensor.
-    scale_bias : tvm.relay.Expr
-        The packed per-channel weight scale and bias tensor.
-    lut : tvm.relay.Expr
-        The look-up table of values to use if activation = "LUT"
-    ifm_scale : float
-        The quantization scale for the Input Feature Map tensor.
-    ifm_zero_point : int
-        The quantization zero point for the Input Feature Map tensor.
-    weight_zero_point : int
-        The quantization zero point for the weight tensor.
-    ofm_scale : float
-        The quantization scale for the Output Feature Map tensor.
-    ofm_zero_point : int
-        The quantization zero point for the Output Feature Map tensor.
-    kernel_shape : tuple of int
-        The 2 dimensional kernel shape as (kernel_height, kernel_width).
-    ofm_channels : int
-        The number of the Output Feature Map channels.
-    strides : tuple of int, optional
-        The 2 dimensional strides as (stride_height, stride_width).
-    padding : tuple of int, optional
-        The 4 dimensional padding as (pad_top, pad_left, pad_bottom, pad_right).
-    dilation : tuple of int, optional
-        The 2 dimensional dilation as (dilation_height, dilation_width).
-    activation : str, optional
-        The activation function to use.
-            "NONE" - no activation function.
-            "CLIP" - clip the output between clip_min and clip_max.
-            "TANH" - tanh activation function.
-            "SIGMOID" - sigmoid activation function.
-            "LUT" - use a look-up table to perform
-        the activation function.
-    clip_min : int, optional
-        The minimum clipping value if activation = "CLIP"
-    clip_max : int, optional,
-        The maximum clipping value if activation = "CLIP"
-    rounding_mode : str, optional
-        The rounding mode to apply to the Output Feature Map tensor.
-            "TFL" - Tensorflow Lite rounding scheme.
-            "TRUNCATE" - Truncate towards zero.
-            "NATURAL" - Round to nearest value, with x.5 rounded up towards +infinity.
-    upscale : str, optional
-        The 2x2 upscaling mode to apply to the Input Feature Map tensor.
-            "NONE" - no upscaling.
-            "NEAREST" - upscale using nearest neighbour.
-            "ZEROS" - upscale using zeros.
-    ifm_layout : str, optional
-        The layout of the Input Feature Map tensor. Can be "NHWC" or "NHCWB16".
-    ofm_layout : str, optional
-        The layout of the Output Feature Map tensor. Can be "NHWC" or "NHCWB16".
-    ofm_dtype : str, optional
-        The Output Feature Map tensor data type. Can be 'int8', 'uint8' or 'int16'.
-
-    Returns
-    -------
-    out : tvm.relay.Call
-        A call to the ethosu_depthwise_conv2d op.
-
-    """
-    return _make.ethosu_depthwise_conv2d(
-        ifm,
-        weight,
-        scale_bias,
-        lut,
-        ifm_scale,
-        ifm_zero_point,
-        weight_zero_point,
-        ofm_scale,
-        ofm_zero_point,
-        kernel_shape,
-        ofm_channels,
-        strides,
-        padding,
-        dilation,
-        activation,
-        clip_min,
-        clip_max,
-        rounding_mode,
-        upscale,
-        ifm_layout,
-        ofm_layout,
-        ofm_dtype,
-    )
diff --git a/python/tvm/relay/backend/contrib/ethosu/op/identity.py b/python/tvm/relay/backend/contrib/ethosu/op/identity.py
deleted file mode 100644
index d91de971dbf2..000000000000
--- a/python/tvm/relay/backend/contrib/ethosu/op/identity.py
+++ /dev/null
@@ -1,112 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=unused-argument
-"""Relay identity operator for Arm(R) Ethos(TM)-U NPU"""
-
-import tvm
-from tvm.relay.op import _make
-from tvm.topi.generic import schedule_injective
-from tvm.relay.op.op import OpStrategy
-from tvm.relay.op import strategy as _strategy
-
-from ..te import identity_compute
-
-
-@tvm.ir.register_op_attr("contrib.ethosu.identity", "FTVMCompute")
-def create_ethosu_identity_compute(attrs, args, out_type):
-    """Create an ethosu_identity compute op."""
-    ifm = args[0]
-    lut = args[1]
-    ifm_scale = attrs.ifm_scale
-    ifm_zero_point = attrs.ifm_zero_point
-    ofm_scale = attrs.ofm_scale
-    ofm_zero_point = attrs.ofm_zero_point
-    activation = attrs.activation
-    rounding_mode = attrs.rounding_mode
-    op = identity_compute(
-        ifm,
-        lut,
-        ifm_scale,
-        ifm_zero_point,
-        ofm_scale,
-        ofm_zero_point,
-        activation,
-        rounding_mode,
-    )
-    return [op]
-
-
-@tvm.ir.register_op_attr("contrib.ethosu.identity", "FTVMStrategy")
-def identity_strategy_ethosu(attrs, inputs, out_type, target):
-    strategy = OpStrategy()
-    strategy.add_implementation(
-        create_ethosu_identity_compute,
-        _strategy.wrap_topi_schedule(schedule_injective),
-        name="ethosu_identity",
-    )
-    return strategy
-
-
-def ethosu_identity(
-    ifm: tvm.relay.Expr,
-    lut: tvm.relay.Expr,
-    ifm_scale: float = 1,
-    ifm_zero_point: int = 0,
-    ofm_scale: float = 1,
-    ofm_zero_point: int = 0,
-    activation: str = "NONE",
-    rounding_mode: str = "TFL",
-) -> tvm.relay.Call:
-    """The Identity operator that runs on the NPU.
-
-    This operator takes in a tensor of any shape and returns the same tensor,
-    with the data optionally requantized.
-
-    Parameters
-    ----------
-    ifm : tvm.relay.Expr
-        The Input Feature Map tensor (IFM).
-    lut : tvm.relay.Expr
-        The look-up table values to use if activation = "LUT", "TANH" or "SIGMOID".
-    ifm_scale : float
-        The quantization scale for the Input Feature Map tensor.
-    ifm_zero_point : int
-        The quantization zero point for the Input Feature Map tensor.
-    ofm_scale : float
-        The quantization scale for the Output Feature Map tensor.
-    ofm_zero_point : int
-       The quantization zero point for the Output Feature Map tensor.
-    activation : str, optional
-        The activation function to use.
-            "NONE" - no activation function.
-            "TANH" - tanh activation function.
-            "SIGMOID" - sigmoid activation function.
-            "LUT" - use a look-up table to perform the activation function.
-    rounding_mode : str, optional
-        The rounding mode to apply to the Output Feature Map tensor.
-            "TFL" - Tensorflow Lite rounding scheme.
-            "TRUNCATE" - Truncate towards zero.
-            "NATURAL" - Round to nearest value, with x.5 rounded up towards +infinity.
-
-    Returns
-    -------
-    out : tvm.relay.Call
-        A call to the ethosu_identity op.
-    """
-    return _make.ethosu_identity(
-        ifm, lut, ifm_scale, ifm_zero_point, ofm_scale, ofm_zero_point, activation, rounding_mode
-    )
diff --git a/python/tvm/relay/backend/contrib/ethosu/op/op_attrs.py b/python/tvm/relay/backend/contrib/ethosu/op/op_attrs.py
deleted file mode 100644
index c421788bcacf..000000000000
--- a/python/tvm/relay/backend/contrib/ethosu/op/op_attrs.py
+++ /dev/null
@@ -1,49 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""The attributes node used for Arm(R) Ethos(TM)-U NPU Relay operators."""
-from tvm.ir import Attrs
-import tvm._ffi
-
-
-@tvm._ffi.register_object("relay.attrs.EthosuConv2DAttrs")
-class EthosuConv2DAttrs(Attrs):
-    """Attributes for contrib.ethosu.conv2d."""
-
-
-@tvm._ffi.register_object("relay.attrs.EthosuIdentityAttrs")
-class EthosuIdentityAttrs(Attrs):
-    """Attributes for contrib.ethosu.identity."""
-
-
-@tvm._ffi.register_object("relay.attrs.EthosuDepthwiseConv2DAttrs")
-class EthosuDepthwiseConv2DAttrs(Attrs):
-    """Attributes for contrib.ethosu.depthwise_conv2d."""
-
-
-@tvm._ffi.register_object("relay.attrs.EthosuPoolingAttrs")
-class EthosuPooling2DAttrs(Attrs):
-    """Attributes for contrib.ethosu.pooling."""
-
-
-@tvm._ffi.register_object("relay.attrs.EthosuBinaryElementwiseAttrs")
-class EthosuBinaryElementwiseAttrs(Attrs):
-    """Attributes for contrib.ethosu.binary_elementwise"""
-
-
-@tvm._ffi.register_object("relay.attrs.EthosuUnaryElementwiseAttrs")
-class EthosuUnaryElementwiseAttrs(Attrs):
-    """Attributes for contrib.ethosu.unary_elementwise"""
diff --git a/python/tvm/relay/backend/contrib/ethosu/op/pooling.py b/python/tvm/relay/backend/contrib/ethosu/op/pooling.py
deleted file mode 100644
index 4d12704acb0f..000000000000
--- a/python/tvm/relay/backend/contrib/ethosu/op/pooling.py
+++ /dev/null
@@ -1,199 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=unused-argument
-"""Relay operators for pooling for Arm(R) Ethos(TM)-U NPU"""
-from typing import Tuple
-
-import tvm
-from tvm.relay.op import _make
-from tvm.topi.generic import schedule_injective
-from tvm.relay.op.op import OpStrategy
-from tvm.relay.op import strategy as _strategy
-
-from ..te import pooling_compute
-
-
-def _extract_ethosu_pooling_params(attrs, args):
-    """Get the parameters necessary to construct a ethosu_pooling compute TE
-    from a ethosu_pooling Relay call."""
-    ifm = args[0]
-    lut = args[1]
-    pooling_type = attrs.pooling_type
-    ifm_scale = attrs.ifm_scale
-    ifm_zero_point = attrs.ifm_zero_point
-    ofm_scale = attrs.ofm_scale
-    ofm_zero_point = attrs.ofm_zero_point
-    pool_shape = attrs.pool_shape
-    ofm_channels = attrs.ofm_channels
-    ofm_dtype = attrs.ofm_dtype
-    strides = attrs.strides
-    padding = attrs.padding
-    activation = attrs.activation
-    clip_min = attrs.clip_min
-    clip_max = attrs.clip_max
-    rounding_mode = attrs.rounding_mode
-    upscale = attrs.upscale
-    ifm_layout = attrs.ifm_layout
-    ofm_layout = attrs.ofm_layout
-
-    return (
-        ifm,
-        lut,
-        pooling_type,
-        ifm_scale,
-        ifm_zero_point,
-        ofm_scale,
-        ofm_zero_point,
-        pool_shape,
-        ofm_channels,
-        ofm_dtype,
-        strides,
-        padding,
-        activation,
-        clip_min,
-        clip_max,
-        rounding_mode,
-        upscale,
-        ifm_layout,
-        ofm_layout,
-    )
-
-
-@tvm.ir.register_op_attr("contrib.ethosu.pooling", "FTVMCompute")
-def create_ethosu_pooling_compute(attrs, args, out_type):
-    """Create an ethosu_pooling compute op."""
-    params = _extract_ethosu_pooling_params(attrs, args)
-    op = pooling_compute(*params)
-    return [op]
-
-
-@tvm.ir.register_op_attr("contrib.ethosu.pooling", "FTVMStrategy")
-def pooling_strategy_ethosu(attrs, inputs, out_type, target):
-    strategy = OpStrategy()
-    strategy.add_implementation(
-        create_ethosu_pooling_compute,
-        _strategy.wrap_topi_schedule(schedule_injective),
-        name="ethosu_pooling",
-    )
-    return strategy
-
-
-def ethosu_pooling(
-    ifm: tvm.relay.Expr,
-    lut: tvm.relay.Expr,
-    pooling_type: str,
-    ifm_scale: float,
-    ifm_zero_point: int,
-    ofm_scale: float,
-    ofm_zero_point: int,
-    pool_shape: Tuple[int, int],
-    ofm_channels: int,
-    ofm_dtype: str,
-    strides: Tuple[int, int] = (1, 1),
-    padding: Tuple[int, int, int, int] = (0, 0, 0, 0),
-    activation: str = "NONE",
-    clip_min: int = 0,
-    clip_max: int = 0,
-    rounding_mode: str = "TFL",
-    upscale: str = "NONE",
-    ifm_layout: str = "NHWC",
-    ofm_layout: str = "NHWC",
-) -> tvm.relay.Call:
-    """This is a quantized 2D pooling operation as supported by
-    the NPU. It accepts either NHWC or NHCWB16 format
-    for the input data.
-
-    Parameters
-    ----------
-    ifm : tvm.relay.Expr
-        The Input Feature Map tensor (IFM).
-    lut : tvm.relay.Expr
-         The look-up table of values to use if activation = "LUT".
-    pooling_type: str
-        The type of the pooling. "AVG" - average pool, "MAX" - max pool, "SUM" - reduce sum pool.
-    ifm_scale : float
-        The quantization scale for the Input Feature Map tensor.
-    ifm_zero_point : int
-        The quantization zero point for the Input Feature Map tensor.
-    ofm_scale : float
-        The quantization scale for the Output Feature Map tensor.
-    ofm_zero_point : int
-       The quantization zero point for the Output Feature Map tensor.
-    pool_shape : tuple of int
-        The 2 dimensional pool shape as (pool_shape_height, pool_shape_width).
-    ofm_channels : int
-        The number of the Output Feature Map channels
-    ofm_dtype : str
-        The Output Feature Map tensor data type.
-            "AVG" or "MAX" pooling - can be "int8", "uint8", or "int16".
-            "SUM" pooling - can be "int32".
-    strides : tuple of int, optional
-        The 2 dimensional strides as (stride_height, stride_width).
-    padding : tuple of int, optional
-        The 4 dimensional padding as (pad_top, pad_left, pad_bottom, pad_right).
-    activation : str, optional
-        The activation function to use.
-            "NONE" - no activation function.
-            "CLIP" - clip the output between clip_min and clip_max.
-            "TANH" - tanh activation function.
-            "SIGMOID" - sigmoid activation function.
-            "LUT" - use a look-up table to perform the activation function.
-    clip_min : int, optional
-        The minimum clipping value if activation = "CLIP".
-    clip_max : int, optional
-        The maximum clipping value if activation = "CLIP".
-    rounding_mode : str, optional
-        The rounding mode to apply to the Output Feature Map tensor.
-            "TFL" - Tensorflow Lite rounding scheme.
-            "TRUNCATE" - Truncate towards zero.
-            "NATURAL" - Round to nearest value, with x.5 rounded up towards +infinity.
-    upscale: str, optional
-        The 2x2 upscaling mode to apply to the Input Feature Map tensor.
-            "NONE" - no upscaling.
-            "NEAREST" - upscale using nearest neighbour.
-            "ZEROS" - upscale using zeros.
-    ifm_layout : str, optional
-        The layout of the Input Feature Map tensor. Can be "NHWC" or "NHCWB16".
-    ofm_layout : str, optional
-        The layout of the Output Feature Map tensor. Can be "NHWC" or "NHCWB16".
-
-    Returns
-    -------
-    out : tvm.relay.Call
-        A call to the ethosu_pooling op.
-    """
-    return _make.ethosu_pooling(
-        ifm,
-        lut,
-        pooling_type,
-        ifm_scale,
-        ifm_zero_point,
-        ofm_scale,
-        ofm_zero_point,
-        pool_shape,
-        ofm_channels,
-        ofm_dtype,
-        strides,
-        padding,
-        activation,
-        clip_min,
-        clip_max,
-        rounding_mode,
-        upscale,
-        ifm_layout,
-        ofm_layout,
-    )
diff --git a/python/tvm/relay/backend/contrib/ethosu/op/unary_elementwise.py b/python/tvm/relay/backend/contrib/ethosu/op/unary_elementwise.py
deleted file mode 100644
index 35104da92e8b..000000000000
--- a/python/tvm/relay/backend/contrib/ethosu/op/unary_elementwise.py
+++ /dev/null
@@ -1,164 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=unused-argument
-"""Relay operator for unary elementwise operations for Arm(R) Ethos(TM)-U NPU"""
-from typing import Optional
-import tvm
-from tvm.relay.op import _make
-from tvm.topi.generic import schedule_injective
-from tvm.relay.op.op import OpStrategy
-from tvm.relay.op import strategy as _strategy
-
-from ..te import unary_elementwise_compute
-
-
-def _extract_ethosu_unary_elementwise_params(attrs, args):
-    """Get the parameters necessary to construct a ethosu_unary_elementwise compute TE
-    from a ethosu_unary_elementwise Relay call."""
-    ifm = args[0]
-    lut = args[1]
-    operator_type = attrs.operator_type
-    ifm_scale = attrs.ifm_scale
-    ifm_zero_point = attrs.ifm_zero_point
-    ofm_scale = attrs.ofm_scale
-    ofm_zero_point = attrs.ofm_zero_point
-    ofm_channels = attrs.ofm_channels
-    activation = attrs.activation
-    clip_min = attrs.clip_min
-    clip_max = attrs.clip_max
-    rounding_mode = attrs.rounding_mode
-    ifm_layout = attrs.ifm_layout
-    ofm_layout = attrs.ofm_layout
-
-    return (
-        ifm,
-        lut,
-        operator_type,
-        ifm_scale,
-        ifm_zero_point,
-        ofm_scale,
-        ofm_zero_point,
-        ofm_channels,
-        activation,
-        clip_min,
-        clip_max,
-        rounding_mode,
-        ifm_layout,
-        ofm_layout,
-    )
-
-
-@tvm.ir.register_op_attr("contrib.ethosu.unary_elementwise", "FTVMCompute")
-def create_ethosu_unary_elementwise_compute(attrs, args, out_type):
-    """Create an ethosu_unary_elementwise compute op."""
-    params = _extract_ethosu_unary_elementwise_params(attrs, args)
-    op = unary_elementwise_compute(*params)
-    return [op]
-
-
-@tvm.ir.register_op_attr("contrib.ethosu.unary_elementwise", "FTVMStrategy")
-def unary_elementwise_strategy_ethosu(attrs, inputs, out_type, target):
-    strategy = OpStrategy()
-    strategy.add_implementation(
-        create_ethosu_unary_elementwise_compute,
-        _strategy.wrap_topi_schedule(schedule_injective),
-        name="ethosu_unary_elementwise",
-    )
-    return strategy
-
-
-def ethosu_unary_elementwise(
-    ifm: tvm.relay.Expr,
-    lut: tvm.relay.Expr,
-    operator_type: str,
-    ifm_scale: float,
-    ifm_zero_point: int,
-    ofm_scale: float,
-    ofm_zero_point: int,
-    ofm_channels: int,
-    activation: Optional[str] = "NONE",
-    clip_min: Optional[int] = 0,
-    clip_max: Optional[int] = 0,
-    rounding_mode: Optional[str] = "TFL",
-    ifm_layout: Optional[str] = "NHWC",
-    ofm_layout: Optional[str] = "NHWC",
-) -> tvm.relay.Call:
-    """This is a quantized unary elementwise operation as supported by the
-    NPU. It accepts either NHWC or NHCWB16 format for the input data.
-
-    Parameters
-    ----------
-    ifm : tvm.relay.Expr
-        The Input Feature Map tensor (IFM).
-    lut : tvm.relay.Expr
-        The look-up table values to use if activation = "LUT".
-    operator_type: str
-        The type of the unary elementwise operator.
-            "ABS"
-            "CLZ"
-    ifm_scale : float
-        The quantization scale for the Input Feature Map tensor.
-    ifm_zero_point : int
-        The quantization zero point for the Input Feature Map tensor.
-    ofm_scale : float
-        The quantization scale for the Output Feature Map tensor.
-    ofm_zero_point : int
-       The quantization zero point for the Output Feature Map tensor.
-    ofm_channels : int
-        The number of OFM channels.
-    activation : str, optional
-        The activation function to use.
-            "NONE" - no activation function.
-            "CLIP" - clip the output between clip_min and clip_max.
-            "TANH" - tanh activation function.
-            "SIGMOID" - sigmoid activation function.
-            "LUT" - use a look-up table to perform the activation function.
-    clip_min : int, optional
-        The minimum clipping value if activation = "CLIP".
-    clip_max : int, optional
-        The maximum clipping value if activation = "CLIP".
-    rounding_mode : str, optional
-        The rounding mode to apply to the Output Feature Map tensor.
-            "TFL" - Tensorflow Lite rounding scheme.
-            "TRUNCATE" - Truncate towards zero.
-            "NATURAL" - Round to nearest value, with x.5 rounded up towards +infinity.
-    ifm_layout : str, optional
-        The layout of the Input Feature Map tensor. Can be "NHWC" or "NHCWB16".
-    ofm_layout : str, optional
-        The layout of the Output Feature Map tensor. Can be "NHWC" or "NHCWB16".
-
-    Returns
-    -------
-    out : tvm.relay.Call
-        A call to the ethosu_unary_elementwise op.
-    """
-    return _make.ethosu_unary_elementwise(
-        ifm,
-        lut,
-        operator_type,
-        ifm_scale,
-        ifm_zero_point,
-        ofm_scale,
-        ofm_zero_point,
-        ofm_channels,
-        activation,
-        clip_min,
-        clip_max,
-        rounding_mode,
-        ifm_layout,
-        ofm_layout,
-    )
diff --git a/python/tvm/relay/backend/contrib/ethosu/preprocess.py b/python/tvm/relay/backend/contrib/ethosu/preprocess.py
deleted file mode 100644
index 795adfc2fb1f..000000000000
--- a/python/tvm/relay/backend/contrib/ethosu/preprocess.py
+++ /dev/null
@@ -1,36 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name, unused-argument, import-outside-toplevel
-"""Set of passes to pre-process the IRModule to support Arm(R)-Ethos(TM)-U
-NPU code generation. These set of passes will mutate both the main and the
-external functions.
-"""
-import tvm  # type: ignore
-from . import _ffi_api  # type: ignore
-
-
-def preprocess_ext_io() -> tvm.transform.Pass:
-    """This pass mutates the number of inputs going to / outputs coming out to/from
-    external functions to one. This is achieved via concatenation
-    of inputs and splitting of outputs in around the call to the external function.
-
-    Returns
-    -------
-    ret : tvm.transform.Pass
-        The registered pass to mutate the IO of the external functions and their calls.
-    """
-    return _ffi_api.PreprocessExternalFuncIO()  # type: ignore # pylint: disable=no-member
diff --git a/python/tvm/relay/backend/contrib/ethosu/softmax_rewriter.py b/python/tvm/relay/backend/contrib/ethosu/softmax_rewriter.py
deleted file mode 100644
index 23d4f4b45b11..000000000000
--- a/python/tvm/relay/backend/contrib/ethosu/softmax_rewriter.py
+++ /dev/null
@@ -1,541 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""SoftmaxRewriter for legalization Softmax operation."""
-import math
-
-import numpy as np
-from ethosu.vela import fp_math, scaling
-
-import tvm
-from tvm import relay
-from tvm.relay.backend.contrib.ethosu import op as ethosu_ops
-from tvm.relay.dataflow_pattern import DFPatternCallback, wildcard
-from tvm.relay.op.contrib import ethosu as ethosu_patterns
-
-
-class SoftmaxRewriter(DFPatternCallback):
-    """This rewriting converts Softmax operation into a sequence of operations as in Vela."""
-
-    def __init__(self):
-        super().__init__(require_type=True, rewrite_once=True)
-        self.params_class = ethosu_patterns.SoftMaxParams
-        self.pattern = (
-            wildcard().has_attr({"Composite": ethosu_patterns.SoftMaxParams.composite_name})
-        )(None)
-
-    def generate_exp_table(self, input_scale):
-        """Generate a LUT table for exponential function.
-
-        Parameters
-        ----------
-        input_scale : float
-            The scale for input.
-
-        Returns
-        -------
-        lut : tvm.relay.expr.Constant
-            LUT table for exponential function.
-        """
-        beta = 1.0
-        integer_bits = 5
-        total_signed_bits = 31
-        # Calculate scaling
-        real_beta = min(
-            np.double(beta) * np.double(input_scale) * (1 << (31 - integer_bits)),
-            np.double((1 << 31) - 1.0),
-        )
-        scale, shift = scaling.quantise_scale(real_beta)
-        shift = 31 - shift
-        diff_min = -1.0 * math.floor(
-            1.0
-            * ((1 << integer_bits) - 1)
-            * (1 << (total_signed_bits - integer_bits))
-            / (1 << shift)
-        )
-        # Generate the exp LUT
-        lut = []
-        for x in range(256):
-            input_diff = x - 255
-            if input_diff >= diff_min:
-                rescale = fp_math.saturating_rounding_mul32(input_diff * (1 << shift), scale)
-                lut.append(fp_math.exp_on_negative_values(rescale))
-            else:
-                lut.append(0)
-        res = np.array(lut, dtype="int32")
-        return relay.const(res)
-
-    def callback(
-        self, pre: tvm.relay.Expr, post: tvm.relay.Expr, node_map: tvm.ir.container.Map
-    ) -> tvm.relay.Expr:
-        params = self.params_class(post.op.body)
-        quant_min = -128
-        quant_max = 127
-
-        ifm = post.args[0]
-        ifm_dtype = ifm.checked_type.dtype
-        bhw = np.prod(params.ifm.shape[:-1])
-        depth = params.ifm.shape[-1]
-
-        # The calculation of Softmax is similar to that in Vela
-        # https://review.mlplatform.org/plugins/gitiles/ml/ethos-u/ethos-u-vela/+/refs/tags/3.7.0/ethosu/vela/softmax.py#230
-        # PASS 0 - Depthwise Maxpool
-        # reshape for depthwise maxpool
-        ifm = relay.reshape(ifm, (1, bhw, depth, 1))
-        lut = relay.const([], dtype="int32")
-        depthwise_maxpool = ethosu_ops.ethosu_pooling(
-            ifm=ifm,
-            lut=lut,
-            pooling_type="MAX",
-            ifm_scale=float(params.ifm.q_params.scale_f32),
-            ifm_zero_point=int(params.ifm.q_params.zero_point),
-            ofm_scale=0.0,
-            ofm_zero_point=int(params.ifm.q_params.zero_point),
-            pool_shape=(1, depth),
-            ofm_channels=1,
-            ofm_dtype=ifm_dtype,
-        )
-
-        # PASS 1 - Sub+LUT(exp)
-        # move all data along the height axis, except channels
-        ifm = relay.reshape(ifm, (1, bhw, 1, depth))
-        exp_lut = self.generate_exp_table(float(params.ifm.q_params.scale_f32))
-        ifm_exp = ethosu_ops.ethosu_binary_elementwise(
-            ifm=ifm,
-            ifm2=depthwise_maxpool,
-            lut=exp_lut,
-            operator_type="SUB",
-            ifm_scale=float(params.ifm.q_params.scale_f32),
-            ifm_zero_point=int(params.ifm.q_params.zero_point),
-            ifm2_scale=0.0,
-            ifm2_zero_point=int(params.ifm.q_params.zero_point),
-            ofm_scale=1.0,
-            ofm_zero_point=quant_max,
-            ifm_channels=depth,
-            ifm2_channels=1,
-            reversed_operands=False,
-            ofm_dtype="int32",
-            activation="LUT",
-            clip_min=-255,
-            clip_max=0,
-        )
-
-        # PASS 2 - SHR
-        shr_const = relay.const(np.full([1, 1, 1, 1], 12, dtype="int32"))
-        shr = ethosu_ops.ethosu_binary_elementwise(
-            ifm=ifm_exp,
-            ifm2=shr_const,
-            lut=lut,
-            operator_type="SHR",
-            ifm_scale=1.0,
-            ifm_zero_point=0,
-            ifm2_scale=0.0,
-            ifm2_zero_point=0,
-            ofm_scale=0.0,
-            ofm_zero_point=int(params.ifm.q_params.zero_point),
-            ifm_channels=params.ifm.shape[-1],
-            ifm2_channels=1,
-            reversed_operands=False,
-            ofm_dtype="int32",
-            activation="CLIP",
-            clip_min=quant_min,
-            clip_max=quant_max,
-            rounding_mode="NATURAL",
-        )
-
-        # PASS 3 - Reduce sum
-        sum_of_exp = ethosu_ops.ethosu_pooling(
-            ifm=shr,
-            lut=lut,
-            pooling_type="SUM",
-            ifm_scale=0.0,
-            ifm_zero_point=0,
-            ofm_scale=0.0,
-            ofm_zero_point=int(params.ifm.q_params.zero_point),
-            pool_shape=(1, 1),
-            ofm_channels=1,
-            upscale="NONE",
-            ofm_dtype="int32",
-            activation="CLIP",
-            clip_min=quant_min,
-            clip_max=quant_max,
-        )
-
-        # PASS 4 - CLZ
-        headroom_plus_one = ethosu_ops.ethosu_unary_elementwise(
-            ifm=sum_of_exp,
-            lut=lut,
-            operator_type="CLZ",
-            ifm_scale=0.0,
-            ifm_zero_point=0,
-            ofm_scale=0.0,
-            ofm_zero_point=int(params.ifm.q_params.zero_point),
-            ofm_channels=1,
-            activation="CLIP",
-            clip_min=quant_min,
-            clip_max=quant_max,
-        )
-
-        # PASS 5 - Sub
-        headroom_offset_const = relay.const(np.full([1, bhw, 1, 1], 35, dtype="int32"))
-        right_shift = ethosu_ops.ethosu_binary_elementwise(
-            ifm=headroom_offset_const,
-            ifm2=headroom_plus_one,
-            lut=lut,
-            operator_type="SUB",
-            ifm_scale=0.0,
-            ifm_zero_point=0,
-            ifm2_scale=0.0,
-            ifm2_zero_point=0,
-            ofm_scale=0.0,
-            ofm_zero_point=int(params.ifm.q_params.zero_point),
-            ifm_channels=1,
-            ifm2_channels=1,
-            reversed_operands=False,
-            ofm_dtype="int32",
-            activation="CLIP",
-            clip_min=quant_min,
-            clip_max=quant_max,
-        )
-
-        # PASS 6 - Sub
-        one_const = relay.const(np.full([1, 1, 1, 1], 1, dtype="int32"))
-        headroom = ethosu_ops.ethosu_binary_elementwise(
-            ifm=headroom_plus_one,
-            ifm2=one_const,
-            lut=lut,
-            operator_type="SUB",
-            ifm_scale=0.0,
-            ifm_zero_point=0,
-            ifm2_scale=0.0,
-            ifm2_zero_point=0,
-            ofm_scale=0.0,
-            ofm_zero_point=int(params.ifm.q_params.zero_point),
-            ifm_channels=1,
-            ifm2_channels=1,
-            reversed_operands=False,
-            ofm_dtype="int32",
-            activation="CLIP",
-            clip_min=quant_min,
-            clip_max=quant_max,
-        )
-
-        # PASS 7 - SHL
-        shifted_sum = ethosu_ops.ethosu_binary_elementwise(
-            ifm=sum_of_exp,
-            ifm2=headroom,
-            lut=lut,
-            operator_type="SHL",
-            ifm_scale=0.0,
-            ifm_zero_point=0,
-            ifm2_scale=0.0,
-            ifm2_zero_point=0,
-            ofm_scale=0.0,
-            ofm_zero_point=int(params.ifm.q_params.zero_point),
-            ifm_channels=1,
-            ifm2_channels=1,
-            reversed_operands=False,
-            ofm_dtype="int32",
-            activation="CLIP",
-            clip_min=quant_min,
-            clip_max=quant_max,
-        )
-
-        # PASS 8 - Sub
-        shifted_one_const = relay.const(np.full([1, 1, 1, 1], 1 << 30, dtype="int32"))
-        shifted_sum_minus_one = ethosu_ops.ethosu_binary_elementwise(
-            ifm=shifted_sum,
-            ifm2=shifted_one_const,
-            lut=lut,
-            operator_type="SUB",
-            ifm_scale=0.0,
-            ifm_zero_point=0,
-            ifm2_scale=0.0,
-            ifm2_zero_point=0,
-            ofm_scale=0.0,
-            ofm_zero_point=int(params.ifm.q_params.zero_point),
-            ifm_channels=1,
-            ifm2_channels=1,
-            reversed_operands=False,
-            ofm_dtype="int32",
-            activation="CLIP",
-            clip_min=quant_min,
-            clip_max=quant_max,
-        )
-
-        # PASS 9 - SHL
-        shifted_sum_minus_one = ethosu_ops.ethosu_binary_elementwise(
-            ifm=shifted_sum_minus_one,
-            ifm2=one_const,
-            lut=lut,
-            operator_type="SHL",
-            ifm_scale=0.0,
-            ifm_zero_point=0,
-            ifm2_scale=0.0,
-            ifm2_zero_point=0,
-            ofm_scale=0.0,
-            ofm_zero_point=int(params.ifm.q_params.zero_point),
-            ifm_channels=1,
-            ifm2_channels=1,
-            reversed_operands=False,
-            ofm_dtype="int32",
-            activation="CLIP",
-            clip_min=quant_min,
-            clip_max=quant_max,
-        )
-
-        # PASS 10 - Add
-        f0_one_const = relay.const(np.full([1, 1, 1, 1], (1 << 31) - 1, dtype="int32"))
-        half_denominator = ethosu_ops.ethosu_binary_elementwise(
-            ifm=shifted_sum_minus_one,
-            ifm2=f0_one_const,
-            lut=lut,
-            operator_type="ADD",
-            ifm_scale=0.0,
-            ifm_zero_point=0,
-            ifm2_scale=0.0,
-            ifm2_zero_point=0,
-            ofm_scale=1.0,
-            ofm_zero_point=0,
-            ifm_channels=1,
-            ifm2_channels=1,
-            reversed_operands=False,
-            ofm_dtype="int32",
-            activation="CLIP",
-            clip_min=quant_min,
-            clip_max=quant_max,
-            use_rescale=True,
-            rescale_scale=1,
-            rescale_shift=1,
-        )
-
-        # PASS 11 - Mul
-        neg_32_over_17_const = relay.const(np.full([1, 1, 1, 1], -1010580540, dtype="int32"))
-        rescaled = ethosu_ops.ethosu_binary_elementwise(
-            ifm=half_denominator,
-            ifm2=neg_32_over_17_const,
-            lut=lut,
-            operator_type="MUL",
-            ifm_scale=1.0,
-            ifm_zero_point=0,
-            ifm2_scale=1.0,
-            ifm2_zero_point=0,
-            ofm_scale=2.0,
-            ofm_zero_point=0,
-            ifm_channels=1,
-            ifm2_channels=1,
-            reversed_operands=False,
-            ofm_dtype="int32",
-            activation="CLIP",
-            clip_min=quant_min,
-            clip_max=quant_max,
-        )
-
-        # PASS 12 - Add
-        const_48_over_17_const = relay.const(np.full([1, 1, 1, 1], 1515870810, dtype="int32"))
-        rescale_w_offset = ethosu_ops.ethosu_binary_elementwise(
-            ifm=rescaled,
-            ifm2=const_48_over_17_const,
-            lut=lut,
-            operator_type="ADD",
-            ifm_scale=2.0,
-            ifm_zero_point=0,
-            ifm2_scale=0.0,
-            ifm2_zero_point=0,
-            ofm_scale=1.0,
-            ofm_zero_point=0,
-            ifm_channels=1,
-            ifm2_channels=1,
-            reversed_operands=False,
-            ofm_dtype="int32",
-            activation="CLIP",
-            clip_min=quant_min,
-            clip_max=quant_max,
-        )
-
-        nr_x = rescale_w_offset
-        f2_one_const = relay.const(np.full([1, bhw, 1, 1], 1 << 29, dtype="int32"))
-        four_const = relay.const(np.full([1, 1, 1, 1], 4, dtype="int32"))
-        for _ in range(3):
-            # PASS 13, 18, 23 - Mul
-            half_denominator_times_x = ethosu_ops.ethosu_binary_elementwise(
-                ifm=nr_x,
-                ifm2=half_denominator,
-                lut=lut,
-                operator_type="MUL",
-                ifm_scale=1.0,
-                ifm_zero_point=0,
-                ifm2_scale=1.0,
-                ifm2_zero_point=0,
-                ofm_scale=2.0,
-                ofm_zero_point=0,
-                ifm_channels=1,
-                ifm2_channels=1,
-                reversed_operands=False,
-                ofm_dtype="int32",
-                activation="CLIP",
-                clip_min=quant_min,
-                clip_max=quant_max,
-            )
-
-            # PASS 14, 19, 24 - Sub
-            one_minus_half_denomin_times_x = ethosu_ops.ethosu_binary_elementwise(
-                ifm=f2_one_const,
-                ifm2=half_denominator_times_x,
-                lut=lut,
-                operator_type="SUB",
-                ifm_scale=2.0,
-                ifm_zero_point=0,
-                ifm2_scale=0.0,
-                ifm2_zero_point=0,
-                ofm_scale=1.0,
-                ofm_zero_point=0,
-                ifm_channels=1,
-                ifm2_channels=1,
-                reversed_operands=False,
-                ofm_dtype="int32",
-                activation="CLIP",
-                clip_min=quant_min,
-                clip_max=quant_max,
-            )
-
-            # PASS 15, 20, 25 - Mul
-            to_rescale = ethosu_ops.ethosu_binary_elementwise(
-                ifm=nr_x,
-                ifm2=one_minus_half_denomin_times_x,
-                lut=lut,
-                operator_type="MUL",
-                ifm_scale=1.0,
-                ifm_zero_point=0,
-                ifm2_scale=1.0,
-                ifm2_zero_point=0,
-                ofm_scale=2.0,
-                ofm_zero_point=0,
-                ifm_channels=1,
-                ifm2_channels=1,
-                reversed_operands=False,
-                ofm_dtype="int32",
-                activation="CLIP",
-                clip_min=quant_min,
-                clip_max=quant_max,
-            )
-
-            # PASS 16, 21, 26 - Mul
-            to_add = ethosu_ops.ethosu_binary_elementwise(
-                ifm=to_rescale,
-                ifm2=four_const,
-                lut=lut,
-                operator_type="MUL",
-                ifm_scale=2.0,
-                ifm_zero_point=0,
-                ifm2_scale=0.0,
-                ifm2_zero_point=0,
-                ofm_scale=0.0,
-                ofm_zero_point=int(params.ifm.q_params.zero_point),
-                ifm_channels=1,
-                ifm2_channels=1,
-                reversed_operands=False,
-                ofm_dtype="int32",
-                activation="CLIP",
-                clip_min=quant_min,
-                clip_max=quant_max,
-            )
-
-            # PASS 17, 22, 27 - Add
-            nr_x = ethosu_ops.ethosu_binary_elementwise(
-                ifm=nr_x,
-                ifm2=to_add,
-                lut=lut,
-                operator_type="ADD",
-                ifm_scale=1.0,
-                ifm_zero_point=0,
-                ifm2_scale=0.0,
-                ifm2_zero_point=0,
-                ofm_scale=1.0,
-                ofm_zero_point=0,
-                ifm_channels=1,
-                ifm2_channels=1,
-                reversed_operands=False,
-                ofm_dtype="int32",
-                activation="CLIP",
-                clip_min=quant_min,
-                clip_max=quant_max,
-            )
-
-        # PASS 28 - Mul
-        two_const = relay.const(np.full([1, 1, 1, 1], 2, dtype="int32"))
-        scale_factor = ethosu_ops.ethosu_binary_elementwise(
-            ifm=nr_x,
-            ifm2=two_const,
-            lut=lut,
-            operator_type="MUL",
-            ifm_scale=1.0,
-            ifm_zero_point=0,
-            ifm2_scale=0.0,
-            ifm2_zero_point=0,
-            ofm_scale=1.0,
-            ofm_zero_point=0,
-            ifm_channels=1,
-            ifm2_channels=1,
-            reversed_operands=False,
-            ofm_dtype="int32",
-            activation="CLIP",
-            clip_min=quant_min,
-            clip_max=quant_max,
-        )
-
-        # PASS 29 - Mul
-        scaled_exp = ethosu_ops.ethosu_binary_elementwise(
-            ifm=ifm_exp,
-            ifm2=scale_factor,
-            lut=lut,
-            operator_type="MUL",
-            ifm_scale=1.0,
-            ifm_zero_point=0,
-            ifm2_scale=1.0,
-            ifm2_zero_point=0,
-            ofm_scale=2.0,
-            ofm_zero_point=0,
-            ifm_channels=depth,
-            ifm2_channels=1,
-            reversed_operands=False,
-            ofm_dtype="int32",
-            activation="CLIP",
-            clip_min=quant_min,
-            clip_max=quant_max,
-        )
-
-        # PASS 30 - SHR
-        shr30_op = ethosu_ops.ethosu_binary_elementwise(
-            ifm=scaled_exp,
-            ifm2=right_shift,
-            lut=lut,
-            operator_type="SHR",
-            ifm_scale=2.0,
-            ifm_zero_point=0,
-            ifm2_scale=0.0,
-            ifm2_zero_point=0,
-            ofm_scale=float(params.ofm.q_params.scale_f32),
-            ofm_zero_point=int(params.ofm.q_params.zero_point),
-            ifm_channels=depth,
-            ifm2_channels=1,
-            reversed_operands=False,
-            rounding_mode="NATURAL",
-            ofm_dtype=ifm_dtype,
-        )
-
-        reshape = relay.reshape(shr30_op, params.ofm.shape)
-        return reshape
diff --git a/python/tvm/relay/backend/contrib/ethosu/te/__init__.py b/python/tvm/relay/backend/contrib/ethosu/te/__init__.py
deleted file mode 100644
index 2ede967a036c..000000000000
--- a/python/tvm/relay/backend/contrib/ethosu/te/__init__.py
+++ /dev/null
@@ -1,25 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Tensor Expressions for the NPU"""
-
-from .convolution import *
-from .depthwise import *
-from .pooling import *
-from .binary_elementwise import *
-from .identity import *
-from .unary_elementwise import *
-from .inline import *
diff --git a/python/tvm/relay/backend/contrib/ethosu/te/binary_elementwise.py b/python/tvm/relay/backend/contrib/ethosu/te/binary_elementwise.py
deleted file mode 100644
index 99ee932119e9..000000000000
--- a/python/tvm/relay/backend/contrib/ethosu/te/binary_elementwise.py
+++ /dev/null
@@ -1,357 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name,unused-argument
-"""Tensor Expressions for binary_elementwise"""
-import operator
-import numpy as np
-from tvm import te
-from tvm.contrib.ethosu.cascader import TESubgraph, EthosuPart, Propagator, register_matcher
-
-from .dma import dma_ofm_compute, dma_ifm_compute
-from .common import get_layout_transform_matrices, get_lut_expr
-
-
-def binary_elementwise_compute(
-    ifm: te.Tensor,
-    ifm2: te.Tensor,
-    lut: te.Tensor,
-    operator_type: str,
-    ifm_scale: float,
-    ifm_zero_point: int,
-    ifm2_scale: float,
-    ifm2_zero_point: int,
-    ofm_scale: float,
-    ofm_zero_point: int,
-    ifm_channels: int,
-    ifm2_channels: int,
-    reversed_operands: bool,
-    activation: str,
-    clip_min: int,
-    clip_max: int,
-    rounding_mode: str,
-    ifm_layout: str,
-    ifm2_layout: str,
-    ofm_layout: str,
-    ofm_dtype: str,
-    use_rescale: bool,
-    rescale_scale: int,
-    rescale_shift: int,
-) -> te.Tensor:
-    """A compute operator representing the capabilities of binary_elementwise for the NPU.
-
-    Parameters
-    ----------
-    ifm : te.Tensor
-        The Input Feature Map tensor (IFM).
-    ifm2 : te.Tensor
-        The Input Feature Map tensor 2 (IFM2).
-    lut : te.Tensor
-        The look-up table values to use if activation = "LUT".
-    operator_type: str
-        The type of the binary elementwise operator.
-            "ADD"
-            "SUB"
-            "MUL"
-            "MIN"
-            "MAX"
-            "SHR"
-            "SHL"
-    ifm_scale : float
-        The quantization scale for the Input Feature Map tensor.
-    ifm_zero_point : int
-        The quantization zero point for the Input Feature Map tensor.
-    ifm2_scale : float
-        The quantization scale for the Input Feature Map tensor 2.
-    ifm2_zero_point : int
-        The quantization zero point for the Input Feature Map tensor 1.
-    ofm_scale : float
-        The quantization scale for the Output Feature Map tensor.
-    ofm_zero_point : int
-        The quantization zero point for the Output Feature Map tensor.
-    ifm_channels : int
-        The number of the Input Feature Map channels.
-    ifm2_channels : int
-        The number of the Input Feature Map 2 channels.
-    reversed_operands : bool
-        True if IFM2 is the first operand and IFM is the second operand.
-    activation : str
-        The activation function to use.
-            "NONE" - no activation function.
-            "CLIP" - clip the output between clip_min and clip_max.
-            "TANH" - tanh activation function.
-            "SIGMOID" - sigmoid activation function.
-            "LUT" - use a look-up table to perform the activation function.
-        Available activations for activation type:
-            {int8, uint8}: "NONE", "CLIP", "TANH", "SIGMOID", "LUT"
-            {int32}: "NONE"
-    clip_min : int
-        The minimum clipping value if activation = "CLIP".
-    clip_max : int
-        The maximum clipping value if activation = "CLIP".
-    rounding_mode : str
-        The rounding mode to apply to the Output Feature Map tensor.
-            "TFL" - Tensorflow Lite rounding scheme.
-            "TRUNCATE" - Truncate towards zero.
-            "NATURAL" - Round to nearest value, with x.5 rounded up towards +infinity.
-    ifm_layout : str, optional
-        The layout of the Input Feature Map tensor. Can be "NHWC" or "NHCWB16".
-    ifm2_layout : str, optional
-        The layout of the Input Feature Map tensor 2. Can be "NHWC" or "NHCWB16".
-    ofm_layout : str, optional
-        The layout of the Output Feature Map tensor. Can be "NHWC" or "NHCWB16".
-    ofm_dtype: str
-        The Output Feature Map tensor type.
-        MUL, ADD, SUB {IFM}->{OFM}:
-          {uint8, int8 int32} -> {uint8, int8, int32}, any pairing
-        MAX, MIN:
-          IFM and OFM must be of the same type, one of:
-          {int8, uint8}
-        SHR {IFM}->{OFM}:
-          {int32}->{int8, uint8, int32}, any pairing"
-        SHL:
-          {int32}->{int32} only
-    use_rescale : bool
-        Use explicit scaling if True.
-    rescale_scale : int
-        Scale value for rescale. For 32-bit operations scale is not applied but shift is.
-    rescale_shift : int
-        Shift value for rescale.
-
-    Returns
-    -------
-    te.Tensor
-        The Output Feature Map tensor.
-    """
-    assert ifm.shape[0] == 1
-    assert ifm2.shape[0] == 1
-    assert ifm_layout in {"NHWC", "NHCWB16"}
-    assert ifm2_layout in {"NHWC", "NHCWB16"}
-    assert ofm_layout in {"NHWC", "NHCWB16"}
-
-    # Compute operation for the IFM DMA pipeline
-    dmaed_ifm = dma_ifm_compute(
-        ifm, ifm_layout, ifm_zero_point, ifm_scale, ifm_channels, (0, 0, 0, 0)
-    )
-    dmaed_ifm2 = dma_ifm_compute(
-        ifm2, ifm2_layout, ifm2_zero_point, ifm2_scale, ifm2_channels, (0, 0, 0, 0)
-    )
-
-    # Binary elementwise compute operation
-    ofm_height = dmaed_ifm.shape[1]
-    ofm_width = dmaed_ifm.shape[2]
-
-    binary_elementwise_attrs = {
-        "op": "ethosu_binary_elementwise",
-        "operator_type": operator_type,
-        "reversed_operands": reversed_operands,
-        "activation": activation,
-        "clip_min": clip_min,
-        "clip_max": clip_max,
-        "rounding_mode": rounding_mode,
-        "use_rescale": use_rescale,
-        "rescale_scale": rescale_scale,
-        "rescale_shift": rescale_shift,
-    }
-
-    operators = {
-        "ADD": operator.add,
-        "SUB": operator.sub,
-        "MUL": operator.mul,
-        "MIN": te.min,
-        "MAX": te.max,
-        "SHR": operator.add,
-        "SHL": operator.add,
-    }
-    broadcast = [value == 1 for value in dmaed_ifm2.shape]
-
-    has_lut = activation in ("TANH", "LUT", "SIGMOID")
-    # This is a trick to insert the LUT tensor into the TE graph if LUT is present
-    lut_expr = get_lut_expr(lut, ifm.dtype) if has_lut else 0
-
-    # Add the LUT tensor to the attributes to be able to later tell which tensor is the LUT
-    if has_lut:
-        binary_elementwise_attrs["lut"] = lut
-
-    if reversed_operands:
-        binary_elementwise = te.compute(
-            (1, ofm_height, ofm_width, ifm_channels),
-            lambda nn, hh, ww, cc: operators[operator_type](
-                dmaed_ifm2(
-                    0 if broadcast[0] else nn,
-                    0 if broadcast[1] else hh,
-                    0 if broadcast[2] else ww,
-                    0 if broadcast[3] else cc,
-                ).astype(ifm.dtype),
-                dmaed_ifm(nn, hh, ww, cc).astype(ifm.dtype) + lut_expr,
-            ).astype(ofm_dtype),
-            name="ethosu_binary_elementwise",
-            attrs=binary_elementwise_attrs,
-        )
-    else:
-        binary_elementwise = te.compute(
-            (1, ofm_height, ofm_width, ifm_channels),
-            lambda nn, hh, ww, cc: operators[operator_type](
-                dmaed_ifm(nn, hh, ww, cc).astype(ifm.dtype),
-                dmaed_ifm2(
-                    0 if broadcast[0] else nn,
-                    0 if broadcast[1] else hh,
-                    0 if broadcast[2] else ww,
-                    0 if broadcast[3] else cc,
-                ).astype(ifm.dtype)
-                + lut_expr,
-            ).astype(ofm_dtype),
-            name="ethosu_binary_elementwise",
-            attrs=binary_elementwise_attrs,
-        )
-
-    nhwc_to_nhcwb16, nhcwb16_to_nhwc = get_layout_transform_matrices(int(ifm_channels))
-
-    ifm_matrix = [
-        [1, 0, 0, 0, 0],
-        [0, 1, 0, 0, 0],
-        [0, 0, 1, 0, 0],
-        [0, 0, 0, 1, 0],
-        [0, 0, 0, 0, 1],
-    ]
-    ifm2_matrix = [
-        [1, 0, 0, 0, 0],
-        [0, (1 - int(broadcast[1])), 0, 0, int(broadcast[1])],
-        [0, 0, (1 - int(broadcast[2])), 0, int(broadcast[2])],
-        [0, 0, 0, (1 - int(broadcast[3])), int(broadcast[3])],
-        [0, 0, 0, 0, 1],
-    ]
-    if ofm_layout == "NHCWB16":
-        ifm_matrix = np.matmul(ifm_matrix, nhcwb16_to_nhwc).tolist()
-        ifm2_matrix = np.matmul(ifm2_matrix, nhcwb16_to_nhwc).tolist()
-    if ifm_layout == "NHCWB16":
-        ifm_matrix = np.matmul(nhwc_to_nhcwb16, ifm_matrix).tolist()
-    if ifm2_layout == "NHCWB16":
-        ifm2_matrix = np.matmul(nhwc_to_nhcwb16, ifm2_matrix).tolist()
-    ifm_propagator = Propagator(
-        ifm_matrix,
-        [0, 0, 0, 0] if ifm_layout == "NHWC" else [0, 0, 0, 0, 0],
-    )
-    ifm2_propagator = Propagator(
-        ifm2_matrix,
-        [0, 0, 0, 0] if ifm2_layout == "NHWC" else [0, 0, 0, 0, 0],
-    )
-    propagator_attrs = {
-        "ifm_propagator": ifm_propagator,
-        "ifm2_propagator": ifm2_propagator,
-    }
-
-    # Compute operation for the OFM DMA pipeline
-    return dma_ofm_compute(
-        binary_elementwise,
-        ofm_layout,
-        ofm_zero_point,
-        ofm_scale,
-        ifm_channels,
-        attrs=propagator_attrs,
-    )
-
-
-@register_matcher
-def match_ethosu_binary_elementwise(output_tensor, device_config):
-    """Match a Tensor Expression corresponding to an NPU Binary Elementwise.
-
-    If the Tensor Expression matches, an EthosuPart will be created that models the
-    matched Tensor Expression. Otherwise, None will be returned.
-
-    Parameters
-    ----------
-    output_tensor : tvm.te.Tensor
-        The tensor to attempt to match with.
-    device_config : EthosuDeviceConfig
-        Target device configuration
-
-    Returns
-    -------
-    Union[None, EthosuPart]
-        The created EthosuPart if there was a match, otherwise None.
-
-    """
-    write = output_tensor
-    if write.op.name != "ethosu_write":
-        return None
-    convert_to_nhcwb16 = write.op.input_tensors[0]
-    if convert_to_nhcwb16.op.name != "ethosu_convert_to_nhcwb16":
-        return None
-    binary_elementwise = convert_to_nhcwb16.op.input_tensors[0]
-    if binary_elementwise.op.name != "ethosu_binary_elementwise":
-        return None
-    pad = binary_elementwise.op.input_tensors[0]
-    if pad.op.name != "ethosu_pad":
-        return None
-    upscale = pad.op.input_tensors[0]
-    if upscale.op.name != "ethosu_upscale":
-        return None
-    convert_to_nhwc = upscale.op.input_tensors[0]
-    if convert_to_nhwc.op.name != "ethosu_convert_to_nhwc":
-        return None
-    read = convert_to_nhwc.op.input_tensors[0]
-    if read.op.name != "ethosu_read":
-        return None
-    pad2 = binary_elementwise.op.input_tensors[1]
-    if pad2.op.name != "ethosu_pad":
-        return None
-    upscale2 = pad2.op.input_tensors[0]
-    if upscale2.op.name != "ethosu_upscale":
-        return None
-    convert_to_nhwc2 = upscale2.op.input_tensors[0]
-    if convert_to_nhwc2.op.name != "ethosu_convert_to_nhwc":
-        return None
-    read2 = convert_to_nhwc2.op.input_tensors[0]
-    if read2.op.name != "ethosu_read":
-        return None
-
-    input_tensors = [
-        read.op.input_tensors[0],
-        read2.op.input_tensors[0],
-    ]
-    subgraph = TESubgraph(input_tensors, output_tensor)
-    propagators = [
-        write.op.attrs["ifm_propagator"],
-        write.op.attrs["ifm2_propagator"],
-    ]
-    ifm_dtype = input_tensors[0].dtype
-    ofm_dtype = output_tensor.dtype
-
-    output_layout = convert_to_nhcwb16.op.attrs["layout"]
-    input_layout = convert_to_nhwc.op.attrs["layout"]
-    input2_layout = convert_to_nhwc2.op.attrs["layout"]
-    output_quantum = device_config.get_output_quantum(output_layout)
-
-    block_config = device_config.get_elementwise_block_config(
-        propagators[0],
-        propagators[1],
-        binary_elementwise.op.attrs,
-        output_tensor.shape,
-        output_layout,
-        input_layout,
-        input2_layout,
-        ifm_dtype,
-        ofm_dtype,
-    )
-
-    return EthosuPart(
-        subgraph,
-        propagators,
-        output_quantum,
-        1,
-        block_config,
-    )
diff --git a/python/tvm/relay/backend/contrib/ethosu/te/common.py b/python/tvm/relay/backend/contrib/ethosu/te/common.py
deleted file mode 100644
index 82528e75049b..000000000000
--- a/python/tvm/relay/backend/contrib/ethosu/te/common.py
+++ /dev/null
@@ -1,89 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Common methods for the NPU tensor expressions"""
-
-from typing import Tuple, List
-
-
-def get_layout_transform_matrices(ofm_channels: int) -> Tuple[List[List[float]], List[List[float]]]:
-    """Get the NHWC->NHCWB16 and NHCWB16->NHWC layout transform matrices.
-    For information about the supported layouts see https://developer.arm.com/documentation/102420/
-    0200/Functional-description/Control-and-data-flow/Supported-memory-formats-for-feature-maps
-
-    Parameters
-    ----------
-    ofm_channels : int
-        The number of output channels in a NHWC layout
-
-    Returns
-    -------
-    nhwc_to_nhcwb16, nhcwb16_to_nhwc : Tuple[List[List[float]], List[List[float]]]
-        The layout transformation matrices
-    """
-
-    # The value of the last dimension (B16) is always 16.
-    nhwc_to_nhcwb16 = [
-        [1, 0, 0, 0, 0],
-        [0, 1, 0, 0, 0],
-        [0, 0, 0, 1 / 16, 0],
-        [0, 0, 1, 0, 0],
-        [0, 0, 0, 0, 16],
-        [0, 0, 0, 0, 1],
-    ]
-
-    # When we convert from NHWC to NHCWB16, the new C value is given by
-    # (ofm_channels - 1) // 16 + 1, which is a lossy operation, so we need to use
-    # the actual value of channels in the transform matrix to accurately recover
-    # the C in NHWC when we convert from NHCWB16 to NHWC.
-    nhcwb16_to_nhwc = [
-        [1, 0, 0, 0, 0, 0],
-        [0, 1, 0, 0, 0, 0],
-        [0, 0, 0, 1, 0, 0],
-        # We need to offset only if number of ofm_channels is not divisible by 16
-        # Moreover, we can't use just the "ofm_channels" as last element because
-        # the propogation matrices are used to propogate block configs as well.
-        [0, 0, 16, 0, 0, -(int(ofm_channels % 16 != 0)) * (16 - ofm_channels % 16)],
-        [0, 0, 0, 0, 0, 1],
-    ]
-
-    return nhwc_to_nhcwb16, nhcwb16_to_nhwc
-
-
-def get_lut_expr(lut, ifm_dtype):
-    """Get the LUT expression to pass it to the TE graph.
-    For information about the LUT see
-    https://developer.arm.com/documentation/102420/0200/Functional-description/Functional-blocks-/Output-unit/tanh--sigmoid--and-LUT
-
-    Parameters
-    ----------
-    lut : te.Tensor
-        The look-up table values.
-    ifm_dtype : str
-        The type of Input Feature Map tensor (IFM).
-
-    Returns
-    -------
-    lut_expr : tvm.tir.expr.Cast
-        The LUT expression to pass it to the TE graph
-    """
-    assert ifm_dtype in ["int8", "int16"]
-    if ifm_dtype == "int8":
-        assert lut.shape[0] == 256
-    if ifm_dtype == "int16":
-        assert lut.shape[0] == 512
-    lut_expr = (lut[0] + lut[lut.shape[0] - 1]).astype(ifm_dtype)
-    return lut_expr
diff --git a/python/tvm/relay/backend/contrib/ethosu/te/convolution.py b/python/tvm/relay/backend/contrib/ethosu/te/convolution.py
deleted file mode 100644
index d7ed4a010c71..000000000000
--- a/python/tvm/relay/backend/contrib/ethosu/te/convolution.py
+++ /dev/null
@@ -1,330 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name,unused-argument
-"""Tensor Expressions for convolutions for the NPU"""
-from typing import Tuple, Union, List
-import numpy as np  # type: ignore
-
-from tvm import te  # type: ignore
-from tvm.contrib.ethosu.cascader import TESubgraph, EthosuPart, Propagator, register_matcher
-
-from .dma import dma_ofm_compute, dma_ifm_compute
-from .common import get_layout_transform_matrices, get_lut_expr
-
-
-def conv2d_compute(
-    ifm: te.Tensor,
-    weight: te.Tensor,
-    scale_bias: te.Tensor,
-    lut: te.Tensor,
-    ifm_scale: float,
-    ifm_zero_point: int,
-    weight_zero_point: int,
-    ofm_scale: float,
-    ofm_zero_point: int,
-    strides: Tuple[int, int],
-    padding: Tuple[int, int, int, int],
-    dilation: Union[Tuple[int, int], List[int]],
-    activation: str,
-    clip_min: int,
-    clip_max: int,
-    rounding_mode: str,
-    upscale: str,
-    ifm_layout: str,
-    ofm_layout: str,
-) -> te.Tensor:
-    """A compute operator representing the capabilities of a 2D convolution for the NPU.
-
-    Parameters
-    ----------
-    ifm : te.Tensor
-        The Input Feature Map tensor (IFM).
-    weight : te.Tensor
-        The weight tensor.
-    scale_bias : te.Tensor
-        The packed per-channel weight scale and bias tensor.
-    lut : te.Tensor
-        The look-up table of values to use if activation = "LUT".
-    ifm_scale : float
-        The quantization scale for the Input Feature Map tensor.
-    ifm_zero_point : int
-        The quantization zero point for the Input Feature Map tensor.
-    weight_zero_point : int
-        The quantization zero point for the weight tensor.
-    ofm_scale : float
-        The quantization scale for the Output Feature Map tensor.
-    ofm_zero_point : int
-        The quantization zero point for the Output Feature Map tensor.
-    strides : tuple
-        The 2 dimensional strides as (stride_height, stride_width).
-    padding : tuple
-        The 4 dimensional padding as (pad_top, pad_left, pad_bottom, pad_right).
-    dilation : Union[Tuple[int, int], List[int]]
-        The 2 dimensional dilation as (dilation_height, dilation_width).
-    activation : str
-        The activation function to use.
-            "NONE" - no activation function.
-            "CLIP" - clip the output between clip_min and clip_max.
-            "TANH" - tanh activation function.
-            "SIGMOID" - sigmoid activation function.
-            "LUT" - use a look-up table to perform the activation function.
-    clip_min : int
-        The minimum clipping value if activation = "CLIP".
-    clip_max : int
-        The maximum clipping value if activation = "CLIP".
-    rounding_mode : str
-        The rounding mode to apply to the Output Feature Map tensor.
-            "TFL" - Tensorflow Lite rounding scheme.
-            "TRUNCATE" - Truncate towards zero.
-            "NATURAL" - Round to nearest value, with x.5 rounded up towards +infinity.
-    upscale : str
-        The 2x2 upscaling mode to apply to the Input Feature Map tensor.
-            "NONE" - no upscaling.
-            "NEAREST" - upscale using nearest neighbour.
-            "ZEROS" - upscale using zeros.
-            "NATURAL" - Round to nearest value, with x.5 rounded up towards +infinity.
-    ifm_layout : str
-        The layout of the Input Feature Map tensor. Can be "NHWC" or "NHCWB16".
-    ofm_layout : str
-        The layout of the Output Feature Map tensor. Can be "NHWC" or "NHCWB16".
-
-    Returns
-    -------
-    te.Tensor
-        The OFM tensor.
-
-    """
-    assert ifm.shape[0] == 1
-    assert ifm_layout in {"NHWC", "NHCWB16"}
-    assert ofm_layout in {"NHWC", "NHCWB16"}
-
-    padding = [int(v) for v in padding]
-    stride_h, stride_w = [int(v) for v in strides]
-    dilation_h, dilation_w = [int(v) for v in dilation]
-    ofm_channels, kernel_h, kernel_w, ifm_channels = [int(v) for v in weight.shape]
-    upscale_factor = 2 if upscale != "NONE" else 1
-
-    # Compute operation for the IFM DMA pipeline
-    dmaed_ifm = dma_ifm_compute(
-        ifm,
-        ifm_layout,
-        ifm_zero_point,
-        ifm_scale,
-        weight.shape[3],
-        padding,
-        upscale_factor,
-    )
-
-    # 2D Convolution compute operation
-    dilated_kernel_h = (kernel_h - 1) * dilation_h + 1
-    dilated_kernel_w = (kernel_w - 1) * dilation_w + 1
-    ofm_height = (dmaed_ifm.shape[1] - dilated_kernel_h) // stride_h + 1
-    ofm_width = (dmaed_ifm.shape[2] - dilated_kernel_w) // stride_w + 1
-    rc = te.reduce_axis((0, ifm_channels), name="rc")
-    rh = te.reduce_axis((0, kernel_h), name="ry")
-    rw = te.reduce_axis((0, kernel_w), name="rx")
-
-    conv2d_attrs = {
-        "op": "ethosu_conv2d",
-        "weight_zero_point": weight_zero_point,
-        "activation": activation,
-        "upscale": upscale,
-        "clip_min": clip_min,
-        "clip_max": clip_max,
-        "rounding_mode": rounding_mode,
-        "stride_h": stride_h,
-        "stride_w": stride_w,
-        "dilation_h": dilation_h,
-        "dilation_w": dilation_w,
-    }
-
-    has_lut = activation in ("TANH", "LUT", "SIGMOID")
-
-    # This is a trick to insert the LUT tensor into the TE graph if LUT is present
-    lut_expr = get_lut_expr(lut, ifm.dtype) if has_lut else 0
-
-    # Add the LUT tensor to the attributes to be able to later tell which tensor is the LUT
-    if has_lut:
-        conv2d_attrs["lut"] = lut
-
-    conv = te.compute(
-        (1, ofm_height, ofm_width, ofm_channels),
-        lambda nn, hh, ww, cc: te.sum(
-            dmaed_ifm(
-                nn, hh * stride_h + rh * dilation_h, ww * stride_w + rw * dilation_w, rc
-            ).astype(ifm.dtype)
-            * weight[cc, rh, rw, rc].astype(ifm.dtype)
-            # This is a trick to load 10 elements of the scale_bias at once, not accurate maths
-            + (scale_bias[cc, 0] * scale_bias[cc, 9] + lut_expr).astype(ifm.dtype),
-            axis=[rh, rw, rc],
-        ),
-        name="ethosu_conv2d",
-        attrs=conv2d_attrs,
-    )
-
-    nhwc_to_nhcwb16, nhcwb16_to_nhwc = get_layout_transform_matrices(ofm_channels)
-
-    ifm_matrix = [
-        [1, 0, 0, 0, 0],
-        [0, stride_h, 0, 0, (dilated_kernel_h - stride_h)],
-        [0, 0, stride_w, 0, (dilated_kernel_w - stride_w)],
-        [0, 0, 0, 0, ifm_channels],
-        [0, 0, 0, 0, 1],
-    ]
-    weights_matrix = [
-        [0, 0, 0, 1, 0],
-        [0, 0, 0, 0, kernel_h],
-        [0, 0, 0, 0, kernel_w],
-        [0, 0, 0, 0, ifm_channels],
-        [0, 0, 0, 0, 1],
-    ]
-    bias_matrix = [
-        [0, 0, 0, 1, 0],
-        [0, 0, 0, 0, 10],
-        [0, 0, 0, 0, 1],
-    ]
-    if ofm_layout == "NHCWB16":
-        ifm_matrix = np.matmul(ifm_matrix, nhcwb16_to_nhwc).tolist()
-        weights_matrix = np.matmul(weights_matrix, nhcwb16_to_nhwc).tolist()
-        bias_matrix = np.matmul(bias_matrix, nhcwb16_to_nhwc).tolist()
-    if ifm_layout == "NHCWB16":
-        ifm_matrix = np.matmul(nhwc_to_nhcwb16, ifm_matrix).tolist()
-    ifm_propagator = Propagator(
-        ifm_matrix,
-        [0, -padding[0], -padding[1], 0]
-        if ifm_layout == "NHWC"
-        else [0, -padding[0], 0, -padding[1], 0],
-    )
-    weights_propagator = Propagator(
-        weights_matrix,
-        [0, 0, 0, 0],
-    )
-    bias_propagator = Propagator(
-        bias_matrix,
-        [0, 0],
-    )
-    propagator_attrs = {
-        "ifm_propagator": ifm_propagator,
-        "weights_propagator": weights_propagator,
-        "bias_propagator": bias_propagator,
-    }
-
-    # Compute operation for the OFM DMA pipeline
-    dma_ofm = dma_ofm_compute(
-        conv, ofm_layout, ofm_zero_point, ofm_scale, ofm_channels, attrs=propagator_attrs
-    )
-    return dma_ofm
-
-
-@register_matcher
-def match_ethosu_conv2d(output_tensor, device_config):
-    """Match a Tensor Expression corresponding to an NPU Conv2D.
-
-    If the Tensor Expression matches, an EthosuPart will be created that models the
-    matched Tensor Expression. Otherwise, None will be returned.
-
-    Parameters
-    ----------
-    output_tensor : tvm.te.Tensor
-        The tensor to attempt to match with.
-    device_config : EthosuDeviceConfig
-        Target device configuration
-
-    Returns
-    -------
-    Union[None, EthosuPart]
-        The created EthosuPart if there was a match, otherwise None.
-
-    """
-    write = output_tensor
-    if write.op.name != "ethosu_write":
-        return None
-    convert_to_nhcwb16 = write.op.input_tensors[0]
-    if convert_to_nhcwb16.op.name != "ethosu_convert_to_nhcwb16":
-        return None
-    conv2d = convert_to_nhcwb16.op.input_tensors[0]
-    if conv2d.op.name != "ethosu_conv2d":
-        return None
-    pad = conv2d.op.input_tensors[0]
-    if pad.op.name != "ethosu_pad":
-        return None
-    upscale = pad.op.input_tensors[0]
-    if upscale.op.name != "ethosu_upscale":
-        return None
-    convert_to_nhwc = upscale.op.input_tensors[0]
-    if convert_to_nhwc.op.name != "ethosu_convert_to_nhwc":
-        return None
-    read = convert_to_nhwc.op.input_tensors[0]
-    if read.op.name != "ethosu_read":
-        return None
-
-    input_tensors = [
-        read.op.input_tensors[0],
-        conv2d.op.input_tensors[1],
-        conv2d.op.input_tensors[2],
-    ]
-
-    subgraph = TESubgraph(input_tensors, output_tensor)
-    propagators = [
-        write.op.attrs["ifm_propagator"],
-        write.op.attrs["weights_propagator"],
-        write.op.attrs["bias_propagator"],
-    ]
-    ifm_dtype = input_tensors[0].dtype
-    ofm_dtype = output_tensor.dtype
-
-    # Use channels from the weights tensor since that its shape doesn't change during layout
-    # conversion
-    ifm_channels = int(input_tensors[1].shape[3])
-    ofm_channels, kernel_height, kernel_width = (int(axis) for axis in input_tensors[1].shape[0:3])
-    kernel_elements = kernel_height * kernel_width
-
-    is_part_kernel = device_config.is_partkernel(
-        conv2d.op.name, ifm_channels, ifm_dtype, kernel_elements
-    )
-    subkernels = len(
-        device_config.get_kernel_steps(
-            conv2d.op.name, kernel_height, kernel_width, ifm_dtype, is_part_kernel
-        )
-    )
-
-    output_layout = convert_to_nhcwb16.op.attrs["layout"]
-    input_layout = convert_to_nhwc.op.attrs["layout"]
-    output_quantum = device_config.get_output_quantum(output_layout)
-
-    valid_block_configs = device_config.get_valid_block_configs(
-        propagators[0],
-        conv2d.op.attrs,
-        output_tensor.shape,
-        ofm_channels,
-        ifm_channels,
-        output_layout,
-        input_layout,
-        ifm_dtype,
-        ofm_dtype,
-        kernel_height,
-        kernel_width,
-    )
-
-    return EthosuPart(
-        subgraph,
-        propagators,
-        output_quantum,
-        subkernels,
-        valid_block_configs,
-        1,
-    )
diff --git a/python/tvm/relay/backend/contrib/ethosu/te/depthwise.py b/python/tvm/relay/backend/contrib/ethosu/te/depthwise.py
deleted file mode 100644
index ea88b5dfff9e..000000000000
--- a/python/tvm/relay/backend/contrib/ethosu/te/depthwise.py
+++ /dev/null
@@ -1,313 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name,unused-argument
-"""Tensor Expressions for depthwise convolutions"""
-from typing import Tuple, Union, List
-import numpy as np
-
-from tvm import te
-from tvm.contrib.ethosu.cascader import TESubgraph, EthosuPart, Propagator, register_matcher
-
-from .dma import dma_ofm_compute, dma_ifm_compute
-from .common import get_layout_transform_matrices, get_lut_expr
-
-
-def depthwise_conv2d_compute(
-    ifm: te.Tensor,
-    weight: te.Tensor,
-    scale_bias: te.Tensor,
-    lut: te.Tensor,
-    ifm_scale: float,
-    ifm_zero_point: int,
-    weight_zero_point: int,
-    ofm_scale: float,
-    ofm_zero_point: int,
-    strides: Tuple[int, int],
-    padding: Tuple[int, int, int, int],
-    dilation: Union[Tuple[int, int], List[int]],
-    activation: str,
-    clip_min: int,
-    clip_max: int,
-    rounding_mode: str,
-    upscale: str,
-    ifm_layout: str,
-    ofm_layout: str,
-    ofm_dtype: str,
-) -> te.Tensor:
-    """A compute operator representing the capabilities of 2D convolution for the NPU.
-
-    Parameters
-    ----------
-    ifm : te.Tensor
-        The Input Feature Map tensor (IFM).
-    weight : te.Tensor
-        The weight tensor.
-    scale_bias : te.Tensor
-        The packed per-channel weight scale and bias tensor.
-    lut : te.Tensor
-        The look-up table of values to use if activation = "LUT".
-    ifm_scale : float
-        The quantization scale for the Input Feature Map tensor.
-    ifm_zero_point : int
-        The quantization zero point for the Input Feature Map tensor.
-    weight_zero_point : int
-        The quantization zero point for the weight tensor.
-    ofm_scale : float
-        The quantization scale for the Output Feature Map tensor.
-    ofm_zero_point : int
-        The quantization zero point for the Output Feature Map tensor.
-    strides : tuple
-        The 2 dimensional strides as (stride_height, stride_width).
-    padding : tuple
-        The 4 dimensional padding as (pad_top, pad_left, pad_bottom, pad_right).
-    dilation : Union[int, tuple, list]
-        The 2 dimensional dilation as (dilation_height, dilation_width).
-    activation : str
-        The activation function to use.
-            "NONE" - no activation function.
-            "CLIP" - clip the output between clip_min and clip_max.
-            "TANH" - tanh activation function.
-            "SIGMOID" - sigmoid activation function.
-            "LUT" - use a look-up table to perform the activation function.
-    clip_min : int
-        The minimum clipping value if activation = "CLIP".
-    clip_max : int
-        The maximum clipping value if activation = "CLIP".
-    rounding_mode : str
-        The rounding mode to apply to the Output Feature Map tensor.
-            "TFL" - Tensorflow Lite rounding scheme.
-            "TRUNCATE" - Truncate towards zero.
-            "NATURAL" - Round to nearest value, with x.5 rounded up towards +infinity.
-    upscale : str
-        The 2x2 upscaling mode to apply to the Input Feature Map tensor.
-            "NONE" - no upscaling.
-            "NEAREST" - upscale using nearest neighbour.
-            "ZEROS" - upscale using zeros.
-    ifm_layout : str
-        The layout of the Input Feature Map tensor. Can be "NHWC" or "NHCWB16".
-    ofm_layout : str
-        The layout of the Output Feature Map tensor. Can be "NHWC" or "NHCWB16".
-    ofm_dtype : str, optional
-        The Output Feature Map tensor data type. Can be 'int8', 'uint8' or 'int16'.
-
-    Returns
-    -------
-    te.Tensor
-        The OFM tensor.
-
-    """
-    assert ifm.shape[0] == 1, "Only batch size 1 is supported"
-    assert ifm_layout in {"NHWC", "NHCWB16"}
-    assert ofm_layout in {"NHWC", "NHCWB16"}
-
-    padding = [int(v) for v in padding]
-    stride_h, stride_w = [int(v) for v in strides]
-    dilation_h, dilation_w = [int(v) for v in dilation]
-    channels, kernel_h, kernel_w, _ = [int(v) for v in weight.shape]
-
-    # Compute operation for the IFM DMA pipeline
-    dmaed_ifm = dma_ifm_compute(ifm, ifm_layout, ifm_zero_point, ifm_scale, channels, padding)
-
-    # 2D Depthwise Convolution compute operation
-    dilated_kernel_h = (kernel_h - 1) * dilation_h + 1
-    dilated_kernel_w = (kernel_w - 1) * dilation_w + 1
-    ofm_height = (dmaed_ifm.shape[1] - dilated_kernel_h) // stride_h + 1
-    ofm_width = (dmaed_ifm.shape[2] - dilated_kernel_w) // stride_w + 1
-    rh = te.reduce_axis((0, kernel_h), name="ry")
-    rw = te.reduce_axis((0, kernel_w), name="rx")
-
-    depthwise_conv2d_attrs = {
-        "op": "ethosu_depthwise_conv2d",
-        "weight_zero_point": weight_zero_point,
-        "activation": activation,
-        "clip_min": clip_min,
-        "clip_max": clip_max,
-        "rounding_mode": rounding_mode,
-        "upscale": upscale,
-        "stride_h": stride_h,
-        "stride_w": stride_w,
-        "dilation_h": dilation_h,
-        "dilation_w": dilation_w,
-    }
-
-    has_lut = activation in ("TANH", "LUT", "SIGMOID")
-
-    # This is a trick to insert the LUT tensor into the TE graph if LUT is present
-    lut_expr = get_lut_expr(lut, ifm.dtype) if has_lut else 0
-
-    # Add the LUT tensor to the attributes to be able to later tell which tensor is the LUT
-    if has_lut:
-        depthwise_conv2d_attrs["lut"] = lut
-
-    depthwise = te.compute(
-        (1, ofm_height, ofm_width, channels),
-        lambda nn, hh, ww, cc: te.sum(
-            (
-                dmaed_ifm(
-                    nn, hh * stride_h + rh * dilation_h, ww * stride_w + rw * dilation_w, cc
-                ).astype(ifm.dtype)
-                * weight[cc, rh, rw, 0].astype(ifm.dtype)
-                # This is a trick to load 10 elements of the scale_bias at once, not accurate maths
-                + (scale_bias[cc, 0] * scale_bias[cc, 9] + lut_expr).astype(ifm.dtype)
-            ).astype(ofm_dtype),
-            axis=[rh, rw],
-        ),
-        name="ethosu_depthwise_conv2d",
-        attrs=depthwise_conv2d_attrs,
-    )
-
-    nhwc_to_nhcwb16, nhcwb16_to_nhwc = get_layout_transform_matrices(channels)
-
-    ifm_matrix = [
-        [1, 0, 0, 0, 0],
-        [0, stride_h, 0, 0, (dilated_kernel_h - stride_h)],
-        [0, 0, stride_w, 0, (dilated_kernel_w - stride_w)],
-        [0, 0, 0, 1, 0],
-        [0, 0, 0, 0, 1],
-    ]
-    weights_matrix = [
-        [0, 0, 0, 1, 0],
-        [0, 0, 0, 0, kernel_h],
-        [0, 0, 0, 0, kernel_w],
-        [0, 0, 0, 0, 1],
-        [0, 0, 0, 0, 1],
-    ]
-    bias_matrix = [
-        [0, 0, 0, 1, 0],
-        [0, 0, 0, 0, 10],
-        [0, 0, 0, 0, 1],
-    ]
-    if ofm_layout == "NHCWB16":
-        ifm_matrix = np.matmul(ifm_matrix, nhcwb16_to_nhwc).tolist()
-        weights_matrix = np.matmul(weights_matrix, nhcwb16_to_nhwc).tolist()
-        bias_matrix = np.matmul(bias_matrix, nhcwb16_to_nhwc).tolist()
-    if ifm_layout == "NHCWB16":
-        ifm_matrix = np.matmul(nhwc_to_nhcwb16, ifm_matrix).tolist()
-    ifm_propagator = Propagator(
-        ifm_matrix,
-        [0, -padding[0], -padding[1], 0]
-        if ifm_layout == "NHWC"
-        else [0, -padding[0], 0, -padding[1], 0],
-    )
-    weights_propagator = Propagator(
-        weights_matrix,
-        [0, 0, 0, 0],
-    )
-    bias_propagator = Propagator(
-        bias_matrix,
-        [0, 0],
-    )
-    propagator_attrs = {
-        "ifm_propagator": ifm_propagator,
-        "weights_propagator": weights_propagator,
-        "bias_propagator": bias_propagator,
-    }
-
-    # Compute operation for the OFM DMA pipeline
-    return dma_ofm_compute(
-        depthwise, ofm_layout, ofm_zero_point, ofm_scale, channels, attrs=propagator_attrs
-    )
-
-
-@register_matcher
-def match_ethosu_depthwise_conv2d(output_tensor, device_config):
-    """Match a Tensor Expression corresponding to an NPU Depthwise Conv2D.
-
-    If the Tensor Expression matches, an EthosuPart will be created that models the
-    matched Tensor Expression. Otherwise, None will be returned.
-
-    Parameters
-    ----------
-    output_tensor : tvm.te.Tensor
-        The tensor to attempt to match with.
-    device_config : EthosuDeviceConfig
-        Target device configuration.
-
-    Returns
-    -------
-    Union[None, EthosuPart]
-        The created EthosuPart if there was a match, otherwise None.
-
-    """
-    write = output_tensor
-    if write.op.name != "ethosu_write":
-        return None
-    convert_to_nhcwb16 = write.op.input_tensors[0]
-    if convert_to_nhcwb16.op.name != "ethosu_convert_to_nhcwb16":
-        return None
-    depthwise2d = convert_to_nhcwb16.op.input_tensors[0]
-    if depthwise2d.op.name != "ethosu_depthwise_conv2d":
-        return None
-    pad = depthwise2d.op.input_tensors[0]
-    if pad.op.name != "ethosu_pad":
-        return None
-    upscale = pad.op.input_tensors[0]
-    if upscale.op.name != "ethosu_upscale":
-        return None
-    convert_to_nhwc = upscale.op.input_tensors[0]
-    if convert_to_nhwc.op.name != "ethosu_convert_to_nhwc":
-        return None
-    read = convert_to_nhwc.op.input_tensors[0]
-    if read.op.name != "ethosu_read":
-        return None
-
-    input_tensors = [
-        read.op.input_tensors[0],
-        depthwise2d.op.input_tensors[1],
-        depthwise2d.op.input_tensors[2],
-    ]
-    subgraph = TESubgraph(input_tensors, output_tensor)
-    propagators = [
-        write.op.attrs["ifm_propagator"],
-        write.op.attrs["weights_propagator"],
-        write.op.attrs["bias_propagator"],
-    ]
-    ifm_dtype = input_tensors[0].dtype
-    ofm_dtype = output_tensor.dtype
-
-    channels, kernel_height, kernel_width = (int(axis) for axis in input_tensors[1].shape[0:3])
-
-    subkernels = len(
-        device_config.get_kernel_steps(depthwise2d.op.name, kernel_height, kernel_width, ifm_dtype)
-    )
-
-    output_layout = convert_to_nhcwb16.op.attrs["layout"]
-    input_layout = convert_to_nhwc.op.attrs["layout"]
-    output_quantum = device_config.get_output_quantum(output_layout)
-
-    valid_block_configs = device_config.get_valid_block_configs(
-        propagators[0],
-        depthwise2d.op.attrs,
-        output_tensor.shape,
-        channels,
-        channels,
-        output_layout,
-        input_layout,
-        ifm_dtype,
-        ofm_dtype,
-        kernel_height,
-        kernel_width,
-    )
-
-    return EthosuPart(
-        subgraph,
-        propagators,
-        output_quantum,
-        subkernels,
-        valid_block_configs,
-        1,
-    )
diff --git a/python/tvm/relay/backend/contrib/ethosu/te/dma.py b/python/tvm/relay/backend/contrib/ethosu/te/dma.py
deleted file mode 100644
index 9d9eaf0ed444..000000000000
--- a/python/tvm/relay/backend/contrib/ethosu/te/dma.py
+++ /dev/null
@@ -1,382 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name,unnecessary-lambda
-"""Tensor Expressions for operations supported by the NPU DMA engine"""
-from typing import Callable, Tuple, Optional, List
-
-import tvm  # type: ignore
-from tvm import te
-from tvm.topi.utils import equal_const_int  # type: ignore
-
-
-def _pad_tensor(
-    tensor: te.Tensor, pad_before: List[int], pad_after: Optional[List[int]] = None
-) -> Callable:
-    """Generate a padded tensor.
-
-    Parameters
-    ----------
-    tensor : te.Tensor
-        The tensor to pad.
-    pad_before : tuple of int
-        The 'before' padding on each axis.
-    pad_after : tuple of int
-        The 'after' padding on each axis.
-    Returns
-    -------
-    _pad : callable
-        The padded tensor.
-
-    """
-    pad_after = pad_after or pad_before
-    dims = len(tensor.shape)
-    assert len(pad_before) == dims
-    assert len(pad_after) == dims
-
-    def _pad(*indices):
-        not_zero = []  # A list of padding conditions that aren't trivial (zero padding)
-        index_tuple = []  # The indices with which to access the padded tensor
-        for i in range(dims):
-            if equal_const_int(pad_before[i], 0) and equal_const_int(pad_after[i], 0):
-                index_tuple.append(indices[i])
-            else:
-                index_tuple.append(indices[i] - pad_before[i])
-                not_zero.append(indices[i] >= pad_before[i])
-                not_zero.append(indices[i] < tensor.shape[i] + pad_before[i])
-        if not_zero:
-            not_zero = tvm.tir.all(*not_zero)
-            return tvm.tir.if_then_else(
-                not_zero, tensor(*index_tuple), tvm.tir.const(0, tensor.dtype)
-            )
-        return tensor(*index_tuple)
-
-    return _pad
-
-
-def read_compute(
-    tensor: te.Tensor, zero_point: int, scale: float, layout: Optional[str] = None
-) -> te.Tensor:
-    """A tensor expression which represents a read.
-
-    Parameters
-    ----------
-    tensor : te.Tensor
-        The tensor to read.
-    zero_point : int
-        The zero point of the tensor.
-    scale : float
-        The scale of the tensor.
-    layout : Optional[str]
-        The layout of the tensor, either NHWC or NHCWB16.
-
-    Returns
-    -------
-    te.Tensor
-        The tensor having been read.
-
-    """
-    read_attrs = {
-        "op": "ethosu_read",
-        "zero_point": zero_point,
-        "scale": scale,
-    }
-
-    if layout:
-        assert layout in {"NHWC", "NHCWB16"}
-        read_attrs["layout"] = layout
-
-    return te.compute(tensor.shape, lambda *i: tensor(*i), name="ethosu_read", attrs=read_attrs)
-
-
-def write_compute(
-    tensor: te.Tensor,
-    zero_point: int,
-    scale: float,
-    layout: Optional[str] = None,
-    attrs: dict = None,
-) -> te.Tensor:
-    """A tensor expression which represents a write.
-
-    Parameters
-    ----------
-    tensor : te.Tensor
-        The tensor to write.
-    zero_point : int
-        The zero point of the tensor.
-    scale : float
-        The scale of the tensor.
-    layout : Optional[str]
-        The layout of the tensor, either NHWC or NHCWB16.
-    attrs : dict, optional
-        Additional attributes to add to the compute op.
-
-    Returns
-    -------
-    te.Tensor
-        The tensor having been written.
-
-    """
-
-    if not attrs:
-        attrs = {}
-
-    write_attrs = {
-        "op": "ethosu_write",
-        "zero_point": zero_point,
-        "scale": scale,
-    }
-
-    if layout:
-        assert layout in {"NHWC", "NHCWB16"}
-        write_attrs["layout"] = layout
-
-    write_attrs = {**write_attrs, **attrs}
-    return te.compute(
-        tensor.shape,
-        lambda *i: tensor(*i),
-        name="ethosu_write",
-        attrs=write_attrs,
-    )
-
-
-def convert_to_nhwc_compute(tensor: te.Tensor, layout: str, channels: int) -> te.Tensor:
-    """Converts a tensor into NHWC layout if it's in NHWCB16 layout.
-
-    When the current layout is NHCWB16, a reduce sum operation is inserted
-    to ensure that the whole of the input tensor has a data dependency on
-    the copy operation. Without this, TVM removes compute that is deemed to
-    be unnecessary, which causes strides for the NPU to be calculated
-    incorrectly.
-
-    Parameters
-    ----------
-    tensor : te.Tensor
-        The tensor to convert.
-    layout : str
-        The layout of the tensor, either NHWC or NHCWB16.
-    channels : int
-        The number of valid channels for the tensor.
-
-    Returns
-    -------
-    te.Tensor
-        The converted tensor in NHWC layout.
-
-    """
-    assert layout in {"NHWC", "NHCWB16"}
-    convert_to_nhwc_attrs = {
-        "op": "ethosu_convert_to_nhwc",
-        "layout": layout,
-    }
-    if layout == "NHCWB16":
-        rc = te.reduce_axis((0, 16), name="rc")
-        return te.compute(
-            (tensor.shape[0], tensor.shape[1], tensor.shape[3], channels),
-            lambda nn, hh, ww, cc: te.sum(
-                tensor(nn, hh, te.indexdiv(cc, 16), ww, te.indexmod(rc, 16)), axis=rc
-            ),
-            name="ethosu_convert_to_nhwc",
-            attrs=convert_to_nhwc_attrs,
-        )
-
-    return te.compute(
-        tensor.shape,
-        lambda *i: tensor(*i),
-        name="ethosu_convert_to_nhwc",
-        attrs=convert_to_nhwc_attrs,
-    )
-
-
-def convert_to_nhcwb16_compute(tensor: te.Tensor, layout: str, channels: int) -> te.Tensor:
-    """Converts a tensor into NHCWB16 layout if it's in NHWC layout.
-
-    Parameters
-    ----------
-    tensor : te.Tensor
-        The tensor to convert.
-    layout : str
-        The layout of the tensor, either NHWC or NHCWB16.
-    channels : int
-        The number of valid channels for the tensor.
-
-    Returns
-    -------
-    te.Tensor
-        The converted tensor in NHCWB16 layout.
-
-    """
-    assert layout in {"NHWC", "NHCWB16"}
-    convert_to_nhcwb16_attrs = {
-        "op": "ethosu_convert_to_nhcwb16",
-        "layout": layout,
-    }
-    if layout == "NHCWB16":
-        out_channel_bricks = te.indexdiv(channels - 1, 16) + 1
-        output_shape = (1, tensor.shape[1], out_channel_bricks, tensor.shape[2], 16)
-        return te.compute(
-            output_shape,
-            lambda nn, hh, cc, ww, cb: tvm.tir.if_then_else(
-                cc * 16 + cb < channels,
-                tensor(nn, hh, ww, cc * 16 + cb),
-                tvm.tir.IntImm(tensor.dtype, 0),
-            ),
-            name="ethosu_convert_to_nhcwb16",
-            attrs=convert_to_nhcwb16_attrs,
-        )
-
-    return te.compute(
-        tensor.shape,
-        lambda *i: tensor(*i),
-        name="ethosu_convert_to_nhcwb16",
-        attrs=convert_to_nhcwb16_attrs,
-    )
-
-
-def pad_compute(tensor: te.Tensor, padding: tuple) -> te.Tensor:
-    """Pad an NHWC tensor in the height and width axes.
-
-    Parameters
-    ----------
-    tensor : te.Tensor
-        The tensor to pad.
-    padding : tuple
-        The 4 dimensional padding as (pad_top, pad_left, pad_bottom, pad_right).
-
-    Returns
-    -------
-    te.Tensor
-        The padded tensor.
-
-    """
-    pad_top, pad_left, pad_down, pad_right = padding
-    pad_before = [0, int(pad_top), int(pad_left), 0]
-    pad_after = [0, int(pad_down), int(pad_right), 0]
-    pad_attrs = {
-        "op": "ethosu_pad",
-    }
-    shape = tensor.shape
-    return te.compute(
-        (shape[0], shape[1] + pad_top + pad_down, shape[2] + pad_left + pad_right, shape[3]),
-        lambda nn, hh, ww, cc: _pad_tensor(tensor, pad_before, pad_after)(nn, hh, ww, cc),
-        name="ethosu_pad",
-        attrs=pad_attrs,
-    )
-
-
-def upscale_compute(tensor: te.Tensor, upscale_factor: int) -> te.Tensor:
-    """Apply upscaling to an NHWC tensor.
-
-    Parameters
-    ----------
-    tensor : te.Tensor
-        The tensor to pad.
-    upscale_factor : int
-        The factor by which to apply upscaling.
-
-    Returns
-    -------
-    te.Tensor
-        The upscaled tensor.
-
-    """
-    shape = tensor.shape
-
-    reason = f"The compiler only supports 2x2 upscaling, but factor was {upscale_factor}."
-    assert upscale_factor in (1, 2), reason
-    new_shape = (shape[0], shape[1] * upscale_factor, shape[2] * upscale_factor, shape[3])
-
-    upscale_attrs = {"op": "ethosu_upscale"}
-
-    return te.compute(
-        new_shape,
-        lambda nn, hh, ww, cc: tensor(nn, hh // upscale_factor, ww // upscale_factor, cc),
-        name="ethosu_upscale",
-        attrs=upscale_attrs,
-    )
-
-
-def dma_ifm_compute(
-    ifm: te.Tensor,
-    layout: str,
-    zero_point: int,
-    scale: float,
-    channels: int,
-    padding: Tuple[int, int, int, int],
-    upscale_factor: Optional[int] = 1,
-) -> te.Tensor:
-    """A sequence of compute operators representing the DMA capabilities for an IFM.
-
-    Parameters
-    ----------
-    ifm : te.Tensor
-        The Input Feature Map (IFM) tensor.
-    layout : str
-        The layout of the data, either NHWC or NHCWB16.
-    zero_point : int
-        The zero point of the data.
-    scale : float
-        The scale of the data.
-    channels : int
-        The number of valid channels for the data.
-    padding : tuple
-        The 4 dimensional padding as (pad_top, pad_left, pad_bottom, pad_right).
-    upscale_factor : Optional[int]
-        The factor by which to apply upscaling. By default there will be no upscaling.
-
-    Returns
-    -------
-    te.Tensor
-        The dma-ed IFM tensor.
-
-    """
-    read_ifm = read_compute(ifm, zero_point, scale, layout=layout)
-    convert_to_nhwc_ifm = convert_to_nhwc_compute(read_ifm, layout, channels)
-    upscale_ifm = upscale_compute(convert_to_nhwc_ifm, upscale_factor)
-    return pad_compute(upscale_ifm, padding)
-
-
-def dma_ofm_compute(
-    ofm: te.Tensor, layout: str, zero_point: int, scale: float, channels: int, attrs: dict = None
-) -> te.Tensor:
-    """A sequence of compute operators representing the DMA capabilities for an OFM.
-
-    Parameters
-    ----------
-    ofm : te.Tensor
-        The Output Feature Map (OFM) tensor.
-    layout : str
-        The layout of the data, either NHWC or NHCWB16.
-    zero_point : int
-        The zero point of the data.
-    scale : float
-        The scale of the data.
-    channels : int
-        The number of valid channels for the data.
-    attrs : dict, optional
-        Additional attributes to add to the write compute op.
-
-
-    Returns
-    -------
-    te.Tensor
-        The dma-ed OFM tensor.
-
-    """
-    if not attrs:
-        attrs = {}
-    convert_to_nhcwb16_ofm = convert_to_nhcwb16_compute(ofm, layout, channels)
-    return write_compute(convert_to_nhcwb16_ofm, zero_point, scale, layout=layout, attrs=attrs)
diff --git a/python/tvm/relay/backend/contrib/ethosu/te/identity.py b/python/tvm/relay/backend/contrib/ethosu/te/identity.py
deleted file mode 100644
index 9b0925056fc5..000000000000
--- a/python/tvm/relay/backend/contrib/ethosu/te/identity.py
+++ /dev/null
@@ -1,170 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name,unused-argument
-"""Tensor Expression for identity"""
-import numpy as np
-from tvm import te
-from tvm.contrib.ethosu.cascader import TESubgraph, EthosuPart, Propagator, register_matcher
-
-from .common import get_lut_expr
-from .dma import read_compute, write_compute
-
-
-def identity_compute(
-    ifm: te.Tensor,
-    lut: te.Tensor,
-    ifm_scale: float,
-    ifm_zero_point: int,
-    ofm_scale: float,
-    ofm_zero_point: int,
-    activation: str,
-    rounding_mode: str,
-) -> te.Tensor:
-    """A compute operator for the NPU identity operator.
-
-    Parameters
-    ----------
-    ifm : te.Tensor
-        The Input Feature Map tensor (IFM).
-    lut : te.Tensor
-        The look-up table values to use if activation is "LUT", "TANH" or "SIGMOID".
-    ifm_scale : float
-        The quantization scale for the Input Feature Map tensor.
-    ifm_zero_point : int
-        The quantization zero point for the Input Feature Map tensor.
-    ofm_scale : float
-        The quantization scale for the Output Feature Map tensor.
-    ofm_zero_point : int
-        The quantization zero point for the Output Feature Map tensor.
-    activation : str
-        The activation function to use.
-            "NONE" - no activation function.
-            "TANH" - tanh activation function.
-            "SIGMOID" - sigmoid activation function.
-            "LUT" - use a look-up table to perform the activation function.
-    rounding_mode : str
-        The rounding mode to apply to the Output Feature Map tensor.
-            "TFL" - Tensorflow Lite rounding scheme.
-            "TRUNCATE" - Truncate towards zero.
-            "NATURAL" - Round to nearest value, with x.5 rounded up towards +infinity.
-
-    Returns
-    -------
-    te.Tensor
-        The Output Feature Map tensor.
-    """
-    dmaed_ifm = read_compute(ifm, ifm_zero_point, ifm_scale)
-    id_attrs = {"op": "ethosu_identity", "activation": activation, "rounding_mode": rounding_mode}
-
-    has_lut = activation in ("TANH", "LUT", "SIGMOID")
-
-    # This is a trick to insert the LUT tensor into the TE graph if LUT is present
-    lut_expr = get_lut_expr(lut, ifm.dtype) if has_lut else 0
-
-    # Add the LUT tensor to the attributes to be able to later tell which tensor is the LUT
-    if has_lut:
-        id_attrs["lut"] = lut
-
-    identity = te.compute(
-        ifm.shape,
-        lambda *i: (dmaed_ifm(*i) + lut_expr).astype(ifm.dtype),
-        name="ethosu_identity",
-        attrs=id_attrs,
-    )
-    length = len(ifm.shape)
-    ifm_matrix = np.identity(length + 1)
-    offset = np.zeros(length, dtype="int64")
-    ifm_propagator = Propagator(
-        ifm_matrix,
-        offset.tolist(),
-    )
-    propagator_attrs = {
-        "ifm_propagator": ifm_propagator,
-    }
-    return write_compute(identity, ofm_zero_point, ofm_scale, attrs=propagator_attrs)
-
-
-@register_matcher
-def match_ethosu_identity(output_tensor, device_config):
-    """Match a Tensor Expression corresponding to an NPU identity.
-
-    If the Tensor Expression matches, an EthosuPart will be created that models the
-    matched Tensor Expression. Otherwise, None will be returned.
-
-    Parameters
-    ----------
-    output_tensor : tvm.te.Tensor
-        The tensor to attempt to match with.
-    device_config : EthosuDeviceConfig
-        Target device configuration
-
-    Returns
-    -------
-    Union[None, EthosuPart]
-        The created EthosuPart if there was a match, otherwise None.
-    """
-    write = output_tensor
-    if write.op.name != "ethosu_write":
-        return None
-    identity = write.op.input_tensors[0]
-    if identity.op.name != "ethosu_identity":
-        return None
-    read = identity.op.input_tensors[0]
-    if read.op.name != "ethosu_read":
-        return None
-
-    input_tensors = [
-        read.op.input_tensors[0],
-    ]
-    subgraph = TESubgraph(input_tensors, output_tensor)
-    propagators = [
-        write.op.attrs["ifm_propagator"],
-    ]
-    ifm_dtype = input_tensors[0].dtype
-    ofm_dtype = output_tensor.dtype
-
-    input_tensors_shape = input_tensors[0].shape
-    length = len(input_tensors_shape)
-    assert length <= 4, "Input tensor shape must be <= 4 for the identity operator"
-    channels = int(input_tensors_shape[length - 1]) if length >= 3 else 1
-
-    subkernels = len(device_config.get_kernel_steps(identity.op.name, 1, 1, ifm_dtype))
-
-    input_layout = output_layout = "NHWC"
-    output_quantum = device_config.get_output_quantum(output_layout)
-
-    valid_block_configs = device_config.get_valid_block_configs(
-        propagators[0],
-        identity.op.attrs,
-        output_tensor.shape,
-        channels,
-        channels,
-        output_layout,
-        input_layout,
-        ifm_dtype,
-        ofm_dtype,
-        1,
-        1,
-    )
-
-    return EthosuPart(
-        subgraph,
-        propagators,
-        output_quantum,
-        subkernels,
-        valid_block_configs,
-    )
diff --git a/python/tvm/relay/backend/contrib/ethosu/te/inline.py b/python/tvm/relay/backend/contrib/ethosu/te/inline.py
deleted file mode 100644
index 79631f4b8c1c..000000000000
--- a/python/tvm/relay/backend/contrib/ethosu/te/inline.py
+++ /dev/null
@@ -1,74 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=unused-argument
-"""Tensor Expressions for operations that will be inlined"""
-import numpy as np  # type: ignore
-
-from tvm.contrib.ethosu.cascader import TESubgraph, InlinePart, Propagator, register_matcher
-
-
-INLINE_OPS = {"T_reshape", "T_strided_slice"}
-
-
-@register_matcher
-def match_ethosu_inline(output_tensor, device_config):
-    """Match a Tensor Expression corresponding to an operator that will be inlined.
-
-    If the Tensor Expression matches, an InlinePart will be created that models the
-    matched Tensor Expression. Otherwise, None will be returned. This matcher is
-    naive and assumes nothing about the compute of the Tensor Expression. Therefore,
-    the resulting InlinePart will have full-tensor dependencies (i.e. each output
-    element depends on every input element).
-
-    Parameters
-    ----------
-    output_tensor : tvm.te.Tensor
-        The tensor to attempt to match with.
-    device_config : EthosuDeviceConfig
-        Target device configuration
-
-    Returns
-    -------
-    Union[None, InlinePart]
-        The created InlinePart if there was a match, otherwise None.
-
-    """
-    if output_tensor.op.name not in INLINE_OPS:
-        return None
-
-    input_tensors = output_tensor.op.input_tensors
-    propagators = []
-    output_dims = len(output_tensor.shape)
-    for input_tensor in input_tensors:
-        input_dims = len(input_tensor.shape)
-        transform_matrix = np.zeros((input_dims + 1, output_dims + 1))
-        for i, axis in enumerate(input_tensor.shape):
-            transform_matrix[i, output_dims] = int(axis)
-        transform_matrix[input_dims, output_dims] = 1
-        offset_vector = np.zeros(input_dims, dtype="int64")
-        propagators.append(
-            Propagator(
-                transform_matrix.tolist(),
-                offset_vector.tolist(),
-            )
-        )
-
-    subgraph = TESubgraph(input_tensors, output_tensor)
-    return InlinePart(
-        subgraph,
-        propagators,
-    )
diff --git a/python/tvm/relay/backend/contrib/ethosu/te/pooling.py b/python/tvm/relay/backend/contrib/ethosu/te/pooling.py
deleted file mode 100644
index bf65f380d20a..000000000000
--- a/python/tvm/relay/backend/contrib/ethosu/te/pooling.py
+++ /dev/null
@@ -1,282 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name,unused-argument
-"""Tensor Expressions for poolings"""
-from typing import Tuple
-
-import numpy as np
-from tvm import te
-from tvm.contrib.ethosu.cascader import TESubgraph, EthosuPart, Propagator, register_matcher
-
-from .dma import dma_ofm_compute, dma_ifm_compute
-from .common import get_layout_transform_matrices, get_lut_expr
-
-
-def pooling_compute(
-    ifm: te.Tensor,
-    lut: te.Tensor,
-    pooling_type: str,
-    ifm_scale: float,
-    ifm_zero_point: int,
-    ofm_scale: float,
-    ofm_zero_point: int,
-    pool_shape: Tuple[int, int],
-    ofm_channels: int,
-    ofm_dtype: str,
-    strides: Tuple[int, int],
-    padding: Tuple[int, int, int, int],
-    activation: str,
-    clip_min: int,
-    clip_max: int,
-    rounding_mode: str,
-    upscale: str,
-    ifm_layout: str,
-    ofm_layout: str,
-) -> te.Tensor:
-    """A compute operator representing the capabilities of pooling for the NPU.
-
-    Parameters
-    ----------
-    ifm : te.Tensor
-        The Input Feature Map tensor (IFM).
-    lut : te.Tensor
-        The look-up table of values to use if activation = "LUT".
-    pooling_type: str
-        The type of the pooling. "AVG" - average pool,   "MAX" - max pool.
-    ifm_scale : float
-        The quantization scale for the Input Feature Map tensor.
-    ifm_zero_point : int
-        The quantization zero point for the Input Feature Map tensor.
-    ofm_scale : float
-        The quantization scale for the Output Feature Map tensor.
-    ofm_zero_point : int
-        The quantization zero point for the Output Feature Map tensor.
-    pool_shape : Tuple[int, int]
-        The 2 dimensional pool shape as (pool_shape_height, pool_shape_width).
-    ofm_channels : int
-        The number of the Output Feature Map channels
-    ofm_dtype : str
-        The Output Feature Map tensor data type.
-            "AVG" or "MAX" pooling - can be "int8", "uint8", or "int16".
-            "SUM" pooling - can be "int32".
-    strides : Tuple[int, int]
-        The 2 dimensional strides as (stride_height, stride_width).
-    padding : Tuple[int, int, int, int]
-        The 4 dimensional padding as (pad_top, pad_left, pad_bottom, pad_right).
-    activation : str
-        The activation function to use.
-            "NONE" - no activation function.
-            "CLIP" - clip the output between clip_min and clip_max.
-            "TANH" - tanh activation function.
-            "SIGMOID" - sigmoid activation function.
-            "LUT" - use a look-up table to perform the activation function.
-    clip_min : int
-        The minimum clipping value if activation = "CLIP".
-    clip_max : int
-        The maximum clipping value if activation = "CLIP".
-    rounding_mode : str
-        The rounding mode to apply to the Output Feature Map tensor.
-            "TFL" - Tensorflow Lite rounding scheme.
-            "TRUNCATE" - Truncate towards zero.
-            "NATURAL" - Round to nearest value, with x.5 rounded up towards +infinity.
-    upscale : str
-        The 2x2 upscaling mode to apply to the Input Feature Map tensor.
-            "NONE" - no upscaling.
-            "NEAREST" - upscale using nearest neighbour.
-            "ZEROS" - upscale using zeros.
-    ifm_layout : str
-        The layout of the Input Feature Map tensor. Can be "NHWC" or "NHCWB16".
-    ofm_layout : str
-        The layout of the Output Feature Map tensor. Can be "NHWC" or "NHCWB16".
-
-    Returns
-    -------
-    te.Tensor
-        The OFM tensor.
-    """
-    assert ifm.shape[0] == 1
-    assert ifm_layout in {"NHWC", "NHCWB16"}
-    assert ofm_layout in {"NHWC", "NHCWB16"}
-
-    padding = [int(v) for v in padding]
-    stride_h, stride_w = [int(v) for v in strides]
-    pool_shape_h, pool_shape_w = [int(v) for v in pool_shape]
-    ifm_channels = ofm_channels if pooling_type != "SUM" else ifm.shape[-1]
-    upscale_factor = 2 if upscale != "NONE" else 1
-
-    # Compute operation for the IFM DMA pipeline
-    dmaed_ifm = dma_ifm_compute(
-        ifm, ifm_layout, ifm_zero_point, ifm_scale, ifm_channels, padding, upscale_factor
-    )
-
-    # Pooling compute operation
-    ofm_height = (dmaed_ifm.shape[1] - pool_shape_h) // stride_h + 1
-    ofm_width = (dmaed_ifm.shape[2] - pool_shape_w) // stride_w + 1
-    rh = te.reduce_axis((0, pool_shape_h), name="ry")
-    rw = te.reduce_axis((0, pool_shape_w), name="rx")
-    rc = te.reduce_axis((0, 1 if pooling_type != "SUM" else ifm_channels), name="rc")
-
-    pooling_attrs = {
-        "op": "ethosu_pooling",
-        "pooling_type": pooling_type,
-        "pool_shape_h": pool_shape_h,
-        "pool_shape_w": pool_shape_w,
-        "stride_h": stride_h,
-        "stride_w": stride_w,
-        "activation": activation,
-        "clip_min": clip_min,
-        "clip_max": clip_max,
-        "rounding_mode": rounding_mode,
-        "upscale": upscale,
-    }
-
-    has_lut = activation in ("TANH", "LUT", "SIGMOID")
-
-    # This is a trick to insert the LUT tensor into the TE graph if LUT is present
-    lut_expr = get_lut_expr(lut, ifm.dtype) if has_lut else 0
-
-    # Add the LUT tensor to the attributes to be able to later tell which tensor is the LUT
-    if has_lut:
-        pooling_attrs["lut"] = lut
-
-    pooling = te.compute(
-        (1, ofm_height, ofm_width, ofm_channels),
-        lambda nn, hh, ww, cc: te.max(
-            (dmaed_ifm(nn, hh * stride_h + rh, ww * stride_w + rw, cc + rc) + lut_expr).astype(
-                ofm_dtype
-            ),
-            axis=[rh, rw, rc],
-        ),
-        name="ethosu_pooling",
-        attrs=pooling_attrs,
-    )
-
-    nhwc_to_nhcwb16, nhcwb16_to_nhwc = get_layout_transform_matrices(int(ofm_channels))
-
-    ifm_matrix = [
-        [1, 0, 0, 0, 0],
-        [0, stride_h, 0, 0, (pool_shape_h - stride_h)],
-        [0, 0, stride_w, 0, (pool_shape_w - stride_w)],
-        [0, 0, 0, 1, 0],
-        [0, 0, 0, 0, 1],
-    ]
-    if ofm_layout == "NHCWB16":
-        ifm_matrix = np.matmul(ifm_matrix, nhcwb16_to_nhwc).tolist()
-    if ifm_layout == "NHCWB16":
-        ifm_matrix = np.matmul(nhwc_to_nhcwb16, ifm_matrix).tolist()
-    ifm_propagator = Propagator(
-        ifm_matrix,
-        [0, -padding[0], -padding[1], 0]
-        if ifm_layout == "NHWC"
-        else [0, -padding[0], 0, -padding[1], 0],
-    )
-    propagator_attrs = {
-        "ifm_propagator": ifm_propagator,
-    }
-
-    # Compute operation for the OFM DMA pipeline
-    return dma_ofm_compute(
-        pooling, ofm_layout, ofm_zero_point, ofm_scale, ofm_channels, attrs=propagator_attrs
-    )
-
-
-@register_matcher
-def match_ethosu_pooling(output_tensor, device_config):
-    """Match a Tensor Expression corresponding to an NPU Pooling.
-
-    If the Tensor Expression matches, an EthosuPart will be created that models the
-    matched Tensor Expression. Otherwise, None will be returned.
-
-    Parameters
-    ----------
-    output_tensor : tvm.te.Tensor
-        The tensor to attempt to match with.
-    device_config : EthosuDeviceConfig
-        Target device configuration
-
-    Returns
-    -------
-    Union[None, EthosuPart]
-        The created EthosuPart if there was a match, otherwise None.
-
-    """
-    write = output_tensor
-    if write.op.name != "ethosu_write":
-        return None
-    convert_to_nhcwb16 = write.op.input_tensors[0]
-    if convert_to_nhcwb16.op.name != "ethosu_convert_to_nhcwb16":
-        return None
-    pool2d = convert_to_nhcwb16.op.input_tensors[0]
-    if pool2d.op.name != "ethosu_pooling":
-        return None
-    pad = pool2d.op.input_tensors[0]
-    if pad.op.name != "ethosu_pad":
-        return None
-    upscale = pad.op.input_tensors[0]
-    if upscale.op.name != "ethosu_upscale":
-        return None
-    convert_to_nhwc = upscale.op.input_tensors[0]
-    if convert_to_nhwc.op.name != "ethosu_convert_to_nhwc":
-        return None
-    read = convert_to_nhwc.op.input_tensors[0]
-    if read.op.name != "ethosu_read":
-        return None
-
-    input_tensors = [
-        read.op.input_tensors[0],
-    ]
-    subgraph = TESubgraph(input_tensors, output_tensor)
-    propagators = [
-        write.op.attrs["ifm_propagator"],
-    ]
-    ifm_dtype = input_tensors[0].dtype
-    ofm_dtype = output_tensor.dtype
-
-    # Use channels from a stage of TE graph where the IFM is always NHWC
-    channels = int(pool2d.shape[3])
-    pool_shape_h = int(pool2d.op.attrs["pool_shape_h"])
-    pool_shape_w = int(pool2d.op.attrs["pool_shape_w"])
-
-    subkernels = len(
-        device_config.get_kernel_steps(pool2d.op.name, pool_shape_h, pool_shape_w, ifm_dtype)
-    )
-
-    output_layout = convert_to_nhcwb16.op.attrs["layout"]
-    input_layout = convert_to_nhwc.op.attrs["layout"]
-    output_quantum = device_config.get_output_quantum(output_layout)
-
-    valid_block_configs = device_config.get_valid_block_configs(
-        propagators[0],
-        pool2d.op.attrs,
-        output_tensor.shape,
-        channels,
-        channels,
-        output_layout,
-        input_layout,
-        ifm_dtype,
-        ofm_dtype,
-        pool_shape_h,
-        pool_shape_w,
-    )
-
-    return EthosuPart(
-        subgraph,
-        propagators,
-        output_quantum,
-        subkernels,
-        valid_block_configs,
-    )
diff --git a/python/tvm/relay/backend/contrib/ethosu/te/unary_elementwise.py b/python/tvm/relay/backend/contrib/ethosu/te/unary_elementwise.py
deleted file mode 100644
index dde3133b56fa..000000000000
--- a/python/tvm/relay/backend/contrib/ethosu/te/unary_elementwise.py
+++ /dev/null
@@ -1,239 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name,unused-argument
-"""Tensor Expressions for unary_elementwise for the NPU"""
-
-import numpy as np
-from tvm import te
-from tvm.contrib.ethosu.cascader import TESubgraph, EthosuPart, Propagator, register_matcher
-from .dma import dma_ofm_compute, dma_ifm_compute
-from .common import get_layout_transform_matrices
-
-
-def unary_elementwise_compute(
-    ifm: te.Tensor,
-    lut: te.Tensor,
-    operator_type: str,
-    ifm_scale: float,
-    ifm_zero_point: int,
-    ofm_scale: float,
-    ofm_zero_point: int,
-    ofm_channels: int,
-    activation: str,
-    clip_min: int,
-    clip_max: int,
-    rounding_mode: str,
-    ifm_layout: str,
-    ofm_layout: str,
-) -> te.Tensor:
-    """A compute operator representing the capabilities of unary_elementwise for the NPU.
-
-    Parameters
-    ----------
-    ifm : te.Tensor
-        The Input Feature Map tensor (IFM).
-    lut : te.Tensor
-        The look-up table values to use if activation = "LUT".
-    operator_type: str
-        The type of the unary elementwise operator.
-            "ABS"
-            "CLZ"
-    ifm_scale : float
-        The quantization scale for the Input Feature Map tensor.
-    ifm_zero_point : int
-        The quantization zero point for the Input Feature Map tensor.
-    ofm_scale : float
-        The quantization scale for the Output Feature Map tensor.
-    ofm_zero_point : int
-        The quantization zero point for the Output Feature Map tensor.
-    ofm_channels : int
-        The number of OFM channels.
-    activation : str
-        The activation function to use.
-            "NONE" - no activation function.
-            "CLIP" - clip the output between clip_min and clip_max.
-            "TANH" - tanh activation function.
-            "SIGMOID" - sigmoid activation function.
-            "LUT" - use a look-up table to perform the activation function.
-    clip_min : int
-        The minimum clipping value if activation = "CLIP".
-    clip_max : int
-        The maximum clipping value if activation = "CLIP".
-    rounding_mode : str
-        The rounding mode to apply to the Output Feature Map tensor.
-            "TFL" - Tensorflow Lite rounding scheme.
-            "TRUNCATE" - Truncate towards zero.
-            "NATURAL" - Round to nearest value, with x.5 rounded up towards +infinity.
-    ifm_layout : str, optional
-        The layout of the Input Feature Map tensor. Can be "NHWC" or "NHCWB16".
-    ofm_layout : str, optional
-        The layout of the Output Feature Map tensor. Can be "NHWC" or "NHCWB16".
-
-    Returns
-    -------
-    te.Tensor
-        The OFM tensor.
-
-    """
-    assert ifm.shape[0] == 1
-    assert ifm_layout in {"NHWC", "NHCWB16"}
-    assert ofm_layout in {"NHWC", "NHCWB16"}
-
-    # Changing the ifm and ofm scale to conform with that expected by Vela API
-    if ofm_scale != 0:
-        ofm_scale = ifm_scale / ofm_scale
-    ifm_scale = 1.0
-
-    # Compute operation for the IFM DMA pipeline
-    dmaed_ifm = dma_ifm_compute(
-        ifm, ifm_layout, ifm_zero_point, ifm_scale, ofm_channels, (0, 0, 0, 0)
-    )
-
-    # Unary elementwise compute operation
-    ofm_height = dmaed_ifm.shape[1]
-    ofm_width = dmaed_ifm.shape[2]
-
-    unary_elementwise_attrs = {
-        "op": "ethosu_unary_elementwise",
-        "operator_type": operator_type,
-        "activation": activation,
-        "clip_min": clip_min,
-        "clip_max": clip_max,
-        "rounding_mode": rounding_mode,
-    }
-
-    def clz_imp(inp):
-        # Assuming that it's a 32 bit int
-        return 32 - te.log2(inp)
-
-    operators = {"ABS": te.abs, "CLZ": clz_imp}
-
-    unary_elementwise = te.compute(
-        (1, ofm_height, ofm_width, ofm_channels),
-        lambda nn, hh, ww, cc: operators[operator_type](
-            dmaed_ifm(nn, hh, ww, cc).astype(ifm.dtype)
-        ),
-        name="ethosu_unary_elementwise",
-        attrs=unary_elementwise_attrs,
-    )
-
-    nhwc_to_nhcwb16, nhcwb16_to_nhwc = get_layout_transform_matrices(int(ofm_channels))
-
-    ifm_matrix = [
-        [1, 0, 0, 0, 0],
-        [0, 1, 0, 0, 0],
-        [0, 0, 1, 0, 0],
-        [0, 0, 0, 1, 0],
-        [0, 0, 0, 0, 1],
-    ]
-    if ofm_layout == "NHCWB16":
-        ifm_matrix = np.matmul(ifm_matrix, nhcwb16_to_nhwc).tolist()
-    if ifm_layout == "NHCWB16":
-        ifm_matrix = np.matmul(nhwc_to_nhcwb16, ifm_matrix).tolist()
-
-    ifm_propagator = Propagator(
-        ifm_matrix,
-        [0, 0, 0, 0] if ifm_layout == "NHWC" else [0, 0, 0, 0, 0],
-    )
-    propagator_attrs = {"ifm_propagator": ifm_propagator}
-
-    # Compute operation for the OFM DMA pipeline
-    return dma_ofm_compute(
-        unary_elementwise,
-        ofm_layout,
-        ofm_zero_point,
-        ofm_scale,
-        ofm_channels,
-        attrs=propagator_attrs,
-    )
-
-
-@register_matcher
-def match_ethosu_unary_elementwise(output_tensor, device_config):
-    """Match a Tensor Expression corresponding to an NPU Unary Elementwise.
-
-    If the Tensor Expression matches, an EthosuPart will be created that models the
-    matched Tensor Expression. Otherwise, None will be returned.
-
-    Parameters
-    ----------
-    output_tensor : tvm.te.Tensor
-        The tensor to attempt to match with.
-    device_config : EthosuDeviceConfig
-        Target device configuration
-
-    Returns
-    -------
-    Union[None, EthosuPart]
-        The created EthosuPart if there was a match, otherwise None.
-
-    """
-    write = output_tensor
-    if write.op.name != "ethosu_write":
-        return None
-    convert_to_nhcwb16 = write.op.input_tensors[0]
-    if convert_to_nhcwb16.op.name != "ethosu_convert_to_nhcwb16":
-        return None
-    unary_elementwise = convert_to_nhcwb16.op.input_tensors[0]
-    if unary_elementwise.op.name != "ethosu_unary_elementwise":
-        return None
-    pad = unary_elementwise.op.input_tensors[0]
-    if pad.op.name != "ethosu_pad":
-        return None
-    upscale = pad.op.input_tensors[0]
-    if upscale.op.name != "ethosu_upscale":
-        return None
-    convert_to_nhwc = upscale.op.input_tensors[0]
-    if convert_to_nhwc.op.name != "ethosu_convert_to_nhwc":
-        return None
-    read = convert_to_nhwc.op.input_tensors[0]
-    if read.op.name != "ethosu_read":
-        return None
-
-    input_tensors = [
-        read.op.input_tensors[0],
-    ]
-    subgraph = TESubgraph(input_tensors, output_tensor)
-    propagators = [
-        write.op.attrs["ifm_propagator"],
-    ]
-    ifm_dtype = input_tensors[0].dtype
-    ofm_dtype = output_tensor.dtype
-
-    output_layout = convert_to_nhcwb16.op.attrs["layout"]
-    input_layout = convert_to_nhwc.op.attrs["layout"]
-    output_quantum = device_config.get_output_quantum(output_layout)
-
-    block_config = device_config.get_elementwise_block_config(
-        propagators[0],
-        None,
-        unary_elementwise.op.attrs,
-        output_tensor.shape,
-        output_layout,
-        input_layout,
-        None,
-        ifm_dtype,
-        ofm_dtype,
-    )
-
-    return EthosuPart(
-        subgraph,
-        propagators,
-        output_quantum,
-        1,
-        block_config,
-    )
diff --git a/python/tvm/relay/backend/contrib/ethosu/tir/__init__.py b/python/tvm/relay/backend/contrib/ethosu/tir/__init__.py
deleted file mode 100644
index cc285e5241cd..000000000000
--- a/python/tvm/relay/backend/contrib/ethosu/tir/__init__.py
+++ /dev/null
@@ -1,17 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Arm(R) Ethos(TM)-U NPU TIR codegen modules."""
diff --git a/python/tvm/relay/backend/contrib/ethosu/tir/binary_elementwise.py b/python/tvm/relay/backend/contrib/ethosu/tir/binary_elementwise.py
deleted file mode 100644
index 91f5512453fb..000000000000
--- a/python/tvm/relay/backend/contrib/ethosu/tir/binary_elementwise.py
+++ /dev/null
@@ -1,93 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name, unused-argument
-"""Extract information from the binary_elementwise operators in TIR."""
-from typing import Tuple
-import tvm
-from .utils import get_outer_loops, get_op_attrs, get_loads
-from .dma import get_ifm_params, get_ofm_params
-from .spec import SerialActivation, SerialBinaryElementwise, SerialRescaleConfig
-from .producers_consumers import ProducersConsumers
-
-
-def get_binary_elementwise_params(
-    stmt: tvm.tir.AttrStmt, producers_consumers: ProducersConsumers
-) -> Tuple[SerialBinaryElementwise, tvm.tir.Var, tvm.tir.Var]:
-    """Get the parameters necessary to construct a call_extern for a binary_elementwise.
-
-    Parameters
-    ----------
-    stmt : tvm.tir.AttrStmt
-        The outermost attribute statement of a binary elementwise loop nest.
-    producers_consumers: ProducersConsumers
-        It associates pointers with the loop nest that produces
-        their values and with the loop nest that consumes their values.
-
-    Returns
-    -------
-    SerialBinaryElementwise
-        The parameters needed to construct a binary elementwise operator.
-    output_pointer : tvm.tir.Var
-        The output pointer of the binary elementwise operation.
-    replace_pointer : tvm.tir.Var
-        The output pointer of the DMA write operation, which is to replace
-        the binary elementwise output pointer.
-    is_allocator : bool
-        Whether this operator allocates its output.
-
-    """
-    attrs, body = get_op_attrs(stmt)
-    reversed_operands = attrs["reversed_operands"]
-
-    _, _, _, _, _, inner = get_outer_loops(body, "NHWC")
-    # loads = [input, input, LUT, LUT]
-    loads = get_loads(inner)
-    input_pointer = loads[0].buffer.data
-    input_pointer1 = loads[1].buffer.data
-
-    if reversed_operands:
-        input_pointer, input_pointer1 = input_pointer1, input_pointer
-    output_pointer = inner.buffer.data
-    # Get feature map info
-    serial_ifm, _ = get_ifm_params(input_pointer, producers_consumers, stmt)
-    serial_ifm2, _ = get_ifm_params(input_pointer1, producers_consumers, stmt)
-    serial_ofm, serial_block_config, replace_pointer, is_allocator = get_ofm_params(
-        output_pointer, producers_consumers, stmt
-    )
-    # Get activation info
-    serial_activation = SerialActivation(
-        op=attrs["activation"], clip_min=attrs["clip_min"], clip_max=attrs["clip_max"]
-    )
-    rescale_config = SerialRescaleConfig(
-        use_rescale=attrs["use_rescale"], scale=attrs["rescale_scale"], shift=attrs["rescale_shift"]
-    )
-    return (
-        SerialBinaryElementwise(
-            ifm=serial_ifm,
-            ifm2=serial_ifm2,
-            ofm=serial_ofm,
-            operator_type=attrs["operator_type"],
-            reversed_operands=reversed_operands,
-            activation=serial_activation,
-            rounding_mode=attrs["rounding_mode"],
-            block_config=serial_block_config,
-            rescale_config=rescale_config,
-        ),
-        output_pointer,
-        replace_pointer,
-        is_allocator,
-    )
diff --git a/python/tvm/relay/backend/contrib/ethosu/tir/compiler.py b/python/tvm/relay/backend/contrib/ethosu/tir/compiler.py
deleted file mode 100644
index d47b3d4a7de6..000000000000
--- a/python/tvm/relay/backend/contrib/ethosu/tir/compiler.py
+++ /dev/null
@@ -1,246 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name, unused-argument
-"""The integration of the Arm(R) Ethos(TM)-U NPU TIR compiler."""
-import tvm
-from tvm import relay
-from tvm.driver.build_module import schedule_to_module
-from tvm.relay.backend.contrib.ethosu import vela_api as vapi
-from tvm.relay.expr_functor import ExprMutator
-
-from .. import util
-from . import passes as ethosu_passes
-from .scheduler import schedule
-
-
-def lower_ethosu(sch, args, const_dict, name="main"):
-    """Lower a schedule to TIR for the Arm(R) Ethos(TM)-U NPU target.
-
-    The resulting TIR module will contain a single function
-    that consists of a sequence of tir.call_extern to NPU
-    operations.
-
-    Parameters
-    ----------
-    sch : tvm.te.Schedule
-        The schedule to be lowered.
-    args : Union[list of tvm.te.Tensor, TEGraph]
-        The input/output tensors.
-    const_dict : dict of int to numpy.ndarray
-        The constant dictionary.
-    name : str, optional
-        The name of the lowered primitive function.
-
-    Returns
-    -------
-    mod : tvm.IRModule
-        The lowered TIR module.
-    const_dict : dict of int to numpy.ndarray
-        The modified constant dictionary.
-
-    """
-    if not isinstance(args, list):
-        args = list(args.inputs) + list(args.outputs)
-    # config setup
-    curr_pass_ctx = tvm.ir.transform.PassContext.current()
-    curr_cfg = dict()
-    for key, value in curr_pass_ctx.config.items():
-        curr_cfg[key] = value
-    tir_compiler_cfg = {
-        "tir.LoopPartition": {
-            "partition_const_loop": True,
-            "no_unroll_loop_with_extent_one": True,
-        },
-        "tir.UnrollLoop": {"auto_max_depth": -1},
-        "tir.noalias": True,
-        "tir.debug_keep_trivial_loop": True,
-    }
-    # Merge two configs
-    curr_cfg = {**curr_cfg, **tir_compiler_cfg}
-
-    sch = sch.normalize()
-
-    with tvm.transform.PassContext(config=curr_cfg):
-        mod = schedule_to_module(sch, args, name)
-
-        mod = tvm.tir.transform.Simplify()(mod)
-        mod = ethosu_passes.RemoveConcatenates()(mod)
-        mod = tvm.tir.transform.InjectRollingBuffer()(mod)
-        mod = tvm.tir.transform.StorageFlatten(64)(mod)
-        mod = tvm.tir.transform.UnrollLoop()(mod)
-        mod = tvm.tir.transform.Simplify()(mod)
-        mod = tvm.tir.transform.LoopPartition()(mod)
-        mod = ethosu_passes.RemoveZeroStores()(mod)
-        mod = tvm.tir.transform.Simplify()(mod)
-        mod = tvm.tir.transform.RemoveNoOp()(mod)
-        mod = ethosu_passes.ReplaceOperators()(mod)
-        mod = tvm.tir.transform.RemoveNoOp()(mod)
-        mod, const_dict = ethosu_passes.EncodeConstants(const_dict)(mod)
-        mod = ethosu_passes.HoistAllocates()(mod)
-        mod = tvm.tir.transform.RemoveNoOp()(mod)
-        mod, const_dict = ethosu_passes.MergeConstants(const_dict)(mod)
-        mod = ethosu_passes.CopyComputeReordering(vapi.get_max_copy_movements())(mod)
-
-        disable_storage_rewrite = curr_cfg.get("tir.disable_storage_rewrite", False)
-        if not disable_storage_rewrite:
-            mod = tvm.tir.transform.StorageRewrite()(mod)
-
-        mod = tvm.tir.transform.RemoveNoOp()(mod)
-        mod = ethosu_passes.AnnotateAllocates()(mod)
-        mod, const_dict = ethosu_passes.CreatePrimFuncWithoutConstants(const_dict)(mod)
-    return mod, const_dict
-
-
-def lower_to_te(prim_func):
-    """Lower a Relay primitive function to a Tensor Expression in an unscheduled CachedFunc.
-
-    Parameters
-    ----------
-    prim_func : tvm.relay.Function
-        The Relay function to lower.
-
-    Returns
-    -------
-    out : CachedFunc
-        The lowered Tensor Expression as part of a CachedFunc.
-
-    """
-    f = tvm._ffi.get_global_func("relay.backend.LowerToTE")
-    return f(prim_func)
-
-
-class ExtractConstants(ExprMutator):
-    """The actual mutator pass to extract the constants from a function and replace them with
-    Vars so the function can be lowered to a TE graph. Additionally returns all the values of
-    the constants extracted."""
-
-    def __init__(self):
-        super().__init__()
-        self.constants = []
-        self.const_vars = []
-
-    def visit_constant(self, const):
-        if isinstance(const.checked_type, relay.ty.TensorType):
-            self.constants.append(const.data.asnumpy())
-            name = "p" + str(len(self.constants))
-            var = relay.var(type_annotation=const.checked_type, name_hint=name)
-            self.const_vars.append(var)
-            return var
-
-        return const
-
-    def visit_function(self, fn):
-        new_body = self.visit(fn.body)
-        new_params = list(fn.params) + self.const_vars
-        return relay.Function(new_params, new_body)
-
-    def extract_constants(self, func):
-        new_func = self.visit(func)
-        return new_func, self.constants
-
-
-def extract_constants(func):
-    """Extract the constants from a function and replace them with
-    Vars so the function can be lowered to a TE graph. Additionally
-    returns all the values of the constants extracted.
-
-    Parameters
-    ----------
-    func : tvm.relay.Function
-        The Relay function from which to extract constants.
-
-    Returns
-    -------
-    new_func : tvm.relay.Function
-        The Relay function with constants replaced by vars.
-    const_dict : dict of int to numpy.ndarray
-        A dict of the extracted constants keyed by their param index.
-
-    """
-    const_dict = {}
-    params = len(func.params)
-    new_func, consts = ExtractConstants().extract_constants(func)
-    for i, const in enumerate(consts):
-        const_dict[params + i] = const
-
-    new_func = tvm.relay.transform.InferType()(tvm.IRModule.from_expr(new_func))["main"]
-    return new_func, const_dict
-
-
-@util.create_npu_function_pass(opt_level=1)
-class LowerToTIR:
-    """A pass that lowers NPU Relay functions to TIR. This pass wraps
-    the _lower_to_tir pass that operates function->function, while this
-    is IRModule->IRModule.
-
-    Attributes
-    ----------
-    scheduler : callable
-        A function to schedule NPU operations. For example,
-        scheduler.py/copy_constants.
-    """
-
-    def __init__(self, scheduler):
-        self.scheduler = scheduler
-
-    def transform_npu_function(self, _, func: relay.Function) -> relay.Function:
-        """Lower NPU functions to TIR."""
-
-        tir_mod, const_dict = _lower_to_tir(func, self.scheduler)
-
-        for param in const_dict.keys():
-            const_dict[param] = tvm.nd.array(const_dict[param])
-
-        compiler_name = "ethos-u"
-        primfunc = tir_mod["main"]
-        primfunc = primfunc.with_attr("global_symbol", func.attrs["global_symbol"])
-        primfunc = primfunc.with_attr("ethos-u.constants", const_dict)
-        primfunc = primfunc.with_attr("target", tvm.target.Target(compiler_name))
-        return primfunc
-
-    def __call__(self, *args, **kwargs):
-        pass
-
-
-def _lower_to_tir(func, cascader=None):
-    """Lower a Relay function to TIR for the Arm(R) Ethos(TM)-U NPU target.
-
-    The Relay function should only contain operations supported
-    by the NPU.
-
-    Parameters
-    ----------
-    func : tvm.relay.Function
-        The Relay function to lower.
-    cascader : Callable
-        An optional cascading function,
-
-    Returns
-    -------
-    mod : tvm.IRModule
-        The lowered TIR module.
-    consts : dict of int to numpy.ndarray
-        A dict of the extracted constants keyed by their param index.
-
-    """
-    func, consts = extract_constants(func)
-    mod = tvm.IRModule.from_expr(func)
-    func = relay.transform.InferType()(mod)["main"]
-    cached_func = lower_to_te(func)
-    s = schedule(cached_func, consts, cascader)
-    mod, consts = lower_ethosu(s, cached_func, consts)
-    return mod, consts
diff --git a/python/tvm/relay/backend/contrib/ethosu/tir/convolution.py b/python/tvm/relay/backend/contrib/ethosu/tir/convolution.py
deleted file mode 100644
index 2358e5a221bb..000000000000
--- a/python/tvm/relay/backend/contrib/ethosu/tir/convolution.py
+++ /dev/null
@@ -1,163 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name, unused-argument
-"""Extract parameters from the convolution operators in TIR."""
-import math
-from ethosu.vela import api as vapi
-import tvm
-from ..vela_api import SCALE_BIAS_LENGTH, get_accelerator_config
-from .utils import get_outer_loops, get_op_attrs, get_base_address, get_loads, get_stores
-from .dma import get_ifm_params, get_ofm_params
-from .spec import SerialKernel, SerialAddressRange, SerialActivation, Serial2DConvolution
-
-
-def get_conv2d_params(stmt, producers_consumers):
-    """Get the parameters necessary to construct a call_extern for a 2D convolution.
-
-    Parameters
-    ----------
-    stmt : tvm.tir.AttrStmt
-        The outermost attribute statement of a convolution loop nest.
-    producers_consumers: ProducersConsumers
-        It associates pointers with the loop nest that produces
-        their values and with the loop nest that consumes their values.
-
-    Returns
-    -------
-    Serial2DConvolution
-        The parameters needed to construct a 2D convolution.
-    output_pointer : tvm.tir.Var
-        The output pointer of the convolution operation.
-    replace_pointer : tvm.tir.Var
-        The output pointer of the DMA write operation, which is to replace
-        the convolution output pointer.
-    is_allocator : bool
-        Whether this operator allocates its output.
-
-    """
-    accel_config = get_accelerator_config()
-
-    attrs, body = get_op_attrs(stmt)
-    _, _, _, _, _, inner = get_outer_loops(body, "NHWC")
-    rh = inner
-    rw = rh.body
-    rc = rw.body
-    # loads = [output, input, weights, scale_bias, scale_bias, LUT, LUT]
-    loads = get_loads(rc.body)
-    # stores = [output]
-    stores = get_stores(rc.body)
-    input_pointer = loads[1].buffer.data
-    output_pointer = stores[0].buffer.data
-    # Get feature map info
-    serial_ifm, serial_padding = get_ifm_params(input_pointer, producers_consumers, stmt)
-    serial_ofm, serial_block_config, replace_pointer, is_allocator = get_ofm_params(
-        output_pointer, producers_consumers, stmt
-    )
-    # Get kernel info
-    serial_kernel = SerialKernel(
-        width=int(rw.extent),
-        height=int(rh.extent),
-        stride_w=int(attrs["stride_w"]),
-        stride_h=int(attrs["stride_h"]),
-        dilation_w=int(attrs["dilation_w"]),
-        dilation_h=int(attrs["dilation_h"]),
-    )
-    # Get scale_bias info
-    scale_bias_load = loads[3]
-    scale_bias_base = [get_base_address(index) for index in scale_bias_load.indices]
-    # Get weight info
-    weight_load = loads[2]
-    weight_base = [get_base_address(index) for index in weight_load.indices]
-    channels = serial_ofm[3] if isinstance(serial_ofm[3], int) else serial_ofm[3].value
-
-    if accel_config == vapi.NpuAccelerator.Ethos_U65_512:
-        scale_bias_length = SCALE_BIAS_LENGTH * math.ceil(channels / 2)
-        scale_bias2_length = SCALE_BIAS_LENGTH * math.floor(channels / 2)
-
-        serial_scale_bias = SerialAddressRange(
-            address=tvm.tir.BufferLoad(scale_bias_load.buffer, scale_bias_base),
-            length=scale_bias_length,
-        )
-        serial_scale_bias2 = SerialAddressRange(
-            address=tvm.tir.BufferLoad(
-                scale_bias_load.buffer, [scale_bias_base[0] + scale_bias_length]
-            ),
-            length=scale_bias2_length,
-        )
-
-        weight_length = (
-            channels * serial_kernel[0] * serial_kernel[1] * math.ceil(rc.extent.value / 2)
-        )
-        weight2_length = (
-            channels * serial_kernel[0] * serial_kernel[1] * math.floor(rc.extent.value / 2)
-        )
-
-        serial_weight = SerialAddressRange(
-            address=tvm.tir.BufferLoad(weight_load.buffer, weight_base),
-            length=weight_length,
-        )
-        serial_weight2 = SerialAddressRange(
-            address=tvm.tir.BufferLoad(weight_load.buffer, [weight_base[0] + weight_length]),
-            length=weight2_length,
-        )
-    else:
-        scale_bias_length = SCALE_BIAS_LENGTH * channels
-
-        serial_scale_bias = SerialAddressRange(
-            address=tvm.tir.BufferLoad(scale_bias_load.buffer, scale_bias_base),
-            length=scale_bias_length,
-        )
-        # Insert -1s into the spec to denote the absence of the other pointer
-        serial_scale_bias2 = SerialAddressRange(
-            address=tvm.tir.IntImm("int8", -1),
-            length=tvm.tir.IntImm("int8", -1),
-        )
-
-        weight_length = channels * serial_kernel[0] * serial_kernel[1] * rc.extent.value
-
-        serial_weight = SerialAddressRange(
-            address=tvm.tir.BufferLoad(weight_load.buffer, weight_base),
-            length=weight_length,
-        )
-        serial_weight2 = SerialAddressRange(
-            address=tvm.tir.IntImm("int8", -1),
-            length=tvm.tir.IntImm("int8", -1),
-        )
-    # Get activation info
-    serial_activation = SerialActivation(
-        op=attrs["activation"], clip_min=attrs["clip_min"], clip_max=attrs["clip_max"]
-    )
-    return (
-        Serial2DConvolution(
-            ifm=serial_ifm,
-            ofm=serial_ofm,
-            kernel=serial_kernel,
-            weight=serial_weight,
-            weight2=serial_weight2,
-            weight_zero_point=attrs["weight_zero_point"],
-            scale_bias=serial_scale_bias,
-            scale_bias2=serial_scale_bias2,
-            padding=serial_padding,
-            activation=serial_activation,
-            rounding_mode=attrs["rounding_mode"],
-            upscale=attrs["upscale"],
-            block_config=serial_block_config,
-        ),
-        output_pointer,
-        replace_pointer,
-        is_allocator,
-    )
diff --git a/python/tvm/relay/backend/contrib/ethosu/tir/depthwise.py b/python/tvm/relay/backend/contrib/ethosu/tir/depthwise.py
deleted file mode 100644
index 5878c2a7e09c..000000000000
--- a/python/tvm/relay/backend/contrib/ethosu/tir/depthwise.py
+++ /dev/null
@@ -1,119 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name, unused-argument
-"""Extract information from the depthwise convolution operators in TIR."""
-from typing import Tuple
-import tvm
-from ..vela_api import SCALE_BIAS_LENGTH
-from .utils import get_outer_loops, get_op_attrs, get_base_address, get_loads, get_stores
-from .dma import get_ifm_params, get_ofm_params
-from .spec import (
-    SerialKernel,
-    SerialAddressRange,
-    SerialActivation,
-    Serial2DDepthwise,
-)
-from .producers_consumers import ProducersConsumers
-
-
-def get_depthwise_conv2d_params(
-    stmt: tvm.tir.AttrStmt, producers_consumers: ProducersConsumers
-) -> Tuple[Serial2DDepthwise, tvm.tir.Var, tvm.tir.Var]:
-    """Get the parameters necessary to construct a call_extern for a depthwise_conv2d.
-
-    Parameters
-    ----------
-    stmt : tvm.tir.AttrStmt
-        The outermost attribute statement of a depthwise loop nest.
-    producers_consumers: ProducersConsumers
-        It associates pointers with the loop nest that produces
-        their values and with the loop nest that consumes their values.
-
-    Returns
-    -------
-    Serial2DDepthwise
-        The parameters needed to construct a 2D depthwise.
-    output_pointer : tvm.tir.Var
-        The output pointer of the convolution operation.
-    replace_pointer : tvm.tir.Var
-        The output pointer of the DMA write operation, which is to replace
-        the convolution output pointer.
-    is_allocator : bool
-        Whether this operator allocates its output.
-
-    """
-    attrs, body = get_op_attrs(stmt)
-    _, _, _, _, _, inner = get_outer_loops(body, "NHWC")
-    rh = inner
-    rw = rh.body
-    # loads = [output, input, weights, scale_bias, scale_bias]
-    loads = get_loads(rw.body)
-    # stores = [output]
-    stores = get_stores(rw.body)
-    input_pointer = loads[1].buffer.data
-    output_pointer = stores[0].buffer.data
-    # Get feature map info
-    serial_ifm, serial_padding = get_ifm_params(input_pointer, producers_consumers, stmt)
-    serial_ofm, serial_block_config, replace_pointer, is_allocator = get_ofm_params(
-        output_pointer, producers_consumers, stmt
-    )
-    # Get kernel info
-    serial_kernel = SerialKernel(
-        width=int(rw.extent),
-        height=int(rh.extent),
-        stride_w=int(attrs["stride_w"]),
-        stride_h=int(attrs["stride_h"]),
-        dilation_w=int(attrs["dilation_w"]),
-        dilation_h=int(attrs["dilation_h"]),
-    )
-    # Get scale_bias info
-    scale_bias_load = loads[3]
-    scale_bias_base = [get_base_address(index) for index in scale_bias_load.indices]
-    serial_scale_bias = SerialAddressRange(
-        address=tvm.tir.BufferLoad(scale_bias_load.buffer, scale_bias_base),
-        length=SCALE_BIAS_LENGTH * serial_ofm[3],
-    )
-    # Get weight info
-    weight_load = loads[2]
-    weight_base = [get_base_address(index) for index in weight_load.indices]
-    serial_weight = SerialAddressRange(
-        address=tvm.tir.BufferLoad(weight_load.buffer, weight_base),
-        length=serial_ofm[3] * serial_kernel[0] * serial_kernel[1],
-    )
-    # Get activation info
-    serial_activation = SerialActivation(
-        op=attrs["activation"], clip_min=attrs["clip_min"], clip_max=attrs["clip_max"]
-    )
-
-    return (
-        Serial2DDepthwise(
-            ifm=serial_ifm,
-            ofm=serial_ofm,
-            kernel=serial_kernel,
-            weight=serial_weight,
-            weight_zero_point=attrs["weight_zero_point"],
-            scale_bias=serial_scale_bias,
-            padding=serial_padding,
-            activation=serial_activation,
-            rounding_mode=attrs["rounding_mode"],
-            upscale="NONE",
-            block_config=serial_block_config,
-        ),
-        output_pointer,
-        replace_pointer,
-        is_allocator,
-    )
diff --git a/python/tvm/relay/backend/contrib/ethosu/tir/dma.py b/python/tvm/relay/backend/contrib/ethosu/tir/dma.py
deleted file mode 100644
index 82485db65866..000000000000
--- a/python/tvm/relay/backend/contrib/ethosu/tir/dma.py
+++ /dev/null
@@ -1,515 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name, unused-argument
-"""Extract parameters from the DMA operators in TIR."""
-from typing import NamedTuple, Union
-import tvm
-from .utils import get_outer_loops, get_base_address, get_strides, get_op_attrs
-from .spec import SerialBlockConfig, SerialFeatureMap, SerialPadding
-
-
-def get_pad_params(stmt):
-    """Get the padding parameters from a pad loop nest.
-
-    Parameters
-    ----------
-    stmt : tvm.tir.AttrStmt
-        The outermost attribute statement of a pad loop nest.
-
-    Returns
-    -------
-    pad : SerialPadding
-        The serializable padding.
-    input_pointer : tvm.tir.Var
-        The pointer consumed by the operation.
-    output_pointer : tvm.tir.Var
-        The pointer produced by the operation.
-
-    """
-    _, body = get_op_attrs(stmt)
-    n, h, w, c, _, inner = get_outer_loops(body, "NHWC")
-    output_pointer = inner.buffer.data
-    pad = SerialPadding(top=0, left=0, bottom=0, right=0)
-    if isinstance(inner.value, tvm.tir.Call):
-        input_pointer = inner.value.args[1].buffer.data
-    else:
-        input_pointer = inner.value.buffer.data
-        return pad, input_pointer, output_pointer
-
-    padded_shape = [n.extent, h.extent, w.extent, c.extent]
-
-    def _visit(expr):
-        if isinstance(expr, tvm.tir.expr.LT):
-            var = expr.a
-            val = expr.b
-            if var == h.loop_var:
-                pad.bottom = padded_shape[1] - val
-            else:
-                pad.right = padded_shape[2] - val
-        elif isinstance(expr, tvm.tir.expr.LE):
-            var = expr.b
-            val = expr.a
-            if var == h.loop_var:
-                pad.top = val
-            else:
-                pad.left = val
-
-    cond = inner.value.args[0]
-    tvm.tir.stmt_functor.post_order_visit(cond, _visit)
-    return (
-        pad,
-        input_pointer,
-        output_pointer,
-    )
-
-
-def get_upscale_params(stmt):
-    """Get the upscale parameters from a loop nest.
-
-    Parameters
-    ----------
-    stmt : tvm.tir.AttrStmt
-        The outermost attribute statement of an upscale loop nest.
-
-    Returns
-    -------
-    input_pointer : tvm.tir.Var
-        The pointer consumed by the operation.
-    output_pointer : tvm.tir.Var
-        The pointer produced by the operation.
-    """
-    _, body = get_op_attrs(stmt)
-    _, _, _, _, _, inner = get_outer_loops(body, "NHWC")
-    if isinstance(inner.value, tvm.tir.Call):
-        input_pointer = inner.value.args[1].buffer.data
-    else:
-        input_pointer = inner.value.buffer.data
-    output_pointer = inner.buffer.data
-    return (input_pointer, output_pointer)
-
-
-def get_convert_to_nhwc_params(stmt):
-    """Get the true number of channels from a convert_to_nhwc loop nest.
-
-    Parameters
-    ----------
-    stmt : tvm.tir.AttrStmt
-        The outermost attribute statement of a convert_to_nhwc loop nest.
-
-    Returns
-    -------
-    int
-        The true number of channels.
-    input_pointer : tvm.tir.Var
-        The pointer consumed by the operation.
-    output_pointer : tvm.tir.Var
-        The pointer produced by the operation.
-
-    """
-    attrs, body = get_op_attrs(stmt)
-    _, _, _, c, _, inner = get_outer_loops(body, "NHWC")
-
-    # Ignore the reduce sum operation inserted to ensure
-    # compute that is deemed uneccesary isn't removed by TVM.
-    if attrs["layout"] == "NHCWB16":
-        inner = inner.body
-        input_pointer = inner.value.b.buffer.data
-    else:
-        input_pointer = inner.value.buffer.data
-
-    output_pointer = inner.buffer.data
-    return c.extent, input_pointer, output_pointer
-
-
-def get_convert_to_nhcwb16_params(stmt):
-    """Get the true number of channels from a convert_to_nhcwb16 loop nest.
-
-    Parameters
-    ----------
-    stmt : tvm.tir.AttrStmt
-        The outermost attribute statement of a convert_to_nhcwb16 loop nest.
-
-    Returns
-    -------
-    out_channels : int
-        The true number of channels.
-    input_pointer : tvm.tir.Var
-        The pointer consumed by the operation.
-    output_pointer : tvm.tir.Var
-        The pointer produced by the operation.
-
-    """
-    attrs, body = get_op_attrs(stmt)
-    _, _, _, c, b, inner = get_outer_loops(body, attrs["layout"])
-    output_pointer = inner.buffer.data
-    if isinstance(inner.value, tvm.tir.Call):
-        cond = inner.value.args[0]
-        out_channels = cond.b.value
-        input_pointer = inner.value.args[1].buffer.data
-    else:
-        input_pointer = inner.value.buffer.data
-        out_channels = c.extent * b.extent if attrs["layout"] == "NHCWB16" else c.extent
-
-    return out_channels, input_pointer, output_pointer
-
-
-class Tiles(NamedTuple):
-    height_0: tvm.tir.expr.IntImm
-    height_1: tvm.tir.expr.IntImm
-    width_0: tvm.tir.expr.IntImm
-    address_0: Union[tvm.tir.expr.BufferLoad, int]
-    address_1: Union[tvm.tir.expr.BufferLoad, int]
-    address_2: Union[tvm.tir.expr.BufferLoad, int]
-
-
-def create_tiles(stmt: tvm.tir.stmt.AttrStmt) -> Tiles:
-    """Given an AttrStmt this function returns a Tiles instance
-    containing the tiles' addresses and dimensions.
-
-    When rolling buffers are not used only tile0 is used.
-    Otherwise, when rolling buffers are used, the statement contains
-    modulo arithmetic operations, which are unsupported by the NPU.
-    To support this scenario more than one tile is used.
-    In particular, when the rolling variable is the height one
-    tile0 and tile2 are used, otherwise, when the rolling variable
-    is the width one, tile0 and tile1 are used.
-
-    As an example consider this statement:
-
-    // attr [iter_var(i0, )] pragma_op = "ethosu_read"
-    // attr [iter_var(i0, )] pragma_zero_point = 0
-    // attr [iter_var(i0, )] pragma_layout = "NHCWB16"
-    // attr [iter_var(i0, )] pragma_scale = 1f
-    for (i0, 0, 1) {
-      for (i1, 0, 6) {
-        for (i2, 0, 1) {
-          for (i3, 0, 1) {
-            for (i4, 0, 16) {
-              ethosu_read[((i1*16) + i4)] = ethosu_write[((floormod((i1 + 4), 6)*16) + i4)]
-            }
-          }
-        }
-      }
-    }
-
-    You can see from the floormod expression floormod((i1 + 4), 6)
-    that the rolling variable is i1, that is, the height one.
-    In this case tile0 and tile2 are used.
-    The height of tile0 will be 6 - 4 = 2, and height of tile2 will be 4.
-    Both the width of tile0 and tile2 will be equal to the extent of the width variable.
-    Also, the addresses are set accordingly.
-    When the rolling variable is the width one a simmetric approach will be used.
-
-    It is worth mentioning that only the height of tile0, the height of tile1,
-    and the width of tile0 must be computed, the other ones can be inferred.
-    """
-    attrs, body = get_op_attrs(stmt)
-    _, h, w, _, _, inner = get_outer_loops(body, attrs["layout"])
-    base_address = [get_base_address(index) for index in inner.value.indices]
-    read_stmt = inner.value
-    floor_mod_mul = None
-
-    def _compute_stride(for_stmt):
-        stride = 1
-        while isinstance(for_stmt.body, tvm.tir.For):
-            for_stmt = for_stmt.body
-            stride *= for_stmt.extent
-        return stride
-
-    def _get_floor_mod_mul(stmt):
-        nonlocal floor_mod_mul
-        if (
-            isinstance(stmt, tvm.tir.expr.Mul)
-            and isinstance(stmt.b, tvm.tir.expr.IntImm)
-            and isinstance(stmt.a, tvm.tir.FloorMod)
-            and isinstance(stmt.a.b, tvm.tir.expr.IntImm)
-            and isinstance(stmt.a.a, tvm.tir.expr.Add)
-            and isinstance(stmt.a.a.a, tvm.tir.expr.Var)
-            and isinstance(stmt.a.a.b, tvm.tir.expr.IntImm)
-        ):
-            floor_mod_mul = stmt
-
-    tvm.tir.stmt_functor.post_order_visit(read_stmt, _get_floor_mod_mul)
-    if floor_mod_mul is not None:
-        rolling_var = floor_mod_mul.a.a.a
-        count = 0
-
-        def _count_var(var):
-            nonlocal count
-            if var == rolling_var:
-                count += 1
-
-        tvm.tir.stmt_functor.ir_transform(inner, _count_var, None, ["tir.Var"])
-        if count == 2:
-            stride = floor_mod_mul.b
-            tile_length = floor_mod_mul.a.b - floor_mod_mul.a.a.b
-            if rolling_var == h.loop_var and _compute_stride(h) == stride:
-                return Tiles(
-                    height_0=tile_length,
-                    height_1=0,
-                    width_0=w.extent,
-                    address_0=tvm.tir.BufferLoad(inner.value.buffer, base_address),
-                    address_1=0,
-                    address_2=tvm.tir.BufferLoad(inner.value.buffer, [0]),
-                )
-            if rolling_var == w.loop_var and _compute_stride(w) == stride:
-                return Tiles(
-                    height_0=h.extent,
-                    height_1=h.extent,
-                    width_0=tile_length,
-                    address_0=tvm.tir.BufferLoad(inner.value.buffer, base_address),
-                    address_1=tvm.tir.BufferLoad(inner.value.buffer, [0]),
-                    address_2=0,
-                )
-
-    return Tiles(
-        height_0=h.extent,
-        height_1=0,
-        width_0=w.extent,
-        address_0=tvm.tir.BufferLoad(inner.value.buffer, base_address),
-        address_1=0,
-        address_2=0,
-    )
-
-
-def get_read_params(stmt):
-    """Get the feature map parameters from a read loop nest.
-
-    Parameters
-    ----------
-    stmt : tvm.tir.AttrStmt
-        The outermost attribute statement of a read loop nest.
-
-    Returns
-    -------
-    SerialFeatureMap
-        The serializable feature map.
-    input_pointer : tvm.tir.Var
-        The pointer consumed by the operation.
-    output_pointer : tvm.tir.Var
-        The pointer produced by the operation.
-
-    """
-    attrs, body = get_op_attrs(stmt)
-    _, h, w, c, _, inner = get_outer_loops(body, attrs["layout"])
-    input_pointer = inner.value.buffer.data
-    output_pointer = inner.buffer.data
-
-    # Needed for stride calculation, can replace with
-    # inner.value.buffer.strides in future.
-    assert len(inner.value.indices) == 1, "Ethos-U DMA expects flattened buffers"
-    stride_vars = [h.loop_var, w.loop_var, c.loop_var]
-    strides = get_strides(inner.value.indices[0], stride_vars)
-
-    data_type = inner.buffer.data.type_annotation.element_type.dtype
-    tiles = create_tiles(stmt)
-    return (
-        SerialFeatureMap(
-            data_type=data_type,
-            height=h.extent,
-            width=w.extent,
-            channels=c.extent,
-            tile_height_0=tiles.height_0,
-            tile_height_1=tiles.height_1,
-            tile_width_0=tiles.width_0,
-            tile_address_0=tiles.address_0,
-            tile_address_1=tiles.address_1,
-            tile_address_2=tiles.address_2,
-            tile_address_3=0,
-            scale=attrs["scale"],
-            zero_point=attrs["zero_point"],
-            layout=attrs["layout"],
-            stride_h=strides[0],
-            stride_w=strides[1],
-            stride_c=strides[2],
-        ),
-        input_pointer,
-        output_pointer,
-    )
-
-
-def get_write_params(stmt):
-    """Get the feature map parameters from a write loop nest.
-
-    Parameters
-    ----------
-    stmt : tvm.tir.AttrStmt
-        The outermost attribute statement of a write loop nest.
-
-    Returns
-    -------
-    SerialFeatureMap
-        The serializable feature map.
-    input_pointer : tvm.tir.Var
-        The pointer consumed by the operation.
-    output_pointer : tvm.tir.Var
-        The pointer produced by the operation.
-
-    """
-    attrs, body = get_op_attrs(stmt)
-    _, h, w, c, _, inner = get_outer_loops(body, attrs["layout"])
-    input_pointer = inner.value.buffer.data
-    output_pointer = inner.buffer.data
-
-    # Needed for stride calculation, can replace with
-    # inner.value.buffer.strides in future.
-    assert len(inner.indices) == 1, "Ethos-U DMA expects flattened buffers"
-    stride_vars = [h.loop_var, w.loop_var, c.loop_var]
-    strides = get_strides(inner.indices[0], stride_vars)
-
-    base_address = [get_base_address(index) for index in inner.indices]
-    data_type = inner.buffer.data.type_annotation.element_type.dtype
-    if "block_config_height" in attrs:
-        block_config = SerialBlockConfig(
-            height=int(attrs["block_config_height"]),
-            width=int(attrs["block_config_width"]),
-            depth=int(attrs["block_config_depth"]),
-        )
-    else:
-        block_config = SerialBlockConfig(0, 0, 0)
-    return (
-        SerialFeatureMap(
-            data_type=data_type,
-            height=h.extent,
-            width=w.extent,
-            channels=c.extent,
-            tile_height_0=h.extent,
-            tile_height_1=0,
-            tile_width_0=w.extent,
-            tile_address_0=tvm.tir.BufferLoad(inner.buffer, base_address),
-            tile_address_1=0,
-            tile_address_2=0,
-            tile_address_3=0,
-            scale=attrs["scale"],
-            zero_point=attrs["zero_point"],
-            layout=attrs["layout"],
-            stride_h=strides[0],
-            stride_w=strides[1],
-            stride_c=strides[2],
-        ),
-        block_config,
-        input_pointer,
-        output_pointer,
-    )
-
-
-def get_ifm_params(pointer, producers_consumers, stmt):
-    """Get the parameters associated with the DMA capabilities for an IFM.
-
-    Parameters
-    ----------
-    pointer : tvm.tir.Var
-        The pointer that the IFM DMA pipeline produces.
-    producers_consumers: ProducersConsumers
-        It associates pointers with the loop nest that produces
-        their values and with the loop nest that consumes their values.
-
-    Returns
-    -------
-    serial_ifm : SerialFeatureMap
-        The serializable IFM.
-    serial_padding : SerialPadding
-        The serializable padding.
-
-    """
-    pad = producers_consumers.get_producer(pointer, stmt)
-    serial_padding, input_pointer, _ = get_pad_params(pad)
-    upscale = producers_consumers.get_producer(input_pointer, pad)
-    input_pointer, _ = get_upscale_params(upscale)
-    convert_to_nhwc = producers_consumers.get_producer(input_pointer, upscale)
-    in_channels, input_pointer, _ = get_convert_to_nhwc_params(convert_to_nhwc)
-    read = producers_consumers.get_producer(input_pointer, convert_to_nhwc)
-    serial_ifm, _, _ = get_read_params(read)
-    serial_ifm.channels = in_channels
-
-    floor_mod_stmt = None
-    for_stmt = None
-
-    def _get_buffer_var(stmt):
-        nonlocal for_stmt
-        nonlocal floor_mod_stmt
-        if isinstance(stmt, tvm.tir.For):
-            for_stmt = stmt
-        if isinstance(stmt, tvm.tir.FloorMod):
-            floor_mod_stmt = stmt
-
-    tvm.tir.stmt_functor.post_order_visit(stmt, _get_buffer_var)
-
-    if floor_mod_stmt is not None:
-        layout = get_op_attrs(read)[0]["layout"]
-        channels = serial_ifm.channels
-        if for_stmt.body.loop_var == floor_mod_stmt.a.a.a:
-            height_a = floor_mod_stmt.b - floor_mod_stmt.a.b
-            height_b = serial_ifm.height
-            serial_ifm.height = height_a + height_b
-            serial_ifm.tile_height_0 = serial_ifm.height
-            address = serial_ifm.tile_address_0
-            offset = (
-                height_a * (channels // 16 + 1) * serial_ifm.width * 16
-                if layout == "NHCWB16"
-                else height_a * serial_ifm.width * channels
-            )
-            serial_ifm.tile_address_0 = tvm.tir.BufferLoad(
-                address.buffer, [address.indices[0] - offset]
-            )
-        else:
-            width_a = floor_mod_stmt.b - floor_mod_stmt.a.b
-            width_b = serial_ifm.width
-            serial_ifm.width = width_a + width_b
-            serial_ifm.tile_width_0 = serial_ifm.width
-            address = serial_ifm.tile_address_0
-            offset = width_a * 16 if layout == "NHCWB16" else width_a * channels
-            serial_ifm.tile_address_0 = tvm.tir.BufferLoad(
-                address.buffer, [address.indices[0] - offset]
-            )
-    return serial_ifm, serial_padding
-
-
-def get_ofm_params(pointer, producers_consumers, stmt):
-    """Get the parameters associated with the DMA capabilities for an OFM.
-
-    Parameters
-    ----------
-    pointer : tvm.tir.Var
-        The pointer that the OFM DMA pipeline consumes.
-    producers_consumers: ProducersConsumers
-        It associates pointers with the loop nest that produces
-        their values and with the loop nest that consumes their values.
-
-    Returns
-    -------
-    serial_ifm : SerialFeatureMap
-        The serializable OFM.
-    serial_block_config : SerialBlockConfig
-        The serializable block config.
-    output_pointer : tvm.tir.Var
-        The pointer that the OFM DMA pipeline produces.
-    is_allocator : bool
-        Whether this operator allocates its output.
-
-    """
-    convert_to_nhcwb16 = producers_consumers.get_consumer(pointer, stmt)
-    out_channels, _, output_pointer = get_convert_to_nhcwb16_params(convert_to_nhcwb16)
-    write = producers_consumers.get_consumer(output_pointer, convert_to_nhcwb16)
-    serial_ofm, serial_block_config, _, output_pointer = get_write_params(write)
-    is_allocator = True
-
-    producer = producers_consumers.get_producer(output_pointer, write)
-    if producer is None or producer != write:
-        is_allocator = False
-    serial_ofm.channels = out_channels
-    return serial_ofm, serial_block_config, output_pointer, is_allocator
diff --git a/python/tvm/relay/backend/contrib/ethosu/tir/identity.py b/python/tvm/relay/backend/contrib/ethosu/tir/identity.py
deleted file mode 100644
index 9610c8dd3cdc..000000000000
--- a/python/tvm/relay/backend/contrib/ethosu/tir/identity.py
+++ /dev/null
@@ -1,175 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name, unused-argument
-"""Extract information from the identity operator in TIR."""
-from typing import Tuple
-import tvm
-from .spec import (
-    SerialBlockConfig,
-    SerialKernel,
-    SerialActivation,
-    SerialPooling,
-    SerialPadding,
-    SerialFeatureMap,
-)
-from .utils import get_op_attrs, get_base_address, get_strides, get_loads
-from .producers_consumers import ProducersConsumers
-
-
-def _get_feature_map(stmt: tvm.tir.AttrStmt, fm_type: str) -> Tuple[SerialFeatureMap, tvm.tir.Var]:
-    """Get the feature map parameters from a loop nest of any shape (as long there are at
-    most 4 nested loops).
-
-    Parameters
-    ----------
-    stmt : tvm.tir.AttrStmt
-        The outermost attribute statement of a loop nest.
-    fm_type: str
-        Either "ifm" or "ofm", depending on whether it is an input or output feature map
-
-    Returns
-    -------
-    SerialFeatureMap
-        The serializable feature map.
-    output_pointer : tvm.tir.Var
-        The pointer produced by the operation.
-
-    """
-    assert fm_type in ("ifm", "ofm")
-
-    attrs, body = get_op_attrs(stmt)
-
-    loops = []
-    inner = body
-    # extact the loops and the innermost statement
-    while hasattr(inner, "body"):
-        loops.append(inner)
-        inner = inner.body
-
-    # If the batch size loop is present, we need to remove it
-    if len(loops) > 3:
-        assert loops[0].extent == 1
-        loops = loops[1:]
-
-    fm_inner = inner.value if fm_type == "ifm" else inner
-
-    # Needed for stride calculation, can replace with
-    # inner.value.buffer.strides in future.
-    assert len(fm_inner.indices) == 1, "Ethos-U passes expect flattened buffers"
-    stride_vars = [l.loop_var for l in loops]
-    strides = get_strides(fm_inner.indices[0], stride_vars)
-
-    base_address = [get_base_address(index) for index in fm_inner.indices]
-    data_type = inner.buffer.data.type_annotation.element_type.dtype
-
-    serial_feature_map = SerialFeatureMap(
-        data_type=data_type,
-        height=loops[0].extent,
-        width=loops[1].extent if len(loops) > 1 else 1,
-        channels=loops[2].extent if len(loops) > 2 else 1,
-        tile_height_0=loops[0].extent,
-        tile_height_1=0,
-        tile_width_0=loops[1].extent if len(loops) > 1 else 1,
-        tile_address_0=tvm.tir.BufferLoad(fm_inner.buffer, base_address),
-        tile_address_1=0,
-        tile_address_2=0,
-        tile_address_3=0,
-        scale=attrs["scale"],
-        zero_point=attrs["zero_point"],
-        layout="NHWC",
-        stride_h=strides[0] if len(strides) > 0 else 1,
-        stride_w=strides[1] if len(strides) > 1 else 1,
-        stride_c=strides[2] if len(strides) > 2 else 1,
-    )
-
-    output_pointer = inner.buffer.data
-
-    return serial_feature_map, output_pointer
-
-
-def get_identity_params(
-    stmt: tvm.tir.AttrStmt, producers_consumers: ProducersConsumers
-) -> Tuple[SerialPooling, tvm.tir.Var, tvm.tir.Var]:
-    """Get the parameters necessary to construct a call_extern for an identity pooling.
-
-    Parameters
-    ----------
-    stmt : tvm.tir.AttrStmt
-        The outermost attribute statement of an identity pooling loop nest.
-    producers_consumers: ProducersConsumers
-        It associates pointers with the loop nest that produces
-        their values and with the loop nest that consumes their values.
-
-    Returns
-    -------
-    SerialPooling
-        The parameters needed to construct a 2D pooling.
-    output_pointer : tvm.tir.Var
-        The output pointer of the pooling operation.
-    replace_pointer : tvm.tir.Var
-        The output pointer of the DMA write operation, which is to replace
-        the pooling output pointer.
-    is_allocator : bool
-        Whether this operator allocates its output.
-
-    """
-    attrs, _ = get_op_attrs(stmt)
-    # Find the inner loop
-    store = stmt
-    while hasattr(store, "body"):
-        store = store.body
-
-    # loads = [input, LUT, LUT]
-    loads = get_loads(store)
-
-    input_pointer = loads[0].buffer.data
-    output_pointer = store.buffer.data
-
-    read = producers_consumers.get_producer(input_pointer, stmt)
-    write = producers_consumers.get_consumer(output_pointer, stmt)
-
-    serial_ifm, _ = _get_feature_map(read, "ifm")
-    serial_ofm, write_output_pointer = _get_feature_map(write, "ofm")
-
-    replace_pointer = write_output_pointer
-
-    is_allocator = True
-    producer = producers_consumers.get_producer(write_output_pointer, write)
-    if producer is None or producer != write:
-        is_allocator = False
-
-    # TODO: We might want to support stand alone ReLU in the future by adding clip_min and
-    # clip max attributes to the identity operator
-    serial_activation = SerialActivation(op=attrs["activation"], clip_min=0, clip_max=0)
-
-    # Create a serialized identity pooling to be run on the NPU
-    return (
-        SerialPooling(
-            ifm=serial_ifm,
-            ofm=serial_ofm,
-            pooling_type="AVG",
-            pool_shape=SerialKernel(1, 1, 1, 1, 1, 1),
-            padding=SerialPadding(0, 0, 0, 0),
-            activation=serial_activation,
-            upscale="NONE",
-            rounding_mode=attrs["rounding_mode"],
-            block_config=SerialBlockConfig(0, 0, 0),
-        ),
-        output_pointer,
-        replace_pointer,
-        is_allocator,
-    )
diff --git a/python/tvm/relay/backend/contrib/ethosu/tir/passes.py b/python/tvm/relay/backend/contrib/ethosu/tir/passes.py
deleted file mode 100644
index 9636f2044733..000000000000
--- a/python/tvm/relay/backend/contrib/ethosu/tir/passes.py
+++ /dev/null
@@ -1,1048 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name, unused-argument, no-else-return
-# pylint: disable=use-list-literal, inconsistent-return-statements, too-many-nested-blocks
-"""The TIR passes to be run on Arm(R) Ethos(TM)-U NPU TIR Compiler."""
-from collections import namedtuple
-from typing import Optional
-import numpy as np  # type: ignore
-from ethosu.vela import api as vapi  # type: ignore
-
-import tvm
-from tvm.relay.backend.contrib.ethosu import vela_api
-from tvm.relay.backend.contrib.ethosu import tir_to_cs_translator as tirtocs
-
-from .convolution import get_conv2d_params
-from .depthwise import get_depthwise_conv2d_params
-from .pooling import get_pooling_params
-from .binary_elementwise import get_binary_elementwise_params
-from .identity import get_identity_params
-from .unary_elementwise import get_unary_elementwise_params
-from .transform import get_copy_params
-from .producers_consumers import ProducersConsumers
-
-from .. import _ffi_api
-
-
-def RemoveZeroStores():
-    """This pass removes stores which just store zero to initialise buffers.
-
-    We don't codegen these stores and they otherwise considerably reduce
-    the simplicity of the static traversal of convolution."""
-
-    def _remove_zero_store(stmt):
-        if isinstance(stmt.value, tvm.tir.IntImm) and int(stmt.value) == 0:
-            return tvm.tir.Evaluate(tvm.tir.IntImm("uint8", 0))
-        return stmt
-
-    def _ftransform(f, mod, ctx):
-        return f.with_body(
-            tvm.tir.stmt_functor.ir_transform(f.body, _remove_zero_store, None, ["tir.BufferStore"])
-        )
-
-    return tvm.tir.transform.prim_func_pass(
-        _ftransform, opt_level=0, name="tir.contrib.ethos-u.remove_zero_stores"
-    )
-
-
-def ReplaceOperators():
-    """Replace operators represented as explicit loop nests with call_externs
-    to NPU operators."""
-    op_map = {
-        "ethosu_conv2d": get_conv2d_params,
-        "ethosu_copy": get_copy_params,
-        "ethosu_depthwise_conv2d": get_depthwise_conv2d_params,
-        "ethosu_pooling": get_pooling_params,
-        "ethosu_binary_elementwise": get_binary_elementwise_params,
-        "ethosu_identity": get_identity_params,
-        "ethosu_unary_elementwise": get_unary_elementwise_params,
-    }
-    producers_consumers = ProducersConsumers()
-    replace_output_pointer = {}
-    pointer_to_extents = {}
-    replaced_pointers = []
-
-    ReplaceInfo = namedtuple("ReplaceInfo", ["pointer", "reallocate"])
-
-    def _find_pointer_to_extent(stmt):
-        if isinstance(stmt, tvm.tir.Allocate):
-            pointer_to_extents[stmt.buffer_var] = stmt.extents
-
-    def _resolve_pointers(stmt):
-        """This pass determines information about the pointers present in the IR.
-        In particular, it associates pointers with both the operations that
-        produce them and the operations that consume them through the
-        pointer_to_producer and pointer_to_consumer dicts.
-
-        Additionally, it determines the extent (size/shape) of each pointer which
-        is required for the _replace_pointers pass which runs later."""
-        loads = []
-
-        def _get_loads(stmt):
-            if isinstance(stmt, tvm.tir.BufferLoad):
-                loads.append(stmt.buffer.data)
-
-        buffer_var = None
-
-        def _get_buffer_var(stmt):
-            if isinstance(stmt, tvm.tir.BufferStore):
-                nonlocal buffer_var
-                buffer_var = stmt.buffer.data
-
-        if isinstance(stmt, tvm.tir.AttrStmt):
-            if stmt.attr_key == "pragma_op":
-                tvm.tir.stmt_functor.post_order_visit(stmt, _get_buffer_var)
-                producers_consumers.add_producer(buffer_var, stmt)
-
-                tvm.tir.stmt_functor.post_order_visit(stmt, _get_loads)
-                for load_pointer in loads:
-                    if load_pointer != buffer_var:
-                        producers_consumers.add_consumer(load_pointer, stmt)
-
-    def _replace_operator(stmt):
-        """Replace operators with call_externs, having derived the parameters
-        from the relevant TIR expressions/statements.
-
-        Note the complexity of this pass is mostly from the concept of 'replace
-        pointers'. A call_extern may in principle require information from several
-        loop nests in TIR (each corresponding to a different TE compute op). For
-        example, a convolution operator will have other TE compute ops before and
-        after corresponding to the input/output DMA functionality. Therefore, when
-        the 'central' convolution op is replaced with a call_extern, the memory
-        from the final DMA output op must be hoisted to the location/scope of
-        the call_extern.
-
-        The is done by replacing the pointer corresponding to the current operation
-        with the 'true' output operator through the replace_output_pointer dict.
-        Because of this, the param_func must provide a replace_pointer if the op
-        isn't the true output but instead a no_compile op is."""
-        if isinstance(stmt, tvm.tir.AttrStmt):
-            op_name = stmt.value.value
-            if stmt.attr_key == "pragma_op" and op_name in op_map:
-                # Get the parameters for the extern call
-                param_func = op_map[op_name]
-                info, output_pointer, replace_pointer, is_allocator = param_func(
-                    stmt, producers_consumers
-                )
-                if replace_pointer is not None:
-                    # Allocate pointer only once
-                    if replace_pointer in replaced_pointers:
-                        is_allocator = False
-                    replace_output_pointer[output_pointer] = ReplaceInfo(
-                        replace_pointer, is_allocator
-                    )
-                    replaced_pointers.append(replace_pointer)
-                # Make the extern call
-                irb = tvm.tir.ir_builder.create()
-                irb.emit(tvm.tir.call_extern("handle", op_name, *info))
-                return irb.get()
-        return None
-
-    def _remove_no_compile(stmt):
-        """Certain operators are marked as 'no compile' operators. This means they
-        should be removed from the IR as they are compiled as part of other operators.
-        The IFM DMA operations are an example of this, as they don't get compiled
-        independently but instead get compiled into the operator they're associated with,
-        e.g. a conv2d.
-
-        There are potentially 2 parts to remove for an operator:
-        the allocate for its output and the compute nest itself. For the
-        allocate, we can check if the pointer they reference is produced by a 'no compile'
-        operator. For the compute nest, we can just check the op pragma."""
-        if isinstance(stmt, tvm.tir.AttrStmt):
-            # Remove compute nests
-            if stmt.attr_key == "pragma_op" and stmt.value.value not in op_map:
-                return tvm.tir.Evaluate(0)
-
-        if isinstance(stmt, tvm.tir.Allocate):
-            # Remove allocates
-            producer = producers_consumers.get_last_producer(stmt.buffer_var)
-            if producer:
-                if producer.attr_key == "pragma_op" and producer.value.value not in op_map:
-                    return stmt.body
-
-        return None
-
-    def _replace_pointers(stmt):
-        if isinstance(stmt, tvm.tir.Allocate):
-            # If the allocate allocates a pointer that needs replacing
-            if stmt.buffer_var in replace_output_pointer:
-                replace_pointer, reallocate = replace_output_pointer[stmt.buffer_var]
-                if not reallocate:
-                    return stmt.body
-                # Otherwise, rewrite the allocation statement with the new pointer
-                # and the new extent
-                replace_type = replace_pointer.type_annotation.element_type.dtype
-                replace_extents = pointer_to_extents[replace_pointer]
-                return tvm.tir.Allocate(
-                    replace_pointer, replace_type, replace_extents, stmt.condition, stmt.body
-                )
-        return None
-
-    def _remove_buffer_decl(stmt):
-        if isinstance(stmt, tvm.tir.DeclBuffer):
-            if stmt.buffer.data in replace_output_pointer:
-                return stmt.body
-
-    def _post_transform(stmt):
-        # Replace operators with call_externs
-        result = _replace_operator(stmt)
-        # Remove operators that don't need compiling
-        result = result or _remove_no_compile(stmt)
-        # Replace necessary pointers that were removed in the previous step
-        result = result or _replace_pointers(stmt)
-        # Replace BufferDecl, since only the tir.Var data pointer is
-        # still used, and not the tir.Buffer
-        result = result or _remove_buffer_decl(stmt)
-
-        return result
-
-    def _ftransform(f, mod, ctx):
-        tvm.tir.stmt_functor.post_order_visit(f.body, _find_pointer_to_extent)
-        tvm.tir.stmt_functor.post_order_visit(f.body, _resolve_pointers)
-        producers_consumers.add_allocate_variables(pointer_to_extents.keys())
-        return f.with_body(
-            tvm.tir.stmt_functor.ir_transform(
-                f.body, None, _post_transform, ["tir.AttrStmt", "tir.Allocate"]
-            )
-        )
-
-    return tvm.tir.transform.prim_func_pass(
-        _ftransform, opt_level=0, name="tir.contrib.ethos-u.replace_operators"
-    )
-
-
-def DivideConstants(const_dict):
-    """This pass rewrites the IR and constant dict such that all constant
-    accesses are at 0 offset and full length (i.e. they read the whole buffer).
-
-    Where necessary, new constants are created in order to ensure the rewrite
-    can take place. As an example, if a convolution is tiled along the channels
-    axis, the accesses to the weights will need to be offset. This pass will
-    create new constants consisting of 'slices' of the weights so each tile
-    of the compute can access one of these 'slices'.
-
-    The purpose of this pass is to transform the IR into a form we can apply
-    constant encoding to (which will compress weights and encode biases)."""
-    buffer_to_const = {}  # type: ignore
-    new_buffers = []
-    new_consts = []
-    keep_buffers = set()
-    new_const_dict = {}
-
-    def _visit(stmt):
-        new_args = []
-        # We don't want to divide the constant that will be executed on two cores in parallel
-        is_u65_conv2d = (
-            vela_api.get_accelerator_config() == vapi.NpuAccelerator.Ethos_U65_512
-            and stmt.args[0] == "ethosu_conv2d"
-        )
-        for i, arg in enumerate(stmt.args):
-            if isinstance(arg, tvm.tir.expr.BufferLoad):
-                # If we're trying to load a buffer that maps to a constant
-                if arg.buffer.data in buffer_to_const:
-                    const = buffer_to_const[arg.buffer.data]
-                    flattened_const_shape = np.prod(const.shape)
-
-                    offset = int(arg.indices[0])
-                    # Note by convention the arg after a constant read is the length of the read
-                    length = int(stmt.args[i + 1])
-                    # If it's anything other than a full read, create a new buffer
-                    if (
-                        offset != 0 or flattened_const_shape != length and length > 0
-                    ) and not is_u65_conv2d:
-                        out_channels = const.shape[0]
-                        offset_channels = int((offset * out_channels) / flattened_const_shape)
-                        length_channels = int((length * out_channels) / flattened_const_shape)
-                        # split the constant up across channels
-                        split_const = np.split(const, out_channels, axis=0)
-                        # create a new const out of the channels we want to keep
-                        new_const = np.concatenate(
-                            split_const[offset_channels : offset_channels + length_channels], axis=0
-                        )
-                        new_consts.append(new_const)
-                        new_buffer = tvm.tir.decl_buffer(
-                            (length,), arg.dtype, scope=arg.buffer.scope()
-                        )
-                        new_buffers.append(new_buffer)
-                        new_args.append(tvm.tir.expr.BufferLoad(new_buffer, [0]))
-                        continue
-                    keep_buffers.add(arg.buffer.data)
-
-            new_args.append(arg)
-
-        return tvm.tir.Call(stmt.dtype, stmt.op, new_args, stmt.span)
-
-    def _ftransform(f, mod, ctx):
-        for i, param in enumerate(f.params):
-            if i in const_dict:
-                buffer_to_const[param] = const_dict[i]
-                buffer_to_const[f.buffer_map[param].data] = const_dict[i]
-
-        new_body = tvm.tir.stmt_functor.ir_transform(f.body, _visit, None, ["tir.Call"])
-        # Both the params and buffer map need updating for the newly introduced buffers
-        new_params = []  # type: ignore
-        new_buffer_map = {}
-        for i, param in enumerate(f.params):
-            buffer = f.buffer_map[param]
-            pointer = buffer.data
-            if pointer in buffer_to_const:
-                if pointer not in keep_buffers:
-                    continue
-                new_const_dict[len(new_params)] = const_dict[i]
-            new_params.append(param)
-            new_buffer_map[param] = buffer
-
-        for i, new_buffer in enumerate(new_buffers):
-            handle = tvm.tir.Var("placeholder", "handle")
-            new_params.append(handle)
-            new_buffer_map[handle] = new_buffer
-            new_const_dict[len(new_params) - 1] = new_consts[i]
-
-        new_f = tvm.tir.PrimFunc(
-            new_params,
-            new_body,
-            f.ret_type,
-            new_buffer_map,
-            f.attrs,
-            f.span,
-        )
-        return new_f
-
-    def _divide_constants(mod):
-        transform_func = tvm.tir.transform.prim_func_pass(
-            _ftransform, opt_level=0, name="tir.contrib.ethos-u.divide_constants"
-        )
-        new_func = transform_func(mod)
-        return new_func, new_const_dict
-
-    return _divide_constants
-
-
-def EncodeConstants(const_dict):
-    """the NPU requires that weights are compressed and bias/scales are 'encoded', both
-    of which are performed by this pass.
-
-    This pass modifies both the constant dict to contain the post-encoding values of the
-    constants and the IR to adjust buffer types/sizes/accesses so they align with the
-    encoded constants. Calls to the Vela API are made to perform the actual compression/
-    encoding.
-
-    """
-    new_const_dict = {}
-
-    def collect_encoding_definitions(stmt, old_buffer_var_to_const):
-        # Map from copy destination to copy source.
-        copy_map = {}
-        # List of buffer copies that occurred
-        copied_buffers = []
-        # List of encoded buffer information
-        constant_buffer_replacements = []
-
-        def _align_scale_bias(tir_extern_call, bias):
-            """Align the scale_bias to 16 bytes."""
-            value_bytes = bytearray()
-            value_bytes.extend(bias.tobytes())
-            # Align to 16
-            remainder = (len(value_bytes)) % 16
-            if remainder > 0:
-                value_bytes.extend(bytearray(16 - remainder))
-            value = np.frombuffer(value_bytes, dtype="uint8")
-            return value
-
-        accel_config = vela_api.get_accelerator_config()
-
-        def _encode_weights(tir_extern_call, weights):
-            """Encode the weights for a TIR extern call."""
-            value_bytes = vela_api.encode_weights(tir_extern_call, weights, accel_config)
-            value = np.frombuffer(value_bytes, dtype="uint8")
-            return value
-
-        def _declare_constant_buffer(old_buffer, encoded_constants, split_idx):
-            """Create a new buffer and add the old buffer and its pointer to the
-            rewriting maps."""
-            new_buffer = tvm.tir.decl_buffer(
-                shape=[len(encoded_constants)],
-                dtype=str(encoded_constants.dtype),
-                name=old_buffer.name + "_encoded",
-                scope=old_buffer.scope(),
-            )
-
-            constant_buffer_replacements.append(
-                {
-                    "old_buffer": old_buffer,
-                    "new_buffer": new_buffer,
-                    "encoded_constants": encoded_constants,
-                    "split_idx": split_idx,
-                }
-            )
-
-        def _encode_weights_or_bias(buffer1, buffer2, stmt, encode_func):
-            """Encode the weights or align the bias either for one or two cores,
-            depending on the variant."""
-            constant = old_buffer_var_to_const[buffer1.data]
-
-            # If we have just one core, encode the whole constant
-            if buffer2 is None:
-                new_const = encode_func(stmt, constant)
-                return new_const, None
-
-            # Assume that the constant tensor has not been flattened yet
-            assert len(constant.shape) != 1
-            channels = constant.shape[0]
-            split_const = np.split(constant, channels, axis=0)
-
-            const_list = [split_const[i] for i in range(channels) if i % 2 == 0]
-            const_to_encode = np.concatenate(const_list, axis=0)
-
-            new_const = encode_func(stmt, const_to_encode)
-            split_idx = len(new_const)
-
-            # Encode half of the constant separately for the other core if it exists
-            assert buffer1.same_as(buffer2)
-            const2_list = [split_const[i] for i in range(channels) if i % 2 == 1]
-            const2_to_encode = np.concatenate(const2_list, axis=0)
-
-            new_const2 = encode_func(stmt, const2_to_encode)
-            new_const = np.append(new_const, new_const2).astype("uint8")
-
-            return new_const, split_idx
-
-        def _visit(stmt):
-            if isinstance(stmt, tvm.tir.Call):
-                op = str(stmt.args[0].value)
-                # Handle copies as a special-case by propagating the buffer information
-                # from the read to the write pointer.
-                if op == "ethosu_copy":
-                    read_buffer = stmt.args[1].buffer
-                    write_buffer = stmt.args[3].buffer
-                    # Assert writing to the base of the write_var (pre-StorageRewrite)
-                    assert list(stmt.args[3].indices) == [0]
-                    assert list(stmt.args[1].indices) == [0]
-                    copied_buffers.append({"source": read_buffer, "dest": write_buffer})
-                    copy_map[write_buffer] = read_buffer
-
-                ops_with_weights = {
-                    "ethosu_conv2d": tirtocs.translate_ethosu_conv2d,
-                    "ethosu_depthwise_conv2d": tirtocs.translate_ethosu_depthwise_conv2d,
-                }
-                if op in ops_with_weights:
-                    npu_op, _ = ops_with_weights[op](stmt)
-
-                    # Encode the weights
-                    weights_buffer = npu_op.weights[0].address.buffer
-                    if weights_buffer in copy_map:
-                        weights_buffer = copy_map[weights_buffer]
-
-                    # In case of U65 512 mac variant the weights are split across two cores
-                    # and need to be encoded separately
-                    weights2_buffer = (
-                        npu_op.weights[1].address.buffer
-                        if accel_config == vapi.NpuAccelerator.Ethos_U65_512
-                        else None
-                    )
-                    if weights2_buffer in copy_map:
-                        weights2_buffer = copy_map[weights2_buffer]
-
-                    new_weights, split_idx = _encode_weights_or_bias(
-                        weights_buffer, weights2_buffer, stmt, _encode_weights
-                    )
-                    _declare_constant_buffer(weights_buffer, new_weights, split_idx)
-
-                    # Align the scale_bias to 16 bytes
-                    scale_bias_buffer = npu_op.biases[0].address.buffer
-                    if scale_bias_buffer in copy_map:
-                        scale_bias_buffer = copy_map[scale_bias_buffer]
-                    scale_bias2_buffer = (
-                        npu_op.biases[1].address.buffer
-                        if accel_config == vapi.NpuAccelerator.Ethos_U65_512
-                        else None
-                    )
-                    if scale_bias2_buffer in copy_map:
-                        scale_bias2_buffer = copy_map[scale_bias2_buffer]
-
-                    new_scale_bias, split_idx = _encode_weights_or_bias(
-                        scale_bias_buffer, scale_bias2_buffer, stmt, _align_scale_bias
-                    )
-
-                    _declare_constant_buffer(scale_bias_buffer, new_scale_bias, split_idx)
-
-        tvm.tir.stmt_functor.post_order_visit(stmt, _visit)
-
-        return {
-            "copied_buffers": copied_buffers,
-            "constant_buffer_replacements": constant_buffer_replacements,
-        }
-
-    def transform_stmt(
-        stmt,
-        buf_remap,
-        var_remap,
-        pointer_to_buffer,
-        new_buffer_var_to_const,
-        new_buffer_to_split_idx,
-    ):
-        def _visit_rewrite(stmt):
-            if isinstance(stmt, tvm.tir.Call):
-                # For extern calls, we need to rewrite pairs of arguments corresponding to
-                # base address load and the length of the load.
-                old_args = list(stmt.args)
-
-                new_args = [stmt.args[0]]
-                for prev_arg, arg in zip(old_args[:-1], old_args[1:]):
-                    # If the previous argument was a load from an
-                    # encoded buffer, the current should be a length.
-                    if (
-                        isinstance(prev_arg, tvm.tir.BufferLoad)
-                        and prev_arg.buffer.data in new_buffer_var_to_const
-                    ):
-                        buffer_size = np.prod(list(prev_arg.buffer.shape))
-                        arg = buffer_size
-                        # We have to check for split weights/bias for conv2d and depthwise_conv2d
-                        if old_args[0] in ("ethosu_conv2d", "depthwise_conv2d"):
-                            # We have split weights/bias
-                            if prev_arg.buffer in new_buffer_to_split_idx:
-                                split_idx = new_buffer_to_split_idx[prev_arg.buffer]
-                                # The first half of the split buffer
-                                if prev_arg.indices[0] == 0:
-                                    arg = split_idx
-                                # the second half of the split buffer
-                                else:
-                                    arg = buffer_size - split_idx
-
-                    new_args.append(arg)
-
-                return tvm.tir.Call(stmt.dtype, stmt.op, new_args, stmt.span)
-
-            if isinstance(stmt, tvm.tir.Allocate):
-                # Where a pointer needs rewriting, the allocate for it must be rewritten
-                allocate_pointer = stmt.buffer_var
-                if allocate_pointer in var_remap:
-                    new_allocate_pointer = var_remap[allocate_pointer]
-                    new_buffer = pointer_to_buffer[new_allocate_pointer]
-
-                    return tvm.tir.Allocate(
-                        new_buffer.data,
-                        new_buffer.dtype,
-                        new_buffer.shape,
-                        stmt.condition,
-                        stmt.body,
-                        stmt.span,
-                    )
-
-            # The following rewrites would be better expressed by just
-            # rewriting the Buffers. However ir_transform doesn't
-            # visit Buffers, so instead we do the next best thing and
-            # rewrite the nodes which contain the Buffers.
-            if isinstance(stmt, tvm.tir.BufferLoad):
-                if stmt.buffer in buf_remap:
-                    new_buffer = buf_remap[stmt.buffer]
-                    new_indices = stmt.indices
-                    offset = new_indices[0]
-                    if offset != 0 and new_buffer in new_buffer_to_split_idx:
-                        offset = new_buffer_to_split_idx[new_buffer]
-                    return tvm.tir.BufferLoad(buf_remap[stmt.buffer], [offset], stmt.span)
-
-            if isinstance(stmt, tvm.tir.AttrStmt):
-                node_pointer = stmt.node
-                if node_pointer in var_remap:
-                    return tvm.tir.AttrStmt(
-                        var_remap[node_pointer],
-                        stmt.attr_key,
-                        stmt.value,
-                        stmt.body,
-                        stmt.span,
-                    )
-
-            return None
-
-        return tvm.tir.stmt_functor.ir_transform(
-            stmt,
-            None,
-            _visit_rewrite,
-            ["tir.Call", "tir.Allocate", "tir.BufferLoad", "tir.AttrStmt"],
-        )
-
-    def _collect_parameter_buffer_aliases(prim_func):
-        buffer_vars = {}
-        for param in prim_func.params:
-            if param in prim_func.buffer_map:
-                buf = prim_func.buffer_map[param]
-                buffer_vars[buf.data] = {buf}
-
-        def visit(node):
-            if isinstance(node, (tvm.tir.BufferStore, tvm.tir.BufferLoad, tvm.tir.DeclBuffer)):
-                buf = node.buffer
-                if buf.data in buffer_vars:
-                    buffer_vars[buf.data].add(buf)
-
-        tvm.tir.stmt_functor.post_order_visit(prim_func.body, visit)
-        return buffer_vars
-
-    def _ftransform(f, mod, ctx):
-        param_buffer_var_usage = _collect_parameter_buffer_aliases(f)
-
-        # Step 0: Unpack the constant dictionary in terms of the
-        # functions buffers.
-        old_buffer_var_to_const = {}
-        for i, param in enumerate(f.params):
-            if i in const_dict:
-                old_buffer_var_to_const[f.buffer_map[param].data] = const_dict[i]
-
-        # Step 1: Collect information on the buffers that will be
-        # replaced by encodings.
-        buffer_information = collect_encoding_definitions(f.body, old_buffer_var_to_const)
-
-        # Step 2: Generate variable/buffer remaps, based on the
-        # collected information.
-        buf_remap = {}
-        new_buffer_var_to_const = {}
-        new_buffer_to_split_idx = {}
-
-        def define_remap(old_buf, new_buf):
-            try:
-                old_buffers = param_buffer_var_usage[old_buf.data]
-            except KeyError:
-                old_buffers = [old_buf]
-
-            for old_buffer in old_buffers:
-                buf_remap[old_buffer] = new_buf
-
-        # Any encoded buffers must be replaced
-        for info in buffer_information["constant_buffer_replacements"]:
-            define_remap(info["old_buffer"], info["new_buffer"])
-
-            new_buffer_var_to_const[info["new_buffer"].data] = info["encoded_constants"]
-
-            if info["split_idx"]:
-                new_buffer_to_split_idx[info["new_buffer"]] = info["split_idx"]
-
-        # Any buffers that are copied into from an encoded buffer must
-        # be replaced.
-        for info in buffer_information["copied_buffers"]:
-            copy_source = info["source"]
-            while copy_source in buf_remap:
-                copy_source = buf_remap[copy_source]
-
-            copy_dest = info["dest"]
-
-            if copy_source.shape != copy_dest.shape or copy_source.dtype != copy_dest.dtype:
-                new_dest = tvm.tir.decl_buffer(
-                    shape=copy_source.shape,
-                    dtype=copy_source.dtype,
-                    name=copy_dest.name,
-                    scope=copy_dest.scope(),
-                )
-                define_remap(copy_dest, new_dest)
-                if copy_source.data in new_buffer_var_to_const:
-                    new_buffer_var_to_const[new_dest.data] = new_buffer_var_to_const[
-                        copy_source.data
-                    ]
-
-                if copy_source in new_buffer_to_split_idx:
-                    new_buffer_to_split_idx[new_dest] = new_buffer_to_split_idx[copy_source]
-
-        # Define additional dependent lookup tables.
-        var_remap = {old.data: new.data for (old, new) in buf_remap.items()}
-        pointer_to_buffer = {
-            buf.data: buf for (old, new) in buf_remap.items() for buf in [old, new]
-        }
-
-        # Step 3: Then perform the rewrites
-        new_body = transform_stmt(
-            f.body,
-            buf_remap,
-            var_remap,
-            pointer_to_buffer,
-            new_buffer_var_to_const,
-            new_buffer_to_split_idx,
-        )
-
-        # Step 4: Rewrite the buffer map and const dict to instead use the encoded versions
-        new_buffer_map = {}
-        for i, param in enumerate(f.params):
-            buffer = f.buffer_map[param]
-            if buffer in buf_remap:
-                buffer = buf_remap[buffer]
-
-            if buffer.data in new_buffer_var_to_const:
-                new_const_dict[i] = new_buffer_var_to_const[buffer.data].flatten()
-            elif buffer.data in old_buffer_var_to_const:
-                new_const_dict[i] = old_buffer_var_to_const[buffer.data].flatten()
-
-            new_buffer_map[param] = buffer
-
-        new_f = tvm.tir.PrimFunc(
-            f.params,
-            new_body,
-            f.ret_type,
-            new_buffer_map,
-            f.attrs,
-            f.span,
-        )
-        return new_f
-
-    def _encode_constants(mod):
-        mod, divided_const_dict = DivideConstants(const_dict)(mod)
-        const_dict.clear()
-        for key, value in divided_const_dict.items():
-            const_dict[key] = value
-        transform_func = tvm.tir.transform.prim_func_pass(
-            _ftransform, opt_level=0, name="tir.contrib.ethos-u.encode_constants"
-        )
-        new_func = transform_func(mod)
-        return new_func, new_const_dict
-
-    return _encode_constants
-
-
-# This need to be kept in sync with kDisableLowerTVMBuiltin in include/tvm/tir/transform.h
-DISABLE_LOWER_BUILTIN = "disable_lower_builtin"
-
-
-def AnnotateAllocates():
-    """
-    This is pass to annotate all allocate
-    nodes of the PrimFuncs of the microNPU
-    to be not lowered to built-ins.
-    """
-
-    def _post_transform(allocate):
-        return tvm.tir.Allocate(
-            buffer_var=allocate.buffer_var,
-            dtype=allocate.dtype,
-            extents=allocate.extents,
-            condition=allocate.condition,
-            body=allocate.body,
-            annotations={DISABLE_LOWER_BUILTIN: True},
-        )
-
-    def _ftransform(f, mod, ctx):
-        return f.with_body(
-            tvm.tir.stmt_functor.ir_transform(f.body, None, _post_transform, ["tir.Allocate"])
-        )
-
-    return tvm.tir.transform.prim_func_pass(
-        _ftransform, opt_level=0, name="tir.contrib.ethos-u.annotate_allocates"
-    )
-
-
-def RemoveConcatenates():
-    """Remove concatenate operators by modifying the input buffers to write directly into
-    the concatenated buffer with the appropriate offset.
-
-    This pass works in two stages. The first finds every concatenate operation (marked by
-    pragma_op = ethosu_concatenate) and it performs the following analysis. For each buffer
-    that is concatenated, the buffer is marked that it is to be replaced with the concat
-    buffer and the axis along which it is concatenated as well as the offset along that
-    axis is recorded in 'ReplaceInfo'. Once this analysis is completed, the concatenate
-    loop nest along with its buffer realization statements are removed.
-
-    In the second stage, the input buffers to the concatenate operators are rewritten
-    to use the concat buffer directly. This means applying the correct offset to the
-    concatenation axis where ever the buffer is loaded or stored. Additionally, as the
-    realization statements for the concat buffers were removed in the first stage, they
-    are rewritten in place of the input buffer realization with the earliest liveness."""
-
-    in_concat = [False]  # Whether the visitor is currently inside a concatenate operator
-    concat_buffers = []  # The buffers produced by concatenate operators
-    buffer_replace_map = {}  # A map of buffers to be replaced with the concat buffer
-    attrs_by_buffer = {}  # AttrStmts by the buffer they reference
-    realizes_by_buffer = {}  # BufferRealize statements by the buffer they reference
-    first_replacements = {}  # The first buffers to be replaced by a given concat buffer
-
-    ReplaceInfo = namedtuple("ReplaceInfo", ["buffer", "axis", "offset"])
-
-    def _get_replace_info(buffer_load, concat_buffer):
-        axis = 0
-        offset = 0
-        dmap = dict()
-
-        for i, index in enumerate(buffer_load.indices):
-            if isinstance(index, tvm.tir.Sub):
-                axis = i
-                dmap = {}
-
-                def _visit(stmt):
-                    if isinstance(stmt, tvm.tir.Var):
-                        dmap[stmt] = tvm.arith.IntervalSet(0, 0)
-
-                tvm.tir.stmt_functor.post_order_visit(index, _visit)
-                offset = abs(int(tvm.arith.Analyzer().int_set(index, dmap).max_value))
-        return ReplaceInfo(concat_buffer, axis, offset)
-
-    def _pre_remove(stmt):
-        if isinstance(stmt, tvm.tir.BufferRealize):
-            # Record the realize statements by buffer as we need to hoist some of these
-            realizes_by_buffer[stmt.buffer] = stmt
-        if isinstance(stmt, tvm.tir.AttrStmt):
-            if stmt.attr_key == "realize_scope" and isinstance(stmt.node, tvm.tir.Buffer):
-                # Record the realize_scope attrs by buffer as we need to hoist some of these
-                attrs_by_buffer[stmt.node] = stmt
-            if stmt.attr_key == "pragma_op" and stmt.value.value == "ethosu_concatenate":
-                # Record that we're entering a concatenate loop nest
-                in_concat[0] = True
-        if isinstance(stmt, tvm.tir.BufferLoad) and in_concat[0]:
-            # Any buffer loaded inside a concat is a buffer we intend to replace with this pass.
-            # The buffer_replace_map keeps track of which buffers need replacing with the
-            # concat buffer.
-            replace_info = _get_replace_info(stmt, concat_buffers[-1])
-            buffer_replace_map[stmt.buffer] = replace_info
-        if isinstance(stmt, tvm.tir.BufferStore) and in_concat[0]:
-            # If we're inside a concat, the BufferStore indicates what the concat buffer is
-            concat_buffers.append(stmt.buffer)
-
-    def _post_remove(stmt):
-        if isinstance(stmt, tvm.tir.AttrStmt):
-            if isinstance(stmt.node, tvm.tir.Buffer) and stmt.node in concat_buffers:
-                return stmt.body
-            if stmt.attr_key == "pragma_op" and stmt.value.value == "ethosu_concatenate":
-                # When we leave a concatenate operator, record it and then remove the loop nest
-                in_concat[0] = False
-                return tvm.tir.Evaluate(0)
-        if isinstance(stmt, tvm.tir.BufferRealize):
-            if stmt.buffer in concat_buffers:
-                return stmt.body
-        return None
-
-    def _pre_replace(stmt):
-        if isinstance(stmt, (tvm.tir.BufferLoad, tvm.tir.BufferStore)):
-            # The first buffer referenced that needs replacing with a concat buffer shall
-            # be the one that the concat buffer realize is hoisted to.
-            if stmt.buffer in buffer_replace_map:
-                concat_buffer = buffer_replace_map[stmt.buffer].buffer
-                if concat_buffer not in first_replacements:
-                    first_replacements[concat_buffer] = stmt.buffer
-
-    def _post_replace(stmt):
-        if isinstance(stmt, tvm.tir.BufferStore):
-            if stmt.buffer in buffer_replace_map:
-                # Replace the original buffer store with a new one into the concat buffer
-                # and adjust the indices accordingly to account for the offset
-                replace_info = buffer_replace_map[stmt.buffer]
-                concat_buffer = replace_info.buffer
-                new_indices = list(stmt.indices)
-                new_indices[replace_info.axis] += replace_info.offset
-                # The new buffer store node that stores the tensor directly into the concat buffer
-                new_store = tvm.tir.BufferStore(concat_buffer, stmt.value, new_indices, stmt.span)
-                return new_store
-        if isinstance(stmt, tvm.tir.BufferLoad):
-            if stmt.buffer in buffer_replace_map:
-                # Replace the original buffer load with a new one into the concat buffer
-                # and adjust the indices accordingly to account for the offset
-                replace_info = buffer_replace_map[stmt.buffer]
-                concat_buffer = replace_info.buffer
-                new_indices = list(stmt.indices)
-                new_indices[replace_info.axis] += replace_info.offset
-                new_load = tvm.tir.BufferLoad(concat_buffer, new_indices, stmt.span)
-                return new_load
-        if isinstance(stmt, tvm.tir.BufferRealize):
-            if stmt.buffer in buffer_replace_map:
-                concat_buffer = buffer_replace_map[stmt.buffer].buffer
-                # If this isn't the first buffer replaced, don't hoist the realize
-                if first_replacements[concat_buffer] != stmt.buffer:
-                    return stmt.body
-                # Otherwise, do hoist it
-                else:
-                    concat_realize = realizes_by_buffer[concat_buffer]
-                    new_realize = tvm.tir.BufferRealize(
-                        concat_realize.buffer,
-                        concat_realize.bounds,
-                        concat_realize.condition,
-                        stmt.body,
-                        stmt.span,
-                    )
-                    return new_realize
-        if isinstance(stmt, tvm.tir.AttrStmt):
-            if isinstance(stmt.node, tvm.tir.Buffer) and stmt.node in buffer_replace_map:
-                concat_buffer = buffer_replace_map[stmt.node].buffer
-                # If this isn't the first buffer replaced, don't hoist the attrstmt
-                if first_replacements[concat_buffer] != stmt.node:
-                    return stmt.body
-                # Otherwise, do hoist it
-                else:
-                    concat_attr = attrs_by_buffer[concat_buffer]
-                    new_attr = tvm.tir.AttrStmt(
-                        concat_attr.node,
-                        concat_attr.attr_key,
-                        concat_attr.value,
-                        stmt.body,
-                        stmt.span,
-                    )
-                    return new_attr
-
-    def _ftransform(f, mod, ctx):
-        f = f.with_body(
-            tvm.tir.stmt_functor.ir_transform(
-                f.body,
-                _pre_remove,
-                _post_remove,
-                ["tir.AttrStmt", "tir.BufferLoad", "tir.BufferStore", "tir.BufferRealize"],
-            )
-        )
-        return f.with_body(
-            tvm.tir.stmt_functor.ir_transform(
-                f.body,
-                _pre_replace,
-                _post_replace,
-                ["tir.AttrStmt", "tir.BufferLoad", "tir.BufferStore", "tir.BufferRealize"],
-            )
-        )
-
-    return tvm.tir.transform.prim_func_pass(
-        _ftransform, opt_level=0, name="tir.contrib.ethos-u.remove_concatenates"
-    )
-
-
-def CreatePrimFuncWithoutConstants(const_dict):
-    """
-    This pass will remove arguments that are constants
-    from PrimFunc Args. These should be replaced properly
-    with tir.allocate_const when it becomes available.
-
-    It also modifies the constant dictionary to
-    rewrite the keys as the actual tir.Vars that are params
-    rather than the index because this pass removes PrimFunc
-    arguments that represent constants.
-    """
-
-    new_const_dict = dict()
-
-    def _ftransform(f, mod, ctx):
-        new_params = list()
-        new_buffer_map = dict()
-        for param_idx in const_dict.keys():
-            # We are using buffer_var to key the constants as
-            # PrimFunc params of constants will be removed.
-            new_const_dict[f.buffer_map[f.params[param_idx]].data] = const_dict[param_idx]
-        for i, param in enumerate(f.params):
-            if i not in const_dict.keys():
-                new_params.append(param)
-                new_buffer_map[param] = f.buffer_map[param]
-        return tvm.tir.PrimFunc(
-            new_params,
-            f.body,
-            f.ret_type,
-            new_buffer_map,
-            f.attrs,
-            f.span,
-        )
-
-    def _create_primfunc_without_constants(mod):
-        transform_func = tvm.tir.transform.prim_func_pass(
-            _ftransform, opt_level=0, name="tir.contrib.ethos-u.CreatePrimFuncWithoutConstants"
-        )
-        mod = transform_func(mod)
-        return mod, new_const_dict
-
-    return _create_primfunc_without_constants
-
-
-def HoistAllocates() -> tvm.IRModule:
-    """
-    Hoist allocate nodes up to the top of the body of the main function.
-
-    Returns
-    -------
-    tvm.IRModule
-        The new module with hoisted allocate nodes.
-    """
-    return _ffi_api.HoistAllocates()
-
-
-def CopyComputeReordering(
-    max_copy_movements: Optional[int] = None, reorder_by_cycles: Optional[bool] = None
-) -> tvm.IRModule:
-    """
-    Reorders copy and compute nodes in such a way that independent DMA copies
-    and computes happen in parallel.
-    Copies to buffers with local scope are not reordered since they copy LUT
-    into the SHRAM and that already happens in parallel with copying weights into
-    the weights encoder.
-
-    If reorder_by_cycles is set, we use the compute_cycles_hint to decide the reordering. If it is
-    not set, we move the copies up by a fixed number of movements, either by max_copy_movements if
-    it is specified, or by default value of 1.
-
-    If reordering based on the cycle count is enabled, we try to achieve further copy latency
-    hiding with a two step algorithm:
-    (1) Move all the global copies (i.e. copies that copy a constant into SRAM for conv2d or
-    depthwise_conv2d) above a preceding compute op. If in general the computes take longer than
-    copies, this should be enough to hide the copy latencies.
-    (2) If there are some global copies that take longer than the computes, we might be able to
-    hide them further by moving them further up in a graph since in general there are more compute
-    ops than copy ops in a graph (as only conv2d and depthwise_conv2d have constants associated
-    with them). The algortithm checks whether a copy is hidden and if it is not, it checks if a
-    preceding compute op has a preceding copy and if it doesn't it moves the copy that we try to
-    hide further up. It keeps moving the copy until it can't move it any further or until the
-    latency is hidden.
-
-    Parameters
-    ----------
-    max_copy_movements: Optional[int]
-        The maximum number of movements allowed for a copy.
-        If None, the pass context option
-        tir.contrib.ethos-u.copy_compute_reordering_max_copy_movements
-        is used if provided, otherwise the default value will be 1.
-
-    reorder_by_cycles: Optional[bool]
-        Whether to reorder the computes and copies based on the cycle hint.
-        If None, the pass context option
-        tir.contrib.ethos-u.copy_compute_reordering_reorder_by_cycles
-        is used if provided, otherwise the default value will be False.
-
-    Returns
-    -------
-    tvm.IRModule
-        The new module with copy and compute nodes reordered.
-    """
-    return _ffi_api.CopyComputeReordering(max_copy_movements, reorder_by_cycles)
-
-
-def MergeConstants(const_dict):
-    """
-    This pass looks for the constants used by each compute operator
-    and merges them into a single buffer.
-    Constants written to a buffer with local scope are not merged.
-    """
-
-    def _merge_constants(mod):
-        nonlocal const_dict
-        try:
-            mod["main"]
-        except:
-            raise tvm.TVMError(
-                "Expected a single primitive function called 'main'. "
-                "Please run the MergeConstants pass in conjunction with the LowerToTIR() pass."
-            )
-
-        new_const_dict = {}
-        for param in const_dict.keys():
-            new_const_dict[tvm.tir.IntImm("int64", param)] = tvm.nd.array(const_dict[param])
-        mod["main"] = mod["main"].with_attr("ethos-u.const_dict", new_const_dict)
-
-        mod = _ffi_api.MergeConstants()(mod)
-        const_dict = mod["main"].attrs["ethos-u.const_dict"]
-        mod = _ffi_api.RemoveConstDictAttribute()(mod)
-
-        new_const_dict = {}
-        for param in const_dict.keys():
-            new_const_dict[int(param)] = const_dict[param].numpy()
-
-        return mod, new_const_dict
-
-    return _merge_constants
diff --git a/python/tvm/relay/backend/contrib/ethosu/tir/pooling.py b/python/tvm/relay/backend/contrib/ethosu/tir/pooling.py
deleted file mode 100644
index 069930475df9..000000000000
--- a/python/tvm/relay/backend/contrib/ethosu/tir/pooling.py
+++ /dev/null
@@ -1,96 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name, unused-argument
-"""Extract information from the pooling operators in TIR."""
-from typing import Tuple
-import tvm
-from .utils import get_outer_loops, get_op_attrs, get_loads, get_stores
-from .dma import get_ifm_params, get_ofm_params
-from .spec import SerialKernel, SerialActivation, SerialPooling
-from .producers_consumers import ProducersConsumers
-
-
-def get_pooling_params(
-    stmt: tvm.tir.AttrStmt, producers_consumers: ProducersConsumers
-) -> Tuple[SerialPooling, tvm.tir.Var, tvm.tir.Var]:
-    """Get the parameters necessary to construct a call_extern for a pooling.
-
-    Parameters
-    ----------
-    stmt : tvm.tir.AttrStmt
-        The outermost attribute statement of a convolution loop nest.
-    producers_consumers: ProducersConsumers
-        It associates pointers with the loop nest that produces
-        their values and with the loop nest that consumes their values.
-
-    Returns
-    -------
-    SerialPooling
-        The parameters needed to construct a 2D convolution.
-    output_pointer : tvm.tir.Var
-        The output pointer of the convolution operation.
-    replace_pointer : tvm.tir.Var
-        The output pointer of the DMA write operation, which is to replace
-        the convolution output pointer.
-    is_allocator : bool
-        Whether this operator allocates its output.
-    """
-    attrs, body = get_op_attrs(stmt)
-    _, _, _, _, _, inner = get_outer_loops(body, "NHWC")
-    rh = inner
-    rw = rh.body
-    # loads = [output, input, LUT, LUT]
-    loads = get_loads(rw.body)
-    # stores = [output]
-    stores = get_stores(rw.body)
-    input_pointer = loads[1].buffer.data
-    output_pointer = stores[0].buffer.data
-    # Get feature map info
-    serial_ifm, serial_padding = get_ifm_params(input_pointer, producers_consumers, stmt)
-    serial_ofm, serial_block_config, replace_pointer, is_allocator = get_ofm_params(
-        output_pointer, producers_consumers, stmt
-    )
-    # Get kernel info
-    serial_kernel = SerialKernel(
-        width=int(rw.extent),
-        height=int(rh.extent),
-        stride_w=int(attrs["stride_w"]),
-        stride_h=int(attrs["stride_h"]),
-        dilation_w=1,
-        dilation_h=1,
-    )
-
-    # Get activation info
-    serial_activation = SerialActivation(
-        op=attrs["activation"], clip_min=attrs["clip_min"], clip_max=attrs["clip_max"]
-    )
-    return (
-        SerialPooling(
-            ifm=serial_ifm,
-            ofm=serial_ofm,
-            pooling_type=attrs["pooling_type"],
-            pool_shape=serial_kernel,
-            padding=serial_padding,
-            activation=serial_activation,
-            rounding_mode=attrs["rounding_mode"],
-            upscale=attrs["upscale"],
-            block_config=serial_block_config,
-        ),
-        output_pointer,
-        replace_pointer,
-        is_allocator,
-    )
diff --git a/python/tvm/relay/backend/contrib/ethosu/tir/producers_consumers.py b/python/tvm/relay/backend/contrib/ethosu/tir/producers_consumers.py
deleted file mode 100644
index 39cbf701649f..000000000000
--- a/python/tvm/relay/backend/contrib/ethosu/tir/producers_consumers.py
+++ /dev/null
@@ -1,78 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name, unused-argument
-"""The ProducersConsumers class"""
-from typing import Optional
-from collections.abc import KeysView
-import tvm
-
-
-class ProducersConsumers:
-    """It associates pointers with the loop nest that produces
-    their values and with the loop nest that consumes their values."""
-
-    def __init__(self) -> None:
-        self.indices: dict[tvm.tir.AttrStmt, int] = {}
-        self.producers: list[(tvm.tir.AttrStmt, tvm.tir.expr.Var)] = []
-        self.consumers: list[(tvm.tir.AttrStmt, list[tvm.tir.expr.Var])] = []
-        self.allocate_variables: Optional[KeysView] = None
-
-    def add_producer(self, var: tvm.tir.expr.Var, attr: tvm.tir.AttrStmt) -> None:
-        """Add the attribute statement attr as producer of the variable var."""
-        self.indices[attr] = len(self.producers)
-        self.producers.append((attr, var))
-
-    def get_producer(
-        self, var: tvm.tir.expr.Var, attr: tvm.tir.AttrStmt
-    ) -> Optional[tvm.tir.AttrStmt]:
-        """Get the last attribute statement which produces the variable var when
-        the current attribute statement is attr."""
-        if var not in self.allocate_variables:
-            return None
-
-        index = self.indices[attr]
-        for i in list(reversed(range(index + 1))):
-            if self.producers[i][1] == var:
-                return self.producers[i][0]
-        return None
-
-    def get_last_producer(self, var: tvm.tir.expr.Var) -> Optional[tvm.tir.AttrStmt]:
-        """Get the last attribute statement which produces the variable var."""
-        return self.get_producer(var, self.producers[-1][0])
-
-    def add_allocate_variables(self, allocate_variables: KeysView) -> None:
-        """Add the allocated variables."""
-        self.allocate_variables = allocate_variables
-
-    def add_consumer(self, var: tvm.tir.expr.Var, attr: tvm.tir.AttrStmt) -> None:
-        """Add the attribute statement attr as consumer of the variable var."""
-        index = self.indices[attr]
-        if index < len(self.consumers):
-            self.consumers[index][1].append(var)
-        else:
-            self.consumers.append((attr, [var]))
-
-    def get_consumer(
-        self, var: tvm.tir.expr.Var, attr: tvm.tir.AttrStmt
-    ) -> Optional[tvm.tir.AttrStmt]:
-        """Get the first attribute statement which consumes the variable var when
-        the current attribute statement is attr."""
-        index = self.indices[attr]
-        for i in range(index, len(self.consumers)):
-            if var in self.consumers[i][1]:
-                return self.consumers[i][0]
-        return None
diff --git a/python/tvm/relay/backend/contrib/ethosu/tir/scheduler.py b/python/tvm/relay/backend/contrib/ethosu/tir/scheduler.py
deleted file mode 100644
index cee8f563ff7a..000000000000
--- a/python/tvm/relay/backend/contrib/ethosu/tir/scheduler.py
+++ /dev/null
@@ -1,362 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name, unused-argument
-"""Scheduling for Arm(R) Ethos(TM)-U NPU."""
-import tvm
-from tvm.contrib.ethosu.cascader import Propagator
-
-
-def schedule(cached_func, const_dict, cascader=None):
-    """Schedule a CachedFunc for NPU compilation.
-
-    Parameters
-    ----------
-    cached_func : CachedFunc
-        The CachedFunc to schedule.
-    const_dict : dict of int to numpy.ndarray
-        The constant dictionary.
-    cascader : callable, optional
-        A cascading function to apply optimizing scheduling
-        to the graph.
-
-    Returns
-    -------
-    s : tvm.te.Schedule
-        The completed schedule for the graph.
-
-    """
-    s = tvm.te.create_schedule([t.op for t in cached_func.outputs])
-    if cascader:
-        cascader(cached_func, const_dict, s)
-    inline_no_ops(cached_func, s)
-    copy_luts()(cached_func, const_dict, s)
-    inline_no_ops(cached_func, s)
-    schedule_pragmas(s)
-    schedule_cache_reads(s)
-    return s
-
-
-def tile_nd(s, tensor, tile):
-    """Scheduling utility to perform N-dimensional tiling.
-
-    Parameters
-    ----------
-    s : tvm.te.Schedule
-        The schedule to apply the tiling to.
-    tensor : tvm.te.Tensor
-        The tensor to apply the tiling to.
-    tile : tuple
-        The N-dimensional tile size.
-
-    Returns
-    -------
-    outer_indices : list of tvm.tir.IterVar
-        The outer iteration variables.
-    inner_indices : list of tvm.tir.IterVar
-        The inner iteration variables.
-
-    """
-    outer_indices = []
-    inner_indices = []
-    for i, size in enumerate(tile):
-        outer, inner = s[tensor].split(tensor.op.axis[i], size)
-        outer_indices.append(outer)
-        inner_indices.append(inner)
-
-    s[tensor].reorder(*outer_indices, *inner_indices)
-    return outer_indices, inner_indices
-
-
-def total_cascader(stripe_size):
-    """A demo/test cascader which tries to cascade every op in the graph together.
-
-    The desired output stride size should be specified. Note this only works
-    for single output graphs.
-
-    Parameters
-    ----------
-    stripe_size : tuple
-        The output stripe size.
-
-    Returns
-    -------
-    func : callable
-        The cascading function.
-
-    """
-
-    def _cascader(cached_func, const_dict, sch):
-        scheduled = set()
-
-        def _visit(tensor, stage, ax):
-            if tensor not in scheduled and isinstance(tensor.op, tvm.te.ComputeOp):
-                sch[tensor].compute_at(stage, ax)
-                scheduled.add(tensor)
-                for input_tensor in tensor.op.input_tensors:
-                    _visit(input_tensor, stage, ax)
-
-        assert len(cached_func.outputs) == 1
-        out = cached_func.outputs[0]
-        oi, _ = tile_nd(sch, out, stripe_size)
-        for ax in oi:
-            sch[out].unroll(ax)
-        for input_tensor in out.op.input_tensors:
-            _visit(input_tensor, sch[out], oi[-1])
-
-    return _cascader
-
-
-def copy_constants():
-    """A simple planner which copies all constant data from FLASH -> SRAM.
-
-    Returns
-    -------
-    planner : callable
-        The planning function.
-    """
-
-    def _planner(cached_func, const_dict, sch):
-        planned = set()  # type: ignore
-
-        def _is_matmul(tensor):
-            if tensor.name not in ["ethosu_conv2d"]:
-                return False
-            a, b = tensor.op.input_tensors[0:2]
-            return a.shape[1:3] == [1, 1] and b.shape[1:3] == [1, 1]
-
-        def _visit(tensor, reader, lut):
-            if tensor not in planned:
-                planned.add(tensor)
-                if isinstance(tensor.op, tvm.te.PlaceholderOp) and tensor != lut:
-                    # Find index of input using 'same_as' check to prevent equality
-                    # ambiguity when encountering a scalar.
-                    is_same = [var.same_as(tensor) for var in cached_func.inputs]
-                    index = is_same.index(True)
-                    # Along with constants, also skip for FullyConnected to correspond
-                    # with Vela behavior
-                    if index in const_dict and not _is_matmul(reader):
-                        sch.cache_read(tensor, "global", [reader])
-
-                elif isinstance(tensor.op, tvm.te.ComputeOp):
-                    if "lut" in tensor.op.attrs.keys():
-                        lut = tensor.op.attrs["lut"]
-                    for input_tensor in tensor.op.input_tensors:
-                        _visit(input_tensor, tensor, lut)
-
-        for output_tensor in cached_func.outputs:
-            _visit(output_tensor, None, None)
-
-    return _planner
-
-
-def copy_luts():
-    """A scheduler that copies LUTs to SHRAM.
-
-    Returns
-    -------
-    planner : callable
-        The planning function.
-    """
-
-    def _planner(te_graph, const_dict, sch):
-        planned = set()  # type: ignore
-
-        def _visit(tensor, reader, lut):
-            if tensor not in planned:
-                planned.add(tensor)
-                if isinstance(tensor.op, tvm.te.PlaceholderOp) and tensor == lut:
-                    index = list(te_graph.inputs).index(tensor)
-                    if index in const_dict:
-                        sch.cache_read(tensor, "local", [reader])
-
-                elif isinstance(tensor.op, tvm.te.ComputeOp):
-                    if "lut" in tensor.op.attrs.keys():
-                        lut = tensor.op.attrs["lut"]
-                    for input_tensor in tensor.op.input_tensors:
-                        _visit(input_tensor, tensor, lut)
-
-        for output_tensor in te_graph.outputs:
-            _visit(output_tensor, None, None)
-
-    return _planner
-
-
-def schedule_pragmas(sch):
-    """Add pragmas to the operators that require them.
-
-    This adds the pragmas used for codegen to the NPU ops.
-    They are taken directly from the TE compute op's attributes.
-    Modifies the schedule in-place.
-
-    Parameters
-    ----------
-    sch : tvm.te.Schedule
-        The schedule.
-
-    """
-
-    def _add_pragmas(stage, ax):
-        if stage.op.name == "T_concat":
-            stage.pragma(ax, "op", "ethosu_concatenate")
-        if "op" in [attr for attr, val in stage.op.attrs.items()]:
-            stage.pragma(ax, "op", stage.op.attrs["op"])
-            for attr, val in stage.op.attrs.items():
-                if attr not in ("op", "lut") and not isinstance(val, Propagator):
-                    stage.pragma(ax, str(attr), val)
-        if stage.op.axis[0] in stage.iter_var_attrs:
-            attrs = stage.iter_var_attrs[stage.op.axis[0]]
-            if "block_config_height" in attrs.pragma_keys:
-                pragmas = dict(zip([k.value for k in attrs.pragma_keys], attrs.pragma_values))
-                stage.pragma(ax, "block_config_height", pragmas["block_config_height"])
-                stage.pragma(ax, "block_config_width", pragmas["block_config_width"])
-                stage.pragma(ax, "block_config_depth", pragmas["block_config_depth"])
-
-    for stage in sch.stages:
-        if (
-            isinstance(stage.op, tvm.te.ComputeOp)
-            and len(stage.op.axis) + len(stage.op.reduce_axis) > 0
-        ):
-            # The logic ensures the pragmas are assigned to the inner tiling loops
-            # rather than the outer ones (which end up getting unrolled).
-            num_inner_loops = len(stage.op.axis) + len(stage.op.reduce_axis)
-            ax = stage.leaf_iter_vars[-num_inner_loops]
-            _add_pragmas(stage, ax)
-
-
-def schedule_cache_reads(sch):
-    """Schedule cache reads that have been introduced.
-
-    There are two things we need to happen to cache_read stages. They should be tagged
-    with the 'ethosu_copy' pragma and have all their axes fused to make them 1D.
-
-    Parameters
-    ----------
-    sch : tvm.te.Schedule
-        The schedule.
-
-    """
-
-    def _detect_cache_read(stage):
-        # Try and detect cache_reads by checking if the compute op is identity
-        if isinstance(stage.op, tvm.te.ComputeOp):
-            op = stage.op
-            if "ethosu" in op.name:
-                return False
-            axes = op.axis
-            if len(op.input_tensors) == 1:
-                tensor = op.input_tensors[0]
-                try:
-                    identity_op = tensor(*axes)
-                except ValueError:
-                    return False
-                if tvm.tir.analysis.expr_deep_equal(identity_op, op.body[0]):
-                    return True
-        return False
-
-    for stage in sch.stages:
-        if stage.attach_type != 2:  # Not inlined
-            if _detect_cache_read(stage):
-                fax = stage.fuse(*stage.op.axis)
-
-                # propagate pragmas placed on the outer loop
-                if len(stage.op.axis) > 0 and stage.op.axis[0] in stage.iter_var_attrs:
-                    attrs = stage.iter_var_attrs[stage.op.axis[0]]
-                    for k, v in zip(attrs.pragma_keys, attrs.pragma_values):
-                        stage.pragma(fax, k.value, v)
-
-                stage.pragma(fax, "op", "ethosu_copy")
-
-
-def inline_no_ops(cached_func, sch):
-    """Inline 'no-ops' - operations that in principle do nothing.
-
-    Modifies the schedule in-place. For now we inline reshape and
-    strided slice - more could be added.
-
-    Parameters
-    ----------
-    cached_func : CachedFunc
-        The cached func.
-    sch : tvm.te.Schedule
-        The schedule.
-
-    """
-    no_ops = {"T_reshape", "T_strided_slice"}
-    scheduled = set()
-
-    def _visit(tensor):
-        if tensor not in scheduled and isinstance(tensor.op, tvm.te.ComputeOp):
-            if tensor.op.name in no_ops:
-                sch[tensor].compute_inline()
-            scheduled.add(tensor)
-            for input_tensor in tensor.op.input_tensors:
-                _visit(input_tensor)
-
-    for out in cached_func.outputs:
-        _visit(out)
-
-
-class OperatorCompute:
-    """A helper class to manipulate the series of compute ops that make up an operator."""
-
-    def __init__(self, read, convert_to_nhwc, pad, upscale, op, convert_to_nhcwb16, write):
-        self.read = read
-        self.convert_to_nhwc = convert_to_nhwc
-        self.pad = pad
-        self.upscale = upscale
-        self.op = op
-        self.convert_to_nhcwb16 = convert_to_nhcwb16
-        self.write = write
-
-    @classmethod
-    def from_output(cls, out):
-        write = out
-        convert_to_nhcwb16 = write.op.input_tensors[0]
-        op = convert_to_nhcwb16.op.input_tensors[0]
-        pad = op.op.input_tensors[0]
-        upscale = pad.op.input_tensors[0]
-        convert_to_nhwc = upscale.op.input_tensors[0]
-        read = convert_to_nhwc.op.input_tensors[0]
-        return cls(read, convert_to_nhwc, pad, upscale, op, convert_to_nhcwb16, write)
-
-    def split(self, sch, axis, val):
-        outer, inner = sch[self.write].split(self.write.op.axis[axis], val)
-        iter_vars = [ax for ax in self.write.op.axis if ax != self.write.op.axis[axis]]
-        iter_vars.insert(axis, inner)
-        sch[self.write].reorder(outer, *iter_vars)
-        sch[self.write].unroll(outer)
-        g = sch.create_group(outputs=self.convert_to_nhcwb16, inputs=self.read, include_inputs=True)
-        g.compute_at(sch[self.write], outer)
-        return outer
-
-    def rolling_buffer(self, sch):
-        sch[self.read].rolling_buffer()
-        sch[self.convert_to_nhwc].rolling_buffer()
-        sch[self.pad].rolling_buffer()
-        sch[self.upscale].rolling_buffer()
-        sch[self.op].rolling_buffer()
-        sch[self.convert_to_nhcwb16].rolling_buffer()
-        sch[self.write].rolling_buffer()
-
-    def compute_at(self, sch, stage, axis):
-        sch[self.read].compute_at(stage, axis)
-        sch[self.convert_to_nhwc].compute_at(stage, axis)
-        sch[self.pad].compute_at(stage, axis)
-        sch[self.upscale].compute_at(stage, axis)
-        sch[self.op].compute_at(stage, axis)
-        sch[self.convert_to_nhcwb16].compute_at(stage, axis)
-        sch[self.write].compute_at(stage, axis)
diff --git a/python/tvm/relay/backend/contrib/ethosu/tir/spec.py b/python/tvm/relay/backend/contrib/ethosu/tir/spec.py
deleted file mode 100644
index 583c0363f1ef..000000000000
--- a/python/tvm/relay/backend/contrib/ethosu/tir/spec.py
+++ /dev/null
@@ -1,350 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""The TIR serialization specification for Arm(R) Ethos(TM)-U NPU."""
-from typing import Union
-from typing import get_type_hints
-from inspect import isclass
-
-import tvm
-from tvm.relay.backend.contrib.ethosu import util
-
-
-def create_serial_object(serialized_type, deserialized_elements):
-    """
-    This function will create serialized type that is one of the subclasses
-    of tvm.relay.backend.contrib.ethosu.tir.spec.SerializableFormat
-
-    Parameters
-    ----------
-    serialized_type : a subclass type of SerializableFormat
-
-    deserialized_elements : list
-        The list of arguments that needs to packed to create SerializableFormat objects
-
-    Returns
-    -------
-    The constructed object of type serialized_type
-    """
-
-    def _create_serial_object(internal_serialized_type, read_element_idx=0):
-        """The internal function that increments the read_element_idx
-        when creating nested serial objects"""
-        arg_len = util.get_arg_count(internal_serialized_type.__init__) - 1
-        serial_init_types = get_type_hints(internal_serialized_type.__init__)
-        serial_init_arg_names = list(serial_init_types.keys())
-        serial_init_args = []
-        assert arg_len == len(serial_init_arg_names)
-        for si_arg_name in serial_init_arg_names:
-            si_arg_type = serial_init_types[si_arg_name]
-            if isclass(si_arg_type) and issubclass(si_arg_type, SerializableFormat):
-                sia, read_element_idx = _create_serial_object(si_arg_type, read_element_idx)
-                serial_init_args.append(sia)
-            else:
-                serial_init_args.append(deserialized_elements[read_element_idx])
-                read_element_idx += 1
-        return internal_serialized_type(*serial_init_args), read_element_idx
-
-    # Just return the primary serial object
-    return _create_serial_object(serialized_type)[0]
-
-
-class SerializableFormat:
-    """Base class to retrieve arguments on a predefined ordering"""
-
-    def __iter__(self):
-        # Note class attribute definition order is preserved - see PEP 520
-        for name in self.__dict__:
-            value = self.__getattribute__(name)
-            if isinstance(value, SerializableFormat):
-                yield from list(value)
-            else:
-                yield value
-
-    def __getitem__(self, index):
-        # Note class attribute definition order is preserved - see PEP 520
-        name = list(self.__dict__.keys())[index]
-        return self.__getattribute__(name)
-
-
-class SerialFeatureMap(SerializableFormat):
-    """Specialization class to retrieve arguments of a Feature Map
-    (similiar to NpuFeatureMap of Vela) on a predefined ordering"""
-
-    def __init__(
-        self,
-        data_type: str,
-        height: int,
-        width: int,
-        channels: int,
-        tile_height_0: int,
-        tile_height_1: int,
-        tile_width_0: int,
-        tile_address_0: tvm.tir.expr.BufferLoad,
-        tile_address_1: Union[tvm.tir.expr.BufferLoad, int],
-        tile_address_2: Union[tvm.tir.expr.BufferLoad, int],
-        tile_address_3: Union[tvm.tir.expr.BufferLoad, int],
-        scale: float,
-        zero_point: int,
-        layout: str,
-        stride_h: int,
-        stride_w: int,
-        stride_c: int,
-    ):
-        self.data_type = data_type
-        self.height = height
-        self.width = width
-        self.channels = channels
-        self.tile_height_0 = tile_height_0
-        self.tile_height_1 = tile_height_1
-        self.tile_width_0 = tile_width_0
-        self.tile_address_0 = tile_address_0
-        self.tile_address_1 = tile_address_1
-        self.tile_address_2 = tile_address_2
-        self.tile_address_3 = tile_address_3
-        self.scale = scale
-        self.zero_point = zero_point
-        self.layout = layout
-        self.stride_h = stride_h
-        self.stride_w = stride_w
-        self.stride_c = stride_c
-
-
-class SerialKernel(SerializableFormat):
-    """Specialization class to retrieve arguments of a Kernel
-    (similiar to NpuKernel of Vela) on a predefined ordering"""
-
-    def __init__(
-        self,
-        width: int,
-        height: int,
-        stride_w: int,
-        stride_h: int,
-        dilation_w: int,
-        dilation_h: int,
-    ):
-        self.width = width
-        self.height = height
-        self.stride_w = stride_w
-        self.stride_h = stride_h
-        self.dilation_w = dilation_w
-        self.dilation_h = dilation_h
-
-
-class SerialAddressRange(SerializableFormat):
-    """Specialization class to retrieve arguments of a AddressRange
-    (similiar to NpuAddressRange of Vela) on a predefined ordering"""
-
-    def __init__(self, address: tvm.tir.expr.BufferLoad, length: int):
-        self.address = address
-        self.length = length
-
-
-class SerialPadding(SerializableFormat):
-    """Specialization class to retrieve arguments of a Padding
-    (similiar to NpuPadding of Vela) on a predefined ordering"""
-
-    def __init__(self, top: int, left: int, bottom: int, right: int):
-        self.top = top
-        self.left = left
-        self.bottom = bottom
-        self.right = right
-
-
-class SerialActivation(SerializableFormat):
-    """Specialization class to retrieve arguments of a Activation
-    (similiar to NpuActivation of Vela) on a predefined ordering"""
-
-    def __init__(self, op: str, clip_min: int, clip_max: int):
-        self.op = op
-        self.clip_min = clip_min
-        self.clip_max = clip_max
-
-
-class SerialBlockConfig(SerializableFormat):
-    """Specialization class to retrieve arguments of a BlockConfig
-    (similar to NpuBlockConfig of Vela) on a predefined ordering"""
-
-    def __init__(self, height: int, width: int, depth: int):
-        self.height = height
-        self.width = width
-        self.depth = depth
-
-
-class SerialRescaleConfig(SerializableFormat):
-    """Specialization class to retrieve arguments of a rescale parameters
-    (to fill in rescale field in Vela NpuElementWiseOperation) on a predefined ordering"""
-
-    def __init__(self, use_rescale: bool, scale: int, shift: int):
-        self.use_rescale = use_rescale
-        self.scale = scale
-        self.shift = shift
-
-
-class Serial2DConvolution(SerializableFormat):
-    """Specialization class to retrieve arguments of
-    a ethosu.conv2d tir extern call on a predefined ordering"""
-
-    def __init__(
-        self,
-        ifm: SerialFeatureMap,
-        ofm: SerialFeatureMap,
-        kernel: SerialKernel,
-        weight: SerialAddressRange,
-        weight2: SerialAddressRange,
-        weight_zero_point: int,
-        scale_bias: SerialAddressRange,
-        scale_bias2: SerialAddressRange,
-        padding: SerialPadding,
-        activation: SerialActivation,
-        rounding_mode: str,
-        upscale: str,
-        block_config: SerialBlockConfig,
-    ):
-        self.ifm = ifm
-        self.ofm = ofm
-        self.kernel = kernel
-        self.weight = weight
-        self.weight2 = weight2
-        self.weight_zero_point = weight_zero_point
-        self.scale_bias = scale_bias
-        self.scale_bias2 = scale_bias2
-        self.padding = padding
-        self.activation = activation
-        self.rounding_mode = rounding_mode
-        self.upscale = upscale
-        self.block_config = block_config
-
-
-class Serial2DDepthwise(SerializableFormat):
-    """Specialization class to retrieve arguments of
-    a ethosu.depthwise_conv2d TIR extern call on a predefined ordering"""
-
-    def __init__(
-        self,
-        ifm: SerialFeatureMap,
-        ofm: SerialFeatureMap,
-        kernel: SerialKernel,
-        weight: SerialAddressRange,
-        weight_zero_point: int,
-        scale_bias: SerialAddressRange,
-        padding: SerialPadding,
-        activation: SerialActivation,
-        rounding_mode: str,
-        upscale: str,
-        block_config: SerialBlockConfig,
-    ):
-        self.ifm = ifm
-        self.ofm = ofm
-        self.kernel = kernel
-        self.weight = weight
-        self.weight_zero_point = weight_zero_point
-        self.scale_bias = scale_bias
-        self.padding = padding
-        self.activation = activation
-        self.rounding_mode = rounding_mode
-        self.upscale = upscale
-        self.block_config = block_config
-
-
-class SerialCopy(SerializableFormat):
-    """Specialization class to retrieve arguments of
-    a ethosu.copy tir extern call on a predefined ordering"""
-
-    def __init__(
-        self,
-        read_address: tvm.tir.expr.BufferLoad,
-        length: int,
-        write_address: tvm.tir.expr.BufferLoad,
-    ):
-        self.read_address = read_address
-        self.length = length
-        self.write_address = write_address
-
-
-class SerialPooling(SerializableFormat):
-    """Specialization class to retrieve arguments of
-    a ethosu.pooling tir extern call on a predefined ordering"""
-
-    def __init__(
-        self,
-        ifm: SerialFeatureMap,
-        ofm: SerialFeatureMap,
-        pooling_type: str,
-        pool_shape: SerialKernel,
-        padding: SerialPadding,
-        activation: SerialActivation,
-        rounding_mode: str,
-        upscale: str,
-        block_config: SerialBlockConfig,
-    ):
-        self.ifm = ifm
-        self.ofm = ofm
-        self.pooling_type = pooling_type
-        self.pool_shape = pool_shape
-        self.padding = padding
-        self.activation = activation
-        self.rounding_mode = rounding_mode
-        self.upscale = upscale
-        self.block_config = block_config
-
-
-class SerialBinaryElementwise(SerializableFormat):
-    """Specialization class to retrieve arguments of
-    a ethosu.binary_elementwise tir extern call on a predefined ordering"""
-
-    def __init__(
-        self,
-        ifm: SerialFeatureMap,
-        ifm2: SerialFeatureMap,
-        ofm: SerialFeatureMap,
-        operator_type: str,
-        reversed_operands: bool,
-        activation: SerialActivation,
-        rounding_mode: str,
-        block_config: SerialBlockConfig,
-        rescale_config: SerialRescaleConfig,
-    ):
-        self.ifm = ifm
-        self.ifm2 = ifm2
-        self.ofm = ofm
-        self.operator_type = operator_type
-        self.reversed_operands = reversed_operands
-        self.activation = activation
-        self.rounding_mode = rounding_mode
-        self.block_config = block_config
-        self.rescale_config = rescale_config
-
-
-class SerialUnaryElementwise(SerializableFormat):
-    """Specialization class to retrieve arguments of
-    a ethosu.unary_elementwise tir extern call on a predefined ordering"""
-
-    def __init__(
-        self,
-        ifm: SerialFeatureMap,
-        ofm: SerialFeatureMap,
-        operator_type: str,
-        activation: SerialActivation,
-        rounding_mode: str,
-        block_config: SerialBlockConfig,
-    ):
-        self.ifm = ifm
-        self.ofm = ofm
-        self.operator_type = operator_type
-        self.activation = activation
-        self.rounding_mode = rounding_mode
-        self.block_config = block_config
diff --git a/python/tvm/relay/backend/contrib/ethosu/tir/transform.py b/python/tvm/relay/backend/contrib/ethosu/tir/transform.py
deleted file mode 100644
index 272318066b3f..000000000000
--- a/python/tvm/relay/backend/contrib/ethosu/tir/transform.py
+++ /dev/null
@@ -1,62 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name, unused-argument
-"""Extract parameters from the transform operators in TIR."""
-import tvm
-from .spec import SerialCopy
-from .utils import get_base_address, get_op_attrs
-
-
-def get_copy_params(stmt, producers_consumers):
-    """Get the parameters necessary to construct a call_extern for a copy.
-
-    Parameters
-    ----------
-    stmt : tvm.tir.AttrStmt
-        The outermost attribute statement of a copy loop nest.
-    producers_consumers: ProducersConsumers
-        It associates pointers with the loop nest that produces
-        their values and with the loop nest that consumes their values.
-
-    Returns
-    -------
-    SerialCopy
-        The parameters needed to construct a copy.
-    tvm.tir.Var
-        The output pointer of the copy operation.
-    replace_pointer : tvm.tir.Var
-        The output pointer of the DMA write operation, which is to replace
-        the convolution output pointer.
-    is_allocator : bool
-        Whether this operator allocates its output.
-    """
-    _, body = get_op_attrs(stmt)
-    length = body.extent
-    write_store = body.body
-    write_base = [get_base_address(index) for index in write_store.indices]
-    read_load = body.body.value
-    read_base = [get_base_address(index) for index in read_load.indices]
-    return (
-        SerialCopy(
-            read_address=tvm.tir.expr.BufferLoad(read_load.buffer, read_base),
-            length=length,
-            write_address=tvm.tir.expr.BufferLoad(write_store.buffer, write_base),
-        ),
-        write_store.buffer.data,
-        None,
-        True,
-    )
diff --git a/python/tvm/relay/backend/contrib/ethosu/tir/unary_elementwise.py b/python/tvm/relay/backend/contrib/ethosu/tir/unary_elementwise.py
deleted file mode 100644
index cd5d71d74b84..000000000000
--- a/python/tvm/relay/backend/contrib/ethosu/tir/unary_elementwise.py
+++ /dev/null
@@ -1,80 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name, unused-argument
-"""Extract information from the unary_elementwise operators in TIR."""
-from tvm import tir
-from .utils import get_outer_loops, get_op_attrs
-from .dma import get_ifm_params, get_ofm_params
-from .spec import SerialActivation, SerialUnaryElementwise
-
-
-def get_unary_elementwise_params(stmt, producers_consumers):
-    """Get the parameters necessary to construct a call_extern for a unary_elementwise.
-
-    Parameters
-    ----------
-    stmt : tvm.tir.AttrStmt
-        The outermost attribute statement of a unary elementwise loop nest.
-    producers_consumers: ProducersConsumers
-        It associates pointers with the loop nest that produces
-        their values and with the loop nest that consumes their values.
-
-    Returns
-    -------
-    SerialUnaryElementwise
-        The parameters needed to construct a unary elementwise operator.
-    output_pointer : tvm.tir.Var
-        The output pointer of the unary elementwise operation.
-    replace_pointer : tvm.tir.Var
-        The output pointer of the DMA write operation, which is to replace
-        the unary elementwise output pointer.
-    is_allocator : bool
-        Whether this operator allocates its output.
-    """
-    attrs, body = get_op_attrs(stmt)
-
-    _, _, _, _, _, inner = get_outer_loops(body, "NHWC")
-    input_pointer = None
-    if isinstance(inner.value, tir.expr.Select):
-        # ABS
-        input_pointer = inner.value.condition.b.buffer.data
-    if isinstance(inner.value, tir.expr.Sub):
-        # CLZ
-        input_pointer = inner.value.b.args[0].buffer.data
-    output_pointer = inner.buffer.data
-    # Get feature map info
-    serial_ifm, _ = get_ifm_params(input_pointer, producers_consumers, stmt)
-    serial_ofm, serial_block_config, replace_pointer, is_allocator = get_ofm_params(
-        output_pointer, producers_consumers, stmt
-    )
-    # Get activation info
-    serial_activation = SerialActivation(
-        op=attrs["activation"], clip_min=attrs["clip_min"], clip_max=attrs["clip_max"]
-    )
-    return (
-        SerialUnaryElementwise(
-            ifm=serial_ifm,
-            ofm=serial_ofm,
-            operator_type=attrs["operator_type"],
-            activation=serial_activation,
-            rounding_mode=attrs["rounding_mode"],
-            block_config=serial_block_config,
-        ),
-        output_pointer,
-        replace_pointer,
-        is_allocator,
-    )
diff --git a/python/tvm/relay/backend/contrib/ethosu/tir/utils.py b/python/tvm/relay/backend/contrib/ethosu/tir/utils.py
deleted file mode 100644
index 396735a07c4c..000000000000
--- a/python/tvm/relay/backend/contrib/ethosu/tir/utils.py
+++ /dev/null
@@ -1,236 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name
-"""Helper utility functions used by the NPU TIR compiler"""
-import tvm
-from tvm import arith
-
-
-def get_op_attrs(stmt):
-    """Iterate through nested attribute statements accumulating their values
-    in an attribute dictionary.
-
-    The "pragma_" prefix is removed as a convenience.
-
-    Parameters
-    ----------
-    stmt : tvm.tir.AttrStmt
-        The outermost attribute statement to begin from.
-
-    Returns
-    -------
-    attrs : dict of str to object
-        The attribute dictionary.
-    stmt : tvm.tir.Stmt
-        The body after having collected the final attribute statement.
-
-    """
-    attrs = {}
-    while isinstance(stmt, tvm.tir.AttrStmt):
-        # The pragma scheduler inserts "pragma_" before all the
-        # attr names, this is annoying so we get rid of it
-        attr = stmt.attr_key.replace("pragma_", "")
-        attrs[attr] = stmt.value
-        stmt = stmt.body
-
-    return attrs, stmt
-
-
-def get_strides(index, stride_vars):
-    """Get the striding of given vars in an indexing expression.
-
-    Parameters
-    ----------
-    index : tvm.tir.PrimExpr
-        The index expression where the stride vars are present.
-    stride_vars : list of tvm.tir.Var
-        The vars to determine the striding of.
-
-    Returns
-    -------
-    strides : list of int
-        The striding of each stride var in the index expression
-        in the same order as the stride vars were given.
-
-    """
-    strides = [1] * len(stride_vars)
-    dmap = {}
-
-    def _visit(stmt):
-        if isinstance(stmt, tvm.tir.Var):
-            dmap[stmt] = arith.IntervalSet(0, 0)
-
-    tvm.tir.stmt_functor.post_order_visit(index, _visit)
-    min_value = int(arith.Analyzer().int_set(index, dmap).min_value)
-    for var in dmap:
-        if var in stride_vars:
-            # NOTE: Doing this using a [0, 1] interval doesn't work reliably
-            # Seems to be a bug
-            dmap[var] = arith.IntervalSet(1, 1)
-            max_value = int(arith.Analyzer().int_set(index, dmap).max_value)
-            stride = int(max_value - min_value)
-            i = stride_vars.index(var)
-            strides[i] = stride
-            dmap[var] = arith.IntervalSet(0, 0)
-
-    return strides
-
-
-def get_base_address(index):
-    """Determine the first (base) address accessed by an index expression.
-
-    Parameters
-    ----------
-    index : tvm.tir.PrimExpr
-        The index expression to determine the base address of.
-
-    Returns
-    -------
-    base_address:
-        The first address accessed by the index expression.
-
-    """
-    dmap = {}
-
-    def _visit(stmt):
-        if isinstance(stmt, tvm.tir.Var):
-            dmap[stmt] = arith.IntervalSet(0, 0)
-
-    tvm.tir.stmt_functor.post_order_visit(index, _visit)
-    base_address = int(arith.Analyzer().int_set(index, dmap).min_value)
-    return base_address
-
-
-def get_outer_loops(stmt, layout):
-    """Get the outer loops of an operator.
-
-    Parameters
-    ----------
-    stmt : tvm.tir.For
-        The outermost loop.
-    layout : str
-        The output tensor layout (NHWC or NHCWB16).
-
-    Returns
-    -------
-    n : tvm.tir.For
-        The batch loop.
-    h : tvm.tir.For
-        The height loop.
-    w : tvm.tir.For
-        The width loop.
-    c : tvm.tir.For
-        The channels loop.
-    b : tvm.tir.For
-        The brick loop. None for NHWC
-    body : tvm.tir.Stmt
-        The inner body of the loops.
-
-    """
-    if layout == "NHWC":
-        n = stmt
-        h = n.body
-        w = h.body
-        c = w.body
-        b = tvm.tir.For(tvm.tir.Var("b", "int32"), 0, 0, 0, tvm.tir.Evaluate(0))
-        return n, h, w, c, b, c.body
-    if layout == "NHCWB16":
-        n = stmt
-        h = n.body
-        cb = h.body
-        w = cb.body
-        b = w.body
-        return n, h, w, cb, b, b.body
-    return None
-
-
-def collect_buffer_map(stmt):
-    """Collect a map of Var -> Buffer
-
-    Generate a map from a buffer's backing `tir.Var` to the
-    `tir.Buffer` object that uses it.  If multiple such buffers exist,
-    return the first occurrence.
-
-    Parameters
-    ----------
-    stmt : tvm.tir.Stmt
-        The statement to get the BufferLoads from.
-
-    Returns
-    -------
-    buffer_map : Dict[Var, Buffer]
-        The map from buffer var to the buffers that use it.
-    """
-    buffer_map = {}
-
-    def _visit(node):
-        if isinstance(node, (tvm.tir.BufferLoad, tvm.tir.BufferStore)):
-            buf = node.buffer
-            if buf.data not in buffer_map:
-                buffer_map[buf.data] = buf
-
-    tvm.tir.stmt_functor.post_order_visit(stmt, _visit)
-
-    return buffer_map
-
-
-def get_loads(stmt):
-    """Get the BufferLoad statements.
-
-    Parameters
-    ----------
-    stmt : tvm.tir.Stmt
-        The statement to get the BufferLoads from.
-
-    Returns
-    -------
-    loads : list of tvm.tir.BufferLoad
-        The BufferLoads found.
-
-    """
-    loads = []
-
-    def _visit(s):
-        if isinstance(s, tvm.tir.BufferLoad):
-            loads.append(s)
-
-    tvm.tir.stmt_functor.post_order_visit(stmt, _visit)
-    return loads
-
-
-def get_stores(stmt):
-    """Get the BufferStore statements.
-
-    Parameters
-    ----------
-    stmt : tvm.tir.Stmt
-        The statement to get the BufferStores from.
-
-    Returns
-    -------
-    stores : list of tvm.tir.BufferStore
-        The BufferStores found.
-
-    """
-    stores = []
-
-    def _visit(s):
-        if isinstance(s, tvm.tir.BufferStore):
-            stores.append(s)
-
-    tvm.tir.stmt_functor.post_order_visit(stmt, _visit)
-    return stores
diff --git a/python/tvm/relay/backend/contrib/ethosu/tir_to_cs_translator.py b/python/tvm/relay/backend/contrib/ethosu/tir_to_cs_translator.py
deleted file mode 100644
index e88f9047ddc5..000000000000
--- a/python/tvm/relay/backend/contrib/ethosu/tir_to_cs_translator.py
+++ /dev/null
@@ -1,1134 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=use-list-literal, invalid-name
-"""This source will contain code to convert TIR, as produced by
-the Relay to TIR compilation process, to Vela API calls to
-generate command stream.
-"""
-from typing import Dict, NamedTuple, Tuple, Union, List
-from enum import auto
-from enum import Enum
-import numpy as np  # type: ignore
-import ethosu.vela.api as vapi  # type: ignore
-
-import tvm
-from tvm.tir import stmt_functor
-from tvm.relay.backend.contrib.ethosu import util
-from tvm.relay.backend.contrib.ethosu import vela_api
-from tvm.relay.backend.contrib.ethosu.tir import spec
-from tvm.relay.backend.contrib.ethosu.tir import utils as tir_utils
-
-
-class BufferType(Enum):
-    """The type of information that a buffer contains."""
-
-    constant = auto()
-    input_or_output = auto()
-    scratch = auto()
-    input = auto()
-    output = auto()
-    shram = auto()
-
-
-class BufferInfo(NamedTuple):
-    """A data structure to hold metadata of the buffer."""
-
-    # If the buffer holds constants, the values will contain that otherwise None
-    values: np.ndarray
-    shape: tvm.ir.container.Array
-    dtype: np.dtype
-    btype: BufferType
-
-
-class AcceleratorArchConfig:
-    def __init__(self, total_shram_banks):
-        self.shram_bank_size = 1024
-        self.total_shram_banks = total_shram_banks
-        self.shram_size_bytes = self.shram_bank_size * self.total_shram_banks
-        self.lut_size_bytes = 2048
-        self.lut_start_address = self.shram_size_bytes - self.lut_size_bytes
-
-
-def get_accelerator_arch_config(accel_type):
-    accel_config_str_map = {
-        "ethos-u55-32": AcceleratorArchConfig(16),
-        "ethos-u55-64": AcceleratorArchConfig(16),
-        "ethos-u55-128": AcceleratorArchConfig(24),
-        "ethos-u55-256": AcceleratorArchConfig(48),
-        "ethos-u65-256": AcceleratorArchConfig(48),
-    }
-    return accel_config_str_map[accel_type]
-
-
-class RegionOffset(NamedTuple):
-    """A data structure to hold region and address offset corresponding to a tensor"""
-
-    region: int
-    offset: int
-
-
-def analyze_scratch_memory_acesses(mod: tvm.IRModule, candidate_regions_for_scratch: List[int]):
-    """
-    This function analyzes the IRModule for intermediary tensors that can be resulting
-    from a offset of pool variables (via Let nodes) and/or allocate nodes. The allocate
-    nodes will be folded into a single TVMBackendallocWorkspace call with offsets. Ultimately
-    this will produce a mapping from each such node to a RegionOffset named tuple that
-    has the region and the obtained offset, as mentioned above.
-
-    Parameters
-    ----------
-    mod: tvm.IRModule
-        The TIR module containing ethosu extern calls
-    candidate_regions_for_scratch: List[int]
-        A list of region integers that could be used for scratch regions
-
-    Returns
-    -------
-    scratch_region_map : Dict[tvm.tir.Var, RegionOffset]
-        A map between buffer vars to scratch regions they are assigned
-    tvm_backend_alloc_workspace_size : int
-        The size of tvm_backend_alloc_workspace call required to service
-        remaining allocate nodes if any
-    tvm_backend_alloc_workspace_region : int
-        The region associated with the tvm_backend_alloc_workspace
-    """
-    scratch_region_map = dict()
-    pool_var_region_map = dict()
-    # There should only be a single function
-    assert len(mod.functions.items()) == 1
-    primfunc = mod.functions.items()[0][1]
-    if "pool_args" in primfunc.attrs.keys():
-        pool_args = primfunc.attrs["pool_args"]
-        for pool_arg in pool_args:
-            pool_param = primfunc.params[int(pool_arg.pool_var_idx)]
-            pool_var_region_map[pool_param] = candidate_regions_for_scratch.pop()
-            scratch_region_map[pool_param] = RegionOffset(
-                region=pool_var_region_map[pool_param], offset=None
-            )
-
-    def analyze_pool_access(stmt):
-        if isinstance(stmt, tvm.tir.stmt.LetStmt):
-            call_address_of = stmt.value
-            load = call_address_of.args[0]
-            pool_var = load.buffer.data
-            scratch_region_map[stmt.var] = RegionOffset(
-                region=pool_var_region_map[pool_var], offset=int(load.indices[0])
-            )
-
-    tvm.tir.stmt_functor.post_order_visit(primfunc.body, analyze_pool_access)
-
-    dynamic_allocation_region = None
-    if len(candidate_regions_for_scratch) > 0:
-        dynamic_allocation_region = candidate_regions_for_scratch.pop()
-        dynamic_allocation_size = 0
-
-        # If there are tir.Allocate remaining by now, they need to be serviced via
-        # dynamic_allocation calls.
-        def analyze_remaining_allocates(stmt):
-            nonlocal dynamic_allocation_size
-            if isinstance(stmt, tvm.tir.stmt.Allocate):
-                allocate = stmt
-                pointer_type = allocate.buffer_var.type_annotation
-                storage_scope = pointer_type.storage_scope
-                if storage_scope == "global":
-                    dtype_bytes = np.iinfo(np.dtype(allocate.dtype)).bits // 8
-                    size_in_bytes = int(dtype_bytes * np.prod(list(allocate.extents)))
-                    # Every memory address the NPU access have to be 16 byte aligned
-                    size_in_bytes = util.round_up(size_in_bytes, 16)
-                    address = dynamic_allocation_size
-                    dynamic_allocation_size += size_in_bytes
-                    scratch_region_map[allocate.buffer_var] = RegionOffset(
-                        region=dynamic_allocation_region, offset=address
-                    )
-
-        tvm.tir.stmt_functor.post_order_visit(primfunc.body, analyze_remaining_allocates)
-
-    return (
-        scratch_region_map,
-        dynamic_allocation_size,
-        dynamic_allocation_region,
-    )
-
-
-def _get_region(buffer_type, var=None, scratch_region_map=None):
-    """A helper to obtain regions for buffer_types and buffer vars"""
-    static_regions = {
-        BufferType.constant: 0,
-        BufferType.input: 3,
-        BufferType.output: 4,
-        BufferType.shram: int((1 << 8) | (3 << 0)),
-    }
-    if buffer_type in static_regions.keys():
-        return static_regions[buffer_type]
-    assert buffer_type == BufferType.scratch
-    assert var in scratch_region_map.keys(), f"{var} is not analyzed for scratch regions"
-    return scratch_region_map[var].region
-
-
-def translate(tir_module, params):
-    """This will take an tir module for the NPU
-    and compile to command stream
-
-    Parameters
-    ----------
-    tir_module : tvm.IRModule
-        The TIR module containing ethosu extern calls
-    params : dict
-        A dictionary containing TIR primfunc argument ordering
-        idx to constant NDArray map
-    accel_type : ethosu.vela.api.NpuAccelerator
-        the accelerator variant the tir module needs to compiled to
-
-    Returns
-    -------
-    cs : str
-        An hex string of the bytes of command stream
-    encoded_constants : str
-        An hex string of the bytes that includes concat'd
-        encoded weights, encoded biases and scales.
-    base_addresses : List[util.BaseAddress]
-        base addresses to be used by the driver
-    """
-
-    # The NPU has 6 usable regions ranging from 0-6
-    # The regions 0, 3, and 4 is already used for input,
-    # output and constant, respectively (See _get_regions()).
-    # Thus, for scratch we are left with 5, 2 and 1.
-    candidate_regions_for_scratch = [5, 2, 1]
-    (
-        scratch_region_map,
-        dynamic_allocation_size,
-        dynamic_allocation_region,
-    ) = analyze_scratch_memory_acesses(tir_module, candidate_regions_for_scratch)
-    buffer_info = extract_buffer_info(tir_module, params)
-    call_extern_list = extract_call_extern_list(tir_module)
-    _npu_ops = list()
-    for call_extern in call_extern_list:
-        _npu_ops.append(translate_ethosu_tir_call_extern(call_extern))
-    _npu_ops, constant_data = assign_addresses(buffer_info, _npu_ops, scratch_region_map)
-    base_addresses = extract_param_base_addresses(tir_module, buffer_info, scratch_region_map)
-    if dynamic_allocation_size:
-        base_addresses.append(
-            util.BaseAddress(
-                name="dynamic_allocation",
-                primfunc_param_idx=None,
-                region=dynamic_allocation_region,
-                size=dynamic_allocation_size,
-                is_runtime_allocation=True,
-            )
-        )
-    target_accel_config = vela_api.get_accelerator_config()
-    cmds = vapi.npu_generate_register_command_stream(_npu_ops, target_accel_config)
-    payload = vapi.npu_create_driver_payload(cmds, target_accel_config)
-    return payload.hex(), constant_data, base_addresses
-
-
-def extract_param_base_addresses(mod, buffer_info, scratch_region_map) -> List[util.BaseAddress]:
-    """This function extracts base addresses to be used by the driver
-
-    Parameters
-    ----------
-    mod : tvm.IRModule
-        The TIR Module for NPU
-    buffer_info : Dict[tvm.tir.Var, BufferInfo]
-        Information regarding buffer vars used in the PrimFunc
-
-    Returns
-    -------
-    List[util.BaseAddress]
-        base addresses to be used by the driver
-    """
-    # There should only be a single function
-    assert len(mod.functions.items()) == 1
-    primfunc = mod.functions.items()[0][1]
-
-    buffer_map = tir_utils.collect_buffer_map(primfunc.body)
-
-    base_addresses = list()
-    idx = 0
-
-    for param in primfunc.params:
-        # constants are pooled together and handled specially
-        # this will change after tir.allocate_const.
-        # For now, we are skipping generating buffer addresses here
-        if buffer_info[param].btype == BufferType.constant:
-            continue
-
-        if param in buffer_map:
-            buffer = buffer_map[param]
-            dtype = buffer.dtype
-            element_size_bytes = np.iinfo(dtype).bits // 8
-            size_bytes = element_size_bytes * np.prod(list(buffer.shape))
-            base_addresses.append(
-                util.BaseAddress(
-                    param.name.replace("-", "_"),
-                    idx,
-                    _get_region(buffer_info[param].btype, param, scratch_region_map),
-                    size_bytes,
-                )
-            )
-        else:
-            base_addresses.append(
-                util.BaseAddress(
-                    param.name.replace("-", "_"),
-                    idx,
-                    _get_region(buffer_info[param].btype, param, scratch_region_map),
-                    0,
-                )
-            )
-        idx += 1
-
-    return base_addresses
-
-
-def extract_call_extern_list(mod):
-    """This function will obtain all extern
-    calls from a TIR module
-    Parameters
-    ----------
-    mod : tvm.IRModule
-        The TIR Module for NPU
-
-    Returns
-    -------
-    list
-        of tvm.tir.Call objects
-        that are tir extern calls
-    """
-    # There should only be a single function
-    assert len(mod.functions.items()) == 1
-    primfunc = mod.functions.items()[0][1]
-
-    call_extern_list = list()
-
-    def populate_call_extern_list(stmt):
-        if isinstance(stmt, tvm.tir.Call) and stmt.op.name == "tir.call_extern":
-            call_extern_list.append(stmt)
-
-    stmt_functor.post_order_visit(primfunc.body, populate_call_extern_list)
-    return call_extern_list
-
-
-def extract_buffer_info(
-    mod: tvm.IRModule, param_dict: Dict[int, np.ndarray]
-) -> Dict[str, BufferInfo]:
-    """This function is to read the tvm.IRModule that
-    contains Relay to TIR compiled IRModule. Thereafter,
-    this will extract the buffer information as the shape
-    and constant data (if any).
-
-    Parameters
-    ----------
-    mod : tvm.IRModule
-        The NPU TIR IRModule.
-    param_dict : Dict[tvm.tir.Var, np.ndarray]
-        A dictionary containing param idx --> const numpy.NDArray
-
-    Returns
-    -------
-    dict : Dict[str, BufferInfo]
-        A dictionary of buffer names --> BufferInfo
-
-    """
-    buffer_info = dict()
-    # There should only be a single function
-    assert len(mod.functions.items()) == 1
-    primfunc = mod.functions.items()[0][1]
-
-    for param, const_data in param_dict.items():
-        if isinstance(param, tvm.tir.Buffer):
-            param = param.data
-        buffer_info[param] = BufferInfo(
-            const_data, const_data.shape, const_data.dtype, BufferType.constant
-        )
-
-    pool_param_indices = list()
-    if "pool_args" in primfunc.attrs.keys():
-        pool_args = primfunc.attrs["pool_args"]
-        pool_param_indices = [allocated_pool_info.pool_var_idx for allocated_pool_info in pool_args]
-
-    for idx, param in enumerate(primfunc.params):
-        if param not in buffer_info.keys():
-            if idx in pool_param_indices:
-                btype = BufferType.scratch
-            else:
-                btype = BufferType.input_or_output
-            buffer_info[param] = BufferInfo(
-                None,
-                None,
-                None,
-                btype,
-            )
-
-    def populate_allocate_buffer_info(stmt):
-        if isinstance(stmt, tvm.tir.stmt.Allocate):
-            allocate = stmt
-            pointer_type = allocate.buffer_var.type_annotation
-            storage_scope = pointer_type.storage_scope
-            if storage_scope == "local":
-                buffer_info[allocate.buffer_var] = BufferInfo(
-                    None,
-                    allocate.extents,
-                    allocate.dtype,
-                    BufferType.shram,
-                )
-
-    tvm.tir.stmt_functor.post_order_visit(primfunc.body, populate_allocate_buffer_info)
-    return buffer_info
-
-
-def assign_addresses(buffer_info, npu_ops, scratch_region_map):
-    """This function will assign addresses to tensors
-    within two buffers : scratch and constants.
-    The scratch is the buffer created to hold all intermediary data
-    The constants is the buffer created via unifying all the constant data
-    (post-encoding).
-    Parameters
-    ----------
-    buffer_info : dict
-        This is the dictionary obtained via calling extract_buffer_info.
-        The key is the buffer name to BufferInfo
-    npu_ops : list
-        A list of Vela NpuOps with tir.BufferLoads for addresses
-    scratch_region_map : Dict[tvm.tir.Var, RegionOffset]
-        A buffer_var to region and offset map.
-    Returns
-    -------
-    npu_ops : list
-        A list of Vela NpuOps with addesses within scratch and constant buffers
-    constant_tensor : NDArray
-        A unified constant data array of uint8 as the constant buffer
-    """
-
-    def replace_npu_fm_with_address(npu_fm):
-        assert isinstance(npu_fm.tiles.addresses[0], tvm.tir.BufferLoad)
-        buffer = npu_fm.tiles.addresses[0].buffer.data
-        if buffer in scratch_region_map.keys():
-            address = scratch_region_map[buffer].offset
-            region = scratch_region_map[buffer].region
-        else:
-            assert buffer in buffer_addresses.keys()
-            address, buffer_type = buffer_addresses[buffer]
-            region = _get_region(buffer_type)
-        assert (
-            len(npu_fm.tiles.addresses[0].indices) == 1
-        ), "Ethos-U translation expects flattened buffers"
-        index = npu_fm.tiles.addresses[0].indices[0] * (
-            np.iinfo(np.dtype(npu_fm.tiles.addresses[0])).bits // 8
-        )
-        npu_fm.tiles.addresses[0] = address + int(index)
-        npu_fm.tiles.addresses[1] = (
-            address if isinstance(npu_fm.tiles.addresses[1], tvm.tir.BufferLoad) else 0
-        )
-        npu_fm.tiles.addresses[2] = (
-            address if isinstance(npu_fm.tiles.addresses[2], tvm.tir.BufferLoad) else 0
-        )
-        npu_fm.tiles.addresses[3] = 0
-        npu_fm.region = region
-        return npu_fm
-
-    def replace_npu_address_range_with_address(npu_addr_range):
-        assert isinstance(npu_addr_range.address, tvm.tir.BufferLoad)
-        buffer = npu_addr_range.address.buffer.data
-        index = int(
-            npu_addr_range.address.indices[0]
-            * (np.iinfo(np.dtype(npu_addr_range.address)).bits // 8)
-        )
-        if buffer in scratch_region_map.keys():
-            return vapi.NpuAddressRange(
-                scratch_region_map[buffer].region,
-                scratch_region_map[buffer].offset + index,
-                npu_addr_range.length,
-            )
-        assert buffer in buffer_addresses.keys(), f"searching for buffer : {buffer}, but not found"
-        address, buffer_type = buffer_addresses[buffer]
-        address = address + int(npu_addr_range.address.indices[0].value)
-        return vapi.NpuAddressRange(_get_region(buffer_type), address, npu_addr_range.length)
-
-    def replace_tir_loads(npu_object):
-        if isinstance(npu_object, vapi.NpuFeatureMap):
-            return replace_npu_fm_with_address(npu_object)
-        if isinstance(npu_object, vapi.NpuAddressRange):
-            return replace_npu_address_range_with_address(npu_object)
-        return npu_object
-
-    def classify_io(buffer):
-        for _npu_op in npu_ops:
-            if issubclass(type(_npu_op), vapi.NpuBlockOperation):
-                if _npu_op.ifm and _npu_op.ifm.tiles.addresses[0].buffer.data == buffer:
-                    return BufferType.input
-                if _npu_op.ifm2 and _npu_op.ifm2.tiles.addresses[0].buffer.data == buffer:
-                    return BufferType.input
-                if _npu_op.ofm and _npu_op.ofm.tiles.addresses[0].buffer.data == buffer:
-                    return BufferType.output
-
-        raise ValueError(f"Unused IO : {buffer} in tir module.")
-
-    constant_hex_data = []
-    total_constant_len = 0
-    buffer_addresses = dict()
-    for _buffer, info in buffer_info.items():
-        if info.values is not None:
-            assert info.btype == BufferType.constant
-            assert len(info.shape) == 1
-            buffer_addresses[_buffer] = (
-                (total_constant_len, info.btype) if constant_hex_data else (0, info.btype)
-            )
-            dtype_bytes = np.iinfo(np.dtype(info.dtype)).bits // 8
-            size_in_bytes = dtype_bytes * np.prod(list(info.shape))
-            # Every memory address the NPU access have to be 16 byte aligned
-            size_in_bytes = util.round_up(size_in_bytes, 16)
-            constant_tensor = np.resize(info.values, size_in_bytes // dtype_bytes)
-            constant_tensor = constant_tensor.tobytes().hex()
-            constant_hex_data.append(constant_tensor)
-            total_constant_len += len(constant_tensor) // 2
-        else:
-            if info.btype == BufferType.input_or_output or info.btype == BufferType.input:
-                buffer_type = info.btype
-                if info.btype == BufferType.input_or_output:
-                    buffer_type = classify_io(_buffer)
-                assert buffer_type in (BufferType.input, BufferType.output)
-                address = 0
-                buffer_addresses[_buffer] = (address, buffer_type)
-                buffer_info[_buffer] = BufferInfo(
-                    values=None, shape=info.dtype, dtype=info.dtype, btype=buffer_type
-                )
-            elif info.btype == BufferType.shram:
-                accl_config = util.get_accelerator_config()
-                arch_config = get_accelerator_arch_config(accl_config)
-                address = arch_config.lut_start_address
-                buffer_addresses[_buffer] = (address, info.btype)
-            else:
-                # These buffer_vars are already updated in scratch_region_map
-                assert info.btype == BufferType.scratch
-
-    for npu_op in npu_ops:
-        for attr_name, attr in npu_op.__dict__.items():
-            if isinstance(attr, list):
-                new_attr = list()
-                for attr_ in attr:
-                    new_attr.append(replace_tir_loads(attr_))
-                setattr(npu_op, attr_name, new_attr)
-            else:
-                setattr(npu_op, attr_name, replace_tir_loads(attr))
-
-    constant_data = "".join(constant_hex_data)
-    return (npu_ops, constant_data)
-
-
-def translate_ethosu_tir_call_extern(tir_call_extern):
-    """This is a dispatcher function to dispatch
-    correct translation call depending on the extern call's
-    first argument"""
-    supported_call_extern = {
-        "ethosu_conv2d": translate_ethosu_conv2d,
-        "ethosu_copy": translate_ethosu_copy,
-        "ethosu_depthwise_conv2d": translate_ethosu_depthwise_conv2d,
-        "ethosu_pooling": translate_ethosu_pooling,
-        "ethosu_binary_elementwise": translate_ethosu_binary_elementwise,
-        "ethosu_identity": translate_ethosu_pooling,
-        "ethosu_unary_elementwise": translate_ethosu_unary_elementwise,
-    }
-    ext_call_type = tir_call_extern.args[0].value
-    assert ext_call_type in supported_call_extern.keys(), f"{ext_call_type} is not yet supported"
-    npu_op = supported_call_extern[ext_call_type](tir_call_extern)
-    # Some conversions return additional outputs
-    # if they are needed, the caller should use the function directly
-    if isinstance(npu_op, tuple):
-        return npu_op[0]
-    return npu_op
-
-
-def translate_ethosu_copy(tir_call_extern: tvm.tir.Call) -> vapi.NpuDmaOperation:
-    """This function will translate a TIR call_extern
-    as produced by NPU Relay to TIR compilation.
-
-    Parameters
-    ----------
-    tir_call_extern : tvm.tir.Call
-
-    Returns
-    -------
-    ethosu.vela.api.NpuDmaOperation
-        The vela object containing the params of ethosu_copy
-    """
-    # We skip the first element as it is the call_extern function name
-    serial_object = spec.create_serial_object(spec.SerialCopy, tir_call_extern.args[1:])
-    return _create_npu_dma_op(serial_object)
-
-
-def _convert_clip_bounds(npu_op: vapi.NpuBlockOperation):
-    """This function will convert the min and max value
-    of clip activations to non quantized floats as
-    expected by the API.
-
-    Parameters
-    ----------
-    npu_op : vapi.NpuBlockOperation
-
-    """
-    clip_min_quant = npu_op.activation.min
-    clip_max_quant = npu_op.activation.max
-    if npu_op.ofm.quantization.scale_f32:
-        clip_min_actual = (
-            clip_min_quant - npu_op.ofm.quantization.zero_point
-        ) * npu_op.ofm.quantization.scale_f32
-        clip_max_actual = (
-            clip_max_quant - npu_op.ofm.quantization.zero_point
-        ) * npu_op.ofm.quantization.scale_f32
-    else:
-        clip_min_actual = clip_min_quant
-        clip_max_actual = clip_max_quant
-    npu_op.activation.min = clip_min_actual
-    npu_op.activation.max = clip_max_actual
-
-
-def translate_ethosu_conv2d(tir_call_extern: tvm.tir.Call) -> Tuple[vapi.NpuConv2DOperation, int]:
-    """This function will translate a TIR call_extern
-    as produced by NPU Relay to TIR compilation.
-
-    Parameters
-    ----------
-    tir_call_extern : tvm.tir.Call
-        This should be a TIR call_extern that has agreed upon ordering
-        for TIR Compiler. See Serial2DConvolution in
-        tvm/relay/backend/contrib/ethosu/tir/spec.py for the ordering.
-
-    Returns
-    -------
-    ethosu.vela.api.NpuConv2DOperation
-        The vela object containing the params of ethosu_conv2d
-    weights_zero_point : int
-        The zero point of the weights
-    """
-    # We skip the first element as it is the call_extern function name
-    serial_object = spec.create_serial_object(spec.Serial2DConvolution, tir_call_extern.args[1:])
-    return _create_npu_op_conv2d(serial_object)
-
-
-def _create_npu_op_conv2d(
-    serial_2d_convolution: spec.Serial2DConvolution,
-) -> Tuple[vapi.NpuConv2DOperation, int]:
-    """This is a helper function to capture a list
-    of arguments to create Vela NpuConv2DOperation object.
-    """
-    has_two_weights = serial_2d_convolution.weight2.address != -1
-    has_two_biases = serial_2d_convolution.scale_bias2.address != -1
-
-    npu_conv2d_op = vapi.NpuConv2DOperation()
-    npu_conv2d_op.ifm = _create_npu_feature_map(serial_2d_convolution.ifm)
-    npu_conv2d_op.ofm = _create_npu_feature_map(serial_2d_convolution.ofm)
-    npu_conv2d_op.kernel = _create_npu_kernel(serial_2d_convolution.kernel)
-    npu_conv2d_op.weights = (
-        [
-            _create_npu_address_range(serial_2d_convolution.weight),
-            _create_npu_address_range(serial_2d_convolution.weight2),
-        ]
-        if has_two_weights
-        else [_create_npu_address_range(serial_2d_convolution.weight)]
-    )
-    weights_zero_point = np.int64(serial_2d_convolution.weight_zero_point.value)
-    npu_conv2d_op.biases = (
-        [
-            _create_npu_address_range(serial_2d_convolution.scale_bias),
-            _create_npu_address_range(serial_2d_convolution.scale_bias2),
-        ]
-        if has_two_biases
-        else [_create_npu_address_range(serial_2d_convolution.scale_bias)]
-    )
-    npu_conv2d_op.padding = _create_npu_padding(serial_2d_convolution.padding)
-
-    npu_conv2d_op.activation = _create_npu_activation(serial_2d_convolution.activation)
-    if (
-        npu_conv2d_op.activation
-        and npu_conv2d_op.activation.op_type == vapi.NpuActivationOp.NONE_OR_RELU
-    ):
-        _convert_clip_bounds(npu_conv2d_op)
-
-    npu_conv2d_op.rounding_mode = _create_npu_rounding_mode(serial_2d_convolution.rounding_mode)
-    npu_conv2d_op.ifm_upscale = _create_npu_resampling_mode(serial_2d_convolution.upscale)
-    weights_shape_ohwi = [
-        npu_conv2d_op.ofm.shape.depth,
-        npu_conv2d_op.kernel.height,
-        npu_conv2d_op.kernel.width,
-        npu_conv2d_op.ifm.shape.depth,
-    ]
-    npu_conv2d_op.block_traversal = vela_api.calculate_block_traversal_mode(
-        is_depthwise=False,
-        weights_shape_ohwi=weights_shape_ohwi,
-        ifm_bitdepth=npu_conv2d_op.ifm.data_type.size_in_bits(),
-    )
-    npu_conv2d_op.block_config = _create_npu_block_config(serial_2d_convolution.block_config)
-
-    if not npu_conv2d_op.block_config:
-        target_accel_config = vela_api.get_accelerator_config()
-        block_config = vela_api.get_optimal_block_config(npu_conv2d_op, target_accel_config)
-        npu_conv2d_op.block_config = block_config
-
-    return npu_conv2d_op, weights_zero_point
-
-
-def translate_ethosu_depthwise_conv2d(
-    tir_call_extern: tvm.tir.Call,
-) -> Tuple[vapi.NpuConvDepthWiseOperation, int]:
-    """This function will translate a TIR call_extern
-    as produced by NPU Relay to TIR compilation.
-
-    Parameters
-    ----------
-    tir_call_extern : tvm.tir.Call
-        This should be a TIR call_extern that has agreed upon ordering
-        for TIR Compiler. See Serial2DDepthwise in
-        tvm/relay/backend/contrib/ethosu/tir/spec.py for the ordering.
-
-    Returns
-    -------
-    ethosu.vela.api.NpuConvDepthWiseOperation
-        The vela object containing the params of ethosu_depthwise_conv2d
-    weights_zero_point : int
-        The zero point of the weights
-    """
-    serial_object = spec.create_serial_object(spec.Serial2DDepthwise, tir_call_extern.args[1:])
-    return _create_npu_op_depthwise_conv2d(serial_object)
-
-
-def _create_npu_op_depthwise_conv2d(serial_2d_depthwise):
-    npu_depthwise_conv2d_op = vapi.NpuConvDepthWiseOperation()
-
-    npu_depthwise_conv2d_op.ifm = _create_npu_feature_map(serial_2d_depthwise.ifm)
-    npu_depthwise_conv2d_op.ofm = _create_npu_feature_map(serial_2d_depthwise.ofm)
-    npu_depthwise_conv2d_op.kernel = _create_npu_kernel(serial_2d_depthwise.kernel)
-    npu_depthwise_conv2d_op.weights = [_create_npu_address_range(serial_2d_depthwise.weight)]
-    weights_zero_point = np.int64(serial_2d_depthwise.weight_zero_point.value)
-    npu_depthwise_conv2d_op.biases = [_create_npu_address_range(serial_2d_depthwise.scale_bias)]
-    npu_depthwise_conv2d_op.padding = _create_npu_padding(serial_2d_depthwise.padding)
-
-    npu_depthwise_conv2d_op.activation = _create_npu_activation(serial_2d_depthwise.activation)
-    if (
-        npu_depthwise_conv2d_op.activation
-        and npu_depthwise_conv2d_op.activation.op_type == vapi.NpuActivationOp.NONE_OR_RELU
-    ):
-        _convert_clip_bounds(npu_depthwise_conv2d_op)
-
-    npu_depthwise_conv2d_op.rounding_mode = _create_npu_rounding_mode(
-        serial_2d_depthwise.rounding_mode
-    )
-    npu_depthwise_conv2d_op.ifm_upscale = _create_npu_resampling_mode(serial_2d_depthwise.upscale)
-    npu_depthwise_conv2d_op.block_config = _create_npu_block_config(
-        serial_2d_depthwise.block_config
-    )
-
-    if not npu_depthwise_conv2d_op.block_config:
-        target_accel_config = vela_api.get_accelerator_config()
-        block_config = vela_api.get_optimal_block_config(
-            npu_depthwise_conv2d_op, target_accel_config
-        )
-        npu_depthwise_conv2d_op.block_config = block_config
-
-    return npu_depthwise_conv2d_op, weights_zero_point
-
-
-def _create_npu_feature_map(serial_feature_map: spec.SerialFeatureMap) -> vapi.NpuFeatureMap:
-    """This is a helper function to capture a list
-    of arguments to create Vela NpuFeatureMap object.
-    """
-    layout_map = {"NHWC": vapi.NpuLayout.NHWC, "NHCWB16": vapi.NpuLayout.NHCWB16}
-    datatype_map = {
-        "uint8": vapi.NpuDataType.UINT8,
-        "int8": vapi.NpuDataType.INT8,
-        "uint16": vapi.NpuDataType.UINT16,
-        "int16": vapi.NpuDataType.INT16,
-        "int32": vapi.NpuDataType.INT32,
-    }
-    layout = str(serial_feature_map.layout.value)
-    data_type = str(serial_feature_map.data_type.value)
-    date_type_bytes = np.iinfo(np.dtype(data_type)).bits // 8
-    assert layout in layout_map.keys()
-    assert data_type in datatype_map.keys()
-    nfm = vapi.NpuFeatureMap()
-    nfm.data_type = datatype_map[data_type]
-    nfm.shape = vapi.NpuShape3D(
-        int(serial_feature_map.height),
-        int(serial_feature_map.width),
-        int(serial_feature_map.channels),
-    )
-    nfm.tiles = vapi.NpuTileBox(
-        int(serial_feature_map.tile_height_0),
-        int(serial_feature_map.tile_height_1),
-        int(serial_feature_map.tile_width_0),
-        [
-            serial_feature_map.tile_address_0,
-            serial_feature_map.tile_address_1,
-            serial_feature_map.tile_address_2,
-            serial_feature_map.tile_address_3,
-        ],
-    )
-    nfm.quantization = _create_npu_quantization(
-        serial_feature_map.scale, serial_feature_map.zero_point
-    )
-    nfm.layout = layout_map[layout]
-    nfm.strides = vapi.NpuShape3D(
-        int(serial_feature_map.stride_h.value) * date_type_bytes,
-        int(serial_feature_map.stride_w.value) * date_type_bytes,
-        int(serial_feature_map.stride_c.value) * date_type_bytes,
-    )
-    return nfm
-
-
-def _create_npu_kernel(serial_kernel: spec.SerialKernel) -> vapi.NpuKernel:
-    """This is a helper function to capture a list
-    of arguments to create Vela NpuKernel object.
-    """
-    nknl = vapi.NpuKernel(
-        w=int(serial_kernel.width),
-        h=int(serial_kernel.height),
-        stride_x=int(serial_kernel.stride_w),
-        stride_y=int(serial_kernel.stride_h),
-        dilation_x=int(serial_kernel.dilation_w),
-        dilation_y=int(serial_kernel.dilation_h),
-    )
-    return nknl
-
-
-def _create_npu_address_range(
-    serial_address_range: spec.SerialAddressRange,
-) -> vapi.NpuAddressRange:
-    """This is a helper function to capture a list
-    of arguments to create Vela NpuAddressRange object.
-    """
-    addr_range = vapi.NpuAddressRange(
-        # region will be updated later
-        region=0,
-        address=serial_address_range.address,
-        length=int(serial_address_range.length),
-    )
-    return addr_range
-
-
-def _create_npu_quantization(
-    scale: Union[tvm.tir.FloatImm, float],
-    zero_point: Union[tvm.tir.IntImm, int],
-) -> vapi.NpuQuantization:
-    """This is a helper function to capture a list
-    of arguments to create Vela NpuQuantization object.
-    """
-    scale = float(scale)
-    if scale == 0.0:
-        scale = None
-    return vapi.NpuQuantization(scale_f32=scale, zero_point=int(zero_point))
-
-
-def _create_npu_weights_zero_point(
-    zero_point: Union[int, tvm.tir.IntImm],
-) -> int:
-    """This is a helper function to capture the weights zero point."""
-    return int(zero_point)
-
-
-def _create_npu_padding(serial_padding: spec.SerialPadding) -> vapi.NpuPadding:
-    """This is a helper function to capture a list
-    of arguments to create Vela NpuPadding object."""
-    padding = vapi.NpuPadding(
-        top=int(serial_padding.top),
-        left=int(serial_padding.left),
-        bottom=int(serial_padding.bottom),
-        right=int(serial_padding.right),
-    )
-    return padding
-
-
-def _create_npu_block_config(serial_block_config: spec.SerialBlockConfig) -> vapi.NpuShape3D:
-    """A helper function to convert a SerialBlockConfig into an NpuShape3D"""
-    if serial_block_config.height * serial_block_config.width * serial_block_config.depth == 0:
-        return None
-
-    block_config = vapi.NpuShape3D(
-        height=int(serial_block_config.height),
-        width=int(serial_block_config.width),
-        depth=int(serial_block_config.depth),
-    )
-    return block_config
-
-
-def _create_npu_activation(serial_activation: spec.SerialActivation) -> vapi.NpuActivation:
-    """This is a helper function to capture a list
-    of arguments to create Vela NpuActivation object."""
-    if serial_activation.op == "NONE":
-        return None
-    if (
-        serial_activation.op == "CLIP"
-        and serial_activation.clip_min == 0
-        and serial_activation.clip_max == 0
-    ):
-        return None
-    op_map = {
-        "CLIP": vapi.NpuActivationOp.NONE_OR_RELU,
-        "TANH": vapi.NpuActivationOp.TABLE_LOOKUP,
-        "SIGMOID": vapi.NpuActivationOp.TABLE_LOOKUP,
-        "LUT": vapi.NpuActivationOp.TABLE_LOOKUP,
-    }
-    op = str(serial_activation.op.value)
-    assert op in op_map.keys()
-    act_op = vapi.NpuActivation(op_map[op])
-    if serial_activation.op == "CLIP":
-        act_op.min = int(serial_activation.clip_min.value)
-        act_op.max = int(serial_activation.clip_max.value)
-    if op_map[op] == vapi.NpuActivationOp.TABLE_LOOKUP:
-        act_op.lookup_table_index = 0
-    return act_op
-
-
-def _create_npu_resampling_mode(
-    mode: str,
-) -> vapi.NpuResamplingMode:
-    """This is a helper function to capture a list
-    of arguments to create Vela NpuResamplingMode object."""
-    mode_map = {
-        "NONE": vapi.NpuResamplingMode.NONE,
-        "NEAREST": vapi.NpuResamplingMode.NEAREST,
-        "ZEROS": vapi.NpuResamplingMode.TRANSPOSE,
-    }
-    mode = str(mode.value)
-    assert mode in mode_map.keys()
-    return mode_map[mode]
-
-
-def _create_npu_rounding_mode(
-    mode: str,
-) -> vapi.NpuRoundingMode:
-    """This is a helper function to capture a list
-    of arguments to create Vela NpuRoundingMode object."""
-    mode_map = {
-        "TFL": vapi.NpuRoundingMode.TFL,
-        "TRUNCATE": vapi.NpuRoundingMode.TRUNCATE,
-        "NATURAL": vapi.NpuRoundingMode.NATURAL,
-    }
-    mode = str(mode.value)
-    assert mode in mode_map.keys()
-    return mode_map[mode]
-
-
-def _create_npu_dma_op(serial_copy):
-    """This is a helper function to capture the list of arguments
-    to create a NpuDmaOperation object"""
-    data_type_bytes = np.iinfo(np.dtype(serial_copy.read_address.dtype)).bits // 8
-    length = int(serial_copy.length.value) * data_type_bytes
-    # The buffer size in bytes must be at least 16 bytes
-    length = max(length, 16)
-    src = vapi.NpuAddressRange(
-        # region will be updated later
-        region=0,
-        address=serial_copy.read_address,
-        length=length,
-    )
-    dest = vapi.NpuAddressRange(
-        # region will be updated later
-        region=0,
-        address=serial_copy.write_address,
-        length=length,
-    )
-    return vapi.NpuDmaOperation(src, dest)
-
-
-def translate_ethosu_pooling(tir_call_extern: tvm.tir.Call) -> vapi.NpuPoolingOperation:
-    """This function will translate a TIR call_extern
-    as produced by NPU Relay to TIR compilation.
-
-    Parameters
-    ----------
-    tir_call_extern : tvm.tir.Call
-        This should be a TIR call_extern that has agreed upon ordering
-        for TIR Compiler. See SerialPooling in
-        tvm/relay/backend/contrib/ethosu/tir/spec.py for the ordering.
-
-    Returns
-    -------
-    ethosu.vela.api.NpuPoolingOperation
-        The vela object containing the params of ethosu_pooling
-    """
-    serial_object = spec.create_serial_object(spec.SerialPooling, tir_call_extern.args[1:])
-    return _create_npu_op_pooling(serial_object)
-
-
-def _create_npu_op_pooling(serial_pooling: spec.SerialPooling):
-    pooling_type = serial_pooling.pooling_type
-    if pooling_type == "AVG":
-        npu_pooling_op = vapi.NpuPoolingOp.AVERAGE
-    elif pooling_type == "MAX":
-        npu_pooling_op = vapi.NpuPoolingOp.MAX
-    elif pooling_type == "SUM":
-        npu_pooling_op = vapi.NpuPoolingOp.REDUCE_SUM
-
-    npu_pooling_op = vapi.NpuPoolingOperation(npu_pooling_op)
-    npu_pooling_op.ifm = _create_npu_feature_map(serial_pooling.ifm)
-    npu_pooling_op.ofm = _create_npu_feature_map(serial_pooling.ofm)
-    npu_pooling_op.kernel = _create_npu_kernel(serial_pooling.pool_shape)
-    npu_pooling_op.padding = _create_npu_padding(serial_pooling.padding)
-
-    npu_pooling_op.activation = _create_npu_activation(serial_pooling.activation)
-    if (
-        npu_pooling_op.activation
-        and npu_pooling_op.activation.op_type == vapi.NpuActivationOp.NONE_OR_RELU
-    ):
-        _convert_clip_bounds(npu_pooling_op)
-
-    npu_pooling_op.rounding_mode = _create_npu_rounding_mode(serial_pooling.rounding_mode)
-    npu_pooling_op.ifm_upscale = _create_npu_resampling_mode(serial_pooling.upscale)
-    npu_pooling_op.block_config = _create_npu_block_config(serial_pooling.block_config)
-
-    if not npu_pooling_op.block_config:
-        target_accel_config = vela_api.get_accelerator_config()
-        block_config = vela_api.get_optimal_block_config(npu_pooling_op, target_accel_config)
-        npu_pooling_op.block_config = block_config
-
-    return npu_pooling_op
-
-
-def translate_ethosu_binary_elementwise(
-    tir_call_extern: tvm.tir.Call,
-) -> vapi.NpuElementWiseOperation:
-    """This function will translate a TIR call_extern
-    as produced by NPU Relay to TIR compilation.
-
-    Parameters
-    ----------
-    tir_call_extern : tvm.tir.Call
-        This should be a TIR call_extern that has agreed upon ordering
-        for TIR Compiler. See SerialBinaryElementwise in
-        tvm/relay/backend/contrib/ethosu/tir/spec.py for the ordering.
-
-    Returns
-    -------
-    ethosu.vela.api.NpuElementWiseOperation
-        The vela object containing the params of ethosu_binary_elementwise
-    """
-    serial_object = spec.create_serial_object(
-        spec.SerialBinaryElementwise, tir_call_extern.args[1:]
-    )
-    return _create_npu_op_binary_elementwise(serial_object)
-
-
-def _create_npu_op_binary_elementwise(serial_binary_elementwise: spec.SerialBinaryElementwise):
-    operator_type = serial_binary_elementwise.operator_type
-    if operator_type == "ADD":
-        op = vapi.NpuElementWiseOp.ADD
-    elif operator_type == "SUB":
-        op = vapi.NpuElementWiseOp.SUB
-    elif operator_type == "MUL":
-        op = vapi.NpuElementWiseOp.MUL
-    elif operator_type == "MIN":
-        op = vapi.NpuElementWiseOp.MIN
-    elif operator_type == "MAX":
-        op = vapi.NpuElementWiseOp.MAX
-    elif operator_type == "SHR":
-        op = vapi.NpuElementWiseOp.SHR
-    elif operator_type == "SHL":
-        op = vapi.NpuElementWiseOp.SHL
-
-    npu_binary_elementwise_op = vapi.NpuElementWiseOperation(op)
-    npu_binary_elementwise_op.ifm = _create_npu_feature_map(serial_binary_elementwise.ifm)
-    npu_binary_elementwise_op.ifm2 = _create_npu_feature_map(serial_binary_elementwise.ifm2)
-    npu_binary_elementwise_op.ofm = _create_npu_feature_map(serial_binary_elementwise.ofm)
-    npu_binary_elementwise_op.reversed_operands = serial_binary_elementwise.reversed_operands
-    if serial_binary_elementwise.rescale_config.use_rescale:
-        npu_binary_elementwise_op.rescale = (
-            serial_binary_elementwise.rescale_config.scale.value,
-            serial_binary_elementwise.rescale_config.shift.value,
-        )
-
-    npu_binary_elementwise_op.activation = _create_npu_activation(
-        serial_binary_elementwise.activation
-    )
-    if (
-        npu_binary_elementwise_op.activation
-        and npu_binary_elementwise_op.activation.op_type == vapi.NpuActivationOp.NONE_OR_RELU
-    ):
-        _convert_clip_bounds(npu_binary_elementwise_op)
-
-    npu_binary_elementwise_op.rounding_mode = _create_npu_rounding_mode(
-        serial_binary_elementwise.rounding_mode
-    )
-    npu_binary_elementwise_op.block_config = _create_npu_block_config(
-        serial_binary_elementwise.block_config
-    )
-
-    if not npu_binary_elementwise_op.block_config:
-        target_accel_config = vela_api.get_accelerator_config()
-        block_config = vela_api.get_optimal_block_config(
-            npu_binary_elementwise_op, target_accel_config
-        )
-        npu_binary_elementwise_op.block_config = block_config
-
-    return npu_binary_elementwise_op
-
-
-def translate_ethosu_unary_elementwise(
-    tir_extern_call: tvm.tir.Call,
-) -> vapi.NpuElementWiseOperation:
-    """This function will translate a tir extern_call
-    as produced by Relay to TIR compilation.
-    Parameters
-    ----------
-    tir_extern_call : tvm.tir.Call
-        This should be a tir external call that has a agreed upon ordering
-        for the NPU TIR Compiler. See SerialUnaryElementwise in
-        tvm/relay/backend/contrib/ethosu/tir/spec.py for the ordering.
-
-    Returns
-    -------
-    ethosu.vela.api.NpuElementWiseOperation
-        The vela object containing the params of ethosu_unary_elementwise
-    """
-    serial_object = spec.create_serial_object(spec.SerialUnaryElementwise, tir_extern_call.args[1:])
-    return _create_npu_op_unary_elementwise(serial_object)
-
-
-def _create_npu_op_unary_elementwise(serial_unary_elementwise):
-    operator_type = serial_unary_elementwise.operator_type
-    if operator_type == "ABS":
-        op = vapi.NpuElementWiseOp.ABS
-    if operator_type == "CLZ":
-        op = vapi.NpuElementWiseOp.CLZ
-
-    npu_unary_elementwise_op = vapi.NpuElementWiseOperation(op)
-    npu_unary_elementwise_op.ifm = _create_npu_feature_map(serial_unary_elementwise.ifm)
-    npu_unary_elementwise_op.ofm = _create_npu_feature_map(serial_unary_elementwise.ofm)
-
-    npu_unary_elementwise_op.activation = _create_npu_activation(
-        serial_unary_elementwise.activation
-    )
-    if (
-        npu_unary_elementwise_op.activation
-        and npu_unary_elementwise_op.activation.op_type == vapi.NpuActivationOp.NONE_OR_RELU
-    ):
-        _convert_clip_bounds(npu_unary_elementwise_op)
-
-    npu_unary_elementwise_op.rounding_mode = _create_npu_rounding_mode(
-        serial_unary_elementwise.rounding_mode
-    )
-    npu_unary_elementwise_op.block_config = _create_npu_block_config(
-        serial_unary_elementwise.block_config
-    )
-
-    if not npu_unary_elementwise_op.block_config:
-        target_accel_type = vela_api.get_accelerator_config()
-        block_config = vela_api.get_optimal_block_config(
-            npu_unary_elementwise_op, target_accel_type
-        )
-        npu_unary_elementwise_op.block_config = block_config
-
-    return npu_unary_elementwise_op
diff --git a/python/tvm/relay/backend/contrib/ethosu/util.py b/python/tvm/relay/backend/contrib/ethosu/util.py
deleted file mode 100644
index a402604b4c11..000000000000
--- a/python/tvm/relay/backend/contrib/ethosu/util.py
+++ /dev/null
@@ -1,405 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name
-"""
-Helper utility Enums and Functions used through out code generation.
-
-The rest of the utility functions are misc.
-Refer to the description inside such functions
-"""
-
-from inspect import signature
-from enum import Enum
-from typing import Union, Tuple, List
-import numpy as np  # type: ignore
-
-import tvm  # type: ignore
-from tvm import relay
-from tvm._ffi import register_object
-from tvm.runtime import Object
-from . import _ffi_api
-
-
-class QConv2DArgs(Enum):
-    """
-    This is a helper enum to obtain the correct index
-    of qnn.conv2d arguments.
-    """
-
-    IFM = 0
-    WEIGHTS = 1
-    IFM_ZERO_POINT = 2
-    WEIGHTS_ZERO_POINT = 3
-    IFM_SCALE = 4
-    WEIGHTS_SCALE = 5
-
-
-class QConv2DTransposeArgs(Enum):
-    """
-    This is a helper enum to obtain the correct index
-    of qnn.conv2d_transpose arguments.
-    """
-
-    IFM = 0
-    WEIGHTS = 1
-    IFM_ZERO_POINT = 2
-    WEIGHTS_ZERO_POINT = 3
-    IFM_SCALE = 4
-    WEIGHTS_SCALE = 5
-
-
-class RequantArgs(Enum):
-    """
-    This is a helper enum to obtain the correct index
-    of qnn.requantize arguments.
-    """
-
-    IFM_SCALE = 1
-    IFM_ZERO_POINT = 2
-    OFM_SCALE = 3
-    OFM_ZERO_POINT = 4
-
-
-class BiasAddArgs(Enum):
-    """
-    This is a helper enums to obtain the correct index
-    of qnn.bias_add arguments.
-    """
-
-    BIASES = 1
-
-
-class ClipArgs(Enum):
-    """
-    This is a helper enums to obtain the correct index
-    of clip arguments.
-    """
-
-    A_MIN = 1
-    A_MAX = 2
-
-
-class BinaryElementwiseArgs(Enum):
-    """This is a helper enums to access the correct index
-    of binary elementwise arguments
-    """
-
-    IFM = 0
-    IFM2 = 1
-    IFM_SCALE = 2
-    IFM_ZERO_POINT = 3
-    IFM2_SCALE = 4
-    IFM2_ZERO_POINT = 5
-    OFM_SCALE = 6
-    OFM_ZERO_POINT = 7
-
-
-class QuantizeArgs(Enum):
-    """
-    This is a helper enums to access the correct index of
-    quantize arguments
-    """
-
-    IFM = 0
-    OFM_SCALE = 1
-    OFM_ZERO_POINT = 2
-
-
-class DequantizeArgs(Enum):
-    """
-    This is a helper enums to access the correct index of
-    dequantize arguments
-    """
-
-    IFM = 0
-    IFM_SCALE = 1
-    IFM_ZERO_POINT = 2
-
-
-class QDenseArgs(Enum):
-    """
-    This is a helper enum to access the correct index of
-    qnn.dense arguments
-    """
-
-    IFM = 0
-    WEIGHTS = 1
-    IFM_ZERO_POINT = 2
-    WEIGHTS_ZERO_POINT = 3
-    IFM_SCALE = 4
-    WEIGHTS_SCALE = 5
-
-
-class QPadArgs(Enum):
-    """
-    This is a helper enum to obtain the correct index
-    of nn.pad arguments.
-    """
-
-    IFM = 0
-    IFM_ZERO_POINT = 1
-
-
-def is_npu_func(func: relay.Function) -> bool:
-    """Check if the given function is an NPU function."""
-    return "Compiler" in func.attrs and func.attrs["Compiler"] == "ethos-u"
-
-
-def is_composite_func(func: relay.Function, name: str) -> bool:
-    """
-    This method checks whether the call is to
-    a composite function of a given name.
-
-    Parameters
-    ----------
-    func : relay.Function
-        The header to be displayed along with the dump.
-
-    name : str
-        The candidate name to be checked
-
-    Returns
-    --------
-    a boolean
-    """
-
-    if not hasattr(func, "attrs"):
-        return False
-    if "Composite" not in func.attrs.keys():
-        return False
-    composite_name = func.attrs["Composite"]
-
-    return composite_name == name
-
-
-def is_named_ethosu_op(expr: tvm.relay.Expr, name: str) -> bool:
-    """Checks whether a relay expression matches that of the
-    named operator.
-
-    Parameters
-    ----------
-    expr : tvm.relay.Expr
-        The expression to check.
-    name : str
-        The name of the expected operator
-        (without NPU prefix "contrib.ethosu").
-
-    Returns
-    -------
-    bool
-        True if expression matches name, false if not.
-    """
-    prefix = "contrib.ethosu."
-    return (
-        isinstance(expr, tvm.relay.expr.Call)
-        and isinstance(expr.op, tvm.ir.op.Op)
-        and expr.op.name == prefix + name
-    )
-
-
-def get_range_for_dtype_str(dtype: str) -> Tuple[int, int]:
-    """
-    Produce the min,max for a give data type.
-
-    Parameters
-    ----------
-    dtype : str
-        a type string (e.g., int8)
-
-    Returns
-    -------
-    type_info.min : int
-        the minimum of the range
-    type_info.max : int
-        the maximum of the range
-    """
-
-    try:
-        type_info = np.iinfo(dtype)
-    except ValueError:
-        type_info = np.finfo(dtype)
-    return type_info.min, type_info.max
-
-
-def round_away_zero(f: Union[float, np.double, np.single, np.float32, np.float64]) -> np.float64:
-    """Round the number away from zero towards +inf / -inf"""
-    offset = -0.5 if (f < 0) else 0.5
-    return np.trunc(f + offset)
-
-
-def round_up(a: int, b: int) -> int:
-    """Round up to a multiple of b"""
-    return ((a + b - 1) // b) * b
-
-
-def get_accelerator_config():
-    """Get the variant of the accelerator to compile for"""
-    compiler_attrs = tvm.get_global_func("relay.ext.ethos-u.get_compiler_attrs")()
-    return compiler_attrs.accelerator_config
-
-
-def is_cascader_enabled() -> bool:
-    """Determine whether the cascader is enabled"""
-    compiler_attrs = tvm.get_global_func("relay.ext.ethos-u.get_compiler_attrs")()
-    return bool(compiler_attrs.enable_cascader)
-
-
-def is_copying_constants_disabled() -> bool:
-    """Determine whether copying constants is disabled for case without cascader"""
-    compiler_attrs = tvm.get_global_func("relay.ext.ethos-u.get_compiler_attrs")()
-    return bool(compiler_attrs.disable_copying_constants)
-
-
-def is_striping_enabled() -> bool:
-    """Determine whether the cascader is enabled"""
-    compiler_attrs = tvm.get_global_func("relay.ext.ethos-u.get_compiler_attrs")()
-    return bool(compiler_attrs.enable_striping)
-
-
-def get_arg_count(func):
-    """Helper function to get the number of
-    arguments in a python function"""
-    sig = signature(func)
-    return len(sig.parameters)
-
-
-def get_dim_value(layout: str, dim: int):
-    """This is a helper function to retrieve the value
-    of the dimension given the shape and the layout
-    """
-    assert isinstance(layout, str)
-    assert dim in list(layout)
-    for idx, dim_char in enumerate(layout):
-        if dim_char == dim:
-            return idx
-    return None
-
-
-def calculate_size_bytes(expr):
-    """This is a helper function to calculate the number
-    of bytes required to hold the tensor/relay.expr"""
-    try:
-        type_info = np.iinfo(expr.checked_type.dtype)
-    except ValueError:
-        type_info = np.finfo(expr.checked_type.dtype)
-    element_size = type_info.bits // 8
-    elements = np.prod(list(expr.checked_type.shape))
-    return element_size * elements
-
-
-@register_object("relay.ext.ethos-u.BaseAddress")
-class BaseAddress(Object):
-    """
-    This is a structure to hold base addresses for pointers
-    provided for the driver.
-    """
-
-    def __init__(
-        self,
-        name: str,
-        primfunc_param_idx: int,
-        region: int,
-        size: int,
-        is_runtime_allocation: bool = False,
-    ):
-        self.__init_handle_by_constructor__(
-            _ffi_api.BaseAddress,  # type: ignore # pylint: disable=no-member
-            name,
-            primfunc_param_idx,
-            region,
-            size,
-            is_runtime_allocation,
-        )
-
-
-@register_object("relay.ext.ethos-u.CompilationArtifact")
-class CompilationArtifact(Object):
-    """
-    This is a structure to hold binary artifacts
-    for the microNPU.
-    """
-
-    def __init__(
-        self,
-        function_name: str,
-        command_stream: str,
-        encoded_constants: str,
-        base_addresses: List[BaseAddress],
-    ):
-        self.__init_handle_by_constructor__(
-            _ffi_api.CompilationArtifact,  # type: ignore # pylint: disable=no-member
-            function_name,
-            command_stream,
-            encoded_constants,
-            base_addresses,
-        )
-
-
-def create_npu_function_pass(opt_level: int, name: str = ""):
-    """
-    A utility decorator that wraps a given class as an NPU function pass. That is,
-    a pass that behaves like a function pass and only traverses NPU external
-    functions. How each NPU function is mutated is defined by the
-    `transform_npu_function(global_variable, relay_function)` function which should
-    be created in the class that is to be decorated. See the example below.
-
-    Example
-    -------
-    This small example demonstrates a pass over NPU functions that performs no
-    mutation.
-
-    @create_npu_function_pass(opt_level=1)
-    class MyPass:
-        def transform_npu_function(self, global_var, func):
-            return func
-
-    mod = tvm.IRModule()
-    mod = MyPass()(mod)
-
-    Parameters
-    ----------
-    opt_level: int
-        Optimization level for the module pass.
-    name: str, optional
-        Name for the module pass.
-
-    Returns
-    -------
-    decorator
-        The npu_pass decorator.
-    """
-
-    def decorator(npu_pass_class):
-        @tvm.ir.transform.module_pass(name=name, opt_level=opt_level)
-        class ModulePassWrapper:
-            """The wrapper for the NPU pass."""
-
-            def __init__(self, *args, **kwargs):
-                self.args = args
-                self.kwargs = kwargs
-
-            def transform_module(self, mod: tvm.ir.IRModule, _) -> tvm.ir.IRModule:
-                npu_functions = filter(lambda x: is_npu_func(x[1]), mod.functions.items())
-                for global_var, func in npu_functions:
-                    npu_pass = npu_pass_class(*self.args, **self.kwargs)
-                    func = npu_pass.transform_npu_function(global_var, func)
-                    mod.update_func(global_var, func)
-                return mod
-
-        return ModulePassWrapper
-
-    return decorator
diff --git a/python/tvm/relay/backend/contrib/ethosu/vela_api.py b/python/tvm/relay/backend/contrib/ethosu/vela_api.py
deleted file mode 100644
index 22f5cdd83b04..000000000000
--- a/python/tvm/relay/backend/contrib/ethosu/vela_api.py
+++ /dev/null
@@ -1,495 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-This is an adapter module for conversions between TVM and Vela.
-The following conversion APIs are added :
-    *Obtaining the best block config
-    *Compressing weights
-    *Packing biases
-"""
-import logging
-import math
-from typing import List, Optional, Tuple
-
-import numpy as np  # type: ignore
-from ethosu.vela import api as vapi  # type: ignore
-from ethosu.vela.architecture_allocator import find_block_config
-from ethosu.vela.architecture_features import Accelerator, create_default_arch
-from ethosu.vela.operation import NpuBlockType
-from ethosu.vela.register_command_stream_generator import resampling_mode_map
-from ethosu.vela.register_command_stream_util import to_kernel
-from ethosu.vela.shape4d import Shape4D
-
-import tvm
-from tvm.relay.backend.contrib.ethosu import tir_to_cs_translator as tirtocs
-from tvm.relay.backend.contrib.ethosu import util  # type: ignore
-
-# pylint: disable=invalid-name
-logger = logging.getLogger("Ethos-U")
-
-VELA_TO_NP_DTYPES = {
-    vapi.NpuDataType.UINT8: np.uint8,
-    vapi.NpuDataType.UINT16: np.uint16,
-    vapi.NpuDataType.INT8: np.int8,
-    vapi.NpuDataType.INT16: np.int16,
-    vapi.NpuDataType.INT32: np.int32,
-}
-
-SCALE_BIAS_LENGTH = 10
-
-
-def get_optimal_block_config(
-    npu_op: vapi.NpuOperation, accel_config: vapi.NpuAccelerator
-) -> vapi.NpuShape3D:
-    """
-    "The NPU's unit of work is known as a block. It will fetch block(s) from Input
-    Feature Map (IFM) and a compute block for Output Feature Map (OFM).
-    Therefore, we need to pick an optimal block configuration considering bandwidth
-    to bring IFM blocks and the number of OFM block computes need to happen
-    to cover the OFM as indicated by the npu op.
-    For the case when cascader is enabled, the logic of choosing the optimal configuration block
-    from TVM will be used in other cases, the Vela's logic will be used except
-    the cases when dev_force_block_config option is specified.
-
-    Parameters
-    ----------
-    npu_op : ethosu.vela.api.NpuOperation
-        The NPU operation and its params
-    accel_config : ethosu.vela.api.NpuAccelerator
-        The NPU accelerator config
-
-    Returns
-    -------
-    ethosu.vela.api.NpuShape3D :
-        The optimal block config for the operator
-    """
-    options = tvm.transform.PassContext.current().config.get("relay.ext.ethos-u.options", None)
-    if options and options.dev_force_block_config:
-        block_config = [int(v) for v in options.dev_force_block_config.split("x")]
-        return vapi.NpuShape3D(height=block_config[0], width=block_config[1], depth=block_config[2])
-    elif options and options.enable_cascader:
-        all_valid_block_configs = vapi.npu_find_block_configs(npu_op, accel_config)
-        return _get_optimal_block_config(all_valid_block_configs)
-    else:
-        return _find_block_config_with_vela(npu_op, accel_config)
-
-
-def _find_block_config_with_vela(
-    npu_op: vapi.NpuOperation, accelerator: vapi.NpuAccelerator
-) -> vapi.NpuShape3D:
-    """An internal function to get block config using Vela's logic.
-
-    Parameters
-    ----------
-    npu_op : ethosu.vela.api.NpuOperation
-        The NPU operation
-    accelerator : ethosu.vela.api.NpuAccelerator
-        The NPU accelerator
-
-    Returns
-    -------
-    ethosu.vela.api.NpuShape3D :
-        The optimal block config for the operator
-    """
-    if isinstance(npu_op, vapi.NpuConv2DOperation):
-        block_type = NpuBlockType.ConvolutionMxN
-    elif isinstance(npu_op, vapi.NpuConvDepthWiseOperation):
-        block_type = NpuBlockType.ConvolutionDepthWise
-    elif isinstance(npu_op, vapi.NpuPoolingOperation):
-        block_type = (
-            NpuBlockType.ReduceSum
-            if npu_op.sub_op_type == vapi.NpuPoolingOp.REDUCE_SUM
-            else NpuBlockType.Pooling
-        )
-    elif isinstance(npu_op, vapi.NpuElementWiseOperation):
-        block_type = NpuBlockType.ElementWise
-    else:
-        assert 0, "Unsupported operation"
-
-    ifm_shape = Shape4D(1, npu_op.ifm.shape.height, npu_op.ifm.shape.width, npu_op.ifm.shape.depth)
-    ifm2_shape = None
-    if npu_op.ifm2:
-        ifm2_shape = Shape4D(
-            1, npu_op.ifm2.shape.height, npu_op.ifm2.shape.width, npu_op.ifm2.shape.depth
-        )
-    ofm_shape = Shape4D(1, npu_op.ofm.shape.height, npu_op.ofm.shape.width, npu_op.ofm.shape.depth)
-
-    ifm_resampling_mode = resampling_mode_map[npu_op.ifm_upscale]
-    ifm_bits = npu_op.ifm.data_type.size_in_bits()
-    lut_banks = 0
-    if npu_op.activation:
-        lut_banks = 2 if npu_op.activation.op_type == vapi.NpuActivationOp.TABLE_LOOKUP else 0
-
-    has_scaling = True
-    for tensor in [npu_op.ifm, npu_op.ifm2, npu_op.ofm]:
-        if tensor and tensor.quantization is None:
-            has_scaling = False
-            break
-
-    arch = create_default_arch(Accelerator.from_npu_accelerator(accelerator))
-
-    cfg = find_block_config(
-        arch,
-        block_type,
-        ofm_shape,
-        ifm_shape,
-        ifm2_shape,
-        npu_op.ifm2_scalar is not None,
-        ifm_bits,
-        to_kernel(npu_op.kernel),
-        lut_banks,
-        has_scaling,
-        ifm_resampling_mode,
-    )
-    assert cfg is not None, f"There is no configuration suitable for {accelerator}"
-    return vapi.NpuShape3D(cfg.ofm_block.height, cfg.ofm_block.width, cfg.ofm_block.depth)
-
-
-def _get_optimal_block_config(all_valid_block_configs: List[vapi.NpuShape3D]) -> vapi.NpuShape3D:
-    """An internal function to get block config with largest depth
-    and then highest volume/area"""
-    assert isinstance(all_valid_block_configs, list)
-    for block_cfg in all_valid_block_configs:
-        assert isinstance(block_cfg, vapi.NpuShape3D)
-
-    # Getting the largest volume block for benchmarking
-    all_valid_block_configs.sort(
-        key=lambda _cfg: _cfg.depth * _cfg.height * _cfg.width, reverse=True
-    )
-    largest_volume_block_config = all_valid_block_configs[0]
-    largest_volume = (
-        largest_volume_block_config.depth
-        * largest_volume_block_config.height
-        * largest_volume_block_config.width
-    )
-
-    all_valid_block_configs.sort(key=lambda _cfg: _cfg.depth, reverse=True)
-    max_d = all_valid_block_configs[0].depth
-    max_depth_block_configs = [_cfg for _cfg in all_valid_block_configs if _cfg.depth == max_d]
-    max_depth_block_configs.sort(key=lambda _cfg: _cfg.height * _cfg.width, reverse=True)
-    max_area = max_depth_block_configs[0].height * max_depth_block_configs[0].width
-    max_area_depth_block_configs = [
-        _cfg for _cfg in max_depth_block_configs if _cfg.height * _cfg.width == max_area
-    ]
-    # This to get a deterministic anwser everytime
-    max_area_depth_block_configs.sort(key=lambda _cfg: _cfg.height, reverse=True)
-    assert len(max_area_depth_block_configs) > 0
-    current_volume = (
-        max_area_depth_block_configs[0].depth
-        * max_area_depth_block_configs[0].height
-        * max_area_depth_block_configs[0].width
-    )
-    logger.info("Using block config=%s", max_area_depth_block_configs[0])
-    logger.info(
-        "Quality of the block config w.r.t. max volume block config=%s",
-        100.0 * (current_volume / largest_volume),
-    )
-    return max_area_depth_block_configs[0]
-
-
-def encode_weights(
-    tir_extern_call: tvm.tir.Call, values: np.ndarray, accel_config: vapi.NpuAccelerator
-):
-    """This is an API function to compress weights by passing
-    a tir_extern_call to NPU Convolution operation and values.
-
-    Parameters
-    ----------
-    tir_extern_call : tvm.tir.Call
-        tir_extern_call to NPU Convolution operation
-    values : numpy.ndarray
-        The constant flattened weight data in OHWI layout
-    accel_config : ethosu.vela.api.NpuAccelerator
-        The NPU accelerator config
-
-    Returns
-    -------
-    bytearray
-        Compressed weights
-    """
-    supported_ops = {
-        "ethosu_conv2d": tirtocs.translate_ethosu_conv2d,
-        "ethosu_depthwise_conv2d": tirtocs.translate_ethosu_depthwise_conv2d,
-    }
-    op = str(tir_extern_call.args[0].value)
-    assert op in supported_ops.keys()
-    npu_op, weights_zero_point = supported_ops[op](tir_extern_call)
-    is_depthwise = op == "ethosu_depthwise_conv2d"
-    # Recover the original shape if we are dealing with a flattened tensor
-    if len(values.shape) == 1:
-        shape_ohwi = (
-            npu_op.ofm.shape.depth,
-            npu_op.kernel.height,
-            npu_op.kernel.width,
-            1 if is_depthwise else npu_op.ifm.shape.depth,
-        )
-        assert values.size == np.prod(shape_ohwi)
-        values = np.reshape(values, shape_ohwi)
-
-    return compress_weights(
-        weights=values,
-        weights_zp=weights_zero_point,
-        # The weight layout is assumed to be OHWI, always.
-        weights_layout="OHWI",
-        ifm_bitdepth=npu_op.ifm.data_type.size_in_bits(),
-        block_depth=npu_op.block_config.depth,
-        dilation=(npu_op.kernel.dilation_x, npu_op.kernel.dilation_y),
-        accel_config=accel_config,
-        is_depthwise=is_depthwise,
-    )
-
-
-def compress_weights(
-    weights: np.ndarray,
-    weights_zp: int,
-    weights_layout: str,
-    ifm_bitdepth: int,
-    block_depth: int,
-    dilation: Tuple[int, int],
-    accel_config: vapi.NpuAccelerator,
-    is_depthwise: Optional[bool] = False,
-) -> bytearray:
-    """The NPU requires the weights to be compressed
-    to be executed. Therefore, this function calls into
-    the Vela APIs to compress the weights.
-
-    Parameters
-    ----------
-    weights : numpy.ndarray
-        The raw weights
-    weights_zp : int
-        The zero point of the weights
-    weights_layout : str
-        A string literal indicating the layout
-        Supported values : HWIO, HWOI, OHWI
-    ifm_bitdepth : int
-        The bit depth of the ifm the weights are used with
-    block_depth : int
-        The depth of the optimal block config for the operator
-    dilation : tuple
-        A tuple of 2 elements indicating dilation in h and w
-    accel_config : ethosu.vela.api.NpuAccelerator
-        The NPU accelerator config
-    is_depthwise : bool, Optional
-        This indicates whether the weights are compressed for depthwise convolution
-
-    Returns
-    -------
-    compressed_weights : bytearray
-        Compressed weights
-    """
-    layout_transform_indices = {"HWIO": (3, 0, 1, 2), "HWOI": (2, 0, 1, 3), "OHWI": (0, 1, 2, 3)}
-    assert weights_layout in layout_transform_indices.keys()
-    assert isinstance(weights_zp, np.int64)
-    weights = weights.astype(np.int16) - weights_zp
-    # Vela needs the weights in OHWI layout
-    weights_ohwi = np.transpose(weights, layout_transform_indices[weights_layout])
-    shape_ohwi = [
-        weights.shape[layout_transform_indices[weights_layout][0]],
-        weights.shape[layout_transform_indices[weights_layout][1]],
-        weights.shape[layout_transform_indices[weights_layout][2]],
-        weights.shape[layout_transform_indices[weights_layout][3]],
-    ]
-    block_traversal = calculate_block_traversal_mode(is_depthwise, shape_ohwi, ifm_bitdepth)
-
-    compressed_weights = vapi.npu_encode_weights(
-        accelerator=accel_config,
-        weights_volume=weights_ohwi,
-        dilation_xy=dilation,
-        ifm_bitdepth=ifm_bitdepth,
-        ofm_block_depth=block_depth,
-        is_depthwise=is_depthwise,
-        block_traversal=block_traversal,
-    )
-    return compressed_weights
-
-
-def calculate_block_traversal_mode(
-    is_depthwise: bool, weights_shape_ohwi: List[int], ifm_bitdepth: int
-) -> vapi.NpuBlockTraversal:
-    """Calculate a block traversal mode given whether the op is depthwise convolution,
-    shape of weights and bit-depth of the ifm.
-    """
-
-    if is_depthwise:
-        return vapi.NpuBlockTraversal.DEPTH_FIRST
-    # Determine which block traversal strategy has better DPU utilization
-    kernel_size = weights_shape_ohwi[1] * weights_shape_ohwi[2]
-    depth_utilization = weights_shape_ohwi[3] / util.round_up(
-        weights_shape_ohwi[3], 32 if ifm_bitdepth == 8 else 16
-    )
-    part_kernel_utilization = (weights_shape_ohwi[3] / util.round_up(weights_shape_ohwi[3], 8)) * (
-        kernel_size / util.round_up(kernel_size, 4 if ifm_bitdepth == 8 else 2)
-    )
-    if part_kernel_utilization >= depth_utilization or weights_shape_ohwi[3] <= 8:
-        # Part-kernel first is always better for ifm depths <= 8
-        return vapi.NpuBlockTraversal.PART_KERNEL_FIRST
-    return vapi.NpuBlockTraversal.DEPTH_FIRST
-
-
-def pack_biases(
-    biases: np.ndarray,
-    ifm_scale: float,
-    ifm_dtype: np.dtype,
-    weight_scales: np.ndarray,
-    ofm_scale: float,
-    is_activation_tanh_or_sigmoid: bool = False,
-) -> np.ndarray:
-    """
-    The NPU requires the each bias value to be packed with
-    output scale parameters in a 80-bit format (that is returned
-    via npu_encode_bias API). This function will pack such values
-    to a binary artifact that the NPU will use in the execution.
-
-
-    Parameters
-    ----------
-    biases : numpy.ndarray
-        The values of biases
-    ifm_scale : float
-        The quantization scale parameter of input feature map
-    ifm_dtype : numpy.dtype
-        The data type of input feature map data.
-    weight_scales : numpy.ndarray
-        The quantization scale parameter of weight feature map
-        This could be a tuple if per-channel quantization is present.
-    ofm_scale : float
-        The quantization scale parameter of output feature map.
-    is_activation_tanh_or_sigmoid : bool
-        Indicates whether the fused activation function is tanh or sigmoid.
-
-    Returns
-    -------
-    scale_bias : numpy.ndarray
-        Packed scales/biases as the hardware requires them.
-    """
-    # The BYOC infra should not partition anything else.
-    supported_ifm_dtypes = (np.uint8, np.int8, np.int16)
-    assert ifm_dtype in supported_ifm_dtypes
-
-    if weight_scales.size == 1:
-        weight_scales = [weight_scales] * biases.size
-
-    hw_bias_scales = _calculate_hw_bias_scales(
-        ifm_scale, weight_scales, ofm_scale, ifm_dtype, is_activation_tanh_or_sigmoid
-    )
-    assert len(hw_bias_scales) == biases.size
-    biases = biases.astype("int64")
-    packed_biases = bytearray()
-    for idx, scale in enumerate(hw_bias_scales):
-        packed_biases.extend(vapi.npu_encode_bias(biases[idx], *scale))
-    scale_bias = np.frombuffer(packed_biases, dtype=np.uint8)
-    scale_bias = np.reshape(scale_bias, (-1, 10))
-    return scale_bias
-
-
-def _quantize_scale(scale: float) -> Tuple[int, int]:
-    """Quantize floating point scale into 32-bit int scale with a 6-bit shift.
-    This is to be used with 8-bit data.
-    """
-    mantissa, exponent = math.frexp(scale)
-    mantissa_scaled = mantissa * (1 << 31)
-    mantissa_scaled = int(util.round_away_zero(mantissa_scaled))
-    required_shift = 31 - exponent
-    if required_shift < 0 or required_shift >= (1 << 6):
-        # Shift outside of valid range, set scale to 0
-        return 0, 16
-
-    return mantissa_scaled, required_shift
-
-
-def _reduced_quantize_scale(scale: float) -> Tuple[int, int]:
-    """A reduction of precision is required for 16 bit data."""
-    mantissa_scaled, required_shift = _quantize_scale(scale)
-    # This is max a signed 16-bit number could represent
-    max_reduced_mantissa_scaled = (1 << 15) - 1
-    # if the current value is larger than pre-scaled max_reduced_mantissa_scaled
-    # we need to saturate the anwser to max_reduced_mantissa_scaled
-    if mantissa_scaled >= max_reduced_mantissa_scaled << 16:
-        reduced_mantissa_scaled = max_reduced_mantissa_scaled
-    else:
-        reduced_mantissa_scaled = (mantissa_scaled + (1 << 15)) >> 16
-    reduced_shift = required_shift - 16
-
-    if required_shift < 0 or required_shift >= (1 << 6):
-        # Shift outside of valid range, set scale to 0
-        return 0, 16
-
-    return reduced_mantissa_scaled, reduced_shift
-
-
-def _calculate_hw_bias_scales(
-    ifm_scale: float,
-    weight_scales: List[float],
-    ofm_scale: float,
-    ifm_dtype: np.dtype,
-    is_faf_tanh_sigmoid: bool = False,
-) -> List[Tuple[int, int]]:
-    """This function will produce a scale that is calculated using scales of ifm,
-    weights and ofm. It is also important to note that if per-channel / per-value
-    quantization required they should go into hw bias scales"""
-    if is_faf_tanh_sigmoid:
-        ifm_scale = ifm_scale * 0x3000
-    if ifm_dtype == np.uint8:
-        bias_scales = [np.double(ifm_scale * ws) / np.double(ofm_scale) for ws in weight_scales]
-    else:
-        assert ifm_dtype in (np.int8, np.int16)
-        ifm_scale_dbl = np.double(ifm_scale)
-        ofm_scale_dbl = np.double(ofm_scale)
-        bias_scales = [ifm_scale_dbl * np.double(ws) / ofm_scale_dbl for ws in weight_scales]
-
-    if ifm_dtype == np.int16:
-        hw_bias_scales = [_reduced_quantize_scale(bs) for bs in bias_scales]
-    else:
-        assert ifm_dtype in (np.uint8, np.int8)
-        hw_bias_scales = [_quantize_scale(bs) for bs in bias_scales]
-
-    return hw_bias_scales
-
-
-def get_accelerator_config() -> vapi.NpuAccelerator:
-    """Get the configuration of the NPU accelerator.
-
-    The configuration string provided as a compiler option is converted into
-    an NpuAccelerator object. Valid configuration strings:
-     - 'ethos-u55-256'
-     - 'ethos-u55-128'
-     - 'ethos-u55-64'
-     - 'ethos-u55-32'
-
-    """
-    npu_accel_str_map = {
-        "ethos-u55-256": vapi.NpuAccelerator.Ethos_U55_256,
-        "ethos-u55-128": vapi.NpuAccelerator.Ethos_U55_128,
-        "ethos-u55-64": vapi.NpuAccelerator.Ethos_U55_64,
-        "ethos-u55-32": vapi.NpuAccelerator.Ethos_U55_32,
-        "ethos-u65-256": vapi.NpuAccelerator.Ethos_U65_256,
-        "ethos-u65-512": vapi.NpuAccelerator.Ethos_U65_512,
-    }
-    compiler_attrs = tvm.get_global_func("relay.ext.ethos-u.get_compiler_attrs")()
-    accel_config_str = compiler_attrs.accelerator_config
-    assert accel_config_str in npu_accel_str_map.keys(), f"{accel_config_str} is not supported"
-    return npu_accel_str_map[accel_config_str]
-
-
-def get_max_copy_movements() -> int:
-    """Get maximum copy movements for CopyComputeReordering pass.
-    max_outstanding_dma from architecture features indicates how many
-    DMA operations can be in-progress.
-    """
-    arch = create_default_arch(Accelerator.from_npu_accelerator(get_accelerator_config()))
-    return arch.max_outstanding_dma
diff --git a/python/tvm/relay/op/contrib/__init__.py b/python/tvm/relay/op/contrib/__init__.py
index 3a7b8db55f4c..33cf449db0ab 100644
--- a/python/tvm/relay/op/contrib/__init__.py
+++ b/python/tvm/relay/op/contrib/__init__.py
@@ -22,7 +22,6 @@
 from .dnnl import *
 from .bnns import *
 from .coreml import *
-from .ethosn import *
 from .libtorch import *
 from .tensorrt import *
 from .cutlass import *
diff --git a/python/tvm/relay/op/contrib/_ethosn.py b/python/tvm/relay/op/contrib/_ethosn.py
deleted file mode 100644
index 9c7c922fdfb0..000000000000
--- a/python/tvm/relay/op/contrib/_ethosn.py
+++ /dev/null
@@ -1,23 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""Expose 'is supported' functions to Python."""
-
-import tvm._ffi
-
-tvm._ffi._init_api("relay.ethos-n.support", __name__)
-tvm._ffi._init_api("relay.backend.contrib.ethos-n", __name__)
diff --git a/python/tvm/relay/op/contrib/cmsisnn.py b/python/tvm/relay/op/contrib/cmsisnn.py
deleted file mode 100644
index ed620f0ff125..000000000000
--- a/python/tvm/relay/op/contrib/cmsisnn.py
+++ /dev/null
@@ -1,394 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name, unused-argument
-"""Arm(R) CMSIS-NN supported operators for Cortex-M."""
-import tvm.ir
-from tvm.target import Target
-from tvm.relay import transform
-from tvm.relay.build_module import bind_params_by_name
-
-from ...dataflow_pattern import is_constant, is_op, wildcard
-from .register import register_pattern_table
-
-tvm._ffi._init_api("relay.ext.cmsisnn.transform", __name__)
-
-
-def enabled():
-    return "cmsis-nn" in Target.list_kinds()
-
-
-def partition_for_cmsisnn(mod, params=None, mod_name="default", **opts):
-    """Partition the graph greedily offloading supported
-    operators on Cortex-M using CMSIS-NN
-
-    Parameters
-    ----------
-    mod : Module
-        The module to run passes on.
-    params : Optional[Dict[str, NDArray]]
-        Constant input parameters.
-    mod_name: str, optional
-        The module name
-
-    Returns
-    -------
-    ret : Module
-        annotated and partitioned module.
-    """
-    if params:
-        mod["main"] = bind_params_by_name(mod["main"], params)
-
-    seq = tvm.transform.Sequential(
-        [
-            transform.InferType(),
-            transform.MergeComposite(pattern_table()),
-            transform.AnnotateTarget("cmsis-nn"),
-            transform.PartitionGraph(mod_name=mod_name),
-            GenerateCMSISNNConstants(),
-            CMSISNNFusePads(),
-            ScalarToTensorConstants(),
-            ExtractConstantsFromPartitionedFunction(),
-            transform.InferType(),
-        ]
-    )
-    return seq(mod)
-
-
-@register_pattern_table("cmsis-nn")
-def pattern_table():
-    """Get the CMSIS-NN compiler pattern table."""
-
-    def qnn_softmax_pattern():
-        """Create pattern for quantized softmax"""
-        pattern = is_op("qnn.dequantize")(wildcard(), is_constant(), is_constant())
-        pattern = is_op("nn.softmax")(pattern)
-        pattern = is_op("qnn.quantize")(pattern, is_constant(), is_constant())
-        return pattern
-
-    def check_qnn_softmax(pattern):
-        """Check if softmax is supported by CMSIS-NN."""
-        dequantize_call = pattern.args[0].args[0]
-        scale = pattern.args[1].data.numpy().item(0)
-        zero_point = pattern.args[2].data.numpy().item(0)
-
-        # check for dtypes of quantize and dequantize
-        if (
-            (scale == 1.0 / 256 and zero_point == -128)
-            and pattern.attrs.out_dtype == "int8"
-            and dequantize_call.args[0].checked_type.dtype == "int8"
-        ):
-            return True
-
-        if (
-            (scale == 1.0 / 32768 and zero_point == 0)
-            and pattern.attrs.out_dtype == "int16"
-            and dequantize_call.args[0].checked_type.dtype == "int16"
-        ):
-            return True
-
-        return False
-
-    def qnn_conv2d_pattern(with_pad):
-        """Create pattern for qnn.conv2D with optional pad and/or optional fused relu."""
-        conv2d_input = wildcard()
-        if with_pad:
-            conv2d_input = is_op("nn.pad")(wildcard(), is_constant())
-        qnn_conv2d = is_op("qnn.conv2d")(
-            conv2d_input,
-            is_constant(),
-            is_constant(),
-            is_constant(),
-            is_constant(),
-            is_constant(),
-        )
-        bias_add = is_op("nn.bias_add")(qnn_conv2d, is_constant())
-        req = is_op("qnn.requantize")(
-            qnn_conv2d | bias_add, is_constant(), is_constant(), is_constant(), is_constant()
-        )
-        clip_or_req = req.optional(is_op("clip"))
-        return clip_or_req
-
-    def check_qnn_conv2d(pattern):
-        """Check if the Conv2D is supported by CMSIS-NN."""
-        if str(pattern.op.name) == "clip":
-            relu = pattern
-            requantize = relu.args[0]
-        else:
-            requantize = pattern
-        requantize_input = requantize.args[0]
-        bias_add = None
-        if str(requantize_input.op.name) == "nn.bias_add":
-            bias_add = requantize_input
-            conv2d = bias_add.args[0]
-        else:
-            conv2d = requantize_input
-        conv2d_input = conv2d.args[0]
-        conv2d_weight = conv2d.args[1]
-
-        # check if depthwise Conv2D
-        kernel_layout = conv2d.attrs.kernel_layout
-        pos_o = kernel_layout.index("O")
-        groups = conv2d.attrs.groups
-        is_depthwise = False
-        if groups == int(conv2d_input.checked_type.shape[3]) and groups == int(
-            conv2d_weight.checked_type.shape[pos_o]
-        ):
-            is_depthwise = True
-
-        # check if dtypes are supported for the following entities
-        # (input_dtype, weight_dtype, bias_dtype, out_dtype, pattern_dtype)
-        are_dtypes_valid = False
-        conv2d_input_dtype = conv2d_input.checked_type.dtype
-        if bias_add:
-            bias_dtype = bias_add.args[1].checked_type.dtype
-        else:
-            # this is only to enable to following check that validates all sorts of dtypes
-            bias_dtype = "int32" if conv2d_input_dtype == "int8" else "int64"
-        valid_dtypes = None
-        if conv2d_input_dtype == "int8":
-            valid_dtypes = ("int8", "int8", "int32", "int32", "int8")
-        elif conv2d_input_dtype == "int16":
-            valid_dtypes = ("int16", "int8", "int64", "int64", "int16")
-
-        if (
-            conv2d_input_dtype,
-            conv2d_weight.checked_type.dtype,
-            bias_dtype,
-            conv2d.attrs.out_dtype,
-            pattern.checked_type.dtype,
-        ) == valid_dtypes:
-            are_dtypes_valid = True
-
-        # input_zero_point should be 0 when int16
-        valid_input_zp = True
-        if conv2d_input_dtype == "int16" and conv2d.args[2].data.numpy().item(0) != 0:
-            valid_input_zp = False
-
-        # kernel zero_point should be 0
-        kernel_zp = conv2d.args[3].data.numpy()
-        kernel_zp = [kernel_zp] if kernel_zp.ndim == 0 else kernel_zp
-
-        # combination of all checks to decide if pattern is eligible for partitioning
-        ret = (
-            are_dtypes_valid
-            and valid_input_zp
-            and all([zp == 0 for zp in kernel_zp])
-            and (not is_depthwise or bias_add is not None)
-        )
-        return ret
-
-    def check_qnn_conv2d_pad(pattern):
-        """Check if the Pad followed by Conv2D is supported by CMSIS-NN."""
-        if str(pattern.op.name) == "clip":
-            relu = pattern
-            requantize = relu.args[0]
-        else:
-            requantize = pattern
-        requantize_input = requantize.args[0]
-        if str(requantize_input.op.name) == "nn.bias_add":
-            bias_add = requantize_input
-            conv2d = bias_add.args[0]
-        else:
-            conv2d = requantize_input
-        conv2d_input = conv2d.args[0]
-
-        # check if sum of paddings from pad() and conv2d() satisfies CMSIS-NN constraints
-        can_pad_be_fused = True
-        if isinstance(conv2d_input, tvm.relay.expr.Call) and str(conv2d_input.op.name) == "nn.pad":
-            pad_top, pad_left, pad_bottom, pad_right = GetEffectiveConv2DPadding(
-                conv2d, conv2d_input
-            )
-            # check if difference in the side paddings is 1 along each dimension
-            pad_w_diff = int(pad_right - pad_left)
-            pad_h_diff = int(pad_bottom - pad_top)
-            can_pad_be_fused = pad_w_diff in [0, 1] and pad_h_diff in [0, 1]
-
-        ret = check_qnn_conv2d(pattern) and can_pad_be_fused
-        return ret
-
-    def qnn_fully_connected_pattern():
-        """Create pattern for qnn.dense with optional Relu."""
-        qnn_fc = is_op("qnn.dense")(
-            wildcard(), is_constant(), is_constant(), is_constant(), is_constant(), is_constant()
-        )
-        bias_add = is_op("nn.bias_add")(qnn_fc, is_constant())
-        req = is_op("qnn.requantize")(
-            qnn_fc | bias_add, is_constant(), is_constant(), is_constant(), is_constant()
-        )
-        clip_or_req = req.optional(is_op("clip"))
-        return clip_or_req
-
-    def check_qnn_fully_connected(pattern):
-        """Check if the fully connected is supported by CMSIS-NN."""
-        if str(pattern.op.name) == "clip":
-            relu = pattern
-            requantize = relu.args[0]
-        else:
-            requantize = pattern
-        requantize_input = requantize.args[0]
-        bias_add = None
-        if str(requantize_input.op.name) == "nn.bias_add":
-            bias_add = requantize_input
-            fc = bias_add.args[0]
-        else:
-            fc = requantize_input
-        fc_input = fc.args[0]
-        fc_weight = fc.args[1]
-
-        are_dtypes_valid = False
-        fc_input_dtype = fc_input.checked_type.dtype
-        if bias_add:
-            bias_dtype = bias_add.args[1].checked_type.dtype
-        else:
-            bias_dtype = "int32" if fc_input_dtype == "int8" else "int64"
-
-        valid_dtypes = None
-        if fc_input_dtype == "int8":
-            valid_dtypes = ("int8", "int8", "int32", "int32", "int8")
-        elif fc_input_dtype == "int16":
-            valid_dtypes = ("int16", "int8", "int64", "int64", "int16")
-
-        if (
-            fc_input_dtype,
-            fc_weight.checked_type.dtype,
-            bias_dtype,
-            fc.attrs.out_dtype,
-            pattern.checked_type.dtype,
-        ) == valid_dtypes:
-            are_dtypes_valid = True
-
-        # kernel zero_point should be 0
-        kernel_zp = fc.args[3].data.numpy().item(0)
-
-        return are_dtypes_valid and kernel_zp == 0
-
-    def qnn_avg_pool2d_pattern():
-        """Matches average pooling with optional Relu"""
-        pattern = is_op("cast")(wildcard())
-        pattern = is_op("nn.avg_pool2d")(pattern)
-        pattern = is_op("cast")(pattern)
-        pattern = pattern.optional(is_op("clip"))
-        return pattern
-
-    def check_qnn_avg_pool2d(pattern):
-        """Check if avg pool2d is supported by CMSIS-NN."""
-        output = pattern
-
-        if str(pattern.op.name) == "clip":
-            pooling = pattern.args[0].args[0]
-        else:
-            pooling = pattern.args[0]
-        input_op = pooling.args[0].args[0]
-
-        return (
-            pooling.attrs.layout == "NHWC"
-            and int(input_op.checked_type.shape[0]) == 1
-            and (
-                (input_op.checked_type.dtype == "int8" and output.checked_type.dtype == "int8")
-                or (input_op.checked_type.dtype == "int16" and output.checked_type.dtype == "int16")
-            )
-        )
-
-    def qnn_max_pool2d_pattern():
-        """Matches max pool2d with optional Relu"""
-        pattern = is_op("nn.max_pool2d")(wildcard())
-        pattern = pattern.optional(is_op("clip"))
-        return pattern
-
-    def check_qnn_max_pool2d(pattern):
-        """Check if max pool2d is supported by CMSIS-NN."""
-        output = pattern
-
-        if str(pattern.op.name) == "clip":
-            pooling = pattern.args[0]
-        else:
-            pooling = pattern
-        input_op = pooling.args[0]
-
-        return (
-            pooling.attrs.layout == "NHWC"
-            and int(input_op.checked_type.shape[0]) == 1
-            and (
-                (input_op.checked_type.dtype == "int8" and output.checked_type.dtype == "int8")
-                or (input_op.checked_type.dtype == "int16" and output.checked_type.dtype == "int16")
-            )
-        )
-
-    def binary_op_pattern(op):
-        """Matches QNN binary operation"""
-        pattern = is_op(f"qnn.{op}")(
-            wildcard(),
-            wildcard(),
-            is_constant(),
-            is_constant(),
-            is_constant(),
-            is_constant(),
-            is_constant(),
-            is_constant(),
-        )
-        return pattern.optional(is_op("clip"))
-
-    def check_qnn_binary_op(pattern):
-        """Check if binary op is supported by CMSIS-NN."""
-        binary_op = pattern
-        if str(pattern.op.name) == "clip":
-            binary_op = pattern.args[0]
-
-        arg0 = binary_op.args[0]
-        arg1 = binary_op.args[1]
-
-        # Check arguments are not scalar.
-        if (
-            isinstance(arg0, tvm.relay.expr.Constant)
-            and len(arg0.checked_type.shape) == 0
-            and isinstance(arg1, tvm.relay.expr.Constant)
-            and len(arg1.checked_type.shape) == 0
-        ):
-            return False
-
-        arg0_type = arg0.checked_type.dtype
-        arg1_type = arg1.checked_type.dtype
-
-        # Check arguments are of valid type.
-        if arg0_type not in ["int8", "int16"]:
-            return False
-
-        # Check arguments are the same type.
-        if arg0_type != arg1_type:
-            return False
-
-        # Check zero points are non-zero (arm_elementwise_(add|mul)_s16 does not
-        # handle non-zero zero points).
-        if arg0_type == "int16" and str(binary_op.op.name) in ["qnn.add", "qnn.mul"]:
-            arg_0_zero_point = binary_op.args[3].data.numpy()
-            arg_1_zero_point = binary_op.args[5].data.numpy()
-            output_zero_point = binary_op.args[7].data.numpy()
-            if any([arg_0_zero_point, arg_1_zero_point, output_zero_point]):
-                return False
-
-        return True
-
-    return [
-        ("cmsis-nn.qnn_conv2d", qnn_conv2d_pattern(with_pad=True), check_qnn_conv2d_pad),
-        ("cmsis-nn.qnn_conv2d", qnn_conv2d_pattern(with_pad=False), check_qnn_conv2d),
-        ("cmsis-nn.qnn_fully_connected", qnn_fully_connected_pattern(), check_qnn_fully_connected),
-        ("cmsis-nn.qnn_avg_pool2d", qnn_avg_pool2d_pattern(), check_qnn_avg_pool2d),
-        ("cmsis-nn.qnn_max_pool2d", qnn_max_pool2d_pattern(), check_qnn_max_pool2d),
-        ("cmsis-nn.qnn_mul", binary_op_pattern("mul"), check_qnn_binary_op),
-        ("cmsis-nn.qnn_add", binary_op_pattern("add"), check_qnn_binary_op),
-        ("cmsis-nn.qnn_softmax", qnn_softmax_pattern(), check_qnn_softmax),
-    ]
diff --git a/python/tvm/relay/op/contrib/ethosn.py b/python/tvm/relay/op/contrib/ethosn.py
deleted file mode 100644
index c1e87ad5d90b..000000000000
--- a/python/tvm/relay/op/contrib/ethosn.py
+++ /dev/null
@@ -1,463 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name, unused-argument
-"""Arm(R) Ethos(TM)-N NPU supported operators."""
-from enum import Enum
-from packaging.version import parse
-
-import tvm.ir
-from tvm.relay import transform
-from tvm.relay.build_module import bind_params_by_name
-
-from ...dataflow_pattern import is_constant, is_op, wildcard
-from . import _ethosn
-from .register import register_pattern_table
-
-
-class Available(Enum):
-    UNAVAILABLE = 0
-    SW_ONLY = 1
-    SW_AND_HW = 2
-
-    def __bool__(self):
-        return self != Available.UNAVAILABLE
-
-
-def ethosn_available():
-    """Return whether Ethos-N software and hardware support is available"""
-    if not tvm.get_global_func("relay.ethos-n.query", True):
-        print("skip because Ethos-N module is not available")
-        return Available.UNAVAILABLE
-    hw = tvm.get_global_func("relay.ethos-n.query")()
-    return Available.SW_AND_HW if hw else Available.SW_ONLY
-
-
-def ethosn_api_version() -> str:
-    """
-    Returns the semantic version of the driver stack api that is
-    being used.
-
-    Returns
-    -------
-    str
-        Semantic version string (e.g. 3.0.1).
-    """
-    return tvm.get_global_func("relay.ethos-n.api.version")()
-
-
-def ConvertEquivalents() -> tvm.ir.IRModule:  # pylint: disable=invalid-name
-    """Converts operations into a numerically equivalent form
-    that can be understood by the NPU codegen.
-
-    Returns
-    -------
-    Pass
-        The module pass.
-    """
-    return _ethosn.ConvertEquivalents()
-
-
-def InlineNonComputeIntensivePartitions() -> tvm.ir.IRModule:  # pylint: disable=invalid-name
-    """This pass checks whether functions partitioned for the NPU are considered
-    non-compute intensive. If they are not, they will be unpartitioned and passed onto
-    other backends to consider.
-
-    A partitioned function is currently considered non-compute intensive if it contains
-    no multiply accumulate operations.
-
-    Returns
-    -------
-    Pass
-        The module pass.
-    """
-    return _ethosn.InlineNonComputeIntensivePartitions()
-
-
-def is_inline_non_compute_intensive_partitions_enabled() -> bool:
-    """
-    Determine whether to inline none-compute-intensive partitions.
-
-    Returns
-    -------
-    True if inlining should happen, False if not.
-    """
-    compiler_attrs = tvm.get_global_func("relay.ext.ethos-n.get_compiler_attrs")()
-    if not compiler_attrs:
-        return False
-    return compiler_attrs.inline_non_compute_intensive_partitions
-
-
-def partition_for_ethosn(mod, params=None, **opts):
-    """Partition the graph greedily offloading supported
-    operators to Arm Ethos-N NPU.
-
-    Parameters
-    ----------
-    mod : Module
-        The module to run passes on.
-    params : Optional[Dict[str, NDArray]]
-        Constant input parameters.
-
-    Returns
-    -------
-    ret : annotated and partitioned module.
-    """
-    api_version = ethosn_api_version()
-    supported_api_versions = ["3.2.0"]
-    if all(parse(api_version) != parse(exp_ver) for exp_ver in supported_api_versions):
-        raise ValueError(
-            f"Driver stack version {api_version} is unsupported. "
-            f"Please use version in {supported_api_versions}."
-        )
-
-    if params:
-        mod["main"] = bind_params_by_name(mod["main"], params)
-
-    passes = [
-        transform.InferType(),
-        transform.FoldConstant(fold_qnn=True),
-        transform.MergeComposite(pattern_table()),
-        transform.AnnotateTarget("ethos-n"),
-        transform.MergeCompilerRegions(),
-        transform.PartitionGraph(),
-        ConvertEquivalents(),
-    ]
-    if is_inline_non_compute_intensive_partitions_enabled():
-        passes.append(InlineNonComputeIntensivePartitions())
-
-    return tvm.transform.Sequential(passes)(mod)
-
-
-@register_pattern_table("ethos-n")
-def pattern_table():
-    """Get the Ethos-N compiler pattern table."""
-
-    def qnn_conv_pattern():
-        pattern = is_op("nn.pad")(wildcard(), wildcard()) | wildcard()
-        pattern = is_op("qnn.conv2d")(
-            pattern, is_constant(), is_constant(), is_constant(), is_constant(), is_constant()
-        )
-        pattern = is_op("nn.bias_add")(pattern, is_constant())
-        pattern = is_op("qnn.requantize")(
-            pattern, is_constant(), is_constant(), is_constant(), is_constant()
-        )
-        return pattern
-
-    def qnn_fc_pattern():
-        pattern = is_op("qnn.dense")(
-            wildcard(), is_constant(), is_constant(), is_constant(), is_constant(), is_constant()
-        )
-        pattern = is_op("nn.bias_add")(pattern, is_constant())
-        pattern = is_op("qnn.requantize")(
-            pattern, is_constant(), is_constant(), is_constant(), is_constant()
-        )
-        return pattern
-
-    def qnn_avg_pool2d_pattern():
-        pattern = is_op("cast")(wildcard())
-        pattern = is_op("nn.avg_pool2d")(pattern)
-        pattern = is_op("cast")(pattern)
-        return pattern
-
-    def qnn_sigmoid_pattern():
-        pattern = is_op("qnn.dequantize")(wildcard(), is_constant(), is_constant())
-        pattern = is_op("sigmoid")(pattern)
-        pattern = is_op("qnn.quantize")(pattern, is_constant(), is_constant())
-        return pattern
-
-    def qnn_mean_pattern():
-        pattern = is_op("cast")(wildcard())
-        pattern = is_op("mean")(pattern)
-        pattern = is_op("qnn.requantize")(
-            pattern, is_constant(), is_constant(), is_constant(), is_constant()
-        )
-        return pattern
-
-    def qnn_tanh_pattern():
-        pattern = is_op("qnn.dequantize")(wildcard(), is_constant(), is_constant())
-        pattern = is_op("tanh")(pattern)
-        pattern = is_op("qnn.quantize")(pattern, is_constant(), is_constant())
-        return pattern
-
-    def qnn_leaky_relu_pattern():
-        pattern = is_op("qnn.dequantize")(wildcard(), is_constant(), is_constant())
-        pattern = is_op("nn.leaky_relu")(pattern)
-        pattern = is_op("qnn.quantize")(pattern, is_constant(), is_constant())
-        return pattern
-
-    def qnn_requantize_pattern():
-        pattern = is_op("qnn.requantize")(
-            wildcard(), is_constant(), is_constant(), is_constant(), is_constant()
-        )
-        return pattern
-
-    def qnn_resize_pattern():
-        return is_op("image.resize2d")(wildcard()).has_attr({"method": "nearest_neighbor"})
-
-    def qnn_mul_pattern():
-        """
-        Multiply is supported when one input is a constant of shape [1, ..., C],
-        where C matches the number of channels of the other input.
-        """
-        mul_op = is_op("qnn.mul")
-        gen_mul_inputs = lambda x, y: mul_op(
-            x,
-            y,
-            is_constant(),
-            is_constant(),
-            is_constant(),
-            is_constant(),
-            is_constant(),
-            is_constant(),
-        )
-        input_is_left = gen_mul_inputs(wildcard(), is_constant())
-        input_is_right = gen_mul_inputs(is_constant(), wildcard())
-        return input_is_left | input_is_right
-
-    def qnn_add_pattern(has_constant_input=False):
-        add_op = is_op("qnn.add")
-        gen_add_inputs = lambda x, y: add_op(
-            x,
-            y,
-            is_constant(),
-            is_constant(),
-            is_constant(),
-            is_constant(),
-            is_constant(),
-            is_constant(),
-        )
-
-        if has_constant_input:
-            input_is_left = gen_add_inputs(wildcard(), is_constant())
-            input_is_right = gen_add_inputs(is_constant(), wildcard())
-            return input_is_left | input_is_right
-        else:
-            return gen_add_inputs(wildcard(), wildcard())
-
-    def qnn_conv2d_transpose_pattern():
-        pattern = is_op("qnn.conv2d_transpose")(
-            wildcard(), is_constant(), is_constant(), is_constant(), is_constant(), is_constant()
-        ).has_attr({"data_layout": "NHWC"})
-        pattern = pattern.optional(lambda x: is_op("nn.bias_add")(x, is_constant()))
-        pattern = is_op("qnn.requantize")(
-            pattern, is_constant(), is_constant(), is_constant(), is_constant()
-        )
-        return pattern
-
-    def check_conv2d(extract):
-        """Check if a conv2d is supported by Ethos-N."""
-        if not ethosn_available():
-            return False
-
-        return _ethosn.conv2d(extract)
-
-    def check_fc(extract):
-        """Check if a fully connected is supported by Ethos-N."""
-        if not ethosn_available():
-            return False
-
-        return _ethosn.fc(extract)
-
-    def check_avg_pool2d(extract):
-        """Check if a avg pool2d is supported by Ethos-N."""
-        if not ethosn_available():
-            return False
-
-        return _ethosn.avg_pool2d(extract)
-
-    def check_mean(extract):
-        """Check if mean is supported by Ethos-N."""
-        if not ethosn_available():
-            return False
-
-        return _ethosn.mean(extract)
-
-    def check_conv2d_transpose(extract):
-        """Check if conv2d_transpose is supported by Ethos-N."""
-        if not ethosn_available():
-            return False
-
-        return _ethosn.conv2d_transpose(extract)
-
-    def check_sigmoid(extract):
-        """Check if a sigmoid is supported by Ethos-N."""
-        if not ethosn_available():
-            return False
-
-        return _ethosn.sigmoid(extract)
-
-    def check_tanh(extract):
-        """Check if tanh is supported by Ethos-N."""
-        if not ethosn_available():
-            return False
-
-        return _ethosn.tanh(extract)
-
-    def check_leaky_relu(extract):
-        """Check if Leaky ReLU is supported."""
-        if not ethosn_available():
-            return False
-
-        return _ethosn.leaky_relu(extract)
-
-    def check_mul_to_reinterpret_quantize(extract):
-        """Check if Mul is supported by converting to reinterpret quantize"""
-        if not ethosn_available():
-            return False
-
-        converted_extract = _ethosn.ConvertQnnMultiplyToReinterpretQuantize(extract)
-        if converted_extract:
-            return _ethosn.reinterpret_quantize(converted_extract)
-        return False
-
-    def check_mul_to_depthwise(extract):
-        """Check if Mul is supported by converting to a depthwise operation."""
-        if not ethosn_available():
-            return False
-        converted_extract = _ethosn.ConvertQnnMultiplyToDepthwise(extract)
-        if converted_extract:
-            return _ethosn.conv2d(converted_extract)
-        return False
-
-    def check_requantize(extract):
-        """Check if requantize is supported."""
-        if not ethosn_available():
-            return False
-
-        return _ethosn.requantize(extract)
-
-    def check_resize(extract):
-        """Check if resize (nearest neighbor) is supported."""
-        if not ethosn_available():
-            return False
-
-        return _ethosn.resize(extract)
-
-    def check_add(extract):
-        """Check if an addition is supported by Ethos-N."""
-        if not ethosn_available():
-            return False
-
-        return _ethosn.addition(extract)
-
-    def check_add_to_reinterpret_quantize(extract):
-        """Check if addition can be converted to a reinterpret quantize operation."""
-        if not ethosn_available():
-            return False
-        converted_extract = _ethosn.ConvertQnnAddToReinterpretQuantize(extract)
-        if converted_extract:
-            return _ethosn.reinterpret_quantize(converted_extract)
-        return False
-
-    def check_add_to_depthwise(extract):
-        """Check if addition can be converted to a depthwise operation."""
-        if not ethosn_available():
-            return False
-        converted_extract = _ethosn.ConvertQnnAddToDepthwise(extract)
-        if converted_extract:
-            return _ethosn.conv2d(converted_extract)
-        return False
-
-    return [
-        (
-            "ethos-n.qnn_mul_to_reinterpret_quantize",
-            qnn_mul_pattern(),
-            check_mul_to_reinterpret_quantize,
-        ),
-        ("ethos-n.qnn_mul_to_depthwise", qnn_mul_pattern(), check_mul_to_depthwise),
-        (
-            "ethos-n.qnn_add_to_reinterpret_quantize",
-            qnn_add_pattern(True),
-            check_add_to_reinterpret_quantize,
-        ),
-        ("ethos-n.qnn_add_to_depthwise", qnn_add_pattern(True), check_add_to_depthwise),
-        ("ethos-n.qnn_add", qnn_add_pattern(), check_add),
-        ("ethos-n.qnn_conv2d", qnn_conv_pattern(), check_conv2d),
-        ("ethos-n.qnn_conv2d_transpose", qnn_conv2d_transpose_pattern(), check_conv2d_transpose),
-        ("ethos-n.qnn_avg_pool2d", qnn_avg_pool2d_pattern(), check_avg_pool2d),
-        ("ethos-n.qnn_sigmoid", qnn_sigmoid_pattern(), check_sigmoid),
-        ("ethos-n.qnn_fc", qnn_fc_pattern(), check_fc),
-        ("ethos-n.qnn_mean", qnn_mean_pattern(), check_mean),
-        ("ethos-n.qnn_tanh", qnn_tanh_pattern(), check_tanh),
-        ("ethos-n.qnn_leaky_relu", qnn_leaky_relu_pattern(), check_leaky_relu),
-        ("ethos-n.qnn_resize", qnn_resize_pattern(), check_resize),
-        ("ethos-n.qnn_requantize", qnn_requantize_pattern(), check_requantize),
-    ]
-
-
-@tvm.ir.register_op_attr("nn.max_pool2d", "target.ethos-n")
-def max_pool2d(expr):
-    """Check if a max pool2d is supported by Ethos-N."""
-    if not ethosn_available():
-        return False
-
-    return _ethosn.max_pool2d(expr)
-
-
-@tvm.ir.register_op_attr("reshape", "target.ethos-n")
-def reshape(expr):
-    """Check if a reshape is supported by Ethos-N."""
-    if not ethosn_available():
-        return False
-
-    return _ethosn.reshape(expr)
-
-
-@tvm.ir.register_op_attr("qnn.concatenate", "target.ethos-n")
-def qnn_concatenate(expr):
-    """Check if a concatenate is supported by Ethos-N."""
-    if not ethosn_available():
-        return False
-    if not _ethosn.concatenate(expr):
-        return False
-
-    return True
-
-
-@tvm.ir.register_op_attr("split", "target.ethos-n")
-def split(expr):
-    """Check if a split is supported by Ethos-N."""
-    if not ethosn_available():
-        return False
-    if parse(ethosn_api_version()) == parse("3.0.1"):
-        return False
-    if not _ethosn.split(expr):
-        return False
-
-    return True
-
-
-@tvm.ir.register_op_attr("nn.depth_to_space", "target.ethos-n")
-def depth_to_space(expr):
-    """Check if a depth_to_space is supported by Ethos-N."""
-    if not ethosn_available():
-        return False
-    if not _ethosn.depth_to_space(expr):
-        return False
-
-    return True
-
-
-@tvm.ir.register_op_attr("clip", "target.ethos-n")
-def clip(expr):
-    """Check if a clip is supported by Ethos-N."""
-    if not ethosn_available():
-        return False
-    if not _ethosn.relu(expr):
-        return False
-
-    return True
diff --git a/python/tvm/relay/op/contrib/ethosu.py b/python/tvm/relay/op/contrib/ethosu.py
deleted file mode 100644
index c4eff3fcc9e0..000000000000
--- a/python/tvm/relay/op/contrib/ethosu.py
+++ /dev/null
@@ -1,2536 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=ungrouped-imports, import-outside-toplevel
-"""Arm(R) Ethos(TM)-U NPU supported operators."""
-import functools
-from typing import Callable, Dict, List, Optional, Tuple
-
-import numpy as np  # type: ignore
-
-import tvm  # type: ignore
-from tvm import relay
-from tvm.ir import Op
-from tvm.relay.build_module import bind_params_by_name  # type: ignore
-from tvm.relay.dataflow_pattern import (  # type: ignore
-    is_constant,
-    is_op,
-    is_tuple,
-    wildcard,
-)
-from tvm.relay.expr import Call, Constant  # type: ignore
-from tvm.relay.op.contrib.register import register_pattern_table  # type: ignore
-
-try:
-    # As ethos-u-vela package is an optional TVM dependency, we want to lazy load it
-    # and check whether it is installed or not.
-    #
-    # In order to show the appropriate error messages when we try to invoke code that
-    # rely on imports from ethos-u-vela, we protect them with the decorator @requires_vela
-    # implemented below.
-    from ethosu.vela import api as vapi  # type: ignore
-except ImportError:
-    vapi = None
-
-
-def requires_vela(func):
-    """Decorator to check whether we have the required dependency ethos-u-vela
-    installed as a python package"""
-
-    @functools.wraps(func)
-    def wrapper(*args, **kwargs):
-        if not vapi:
-            raise ImportError(
-                "The 'ethos-u-vela' python package is required for the Arm(R) Ethos(TM)-U NPU "
-                "backend. Please install the dependency using your Python package manager."
-            ) from None
-        return func(*args, **kwargs)
-
-    return wrapper
-
-
-class TensorParams:
-    """
-    This class will parse a tvm Expr along with quantization scale
-    and zero point to populate parameters that are required
-    for the creation of tensors in Vela.
-    """
-
-    @requires_vela
-    def __init__(self, tensor, layout=None, scale=None, zero_point=None):
-        self.tensor = tensor
-        if isinstance(tensor, Constant):
-            self.values = tensor.data.asnumpy()
-        else:
-            self.values = None
-        self.dtype = tensor.checked_type.dtype
-        self.shape = [int(i) for i in tensor.checked_type.shape]
-        self.layout = layout
-
-        if scale is not None and zero_point is not None:
-            self.q_params = vapi.NpuQuantization(
-                scale.data.asnumpy().astype("float32"), zero_point.data.asnumpy().astype(self.dtype)
-            )
-        else:
-            # put default values
-            self.q_params = vapi.NpuQuantization(1.0, 0)
-
-
-def check_strides(strides: List[int], stride_range=None) -> bool:
-    """This function checks whether strides are within the limits supported by the NPU"""
-    if stride_range is None:
-        stride_range = (1, 3)
-    smin, smax = stride_range
-    if not smax >= strides[0] >= smin:
-        return False
-    if not smax >= strides[1] >= smin:
-        return False
-    return True
-
-
-def check_same_ifm_and_kernel_shape(padding, ifm_shape, pool_shape):
-    """
-    This function checks whether AvgPool2D or MaxPool2D could be legalized as ethosu_pooling
-    supported by the NPU.
-    We consider only specific case: when there is no AvgPool2D padding, the spatial
-    dimensions of ifm and the shape of pooling are equal, but stride size exceed 3
-    by any of dimensions, e.g:
-    ifm: (1, 8, 8, _), strides: (8, 8), pool_shape: (8, 8)
-    ifm: (1, 25, 5, _), strides: (25, 5), pool_shape: (25, 5)
-    """
-    if list(padding) != [0, 0, 0, 0]:
-        return False
-    if [ifm_shape[1], ifm_shape[2]] != list(pool_shape):
-        return False
-    return True
-
-
-def check_valid_dtypes(tensor_params: List[TensorParams], supported_dtypes: List[type]) -> bool:
-    """This function checks whether dtypes are supported by the NPU"""
-    for tep in tensor_params:
-        # Check for dtypes
-        if np.dtype(tep.dtype) not in supported_dtypes:
-            return False
-        # Check for shape sizes
-        if any(dimlen > 65536 for dimlen in tep.shape):
-            return False
-    return True
-
-
-def check_weights(weights: TensorParams, dilation: List[int]):
-    """This function checks whether weight tensor is compatible with the NPU"""
-    from tvm.relay.backend.contrib.ethosu.util import get_dim_value
-
-    dilated_height_range = (1, 64)
-    dilated_hxw_range = (1, 64 * 64)
-    weights_limit = 127 * 65536
-    dilated_width = (weights.shape[get_dim_value(weights.layout, "W")] - 1) * dilation[0] + 1
-    dilated_height = (weights.shape[get_dim_value(weights.layout, "H")] - 1) * dilation[1] + 1
-    dh_min, dh_max = dilated_height_range
-    if not dh_min <= dilated_height <= dh_max:
-        return False
-    dilated_hxw = dilated_height * dilated_width
-    dhxw_min, dhxw_max = dilated_hxw_range
-    if not dhxw_min <= dilated_hxw <= dhxw_max:
-        return False
-    # A saturation upper bound check for accumulators
-    weights.values = weights.values - weights.q_params.zero_point
-    axis = (
-        get_dim_value(weights.layout, "H"),
-        get_dim_value(weights.layout, "W"),
-        get_dim_value(weights.layout, "I"),
-    )
-    sum_weights = np.amax(np.sum(np.absolute(weights.values), axis=axis))
-    return sum_weights <= weights_limit
-
-
-def check_bias(bias: TensorParams):
-    """This function checks whether the bias values fit in 40 bits"""
-    if bias and bias.dtype == np.dtype("int64"):
-        valid = all(len(bin(bias_value)[2:]) <= 40 for bias_value in bias.values)
-        return valid
-    return True
-
-
-def check_batch_size(ifm: TensorParams):
-    """This function checks for the number of batches vela currently supports"""
-    return ifm.shape[0] == 1
-
-
-def check_dilation(dilation: List[int], dilation_range=None):
-    """This function checks whether dilation is within the limits supported by the NPU"""
-    if dilation_range is None:
-        dilation_range = (1, 2)
-    dmin, dmax = dilation_range
-    if not dmin <= dilation[0] <= dmax:
-        return False
-    if not dmin <= dilation[1] <= dmax:
-        return False
-    return True
-
-
-def check_padding(padding: List[int], bounds: List[int]):
-    """This function checks whether padding is within the limits supported by the NPU"""
-    if len(padding) != 4 or len(bounds) != 4:
-        return False
-    top, left, bottom, right = padding
-    topb, leftb, bottomb, rightb = bounds
-    return not (top > topb or left > leftb or bottom > bottomb or right > rightb)
-
-
-def check_pool_shape(pool_shape: tvm.ir.container.Array) -> bool:
-    if len(pool_shape) != 2:
-        return False
-    if pool_shape[1] > 256:
-        return False
-    if pool_shape[0] * pool_shape[1] > 256 * 256:
-        return False
-    return True
-
-
-def check_dimensions(tensor: TensorParams):
-    """This function checks that the tensor has no more than 4 dimensions"""
-    return len(tensor.shape) <= 4
-
-
-class QnnConv2DParams:
-    """
-    This class will parse a Call to a ethosu.qnn_conv2d composite function
-    and extract quantization information of all the associated tensors.
-    """
-
-    composite_name = "ethos-u.qnn_conv2d"
-    # The NPU only supports padding upto the numbers as follows
-    padding_bounds = [31, 31, 32, 32]
-    activation_map = {"clip": "CLIP"}
-
-    @requires_vela
-    def __init__(self, func_body: tvm.relay.Function):
-        from tvm.relay.backend.contrib.ethosu.util import QConv2DArgs  # type: ignore
-        from tvm.relay.backend.contrib.ethosu.util import BiasAddArgs, RequantArgs
-
-        activation = None
-        separate_padding = None
-
-        if str(func_body.op.name) in self.activation_map.keys():
-            activation = func_body
-            requantize_op = activation.args[0]
-        else:
-            requantize_op = func_body
-        bias_add = requantize_op.args[0]
-        qnn_conv2d = bias_add.args[0]
-        if (
-            isinstance(qnn_conv2d.args[0], relay.Call)
-            and isinstance(qnn_conv2d.args[0].op, Op)
-            and str(qnn_conv2d.args[0].op.name) == "nn.pad"
-        ):
-            separate_padding = qnn_conv2d.args[0]
-        data_layout = qnn_conv2d.attrs.data_layout
-        self.kernel_layout = qnn_conv2d.attrs.kernel_layout
-
-        # We consider the weights & biases as params as it should be a Constant
-        self.weights = TensorParams(
-            qnn_conv2d.args[QConv2DArgs.WEIGHTS.value],
-            self.kernel_layout,
-            qnn_conv2d.args[QConv2DArgs.WEIGHTS_SCALE.value],
-            qnn_conv2d.args[QConv2DArgs.WEIGHTS_ZERO_POINT.value],
-        )
-
-        self.biases = TensorParams(
-            bias_add.args[BiasAddArgs.BIASES.value],
-            data_layout,
-            requantize_op.args[RequantArgs.IFM_SCALE.value],
-            requantize_op.args[RequantArgs.IFM_ZERO_POINT.value],
-        )
-        ifm_tensor = (
-            separate_padding.args[0] if separate_padding else qnn_conv2d.args[QConv2DArgs.IFM.value]
-        )
-        self.ifm = TensorParams(
-            ifm_tensor,
-            data_layout,
-            qnn_conv2d.args[QConv2DArgs.IFM_SCALE.value],
-            qnn_conv2d.args[QConv2DArgs.IFM_ZERO_POINT.value],
-        )
-        self.ofm = TensorParams(
-            func_body,
-            data_layout,
-            requantize_op.args[RequantArgs.OFM_SCALE.value],
-            requantize_op.args[RequantArgs.OFM_ZERO_POINT.value],
-        )
-        attrs = qnn_conv2d.attrs
-
-        pad_value = int(qnn_conv2d.args[QConv2DArgs.IFM_ZERO_POINT.value].data.asnumpy())
-        self.padding = self.extract_padding(attrs.padding, separate_padding, pad_value)
-
-        self.strides = attrs.strides
-        self.dilation = attrs.dilation
-        self.activation = activation
-        self.channels = attrs.channels
-
-        # If groups are equal to channel, its a depthwise_conv2d
-        self.groups = attrs.groups
-        self.is_depthwise = False
-        channels_axis = {"HWIO": 3, "HWOI": 2}
-        if self.groups == self.weights.shape[channels_axis[self.kernel_layout]]:
-            self.is_depthwise = True
-
-    @staticmethod
-    def extract_padding(
-        operator_padding: Tuple[int, int, int, int],
-        separate_padding: relay.Call,
-        pad_value: int,
-    ) -> Optional[Tuple[int, int, int, int]]:
-        """
-        Convolution operations can sometimes have padding represented as a separate
-        padding operation before the convolution operation itself. Here we can check
-        whether these representations can be combined into a single padding attribute
-        as part of the NPU convolution itself. If the padding specified by the separate
-        nn.pad operation is not supported, None will be returned. This will cause the
-        nn.pad to be offloaded separately.
-        """
-        if separate_padding is None:
-            return operator_padding
-        if pad_value != int(separate_padding.args[1].data.asnumpy()):
-            return None
-        pad_width = separate_padding.attrs["pad_width"]
-        if len(pad_width) != 4:
-            return None
-        if list(pad_width[0]) != [0, 0] or list(pad_width[3]) != [0, 0]:
-            return None
-        top, left, bottom, right = operator_padding
-        return [
-            top + pad_width[1][0],
-            left + pad_width[2][0],
-            bottom + pad_width[1][1],
-            right + pad_width[2][1],
-        ]
-
-    def is_valid(self) -> bool:
-        """
-        This function checks whether QnnConv2D has compatible attributes with the NPU
-        """
-        tensor_params = [self.weights, self.ifm, self.ofm]
-        if not check_valid_dtypes(tensor_params, supported_dtypes=[np.uint8, np.int8]):
-            return False
-        if not check_weights(self.weights, self.dilation):
-            return False
-        if not check_bias(self.biases):
-            return False
-        if not check_strides(self.strides):
-            return False
-        if not check_batch_size(self.ifm):
-            return False
-        if not check_dilation(self.dilation):
-            return False
-        if not self.padding or not check_padding(self.padding, self.padding_bounds):
-            return False
-        legal_groups = [1, self.ofm.shape[3]]
-        if self.groups not in legal_groups:
-            return False
-        # This should be a valid QnnDepthwiseConv2DParams, not QnnConv2DParams
-        return not self.is_depthwise
-
-
-class QnnConv2DTransposeParams:
-    """
-    This class will parse a Call to a ethosu.qnn_conv2d_transpose composite
-    function and extract quantization information of all the associated tensors.
-    """
-
-    composite_name = "ethos-u.qnn_conv2d_transpose"
-    # The NPU only supports padding upto the numbers as follows
-    padding_bounds = [31, 31, 32, 32]
-
-    @requires_vela
-    def __init__(self, func_body: tvm.relay.Function):
-        from tvm.relay.backend.contrib.ethosu.util import (
-            QConv2DTransposeArgs,  # type: ignore
-        )
-        from tvm.relay.backend.contrib.ethosu.util import BiasAddArgs, RequantArgs
-
-        requantize = func_body
-        call = func_body.args[0]
-        if str(call.op.name) == "nn.bias_add":
-            bias_add = call
-            call = call.args[0]
-        else:
-            bias_add = None
-        qnn_conv2d_transpose = call
-
-        data_layout = qnn_conv2d_transpose.attrs.data_layout
-        self.kernel_layout = qnn_conv2d_transpose.attrs.kernel_layout
-
-        self.weights = TensorParams(
-            qnn_conv2d_transpose.args[QConv2DTransposeArgs.WEIGHTS.value],
-            self.kernel_layout,
-            qnn_conv2d_transpose.args[QConv2DTransposeArgs.WEIGHTS_SCALE.value],
-            qnn_conv2d_transpose.args[QConv2DTransposeArgs.WEIGHTS_ZERO_POINT.value],
-        )
-        self.biases = (
-            TensorParams(
-                bias_add.args[BiasAddArgs.BIASES.value],
-                data_layout,
-                requantize.args[RequantArgs.IFM_SCALE.value],
-                requantize.args[RequantArgs.IFM_ZERO_POINT.value],
-            )
-            if bias_add
-            else None
-        )
-        self.ifm = TensorParams(
-            qnn_conv2d_transpose.args[QConv2DTransposeArgs.IFM.value],
-            data_layout,
-            qnn_conv2d_transpose.args[QConv2DTransposeArgs.IFM_SCALE.value],
-            qnn_conv2d_transpose.args[QConv2DTransposeArgs.IFM_ZERO_POINT.value],
-        )
-        self.ofm = TensorParams(
-            func_body,
-            data_layout,
-            requantize.args[RequantArgs.OFM_SCALE.value],
-            requantize.args[RequantArgs.OFM_ZERO_POINT.value],
-        )
-
-        attrs = qnn_conv2d_transpose.attrs
-        self.strides = attrs.strides
-        self.dilation = attrs.dilation
-        self.padding = attrs.padding
-        self.channels = attrs.channels
-        self.groups = attrs.groups
-        self.output_padding = attrs.output_padding
-
-        kernel_size_map = {
-            "IOHW": self.weights.shape[2:4],
-        }
-        self.kernel_shape = kernel_size_map[str(self.weights.layout)]
-
-        # Different padding is used in the legalization from conv2d_transpose
-        # to conv2d, so we to calculate it here to check that the new size fits
-        # within the bounds of the NPU before offloading.
-        pad_top = int(self.kernel_shape[0]) - 1 - int(self.padding[0])
-        pad_left = int(self.kernel_shape[1]) - 1 - int(self.padding[1])
-        pad_bottom = int(self.kernel_shape[0]) - 1 - int(self.padding[2])
-        pad_right = int(self.kernel_shape[1]) - 1 - int(self.padding[3])
-        if self.strides == [2, 2]:
-            pad_bottom -= 1
-            pad_right -= 1
-        self.legalize_padding = [pad_top, pad_left, pad_bottom, pad_right]
-
-    def is_valid(self) -> bool:
-        """
-        This function checks whether QnnConv2D has compatible attributes with the NPU
-        """
-
-        def check_compatible_output_size(ifm_shape, ofm_shape, padding, strides, kernel_shape):
-            is_valid_padding = padding == [0, 0, 0, 0]
-            if is_valid_padding:
-                expected_height = ifm_shape[1] * strides[0] + (kernel_shape[0] - strides[0])
-                expected_width = ifm_shape[2] * strides[1] + (kernel_shape[1] - strides[1])
-            else:
-                expected_height = ifm_shape[1] * strides[0]
-                expected_width = ifm_shape[2] * strides[1]
-            return ofm_shape[1] == expected_height and ofm_shape[2] == expected_width
-
-        tensor_params = [self.weights, self.ifm, self.ofm]
-        if not check_valid_dtypes(tensor_params, supported_dtypes=[np.int8]):
-            return False
-        if not check_weights(self.weights, self.dilation):
-            return False
-        if self.biases and not check_bias(self.biases):
-            return False
-        if not check_strides(self.strides, stride_range=(2, 2)):
-            return False
-        if not check_batch_size(self.ifm):
-            return False
-        if not check_dilation(self.dilation, dilation_range=(1, 1)):
-            return False
-        if not check_compatible_output_size(
-            self.ifm.shape,
-            self.ofm.shape,
-            [int(x) for x in self.padding],
-            self.strides,
-            self.kernel_shape,
-        ):
-            return False
-        if not check_padding(self.legalize_padding, self.padding_bounds):
-            return False
-        if self.kernel_shape[0] - 2 - int(self.padding[2]) < 0:
-            return False
-        if self.kernel_shape[1] - 2 - int(self.padding[3]) < 0:
-            return False
-        if self.groups != 1:
-            return False
-        if list(self.output_padding) != [0, 0]:
-            return False
-        return True
-
-
-class QnnDepthwiseConv2DParams(QnnConv2DParams):
-    """
-    This class will parse a call to a ethosu.depthwise_conv2d composite function
-    and extract the parameter information.
-    """
-
-    composite_name = "ethos-u.depthwise_conv2d"
-    # The hardware only supports padding upto the numbers as follows
-    padding_bounds = [31, 31, 32, 32]
-
-    def __init__(self, func_body: tvm.relay.expr.Call):
-        QnnConv2DParams.__init__(self, func_body)
-
-    def is_valid(self):
-        """
-        Checks whether QnnDepthwiseConv2D + activation function has compatible attributes with HW
-        """
-        tensor_params = [self.weights, self.ifm, self.ofm]
-        if not check_valid_dtypes(tensor_params, supported_dtypes=[np.uint8, np.int8]):
-            return False
-        if not check_weights(self.weights, self.dilation):
-            return False
-        if not check_bias(self.biases):
-            return False
-        if not check_strides(self.strides):
-            return False
-        if not check_batch_size(self.ifm):
-            return False
-        if not check_dilation(self.dilation):
-            return False
-        if not self.padding or not check_padding(self.padding, self.padding_bounds):
-            return False
-        if self.weights.layout != "HWOI":
-            return False
-        # only depth multiplier of size 1 is supported
-        if self.weights.shape[3] != 1:
-            return False
-        if not self.is_depthwise:
-            return False
-        return True
-
-
-def qnn_conv2d_pattern() -> tvm.relay.dataflow_pattern.DFPattern:
-    """
-    This function creates the pattern for qnn.conv2D with optional fused RELU activation.
-    """
-    optional_pad = is_op("nn.pad")(wildcard(), is_constant())
-    qnn_conv2d = is_op("qnn.conv2d")(
-        optional_pad | wildcard(),
-        is_constant(),
-        is_constant(),
-        is_constant(),
-        is_constant(),
-        is_constant(),
-    ).has_attr({"kernel_layout": "HWIO"})
-    bias_add = is_op("nn.bias_add")(qnn_conv2d, is_constant())
-    req = is_op("qnn.requantize")(
-        bias_add, is_constant(), is_constant(), is_constant(), is_constant()
-    )
-    clip_or_req = req.optional(is_op("clip"))
-    return clip_or_req
-
-
-def qnn_depthwise_conv2d_pattern() -> tvm.relay.dataflow_pattern.DFPattern:
-    """
-    This function creates the pattern for depthwise qnn.conv2D with optional fused RELU activation.
-    """
-    optional_pad = is_op("nn.pad")(wildcard(), is_constant())
-    qnn_conv2d = is_op("qnn.conv2d")(
-        optional_pad | wildcard(),
-        is_constant(),
-        is_constant(),
-        is_constant(),
-        is_constant(),
-        is_constant(),
-    ).has_attr({"kernel_layout": "HWOI"})
-    bias_add = is_op("nn.bias_add")(qnn_conv2d, is_constant())
-    req = is_op("qnn.requantize")(
-        bias_add, is_constant(), is_constant(), is_constant(), is_constant()
-    )
-    clip_or_req = req.optional(is_op("clip"))
-    return clip_or_req
-
-
-def qnn_conv2d_transpose_pattern() -> tvm.relay.dataflow_pattern.DFPattern:
-    """
-    This function creates the pattern for qnn.conv2d_transpose.
-    """
-    qnn_conv2d_transpose = is_op("qnn.conv2d_transpose")(
-        wildcard(), is_constant(), is_constant(), is_constant(), is_constant(), is_constant()
-    ).has_attr({"kernel_layout": "IOHW"})
-    optional_bias_add = (
-        is_op("nn.bias_add")(qnn_conv2d_transpose, is_constant()) | qnn_conv2d_transpose
-    )
-    req = is_op("qnn.requantize")(
-        optional_bias_add, is_constant(), is_constant(), is_constant(), is_constant()
-    )
-    return req
-
-
-class MaxPool2DParams:
-    """
-    This class will parse a call to a ethos-u.maxpool2d composite function
-    and extract the parameter information.
-    """
-
-    composite_name = "ethos-u.maxpool2d"
-    # The hardware only supports padding upto the numbers as follows
-    padding_bounds = [127, 127, 128, 128]
-
-    def __init__(self, func_body: Call):
-        clip = None
-        if str(func_body.op.name) == "clip":
-            clip = func_body
-            pool_op = clip.args[0]
-        else:
-            pool_op = func_body
-
-        attrs = pool_op.attrs
-        self.ifm = TensorParams(pool_op.args[0], attrs.layout)
-        self.ofm = TensorParams(pool_op, attrs.layout)
-        self.pool_shape = attrs.pool_size
-        self.strides = attrs.strides
-        self.padding = attrs.padding
-        self.activation = clip
-        self.pooling_type = "MAX"
-
-    def is_valid(self):
-        """
-        This function checks whether MaxPool2D has compatible attributes with the NPU
-        """
-        tensor_params = [self.ifm, self.ofm]
-        if not check_valid_dtypes(tensor_params, supported_dtypes=[np.uint8, np.int8]):
-            return False
-        if self.ifm.dtype != self.ofm.dtype:
-            return False
-        if not check_strides(self.strides) and not check_same_ifm_and_kernel_shape(
-            self.padding, self.ifm.shape, self.pool_shape
-        ):
-            return False
-        if not check_batch_size(self.ifm):
-            return False
-        if not check_padding(self.padding, self.padding_bounds):
-            return False
-        if not check_pool_shape(self.pool_shape):
-            return False
-        return True
-
-
-def qnn_maxpool2d_pattern() -> tvm.relay.dataflow_pattern.DFPattern:
-    """
-    This function creates the pattern for nn.max_pool2d with optional fused RELU activation.
-    """
-    pattern = is_op("nn.max_pool2d")(wildcard())
-    pattern = pattern.optional(is_op("clip"))
-    return pattern
-
-
-class AvgPool2DParams:
-    """
-    This class will parse a call to a ethos-u.avgpool2d composite function
-    and extract the parameter information.
-    """
-
-    composite_name = "ethos-u.avgpool2d"
-    # The hardware only supports padding upto the numbers as follows
-    padding_bounds = [3, 3, 4, 4]
-
-    def __init__(self, func_body: Call):
-        clip = None
-        if str(func_body.op.name) == "clip":
-            clip = func_body
-            cast2 = clip.args[0]
-        else:
-            cast2 = func_body
-
-        avgpool = cast2.args[0]
-        cast1 = avgpool.args[0]
-
-        attrs = avgpool.attrs
-        self.ifm = TensorParams(cast1.args[0], attrs.layout)
-        self.ofm = TensorParams(cast2, attrs.layout)
-        self.pool_shape = attrs.pool_size
-        self.strides = attrs.strides
-        self.padding = attrs.padding
-        self.count_include_pad = attrs.count_include_pad
-        self.activation = clip
-        self.pooling_type = "AVG"
-
-    def is_valid(self):
-        """
-        This function checks whether AvgPool2D has compatible attributes with the NPU
-        """
-        tensor_params = [self.ifm, self.ofm]
-        if not check_valid_dtypes(tensor_params, supported_dtypes=[np.uint8, np.int8]):
-            return False
-        if self.ifm.dtype != self.ofm.dtype:
-            return False
-        if not check_strides(self.strides) and not check_same_ifm_and_kernel_shape(
-            self.padding, self.ifm.shape, self.pool_shape
-        ):
-            return False
-        if not check_batch_size(self.ifm):
-            return False
-        if self.count_include_pad:
-            return False
-        if not check_padding(self.padding, self.padding_bounds):
-            return False
-        if not check_pool_shape(self.pool_shape):
-            return False
-        # Average pool with padding only supports 1 <= pool_shape <= 8
-        if list(self.padding) != [0, 0, 0, 0] and (
-            self.pool_shape[0] > 8 or self.pool_shape[1] > 8
-        ):
-            return False
-        return True
-
-
-def qnn_avgpool2d_pattern() -> tvm.relay.dataflow_pattern.DFPattern:
-    """
-    This function creates the pattern for nn.avg_pool2d with optional fused RELU activation.
-    """
-    pattern = is_op("cast")(wildcard())
-    pattern = is_op("nn.avg_pool2d")(pattern)
-    pattern = is_op("cast")(pattern)
-    pattern = pattern.optional(is_op("clip"))
-    return pattern
-
-
-class BinaryElementwiseParams:
-    """
-    This class will parse a call to a ethosu.binary_elementwise composite function
-    and extract the parameter information.
-    """
-
-    def __init__(self, func_body: Call, operator_type: str, is_quantized_operation: bool):
-        from tvm.relay.backend.contrib.ethosu.util import (
-            BinaryElementwiseArgs,
-            RequantArgs,
-        )
-
-        current_call = func_body
-        clip = None
-        requantize = None
-
-        if str(current_call.op.name) == "clip":
-            clip = current_call
-            current_call = clip.args[0]
-        elif str(current_call.op.name) == "qnn.requantize":
-            requantize = current_call
-            clip = current_call.args[0]
-            current_call = clip.args[0]
-        binary_op = current_call
-
-        layout = "NHWC"
-
-        if is_quantized_operation:
-            self.ifm = TensorParams(
-                binary_op.args[BinaryElementwiseArgs.IFM.value],
-                layout,
-                binary_op.args[BinaryElementwiseArgs.IFM_SCALE.value],
-                binary_op.args[BinaryElementwiseArgs.IFM_ZERO_POINT.value],
-            )
-            self.ifm2 = TensorParams(
-                binary_op.args[BinaryElementwiseArgs.IFM2.value],
-                layout,
-                binary_op.args[BinaryElementwiseArgs.IFM2_SCALE.value],
-                binary_op.args[BinaryElementwiseArgs.IFM2_ZERO_POINT.value],
-            )
-            self.ofm = TensorParams(
-                binary_op,
-                layout,
-                binary_op.args[BinaryElementwiseArgs.OFM_SCALE.value],
-                binary_op.args[BinaryElementwiseArgs.OFM_ZERO_POINT.value],
-            )
-        else:
-            self.ifm = TensorParams(
-                binary_op.args[BinaryElementwiseArgs.IFM.value],
-                layout,
-                requantize.args[RequantArgs.IFM_SCALE.value] if requantize else None,
-                requantize.args[RequantArgs.IFM_ZERO_POINT.value] if requantize else None,
-            )
-            self.ifm2 = TensorParams(
-                binary_op.args[BinaryElementwiseArgs.IFM2.value],
-                layout,
-                requantize.args[RequantArgs.IFM_SCALE.value] if requantize else None,
-                requantize.args[RequantArgs.IFM_ZERO_POINT.value] if requantize else None,
-            )
-            self.ofm = TensorParams(
-                func_body,
-                layout,
-                requantize.args[RequantArgs.OFM_SCALE.value] if requantize else None,
-                requantize.args[RequantArgs.OFM_ZERO_POINT.value] if requantize else None,
-            )
-        self.activation = clip
-        self.operator_type = operator_type
-
-        def can_broadcast(ifm, ifm2):
-            if len(ifm.shape) < len(ifm2.shape):
-                return False
-            for m, n in zip(ifm.shape[::-1], ifm2.shape[::-1]):
-                if m != n and m == 1:
-                    return False
-            return True
-
-        if can_broadcast(self.ifm, self.ifm2):
-            self.reversed_operands = False
-            self.valid_broadcast = True
-        elif can_broadcast(self.ifm2, self.ifm):
-            self.reversed_operands = True
-            self.ifm, self.ifm2 = self.ifm2, self.ifm
-            self.valid_broadcast = True
-        else:
-            self.valid_broadcast = False
-
-    def is_valid(self):
-        """
-        This function checks whether BinaryElementwise has compatible attributes with the NPU
-        """
-        if np.dtype(self.ofm) == np.int32 and self.activation is not None:
-            return False
-        # Due to identity operator requiring ofm != int32 for now
-        if np.dtype(self.ofm) == np.int32 and len(self.ofm.shape) < 4:
-            return False
-        if len(self.ifm.shape) > 4 or len(self.ifm2.shape) > 4:
-            return False
-        if len(self.ifm.shape) == 4 and self.ifm.shape[0] != 1:
-            return False
-        if len(self.ifm2.shape) == 4 and self.ifm2.shape[0] != 1:
-            return False
-        if not self.valid_broadcast:
-            return False
-        return True
-
-
-class AddParams(BinaryElementwiseParams):
-    """
-    This class will parse a call to a ethosu.binary_elementwise Add composite function
-    and extract the parameter information.
-    """
-
-    composite_name = "ethos-u.add"
-
-    def __init__(self, func_body: Call):
-        BinaryElementwiseParams.__init__(self, func_body, "ADD", True)
-
-    def is_valid(self):
-        """
-        This function checks whether Add has compatible attributes with the NPU
-        """
-        if not super().is_valid():
-            return False
-        if not check_valid_dtypes(
-            [self.ifm, self.ifm2, self.ofm], supported_dtypes=[np.uint8, np.int8, np.int32]
-        ):
-            return False
-        return True
-
-
-def qnn_add_pattern() -> tvm.relay.dataflow_pattern.DFPattern:
-    """
-    This function creates the pattern for qnn.add with optional fused RELU activation.
-    """
-    pattern = is_op("qnn.add")(
-        wildcard(),
-        wildcard(),
-        is_constant(),
-        is_constant(),
-        is_constant(),
-        is_constant(),
-        is_constant(),
-        is_constant(),
-    )
-    pattern = pattern.optional(is_op("clip"))
-    return pattern
-
-
-class SubParams(BinaryElementwiseParams):
-    """
-    This class will parse a call to a ethosu.binary_elementwise Sub composite function
-    and extract the parameter information.
-    """
-
-    composite_name = "ethos-u.sub"
-
-    def __init__(self, func_body: Call):
-        BinaryElementwiseParams.__init__(self, func_body, "SUB", True)
-
-    def is_valid(self):
-        """
-        This function checks whether Sub has compatible attributes with the NPU
-        """
-        if not super().is_valid():
-            return False
-        if not check_valid_dtypes(
-            [self.ifm, self.ifm2, self.ofm], supported_dtypes=[np.uint8, np.int8, np.int32]
-        ):
-            return False
-        return True
-
-
-def qnn_subtract_pattern() -> tvm.relay.dataflow_pattern.DFPattern:
-    """
-    This function creates the pattern for qnn.subtract with optional fused RELU activation.
-    """
-    pattern = is_op("qnn.subtract")(
-        wildcard(),
-        wildcard(),
-        is_constant(),
-        is_constant(),
-        is_constant(),
-        is_constant(),
-        is_constant(),
-        is_constant(),
-    )
-    pattern = pattern.optional(is_op("clip"))
-    return pattern
-
-
-class MulParams(BinaryElementwiseParams):
-    """
-    This class will parse a call to a ethosu.binary_elementwise Mul composite function
-    and extract the parameter information.
-    """
-
-    composite_name = "ethos-u.mul"
-
-    def __init__(self, func_body: Call):
-        BinaryElementwiseParams.__init__(self, func_body, "MUL", True)
-
-    def is_valid(self):
-        """
-        This function checks whether Mul has compatible attributes with the NPU
-        """
-        if not super().is_valid():
-            return False
-        if not check_valid_dtypes(
-            [self.ifm, self.ifm2, self.ofm], supported_dtypes=[np.uint8, np.int8, np.int32]
-        ):
-            return False
-        return True
-
-
-def qnn_mul_pattern() -> tvm.relay.dataflow_pattern.DFPattern:
-    """
-    This function creates the pattern for qnn.mul with optional fused RELU activation.
-    """
-    pattern = is_op("qnn.mul")(
-        wildcard(),
-        wildcard(),
-        is_constant(),
-        is_constant(),
-        is_constant(),
-        is_constant(),
-        is_constant(),
-        is_constant(),
-    )
-    pattern = pattern.optional(is_op("clip"))
-    return pattern
-
-
-class MinParams(BinaryElementwiseParams):
-    """
-    This class will parse a call to a ethosu.binary_elementwise Min composite function
-    and extract the parameter information.
-    """
-
-    composite_name = "ethos-u.min"
-
-    def __init__(self, func_body: Call):
-        BinaryElementwiseParams.__init__(self, func_body, "MIN", False)
-
-    def is_valid(self):
-        """
-        This function checks whether Min has compatible attributes with the NPU
-        """
-        if not super().is_valid():
-            return False
-        if self.ifm.dtype != self.ifm2.dtype:
-            return False
-        if not check_valid_dtypes(
-            [self.ifm, self.ifm2, self.ofm], supported_dtypes=[np.uint8, np.int8]
-        ):
-            return False
-        # MIN with different scales is not supported on NPU
-        # (please look at NPU_SET_OFM_SCALE register description
-        # https://developer.arm.com/documentation/102420/0200/Programmers-model/Command-stream/cmd1-commands-).
-        if self.ifm.q_params.scale_f32 != self.ofm.q_params.scale_f32:
-            return False
-        return True
-
-
-# This pattern is for case when there are different scales for requantize and
-# minimum + clip + qnn.requantize can't be offloaded to NPU by one operation
-# due to hardware constraints.
-# It's offloaded by two operations ethosu_binary_elementwise + ethosu_identity.
-def minimum_pattern() -> tvm.relay.dataflow_pattern.DFPattern:
-    """
-    This function creates the pattern for minimum with optional fused RELU activation without
-    requantize.
-    """
-    minimum = is_op("minimum")(wildcard(), wildcard())
-    optional_min_clip = is_op("clip")(minimum)
-    return minimum | optional_min_clip
-
-
-def minimum_clip_requantize_pattern() -> tvm.relay.dataflow_pattern.DFPattern:
-    """
-    This function creates the pattern for minimum with fused RELU activation with requantize.
-    """
-    pattern = is_op("minimum")(wildcard(), wildcard())
-    pattern = is_op("clip")(pattern)
-    pattern = is_op("qnn.requantize")(
-        pattern, is_constant(), is_constant(), is_constant(), is_constant()
-    )
-    return pattern
-
-
-class MaxParams(BinaryElementwiseParams):
-    """
-    This class will parse a call to a ethosu.binary_elementwise Max composite function
-    and extract the parameter information.
-    """
-
-    composite_name = "ethos-u.max"
-
-    def __init__(self, func_body: Call):
-        BinaryElementwiseParams.__init__(self, func_body, "MAX", False)
-
-    def is_valid(self):
-        """
-        This function checks whether Max has compatible attributes with the NPU
-        """
-        if not super().is_valid():
-            return False
-        if self.ifm.dtype != self.ifm2.dtype:
-            return False
-        if not check_valid_dtypes(
-            [self.ifm, self.ifm2, self.ofm], supported_dtypes=[np.uint8, np.int8]
-        ):
-            return False
-        # MAX with different scales is not supported on NPU
-        # (please look at NPU_SET_OFM_SCALE register description
-        # https://developer.arm.com/documentation/102420/0200/Programmers-model/Command-stream/cmd1-commands-).
-        if self.ifm.q_params.scale_f32 != self.ofm.q_params.scale_f32:
-            return False
-        return True
-
-
-# This pattern is for case when there are different scales for requantize and
-# maximum + clip + qnn.requantize can't be offloaded to NPU by one operation due to
-# hardware constraints.
-# It's offloaded by two operations ethosu_binary_elementwise + ethosu_identity.
-def maximum_pattern() -> tvm.relay.dataflow_pattern.DFPattern:
-    """
-    This function creates the pattern for maximum with optional fused RELU activation without
-    requantize.
-    """
-    maximum = is_op("maximum")(wildcard(), wildcard())
-    optional_max_clip = is_op("clip")(maximum)
-    return maximum | optional_max_clip
-
-
-def maximum_clip_requantize_pattern() -> tvm.relay.dataflow_pattern.DFPattern:
-    """
-    This function creates the pattern for maximum with fused RELU activation with requantize.
-    """
-    pattern = is_op("maximum")(wildcard(), wildcard())
-    pattern = is_op("clip")(pattern)
-    pattern = is_op("qnn.requantize")(
-        pattern, is_constant(), is_constant(), is_constant(), is_constant()
-    )
-    return pattern
-
-
-class ShlParams(BinaryElementwiseParams):
-    """
-    This class will parse a call to a ethosu.binary_elementwise Shl composite function
-    and extract the parameter information.
-    """
-
-    composite_name = "ethos-u.shl"
-
-    def __init__(self, func_body: Call):
-        BinaryElementwiseParams.__init__(self, func_body, "SHL", False)
-
-    def is_valid(self):
-        """
-        This function checks whether Shl has compatible attributes with the NPU
-        """
-        if not super().is_valid():
-            return False
-        if not check_valid_dtypes([self.ifm, self.ifm2, self.ofm], supported_dtypes=[np.int32]):
-            return False
-        return True
-
-
-def shl_pattern() -> tvm.relay.dataflow_pattern.DFPattern:
-    """
-    This function creates the pattern for left_shift with optional fused RELU activation.
-    """
-    pattern = is_op("left_shift")(wildcard(), wildcard())
-    pattern = pattern.optional(is_op("clip"))
-    return pattern
-
-
-class ReshapeParams:
-    """
-    This class will parse a call to a ethosu.reshape composite function
-    and extract the parameter information.
-    """
-
-    composite_name = "ethos-u.reshape"
-
-    def __init__(self, func_body: Call):
-        self.new_shape = func_body.attrs.newshape
-        self.ifm = TensorParams(func_body.args[0])
-        self.ofm = TensorParams(func_body)
-
-    def is_valid(self):
-        """
-        This function checks whether reshape has compatible attributes with the NPU
-        """
-        if not check_dimensions(self.ifm) or not check_dimensions(self.ofm):
-            return False
-        if not check_valid_dtypes([self.ifm, self.ofm], supported_dtypes=[np.int8]):
-            return False
-        return True
-
-
-def reshape_pattern():
-    """Create pattern for reshape"""
-    pattern = is_op("reshape")(wildcard())
-    return pattern
-
-
-class StridedSliceParams:
-    """
-    This class will parse a call to a ethosu.strided_slice composite function
-    and extract the parameter information.
-    """
-
-    composite_name = "ethos-u.strided_slice"
-
-    def __init__(self, func_body: Call):
-        self.ifm = TensorParams(func_body.args[0])
-        self.ofm = TensorParams(func_body)
-
-        attrs = func_body.attrs
-        # The indices where we begin the slice
-        self.begin = attrs.begin
-        # The indices where we end the slice
-        self.end = attrs.end
-        self.strides = attrs.strides
-        self.axes = attrs.axes
-        self.slice_mode = attrs.slice_mode
-
-    def is_valid(self):
-        """
-        This function checks whether reshape has compatible attributes with the NPU
-        """
-        if not check_dimensions(self.ifm) or not check_dimensions(self.ofm):
-            return False
-        if not check_valid_dtypes([self.ifm, self.ofm], supported_dtypes=[np.int8]):
-            return False
-        if len(self.begin) != len(self.end):
-            return False
-
-        for begin_idx, end_idx in zip(self.begin, self.end):
-            if begin_idx > end_idx:
-                return False
-
-        # Only strides of 1 are supported
-        if self.strides:
-            if not all([i == 1 for i in self.strides]):
-                return False
-        return True
-
-
-def strided_slice_pattern():
-    """Create pattern for strided_slice"""
-    pattern = is_op("strided_slice")(wildcard())
-    return pattern
-
-
-class AbsParams:
-    """
-    This class will parse a call to a ethosu.unary_elementwise Abs composite function
-    and extract the parameter information.
-    """
-
-    composite_name = "ethos-u.abs"
-
-    def __init__(self, func_body: Call):
-        from tvm.relay.backend.contrib.ethosu.util import DequantizeArgs, QuantizeArgs
-
-        quantize = func_body
-        abs_op = quantize.args[0]
-        dequantize = abs_op.args[0]
-
-        layout = "NHWC"
-
-        self.ifm = TensorParams(
-            dequantize.args[DequantizeArgs.IFM.value],
-            layout,
-            dequantize.args[DequantizeArgs.IFM_SCALE.value],
-            dequantize.args[DequantizeArgs.IFM_ZERO_POINT.value],
-        )
-        self.ofm = TensorParams(
-            quantize,
-            layout,
-            quantize.args[QuantizeArgs.OFM_SCALE.value],
-            quantize.args[QuantizeArgs.OFM_ZERO_POINT.value],
-        )
-
-        self.operator_type = "ABS"
-        self.activation = None
-
-    def is_valid(self):
-        """Checks whether Abs has compatible attributes with HW"""
-        tensor_params = [self.ifm, self.ofm]
-        if not check_valid_dtypes(tensor_params, supported_dtypes=[np.int8, np.uint8]):
-            return False
-        if self.ifm.dtype != self.ofm.dtype:
-            return False
-        if not check_dimensions(self.ifm):
-            return False
-        if len(self.ifm.shape) == 4 and self.ifm.shape[0] != 1:
-            return False
-        if self.ifm.shape != self.ofm.shape:
-            return False
-        return True
-
-
-def abs_pattern() -> tvm.relay.dataflow_pattern.DFPattern:
-    """Create pattern for abs"""
-    pattern = is_op("qnn.dequantize")(wildcard(), is_constant(), is_constant())
-    pattern = is_op("abs")(pattern)
-    pattern = is_op("qnn.quantize")(pattern, is_constant(), is_constant())
-    return pattern
-
-
-class LutActivationParams:
-    """
-    A parent class for LUT based activation functions that extract the input and
-    output tensors and check whether they are valid.
-    """
-
-    def __init__(self, func_body: Call):
-        from tvm.relay.backend.contrib.ethosu.util import DequantizeArgs, QuantizeArgs
-
-        layout = "NHWC"
-
-        quantize = func_body
-        activation = quantize.args[0]
-        dequantize = activation.args[0]
-        in_var = dequantize.args[0]
-
-        self.ifm = TensorParams(
-            in_var,
-            layout=layout,
-            scale=dequantize.args[DequantizeArgs.IFM_SCALE.value],
-            zero_point=dequantize.args[DequantizeArgs.IFM_ZERO_POINT.value],
-        )
-        self.ofm = TensorParams(
-            quantize,
-            layout=layout,
-            scale=quantize.args[QuantizeArgs.OFM_SCALE.value],
-            zero_point=quantize.args[QuantizeArgs.OFM_ZERO_POINT.value],
-        )
-
-    def is_valid(self):
-        """
-        This function checks whether activation has compatible attributes with the NPU
-        """
-        if not check_valid_dtypes([self.ifm, self.ofm], supported_dtypes=[np.int8, np.int16]):
-            return False
-        return True
-
-
-class TanhParams(LutActivationParams):
-
-    composite_name = "ethos-u.tanh"
-
-
-def tanh_pattern():
-    """Create pattern for tanh"""
-    dequant = is_op("qnn.dequantize")(wildcard(), is_constant(), is_constant())
-    tanh = is_op("tanh")(dequant)
-    quant = is_op("qnn.quantize")(tanh, is_constant(), is_constant())
-    return quant
-
-
-class TanhFixedPointParams:
-    """
-    This class will parse a call to a ethos-u.tanh_fixed_point composite function
-    and extract the parameter information.
-    """
-
-    composite_name = "ethos-u.tanh_fixed_point"
-
-    @requires_vela
-    def __init__(self, func_body):
-        layout = "NHWC"
-
-        tanh_fixed_point = func_body.args[0]
-        tanh = tanh_fixed_point.args[0]
-        # fixed_point_multiply relay operation uses multiplier with 31 fractional bits
-        # so to determine the size of the fraction use the formula: 31 - shift
-        self.fraction_size = 31 - tanh_fixed_point.attrs.shift
-        fract_scale = tvm.relay.Constant(tvm.nd.array(np.array(1 / 2**self.fraction_size)))
-        fract_zero_point = tvm.relay.Constant(tvm.nd.array(np.array(0, dtype="int32")))
-
-        self.ifm = TensorParams(
-            tanh.args[0].args[0].args[0],
-            layout=layout,
-            scale=fract_scale,
-            zero_point=fract_zero_point,
-        )
-        self.ofm = TensorParams(
-            func_body,
-            layout=layout,
-            scale=fract_scale,
-            zero_point=fract_zero_point,
-        )
-
-    def is_valid(self) -> bool:
-        """
-        This function checks whether activation has compatible attributes with the NPU
-        """
-
-        if self.fraction_size < 0 or self.fraction_size > 16:
-            return False
-        if not check_valid_dtypes([self.ifm, self.ofm], supported_dtypes=[np.int8, np.int16]):
-            return False
-        return True
-
-
-def tanh_fixed_point_pattern():
-    """Create pattern for fixed point tanh"""
-    ifm = is_op("cast")(wildcard())
-    ifm = is_op("fixed_point_multiply")(ifm)
-    tanh = is_op("tanh")(ifm)
-    tanh = is_op("fixed_point_multiply")(tanh)
-    return is_op("cast")(tanh)
-
-
-class SigmoidParams(LutActivationParams):
-    """
-    This class will parse a call to a ethos-u.sigmoid composite function
-    and extract the parameter information.
-    """
-
-    composite_name = "ethos-u.sigmoid"
-
-
-def sigmoid_pattern():
-    """Create pattern for sigmoid"""
-    dequant = is_op("qnn.dequantize")(wildcard(), is_constant(), is_constant())
-    sigmoid = is_op("sigmoid")(dequant)
-    quant = is_op("qnn.quantize")(sigmoid, is_constant(), is_constant())
-    return quant
-
-
-class LeakyReLUParams(LutActivationParams):
-    """
-    This class will parse a call to ethos-u.leaky_relu composite function
-    and extract the parameter information.
-    """
-
-    composite_name = "ethos-u.leaky_relu"
-
-    def __init__(self, func_body: Call):
-        super().__init__(func_body)
-        self.alpha = func_body.args[0].attrs.alpha
-
-
-def leaky_relu_pattern() -> tvm.relay.dataflow_pattern.DFPattern:
-    """
-    This function creates the pattern for leaky relu.
-    """
-    dequantize = is_op("qnn.dequantize")(wildcard(), is_constant(), is_constant())
-    leaky_relu = is_op("nn.leaky_relu")(dequantize)
-    return is_op("qnn.quantize")(leaky_relu, is_constant(), is_constant())
-
-
-class MeanParams:
-    """
-    This class will parse a call to ethosu.mean composite function
-    and extract the parameter information.
-    """
-
-    composite_name = "ethos-u.mean"
-
-    def __init__(self, func_body: Call):
-        from tvm.relay.backend.contrib.ethosu.util import RequantArgs
-
-        requantize = func_body
-        mean_op = requantize.args[0]
-        attrs = mean_op.attrs
-        cast = mean_op.args[0]
-
-        layout = "NHWC"
-        self.ifm = TensorParams(
-            cast.args[0],
-            layout,
-            requantize.args[RequantArgs.IFM_SCALE.value],
-            requantize.args[RequantArgs.IFM_ZERO_POINT.value],
-        )
-        self.ofm = TensorParams(
-            requantize,
-            layout,
-            requantize.args[RequantArgs.OFM_SCALE.value],
-            requantize.args[RequantArgs.OFM_ZERO_POINT.value],
-        )
-
-        ifm_shape = self.ifm.shape
-        self.height = ifm_shape[0] if len(ifm_shape) in (2, 3) else ifm_shape[1]
-        self.width = ifm_shape[1] if len(ifm_shape) in (2, 3) else ifm_shape[2]
-        self.keepdims = attrs.keepdims
-
-        self.axis = list(sorted(attrs.axis))
-        if attrs.exclude:
-            self.axis = [i for i in range(len(self.ifm.shape)) if i not in self.axis]
-
-    def is_valid(self) -> bool:
-        """
-        Checks whether Mean has compatible attributes with HW.
-        """
-
-        def check_axis(num_dims, axis):
-            if num_dims in (2, 3):
-                return axis in ([0], [1], [0, 1])
-            return axis in ([1], [2], [1, 2])
-
-        def check_single_axis_across_height(num_dims, axis):
-            return len(axis) == 1 and (num_dims in (2, 3) and axis == [0] or axis == [1])
-
-        same_quantization = (
-            self.ifm.q_params.scale_f32 == self.ofm.q_params.scale_f32
-            and self.ifm.q_params.zero_point == self.ofm.q_params.zero_point
-        )
-
-        # IFM must be int8 or uint8
-        if not check_valid_dtypes([self.ifm], [np.int8, np.uint8]):
-            return False
-        # OFM must be int8, uint8 or int16
-        if not check_valid_dtypes([self.ofm], [np.int8, np.uint8, np.int16]):
-            return False
-        # Input tensor must be at least 2D
-        if not len(self.ifm.shape) in [2, 3, 4]:
-            return False
-        # Axis indices must correspond to height and width axes
-        if not check_axis(len(self.ifm.shape), self.axis):
-            return False
-
-        input_size = self.height * self.width
-
-        # Product of height and width must be no greater than 65536
-        if input_size > 65536:
-            return False
-        # Product of height and width must be no greater than 4096 when:
-        #   IFM and OFM have different scale or zero point; or
-        #   'keep_dims' is True
-        if input_size > 4096 and (not same_quantization or self.keepdims):
-            return False
-        # For single axis averages across the height dimension:
-        if check_single_axis_across_height(len(self.ifm.shape), self.axis):
-            # IFM height must be no greater than 256 if the IFM and OFM scale and zero point match
-            if self.height > 256 and same_quantization:
-                return False
-            # IFM height must be no greater than 64 if the IFM and OFM scale or zero point
-            # do not match
-            if self.height > 64 and not same_quantization:
-                return False
-        return True
-
-
-def mean_pattern() -> tvm.relay.dataflow_pattern.DFPattern:
-    """
-    This function creates the pattern for mean.
-    """
-    pattern = is_op("cast")(wildcard())
-    pattern = is_op("mean")(pattern)
-    pattern = is_op("qnn.requantize")(
-        pattern, is_constant(), is_constant(), is_constant(), is_constant()
-    )
-    return pattern
-
-
-class SumParams:
-    """
-    This class will parse a call to ethosu.sum composite function
-    and extract the parameter information.
-    """
-
-    composite_name = "ethos-u.sum"
-
-    def __init__(self, func_body: Call):
-        from tvm.relay.backend.contrib.ethosu.util import RequantArgs
-
-        clip = None
-        if str(func_body.op.name) == "clip":
-            clip = func_body
-            requantize = clip.args[0]
-        else:
-            requantize = func_body
-
-        sum_op = requantize.args[0]
-        attrs = sum_op.attrs
-        cast = sum_op.args[0]
-
-        layout = "NHWC"
-        self.ifm = TensorParams(
-            cast.args[0],
-            layout,
-            requantize.args[RequantArgs.IFM_SCALE.value],
-            requantize.args[RequantArgs.IFM_ZERO_POINT.value],
-        )
-        self.ofm = TensorParams(
-            requantize,
-            layout,
-            requantize.args[RequantArgs.OFM_SCALE.value],
-            requantize.args[RequantArgs.OFM_ZERO_POINT.value],
-        )
-
-        self.activation = clip
-
-        ifm_shape = self.ifm.shape
-        self.height = ifm_shape[0] if len(ifm_shape) in (2, 3) else ifm_shape[1]
-        self.width = ifm_shape[1] if len(ifm_shape) in (2, 3) else ifm_shape[2]
-        self.keepdims = attrs.keepdims
-
-        self.axis = list(sorted(attrs.axis))
-        if attrs.exclude:
-            self.axis = [i for i in range(len(self.ifm.shape)) if i not in self.axis]
-
-    def is_valid(self) -> bool:
-        """
-        Checks whether Sum has compatible attributes with HW.
-        """
-
-        ifm_shape_len = len(self.ifm.shape)
-
-        if not check_valid_dtypes([self.ifm], [np.uint8, np.int8, np.int16, np.int32]):
-            return False
-        if not check_valid_dtypes([self.ofm], [np.int8]):
-            return False
-        if not ifm_shape_len in (3, 4):
-            return False
-        if ifm_shape_len == 3 and self.axis not in [[2]]:
-            return False
-        if ifm_shape_len == 4 and self.axis not in [[3]]:
-            return False
-
-        return True
-
-
-def sum_pattern() -> tvm.relay.dataflow_pattern.DFPattern:
-    """
-    This function creates the pattern for sum.
-    """
-    pattern = is_op("cast")(wildcard())
-    pattern = is_op("sum")(pattern)
-    pattern = is_op("qnn.requantize")(
-        pattern,
-        is_constant(),
-        is_constant(),
-        is_constant(),
-        is_constant(),
-    )
-    pattern = pattern.optional(is_op("clip"))
-    return pattern
-
-
-class ConcatParams:
-    """
-    This class will parse a call to a ethos-u.concat composite function
-    and extract the parameter information.
-    """
-
-    composite_name = "ethos-u.concat"
-
-    def __init__(self, func_body):
-        self.concat = func_body
-        self.is_qnn_variant = self.concat.op.name == "qnn.concatenate"
-        self.input_tensors = [TensorParams(tensor) for tensor in list(func_body.args[0])]
-        self.axis = func_body.attrs.axis
-
-        if self.is_qnn_variant:
-            self.input_scales = [s.data.asnumpy() for s in list(func_body.args[1])]
-            self.input_zero_points = [zp.data.asnumpy() for zp in list(func_body.args[2])]
-
-    def is_valid(self):
-        """Checks whether Concatenate has compatible attributes with the hardware"""
-        if not check_valid_dtypes(self.input_tensors, supported_dtypes=[np.int8]):
-            return False
-        # Check that the scales and zero points of input tensors are the same
-        if self.is_qnn_variant and not all(self.input_scales == self.input_scales[0]):
-            return False
-        if self.is_qnn_variant and not all(self.input_zero_points == self.input_zero_points[0]):
-            return False
-
-        input_dim = len(self.input_tensors[0].shape)
-        for tensor in self.input_tensors:
-            if len(tensor.shape) != input_dim:
-                return False
-
-        if self.axis is None:
-            return False
-        if self.axis < 0:
-            return False
-        if self.axis >= input_dim:
-            return False
-
-        output_shape = self.concat.checked_type.shape
-        if len(output_shape) != input_dim:
-            return False
-        if len(output_shape) > 3 and output_shape[0] != 1:
-            return False
-        return True
-
-
-def concat_pattern():
-    """Create pattern for concat"""
-    tensors = is_tuple(None)
-    scales = is_tuple(None)
-    zero_points = is_tuple(None)
-    qnn_concat = is_op("qnn.concatenate")(
-        tensors, scales, zero_points, is_constant(), is_constant()
-    )
-    concat = is_op("concatenate")(tensors)
-    return concat | qnn_concat
-
-
-class SplitParams:
-    """
-    This class will parse a call to a ethos-u.split composite function
-    and extract the parameter information.
-    """
-
-    composite_name = "ethos-u.split"
-
-    def __init__(self, func_body):
-        self.split = func_body
-        self.input = TensorParams(func_body.args[0])
-        self.axis = func_body.attrs.axis
-        self.indices_or_sections = self.convert_indices_or_sections(
-            func_body.attrs.indices_or_sections
-        )
-
-    def convert_indices_or_sections(self, indices_or_sections):
-        # split_v
-        if isinstance(indices_or_sections, tvm.ir.container.Array):
-            values = [int(i) for i in indices_or_sections]
-        # split
-        else:
-            values = int(indices_or_sections)
-        return values
-
-    def is_valid(self):
-        """Checks whether split has compatible attributes with the hardware"""
-        if not check_valid_dtypes([self.input], supported_dtypes=[np.int8]):
-            return False
-        return True
-
-
-def split_pattern():
-    "Create the pattern for split"
-    split = is_op("split")(wildcard())
-    return split
-
-
-class RequantizeParams:
-    """
-    This class will parse a call to ethos-u.requantize composite function
-    and extract the parameter information.
-    """
-
-    composite_name = "ethos-u.requantize"
-
-    def __init__(self, func_body: Call):
-        from tvm.relay.backend.contrib.ethosu.util import RequantArgs
-
-        layout = "NHWC"
-        in_var = func_body.args[0]
-        requantize = func_body
-
-        self.ifm = TensorParams(
-            in_var,
-            layout=layout,
-            scale=requantize.args[RequantArgs.IFM_SCALE.value],
-            zero_point=requantize.args[RequantArgs.IFM_ZERO_POINT.value],
-        )
-        self.ofm = TensorParams(
-            requantize,
-            layout=layout,
-            scale=requantize.args[RequantArgs.OFM_SCALE.value],
-            zero_point=requantize.args[RequantArgs.OFM_ZERO_POINT.value],
-        )
-
-        attrs = requantize.attrs
-        self.out_dtype = attrs.out_dtype
-
-    def is_valid(self) -> bool:
-        """
-        Checks whether qnn.requantize has compatible attributes with HW.
-        """
-        tensor_params = [self.ifm, self.ofm]
-        if not check_valid_dtypes(tensor_params, supported_dtypes=[np.int8]):
-            return False
-        if not check_dimensions(self.ifm) or not check_dimensions(self.ofm):
-            return False
-        if self.out_dtype and self.out_dtype != "int8":
-            return False
-        return True
-
-
-def requantize_pattern() -> tvm.relay.dataflow_pattern.DFPattern:
-    """
-    This function creates the pattern for qnn.requantize.
-    """
-    return is_op("qnn.requantize")(
-        wildcard(), is_constant(), is_constant(), is_constant(), is_constant()
-    )
-
-
-class Resize2dParams:
-    """
-    This class will parse a call to ethos-u.resize2d composite function
-    and extract the parameter information.
-    """
-
-    composite_name = "ethos-u.resize2d"
-
-    def __init__(self, func_body: Call):
-        layout = "NHWC"
-
-        resize_2d = func_body
-        in_var = func_body.args[0]
-        if (
-            isinstance(resize_2d, tvm.relay.expr.Call)
-            and isinstance(resize_2d.op, tvm.ir.Op)
-            and resize_2d.op.name == "qnn.quantize"
-        ):
-            resize_2d = resize_2d.args[0]
-            in_var = in_var.args[0].args[0]
-        out_var = func_body
-
-        self.ifm = TensorParams(in_var, layout=layout)
-        self.ofm = TensorParams(out_var, layout=layout)
-
-        attrs = resize_2d.attrs
-        self.size = attrs.size
-        self.method = attrs.method
-        self.roi = attrs.roi
-        self.coordinate_transformation_mode = attrs.coordinate_transformation_mode
-        self.rounding_method = attrs.rounding_method
-        self.out_dtype = attrs.out_dtype
-
-    def is_valid(self) -> bool:
-        """
-        Checks whether image.resize2d has compatible attributes with HW.
-        """
-
-        def check_compatible_size(mode, method, upscale_size, ifm_size):
-            """Checking the provided upscale_size is compatible with the NPU. The NPU only
-            supports upsampling when the upsampling size is 2 * input_size, or when there is
-            no upsampling to be done, so check that this is the case. In the special case of
-            resize_bilinear with align_corners=True, the NPU only supports an upsampling
-            size of 2 * input_size - 1."""
-            delta = 1 if mode == "align_corners" and method == "linear" else 0
-            upscale_size = np.array(upscale_size)
-            ifm_size = np.array(ifm_size)
-            ifm_upscaled = ifm_size * 2 - delta
-            return (ifm_upscaled == upscale_size).all() or (ifm_size == upscale_size).all()
-
-        tensor_params = [self.ifm, self.ofm]
-        if not check_valid_dtypes(tensor_params, supported_dtypes=[np.int8]):
-            return False
-        if len(self.ifm.shape) != 4 or len(self.ofm.shape) != 4:
-            return False
-        if list(float(x) for x in self.roi) != [0.0] * 4:
-            return False
-        if self.method not in ("nearest_neighbor", "linear"):
-            return False
-        if self.coordinate_transformation_mode not in (
-            "asymmetric",
-            "align_corners",
-            "half_pixel",
-        ):
-            return False
-        if (
-            self.coordinate_transformation_mode == "half_pixel"
-            and self.rounding_method != "round_prefer_ceil"
-            or self.coordinate_transformation_mode != "half_pixel"
-            and self.rounding_method != ""
-        ):
-            return False
-        if not check_compatible_size(
-            self.coordinate_transformation_mode,
-            self.method,
-            self.size,
-            self.ifm.shape[1:3],
-        ):
-            return False
-        if self.out_dtype and self.out_dtype != "int8":
-            return False
-        return True
-
-
-def resize2d_pattern() -> tvm.relay.dataflow_pattern.DFPattern:
-    """
-    This function creates the pattern for image.resize2d.
-    """
-    dequant = is_op("qnn.dequantize")(wildcard(), is_constant(), is_constant())
-    resize_2d = is_op("image.resize2d")(dequant).has_attr({"method": "linear"})
-    quant = is_op("qnn.quantize")(resize_2d, is_constant(), is_constant())
-    return quant | is_op("image.resize2d")(wildcard()).has_attr({"method": "nearest_neighbor"})
-
-
-class ExpandDimsParams:
-    """
-    This class will parse a call to a ethos-u.expand_dims composite function
-    and extract the parameter information.
-    """
-
-    composite_name = "ethos-u.expand_dims"
-
-    def __init__(self, func_body):
-        self.expand_dims = func_body
-        self.input = TensorParams(func_body.args[0])
-        self.output = TensorParams(func_body)
-
-    def is_valid(self):
-        """Checks whether expand_dims has compatible attributes with the hardware."""
-        if not check_dimensions(self.input) or not check_dimensions(self.output):
-            return False
-        if not check_valid_dtypes([self.input, self.output], supported_dtypes=[np.int8]):
-            return False
-        return True
-
-
-def expand_dims_pattern():
-    """Create the pattern for expand_dims."""
-    return is_op("expand_dims")(wildcard())
-
-
-class SqueezeParams:
-    """
-    This class will parse a call to a ethos-u.squeeze composite function
-    and extract the parameter information.
-    """
-
-    composite_name = "ethos-u.squeeze"
-
-    def __init__(self, func_body):
-        self.squeeze = func_body
-        self.input = TensorParams(func_body.args[0])
-        self.output = TensorParams(func_body)
-
-    def is_valid(self):
-        """Checks whether squeeze has compatible attributes with the hardware."""
-        if not check_dimensions(self.output):
-            return False
-        if not check_valid_dtypes([self.input, self.output], supported_dtypes=[np.int8]):
-            return False
-        return True
-
-
-def squeeze_pattern():
-    """Create the pattern for squeeze."""
-    return is_op("squeeze")(wildcard())
-
-
-class FullyConnectedParams:
-    """
-    This class will parse a call to an ethos-u.fully_connected composite
-    function and extract the parameter information.
-    """
-
-    composite_name = "ethos-u.fully_connected"
-
-    @requires_vela
-    def __init__(self, func_body):
-        from tvm.relay.backend.contrib.ethosu.util import QDenseArgs  # type: ignore
-        from tvm.relay.backend.contrib.ethosu.util import BiasAddArgs, RequantArgs
-
-        self.activation = None
-        if str(func_body.op.name) == "clip":
-            self.activation = func_body
-            requantize_op = self.activation.args[0]
-        else:
-            requantize_op = func_body
-
-        call = requantize_op.args[0]
-        if str(requantize_op.args[0].op.name) == "nn.bias_add":
-            bias_add = call
-            qnn_dense = call.args[0]
-        else:
-            bias_add = None
-            qnn_dense = call
-
-        # weights & biases are params as they should be constant
-        self.weights = TensorParams(
-            qnn_dense.args[QDenseArgs.WEIGHTS.value],
-            None,
-            qnn_dense.args[QDenseArgs.WEIGHTS_SCALE.value],
-            qnn_dense.args[QDenseArgs.WEIGHTS_ZERO_POINT.value],
-        )
-        self.biases = (
-            TensorParams(
-                bias_add.args[BiasAddArgs.BIASES.value],
-                None,
-                requantize_op.args[RequantArgs.IFM_SCALE.value],
-                requantize_op.args[RequantArgs.IFM_ZERO_POINT.value],
-            )
-            if bias_add
-            else None
-        )
-        self.ifm = TensorParams(
-            qnn_dense.args[QDenseArgs.IFM.value],
-            None,
-            qnn_dense.args[QDenseArgs.IFM_SCALE.value],
-            qnn_dense.args[QDenseArgs.IFM_ZERO_POINT.value],
-        )
-        self.ofm = TensorParams(
-            func_body,
-            None,
-            requantize_op.args[RequantArgs.OFM_SCALE.value],
-            requantize_op.args[RequantArgs.OFM_ZERO_POINT.value],
-        )
-
-    def is_valid(self) -> bool:
-        """
-        Checks whether Fully Connected has compatible attributes with HW
-        """
-
-        def check_weights_fc(weights):
-            """Checks whether weight tensor is compatible with HW"""
-            weights_limit = 127 * 65536
-            # A saturation upper bound check for accumulators
-            weights.values = weights.values - weights.q_params.zero_point
-            axis = 1
-            sum_weights = np.amax(np.sum(np.absolute(weights.values), axis=axis))
-            if sum_weights > weights_limit:
-                return False
-            return True
-
-        if not check_valid_dtypes([self.ifm, self.ofm], supported_dtypes=[np.int8]):
-            return False
-        if not check_weights_fc(self.weights):
-            return False
-        if not check_bias(self.biases):
-            return False
-        if not check_batch_size(self.ifm):
-            return False
-        # Check input shape
-        if not len(self.ifm.shape) == 2:
-            return False
-        # Check output shape
-        if not len(self.ofm.shape) == 2:
-            return False
-        return True
-
-
-def qnn_fc_pattern():
-    dense = is_op("qnn.dense")(
-        wildcard(), is_constant(), is_constant(), is_constant(), is_constant(), is_constant()
-    )
-    optional_bias_add = is_op("nn.bias_add")(dense, is_constant())
-    req = is_op("qnn.requantize")(
-        dense | optional_bias_add, is_constant(), is_constant(), is_constant(), is_constant()
-    )
-    optional_clip = req.optional(is_op("clip"))
-    return optional_clip
-
-
-class MatMulParams(FullyConnectedParams):
-    """
-    This class will parse a call to an ethos-u.matmul composite
-    function and extract the parameter information.
-    """
-
-    composite_name = "ethos-u.matmul"
-
-    @requires_vela
-    def __init__(self, func_body):
-        FullyConnectedParams.__init__(self, func_body)
-
-    def is_valid(self) -> bool:
-        """
-        Checks whether matrix multiplication has compatible attributes with HW
-        """
-
-        if not check_valid_dtypes([self.ifm, self.ofm], supported_dtypes=[np.int8, np.int16]):
-            return False
-        if not len(self.ifm.shape) == 2:
-            return False
-        if not len(self.ofm.shape) == 2:
-            return False
-        # The weights must be transposed
-        if self.ifm.shape[1] != self.weights.shape[1]:
-            return False
-        return True
-
-
-def matmul_pattern():
-    dense = is_op("qnn.dense")(
-        wildcard(), wildcard(), is_constant(), is_constant(), is_constant(), is_constant()
-    )
-    req = is_op("qnn.requantize")(dense, is_constant(), is_constant(), is_constant(), is_constant())
-    optional_clip = req.optional(is_op("clip"))
-    return optional_clip
-
-
-class MatMulFixedPointParams:
-    """
-    This class will parse a call to an ethos-u.matmul_fixed_point composite
-    function and extract the parameter information.
-    """
-
-    composite_name = "ethos-u.matmul_fixed_point"
-
-    @requires_vela
-    def __init__(self, func_body):
-        from tvm.relay.backend.contrib.ethosu.util import QDenseArgs
-
-        dense_fixed_point = func_body.args[0]
-        dense = dense_fixed_point.args[0]
-        # fixed_point_multiply relay operation uses multiplier with 31 fractional bits
-        # so to determine the size of the fraction use the formula: 31 - shift
-        self.fraction_size = 31 - dense_fixed_point.attrs.shift
-        fract_scale = tvm.relay.Constant(tvm.nd.array(np.array(1 / 2**self.fraction_size)))
-        fract_zero_point = tvm.relay.Constant(tvm.nd.array(np.array(0, dtype="int32")))
-
-        self.activation = None
-        self.weights = TensorParams(
-            dense.args[QDenseArgs.WEIGHTS.value].args[0].args[0],
-            None,
-            fract_scale,
-            fract_zero_point,
-        )
-        self.ifm = TensorParams(
-            dense.args[QDenseArgs.IFM.value].args[0].args[0],
-            None,
-            fract_scale,
-            fract_zero_point,
-        )
-        self.ofm = TensorParams(
-            func_body,
-            None,
-            fract_scale,
-            fract_zero_point,
-        )
-
-    def is_valid(self) -> bool:
-        """
-        Checks whether matrix multiplication has compatible attributes with HW
-        """
-
-        if self.fraction_size < 0 or self.fraction_size > 16:
-            return False
-        if not check_valid_dtypes([self.ifm, self.ofm], supported_dtypes=[np.int16]):
-            return False
-        if not len(self.ifm.shape) == 2:
-            return False
-        if not len(self.ofm.shape) == 2:
-            return False
-        # The weights must be transposed
-        if self.ifm.shape[1] != self.weights.shape[1]:
-            return False
-        return True
-
-
-def matmul_fixed_point_pattern():
-    ifm = is_op("cast")(wildcard())
-    ifm2 = is_op("cast")(wildcard())
-    ifm = is_op("fixed_point_multiply")(ifm)
-    ifm2 = is_op("fixed_point_multiply")(ifm2)
-    dense = is_op("nn.dense")(ifm, ifm2)
-    dense = is_op("fixed_point_multiply")(dense)
-    return is_op("cast")(dense)
-
-
-class HardSwishParams:
-    """
-    This class will parse a call to a ethos-u.hard_swish composite function
-    and extract the parameter information.
-    """
-
-    composite_name = "ethos-u.hard_swish"
-
-    def __init__(self, func_body):
-        from tvm.relay.backend.contrib.ethosu.util import DequantizeArgs, QuantizeArgs
-
-        quantize = func_body
-        divide = quantize.args[0]
-        multiply = divide.args[0]
-        clip = multiply.args[1]
-        add = clip.args[0]
-        dequantize = add.args[0]
-
-        self.ifm = TensorParams(
-            dequantize.args[0],
-            scale=dequantize.args[DequantizeArgs.IFM_SCALE.value],
-            zero_point=dequantize.args[DequantizeArgs.IFM_ZERO_POINT.value],
-        )
-        self.ofm = TensorParams(
-            quantize,
-            scale=quantize.args[QuantizeArgs.OFM_SCALE.value],
-            zero_point=quantize.args[QuantizeArgs.OFM_ZERO_POINT.value],
-        )
-
-    def is_valid(self):
-        tensor_params = [self.ifm, self.ofm]
-        if not check_valid_dtypes(tensor_params, supported_dtypes=[np.int8]):
-            return False
-        return True
-
-
-def hard_swish_pattern():
-    """Create the pattern for hard swish."""
-    dequantize = is_op("qnn.dequantize")(wildcard(), is_constant(), is_constant())
-    add = is_op("add")(dequantize, is_constant())
-    clip = is_op("clip")(add)
-    multiply = is_op("multiply")(dequantize, clip)
-    divide = is_op("divide")(multiply, is_constant())
-    quantize = is_op("qnn.quantize")(divide, is_constant(), is_constant())
-    return quantize
-
-
-class PadParams:
-    """
-    This class will parse a call to a ethosu.pad2d composite function
-    and extract the parameter information.
-    """
-
-    composite_name = "ethos-u.pad2d"
-    # The ethos-u.pad2d composite function will be transformed to the
-    # ethosu_depthwise_conv2d operator.
-    # For the ethosu_depthwise_conv2d the hardware only supports padding
-    # upto the numbers as follows, so we define such padding limits
-    padding_bounds = [31, 31, 32, 32]
-
-    def __init__(self, func_body: Call):
-        from tvm.relay.backend.contrib.ethosu.util import QPadArgs
-
-        # there is no 'layout' attribute in nn.pad
-        layout = "NHWC"
-        self.ifm = TensorParams(
-            tensor=func_body.args[QPadArgs.IFM.value],
-            layout=layout,
-            scale=tvm.relay.Constant(tvm.nd.array(np.array(1.0, dtype="float32"))),
-            zero_point=func_body.args[QPadArgs.IFM_ZERO_POINT.value],
-        )
-
-        self.padding = self.extract_padding(func_body)
-        self.ofm = TensorParams(
-            tensor=func_body,
-            layout=layout,
-            scale=tvm.relay.Constant(tvm.nd.array(np.array(1.0, dtype="float32"))),
-            zero_point=func_body.args[QPadArgs.IFM_ZERO_POINT.value],
-        )
-
-    @staticmethod
-    def extract_padding(
-        padding: relay.Call,
-    ) -> Optional[Tuple[int, int, int, int]]:
-        """
-        Here we check whether a separate spatial-dimension padding operation can be
-        rewritten as NPU depthwise convolution. If the padding specified by the
-        separate nn.pad operation is not supported by NPU depthwise convolution,
-        None will be returned. This will cause the nn.pad not to be offloaded to NPU.
-        """
-        pad_width = padding.attrs["pad_width"]
-        if len(pad_width) != 4:
-            return None
-        if list(pad_width[0]) != [0, 0] or list(pad_width[3]) != [0, 0]:
-            return None
-        return [
-            pad_width[1][0],
-            pad_width[2][0],
-            pad_width[1][1],
-            pad_width[2][1],
-        ]
-
-    def is_valid(self):
-        """
-        This function checks whether pad has compatible attributes
-        with the NPU depthwise convolution
-        """
-        tensor_params = [self.ifm, self.ofm]
-        if not check_valid_dtypes(tensor_params, supported_dtypes=[np.uint8, np.int8]):
-            return False
-        if self.ifm.dtype != self.ofm.dtype:
-            return False
-        if not check_batch_size(self.ifm):
-            return False
-        if not self.padding or not check_padding(self.padding, self.padding_bounds):
-            return False
-        if not check_dimensions(self.ifm) or not check_dimensions(self.ofm):
-            return False
-        return True
-
-
-class ChannelPadParams:
-    """
-    This class will parse a call to a ethos-u.channel-pad composite function
-    and extract the parameter information.
-    """
-
-    composite_name = "ethos-u.channel-pad"
-    # The ethos-u.channel-pad composite function will be transformed
-    # to the Relay concatenate operation.
-
-    def __init__(self, func_body: Call):
-        from tvm.relay.backend.contrib.ethosu.util import QPadArgs
-
-        # there is no 'layout' attribute in nn.pad
-        layout = "NHWC"
-        self.ifm = TensorParams(
-            tensor=func_body.args[QPadArgs.IFM.value],
-            layout=layout,
-            scale=tvm.relay.Constant(tvm.nd.array(np.array(1.0, dtype="float32"))),
-            zero_point=func_body.args[QPadArgs.IFM_ZERO_POINT.value],
-        )
-
-        self.ch_padding = self.extract_ch_padding(func_body)
-        self.ofm = TensorParams(
-            tensor=func_body,
-            layout=layout,
-            scale=tvm.relay.Constant(tvm.nd.array(np.array(1.0, dtype="float32"))),
-            zero_point=func_body.args[QPadArgs.IFM_ZERO_POINT.value],
-        )
-
-    @staticmethod
-    def extract_ch_padding(
-        padding: relay.Call,
-    ) -> Optional[Tuple[int, int]]:
-        """
-        Here we check whether a separate channel-dimension padding operation can be
-        rewritten as Relay concatenate operation. If the padding specified by the
-        separate nn.pad operation is not supported by NPU, None will be returned.
-        This will cause the nn.pad not to be offloaded to NPU.
-        """
-        pad_width = padding.attrs["pad_width"]
-        if len(pad_width) != 4:
-            return None
-        if (
-            list(pad_width[0]) != [0, 0]
-            or list(pad_width[1]) != [0, 0]
-            or list(pad_width[2]) != [0, 0]
-        ):
-            return None
-        return [
-            pad_width[3][0],
-            pad_width[3][1],
-        ]
-
-    def is_valid(self):
-        """
-        This function checks whether pad has compatible attributes
-        with the Relay concatenate operation
-        """
-        tensor_params = [self.ifm, self.ofm]
-        if not check_valid_dtypes(tensor_params, supported_dtypes=[np.uint8, np.int8]):
-            return False
-        if self.ifm.dtype != self.ofm.dtype:
-            return False
-        if not check_batch_size(self.ifm):
-            return False
-        if not self.ch_padding:
-            return False
-        if not check_dimensions(self.ifm) or not check_dimensions(self.ofm):
-            return False
-        return True
-
-
-def pad_pattern():
-    """Create pattern for pad"""
-    pattern = is_op("nn.pad")(wildcard(), is_constant())
-    return pattern
-
-
-class SoftMaxParams:
-    """
-    This class will parse a call to a ethos-u.softmax composite function
-    and extract the parameter information.
-    """
-
-    composite_name = "ethos-u.softmax"
-
-    def __init__(self, func_body: Call):
-        from tvm.relay.backend.contrib.ethosu.util import QuantizeArgs
-        from tvm.relay.backend.contrib.ethosu.util import DequantizeArgs
-
-        quantize = func_body
-        softmax_op = quantize.args[0]
-        dequantize = softmax_op.args[0]
-
-        layout = "NHWC"
-
-        self.ifm = TensorParams(
-            dequantize.args[DequantizeArgs.IFM.value],
-            layout,
-            dequantize.args[DequantizeArgs.IFM_SCALE.value],
-            dequantize.args[DequantizeArgs.IFM_ZERO_POINT.value],
-        )
-        self.ofm = TensorParams(
-            quantize,
-            layout,
-            quantize.args[QuantizeArgs.OFM_SCALE.value],
-            quantize.args[QuantizeArgs.OFM_ZERO_POINT.value],
-        )
-
-        self.operator_type = "SOFTMAX"
-
-    def is_valid(self):
-        """Checks whether Softmax has compatible attributes with HW"""
-        tensor_params = [self.ifm, self.ofm]
-        if not check_valid_dtypes(tensor_params, supported_dtypes=[np.int8]):
-            return False
-        if self.ifm.dtype != self.ofm.dtype:
-            return False
-        if not check_dimensions(self.ifm):
-            return False
-        if self.ifm.shape != self.ofm.shape:
-            return False
-        return True
-
-
-def softmax_pattern() -> tvm.relay.dataflow_pattern.DFPattern:
-    """
-    This function creates the pattern for Softmax.
-    """
-    pattern = is_op("qnn.dequantize")(wildcard(), is_constant(), is_constant())
-    pattern = is_op("nn.softmax")(pattern)
-    pattern = is_op("qnn.quantize")(pattern, is_constant(), is_constant())
-    return pattern
-
-
-@register_pattern_table("ethos-u")
-def pattern_table() -> List[Tuple[str, tvm.relay.dataflow_pattern.DFPattern, Callable]]:
-    return [
-        (
-            ChannelPadParams.composite_name,
-            pad_pattern(),
-            lambda pat: ChannelPadParams(pat).is_valid(),
-        ),
-        (
-            QnnConv2DParams.composite_name,
-            qnn_conv2d_pattern(),
-            lambda pat: QnnConv2DParams(pat).is_valid(),
-        ),
-        (
-            QnnDepthwiseConv2DParams.composite_name,
-            qnn_depthwise_conv2d_pattern(),
-            lambda pat: QnnDepthwiseConv2DParams(pat).is_valid(),
-        ),
-        (
-            QnnConv2DTransposeParams.composite_name,
-            qnn_conv2d_transpose_pattern(),
-            lambda pat: QnnConv2DTransposeParams(pat).is_valid(),
-        ),
-        (
-            FullyConnectedParams.composite_name,
-            qnn_fc_pattern(),
-            lambda pat: FullyConnectedParams(pat).is_valid(),
-        ),
-        (
-            MatMulParams.composite_name,
-            matmul_pattern(),
-            lambda pat: MatMulParams(pat).is_valid(),
-        ),
-        (
-            MatMulFixedPointParams.composite_name,
-            matmul_fixed_point_pattern(),
-            lambda pat: MatMulFixedPointParams(pat).is_valid(),
-        ),
-        (
-            MaxPool2DParams.composite_name,
-            qnn_maxpool2d_pattern(),
-            lambda pat: MaxPool2DParams(pat).is_valid(),
-        ),
-        (
-            AvgPool2DParams.composite_name,
-            qnn_avgpool2d_pattern(),
-            lambda pat: AvgPool2DParams(pat).is_valid(),
-        ),
-        (
-            PadParams.composite_name,
-            pad_pattern(),
-            lambda pat: PadParams(pat).is_valid(),
-        ),
-        (
-            AddParams.composite_name,
-            qnn_add_pattern(),
-            lambda pat: AddParams(pat).is_valid(),
-        ),
-        (
-            SubParams.composite_name,
-            qnn_subtract_pattern(),
-            lambda pat: SubParams(pat).is_valid(),
-        ),
-        (
-            MulParams.composite_name,
-            qnn_mul_pattern(),
-            lambda pat: MulParams(pat).is_valid(),
-        ),
-        (
-            MinParams.composite_name,
-            minimum_clip_requantize_pattern(),
-            lambda pat: MinParams(pat).is_valid(),
-        ),
-        (
-            MinParams.composite_name,
-            minimum_pattern(),
-            lambda pat: MinParams(pat).is_valid(),
-        ),
-        (
-            MaxParams.composite_name,
-            maximum_clip_requantize_pattern(),
-            lambda pat: MaxParams(pat).is_valid(),
-        ),
-        (
-            MaxParams.composite_name,
-            maximum_pattern(),
-            lambda pat: MaxParams(pat).is_valid(),
-        ),
-        (
-            ShlParams.composite_name,
-            shl_pattern(),
-            lambda pat: ShlParams(pat).is_valid(),
-        ),
-        (
-            ReshapeParams.composite_name,
-            reshape_pattern(),
-            lambda pat: ReshapeParams(pat).is_valid(),
-        ),
-        (
-            StridedSliceParams.composite_name,
-            strided_slice_pattern(),
-            lambda pat: StridedSliceParams(pat).is_valid(),
-        ),
-        (
-            AbsParams.composite_name,
-            abs_pattern(),
-            lambda pat: AbsParams(pat).is_valid(),
-        ),
-        (TanhParams.composite_name, tanh_pattern(), lambda pat: TanhParams(pat).is_valid()),
-        (
-            TanhFixedPointParams.composite_name,
-            tanh_fixed_point_pattern(),
-            lambda pat: TanhFixedPointParams(pat).is_valid(),
-        ),
-        (
-            MeanParams.composite_name,
-            mean_pattern(),
-            lambda pat: MeanParams(pat).is_valid(),
-        ),
-        (
-            SumParams.composite_name,
-            sum_pattern(),
-            lambda pat: SumParams(pat).is_valid(),
-        ),
-        (
-            SoftMaxParams.composite_name,
-            softmax_pattern(),
-            lambda pat: SoftMaxParams(pat).is_valid(),
-        ),
-        (
-            LeakyReLUParams.composite_name,
-            leaky_relu_pattern(),
-            lambda pat: LeakyReLUParams(pat).is_valid(),
-        ),
-        (ConcatParams.composite_name, concat_pattern(), lambda pat: ConcatParams(pat).is_valid()),
-        (
-            SigmoidParams.composite_name,
-            sigmoid_pattern(),
-            lambda pat: SigmoidParams(pat).is_valid(),
-        ),
-        (
-            SplitParams.composite_name,
-            split_pattern(),
-            lambda pat: SplitParams(pat).is_valid(),
-        ),
-        (
-            RequantizeParams.composite_name,
-            requantize_pattern(),
-            lambda pat: RequantizeParams(pat).is_valid(),
-        ),
-        (
-            Resize2dParams.composite_name,
-            resize2d_pattern(),
-            lambda pat: Resize2dParams(pat).is_valid(),
-        ),
-        (
-            ExpandDimsParams.composite_name,
-            expand_dims_pattern(),
-            lambda pat: ExpandDimsParams(pat).is_valid(),
-        ),
-        (
-            SqueezeParams.composite_name,
-            squeeze_pattern(),
-            lambda pat: SqueezeParams(pat).is_valid(),
-        ),
-        (
-            HardSwishParams.composite_name,
-            hard_swish_pattern(),
-            lambda pat: HardSwishParams(pat).is_valid(),
-        ),
-    ]
-
-
-# pylint: disable=unused-argument
-@requires_vela
-def partition_for_ethosu(
-    mod: tvm.ir.IRModule,
-    params: Optional[Dict[str, tvm.runtime.NDArray]] = None,
-    mod_name: str = "default",
-    **opts,
-):
-    """This helper function partition the relay graph as produced by the
-    relay frontend for a given model into external functions
-    to be presented to the codegen.
-
-    Parameters
-    ----------
-    mod : tvm.ir.IRModule
-        The IRModule that gets generated from a relay frontend
-    params : Optional[Dict[str, tvm.runtime.NDArray]]
-        Constant input parameters.
-    mod_name: str, optional
-        The module name
-
-    Returns
-    -------
-    mod : IRModule
-        The partitioned IRModule with external global functions
-    """
-    from tvm.relay.backend.contrib.ethosu import preprocess, codegen
-
-    if params:
-        mod["main"] = bind_params_by_name(mod["main"], params)
-
-    pattern = relay.op.contrib.get_pattern_table("ethos-u")
-    mod = relay.transform.InferType()(mod)
-    mod = codegen.replicate_pads(mod)
-    mod = relay.transform.InferType()(mod)
-    mod = relay.transform.MergeComposite(pattern)(mod)
-    mod = relay.transform.AnnotateTarget("ethos-u")(mod)
-    mod = relay.transform.MergeCompilerRegions()(mod)
-    mod = relay.transform.InferType()(mod)
-    mod = relay.transform.PartitionGraph(mod_name)(mod)
-    mod = relay.transform.InferType()(mod)
-    mod = preprocess.preprocess_ext_io()(mod)
-    return mod
diff --git a/python/tvm/target/target.py b/python/tvm/target/target.py
index 35ebaf46f067..81baa57f9eec 100644
--- a/python/tvm/target/target.py
+++ b/python/tvm/target/target.py
@@ -511,29 +511,6 @@ def intel_graphics(model="unknown", options=None):
 }
 
 
-def micro(model="unknown", options=None):
-    """Returns a microTVM target.
-
-    Parameters
-    ----------
-    model : str
-        Canonically identifies the target device. This is typically a device board level name.
-        The allowed values are MICRO_SUPPORTED_MODELS.keys().
-    options : str or list of str
-        Additional options
-    """
-    if model not in MICRO_SUPPORTED_MODELS:
-        raise ValueError(f"Model {model} not supported by tvm.target.micro.")
-    opts = _merge_opts(
-        MICRO_SUPPORTED_MODELS[model] + [f"-model={model}"],
-        options,
-    )
-
-    # NOTE: in the future, the default micro target will be LLVM except when
-    # external dependencies are present.
-    return Target(" ".join(["c"] + opts))
-
-
 def arm_cpu(model="unknown", options=None):
     """Returns a ARM CPU target.
     This function will also download pre-tuned op parameters when there is none.
diff --git a/python/tvm/testing/aot.py b/python/tvm/testing/aot.py
deleted file mode 100644
index 36fdad789d96..000000000000
--- a/python/tvm/testing/aot.py
+++ /dev/null
@@ -1,1117 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=use-list-literal, consider-using-with, f-string-without-interpolation
-"""Common functions for AOT test cases"""
-import contextlib
-import datetime
-import os
-import pathlib
-import re
-import subprocess
-import tarfile
-import logging
-from typing import Any, NamedTuple, Union, Tuple, Optional, List, Dict, Callable
-import numpy as np
-
-import tvm
-from tvm import relay
-from tvm import autotvm
-from tvm.contrib import utils, graph_executor
-from tvm.relay.backend import Executor, Runtime
-from tvm.relay.backend.utils import mangle_module_name
-from tvm.micro import export_model_library_format
-from tvm.micro.testing.utils import mlf_extract_workspace_size_bytes
-
-_LOG = logging.getLogger(__name__)
-
-NP_TYPE_TO_C = {
-    "int8": "int8_t",
-    "uint8": "uint8_t",
-    "int16": "int16_t",
-    "uint16": "uint16_t",
-    "int32": "int32_t",
-    "uint32": "uint32_t",
-    # See: https://gcc.gnu.org/onlinedocs/gcc/Half-Precision.html
-    "float16": "_Float16",
-    "float32": "float",
-}
-
-AOT_SUCCESS_TOKEN = "AOT_TEST_SUCCESS"
-AOT_FAILURE_TOKEN = "AOT_TEST_FAILURE"
-
-
-class AOTTestModel(NamedTuple):
-    """Class to describe a model under test
-
-    Parameters
-    ----------
-    module: tvm.IRModule
-        IRModule to generate AOT executor for
-    inputs: Dict[str, np.array]
-        Dict of input names to value arrays
-    outputs: List[np.array]
-        Dict of output names to value arrays
-    output_tolerance: Optional[Union[int, float]]
-        Allowed tolerance of the output
-    name: str
-        Name to use for this model
-    params: Optional[Dict[str, np.array]]
-        Dict of parameter names to value arrays
-    extra_memory_in_bytes: int
-        Extra memory to allocate after planned memory
-    """
-
-    module: tvm.IRModule
-    inputs: Dict[str, np.array]
-    outputs: Dict[str, np.array]
-    output_tolerance: Optional[Union[int, float]] = None
-    name: str = "default"
-    params: Optional[Dict[str, np.array]] = None
-    extra_memory_in_bytes: int = 0
-
-
-class AOTCompiledTestModel(NamedTuple):
-    """A compiled AOTTestModel with associated module
-
-    Parameters
-    ----------
-    model: AOTTestModel
-        Input model to be compiled
-    module: tvm.runtime.Module
-        The compiled Module for the associated AOTTestModel
-    """
-
-    model: AOTTestModel
-    executor_factory: tvm.relay.backend.executor_factory.AOTExecutorFactoryModule
-
-
-class AOTDataLinkage(NamedTuple):
-    """A compiled AOTTestModel with associated module
-
-    Parameters
-    ----------
-    section: str
-        Named section to place data into
-    alignment: int
-        Section alignment
-    """
-
-    section: str
-    alignment: int
-
-
-class AOTTestRunner(NamedTuple):
-    """Class to describe a test runner for AOT code
-
-    Parameters
-    ----------
-    makefile: str
-        Premade Makefile to use from the AOT test folder
-    prologue: str
-        Code to prepend to the main function
-    epilogue: str
-        Code to append to the main function
-    includes: List[str]
-        Additional includes required to run the AOT test runner
-    parameters: Dict[str, str]
-        Additional parameters to pass to the make command
-    pass_config: Dict[str, Any]
-        Additional pass configuration when building the model
-    """
-
-    makefile: str = "default"
-    prologue: str = ""
-    epilogue: str = ""
-    includes: List[str] = []
-    parameters: Dict[str, str] = {}
-    pass_config: Dict[str, Any] = {}
-
-
-def _subprocess_check_log_output(cmd, cwd, logfile):
-    """
-    This method runs a process and logs the output to both a log file and stdout
-    """
-    _LOG.info("Execute (%s): %s", cwd, cmd)
-    cmd_base = cmd[0] if isinstance(cmd, (list, tuple)) else cmd.split(" ", 1)[0]
-    proc = subprocess.Popen(
-        cmd,
-        cwd=cwd,
-        shell=True,
-        bufsize=0,
-        stdout=subprocess.PIPE,
-        stderr=subprocess.STDOUT,
-        encoding="utf-8",
-    )
-    stdout = ""
-    with open(logfile, "a") as f:
-        msg = (
-            "\n"
-            + "-" * 80
-            + f"{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}: Execute ({cwd}): {cmd}\n"
-            + "-" * 80
-        )
-        f.write(msg)
-        stdout += msg + "\n"
-        while True:
-            data = proc.stdout.readline()
-            stdout += data
-            _LOG.debug("%s: %s", cmd_base, data.rstrip("\n"))
-            f.write(data)
-
-            # process is done if there is no data and the result is valid
-            if not data:  # EOF
-                break
-
-    proc.wait()
-    if proc.returncode != 0:
-        raise RuntimeError(f"Subprocess failed: {cmd}\nstdout:\n{stdout}")
-
-
-def _get_entrypoint_suffix(target):
-    # LLVM modules don't use the same entrypoint suffix
-    # as C source generated modules.
-    if target.kind.name == "llvm":
-        return "__tvm_main__"
-    else:
-        return "run"
-
-
-def _mangle_name(mod_name, name):
-    mod_name = mangle_module_name(mod_name)
-    return mod_name + "_" + name
-
-
-# TODO: Move to linker script with list of symbols rather than coding into source
-def _emit_data_linkage(output_file, data_linkage):
-    if data_linkage is not None:
-        output_file.write(
-            f'__attribute__((section("{data_linkage.section}"), '
-            f"aligned({data_linkage.alignment}))) "
-        )
-
-
-def _emit_main_prologue(
-    main_file,
-    custom_prologue,
-    workspace_bytes,
-    data_linkage,
-    compiled_models,
-    interface_api,
-    use_stack_allocator=True,
-    debug_last_error=False,
-):
-    if use_stack_allocator:
-        workspace_define = f"#define WORKSPACE_SIZE ({workspace_bytes}"
-        if interface_api == "c":
-            for compiled_model in compiled_models:
-                model = compiled_model.model
-                workspace_define += f" + TVMGEN_{model.name.upper()}_WORKSPACE_SIZE"
-        # Add TVM_RUNTIME_ALLOC_ALIGNMENT_BYTES because of memory alignment.
-        workspace_define += " + TVM_RUNTIME_ALLOC_ALIGNMENT_BYTES)\n"
-        main_file.write(workspace_define)
-        _emit_data_linkage(main_file, data_linkage)
-        main_file.write("static uint8_t g_aot_memory[WORKSPACE_SIZE];\n")
-        main_file.write("tvm_workspace_t app_workspace;\n")
-        main_file.write(
-            """\n
-tvm_crt_error_t TVMPlatformMemoryAllocate(size_t num_bytes, DLDevice dev, void** out_ptr) {
-    return StackMemoryManager_Allocate(&app_workspace, num_bytes, out_ptr);
-}
-tvm_crt_error_t TVMPlatformMemoryFree(void* ptr, DLDevice dev) {
-    return StackMemoryManager_Free(&app_workspace,ptr);
-}
-        """
-        )
-    else:
-        # An implementation is not needed for these if the stack allocator is not used
-        main_file.write(
-            """\n
-tvm_crt_error_t TVMPlatformMemoryAllocate(size_t num_bytes, DLDevice dev, void** out_ptr) {
-    return kTvmErrorFunctionCallNotImplemented;
-}
-tvm_crt_error_t TVMPlatformMemoryFree(void* ptr, DLDevice dev) {
-    return kTvmErrorFunctionCallNotImplemented;
-}
-            """
-        )
-    main_file.write(
-        """\n
-void TVMPlatformAbort(tvm_crt_error_t code) { exit(-1); }
-void TVMLogf(const char* msg, ...) {
-  va_list args;
-  va_start(args, msg);
-  vfprintf(stdout, msg, args);
-  va_end(args);
-}
-    """
-    )
-    if debug_last_error:
-        main_file.write(
-            """\n
-tvm_crt_error_t TVMPlatformTimerStart() {
-  return kTvmErrorFunctionCallNotImplemented;
-}
-tvm_crt_error_t TVMPlatformTimerStop(double* elapsed_time_seconds) {
-  return kTvmErrorFunctionCallNotImplemented;
-}
-const TVMModule* TVMSystemLibEntryPoint(void) { return NULL; }
-"""
-        )
-    else:
-        main_file.write(
-            """\n
-TVM_DLL int TVMFuncRegisterGlobal(const char* name, TVMFunctionHandle f, int override) {}
-"""
-        )
-    main_file.write("\nint main(){\n")
-    main_file.write(custom_prologue)
-
-
-def _emit_main_data(main_file, input_map, output_map, mod_name):
-    for key in input_map:
-        sanitized_tensor_name = re.sub(r"\W", "_", key)
-        main_file.write(
-            f'#include "{_mangle_name(mod_name,"input_data")}_{sanitized_tensor_name}.h"\n'
-        )
-
-    for key in output_map:
-        sanitized_tensor_name = re.sub(r"\W", "_", key)
-        main_file.write(
-            f'#include "{_mangle_name(mod_name,"expected_output_data")}_'
-            f'{sanitized_tensor_name}.h"\n'
-            f'#include "{_mangle_name(mod_name,"output_data")}_'
-            f'{sanitized_tensor_name}.h"\n'
-        )
-
-
-def _emit_main_device_structs(main_file, devices, mod_name):
-    if devices:
-        main_file.write(
-            f"struct {_mangle_name(mod_name, 'devices')} {_mangle_name(mod_name, 'devices')} = {{"
-        )
-        for device in devices:
-            main_file.write(f"\t.{device} = {device},\n")
-        main_file.write("};\n")
-
-
-def _emit_main_workspace_pool_structs(main_file, workspace_pool_names, mod_name):
-    if workspace_pool_names and len(workspace_pool_names) > 0:
-        main_file.write(
-            f"struct {_mangle_name(mod_name, 'workspace_pools')} "
-            f"{_mangle_name(mod_name, 'workspace_pools')} = {{"
-        )
-        for workspace_pool_name in workspace_pool_names.keys():
-            main_file.write(
-                f"\t.{workspace_pool_name} = {workspace_pool_names[workspace_pool_name]}"
-                f"{workspace_pool_name},\n"
-            )
-        main_file.write("};\n")
-
-
-def _emit_main_data_structs(main_file, input_map, output_map, mod_name):
-    main_file.write(
-        f"struct {_mangle_name(mod_name, 'inputs')} {_mangle_name(mod_name, 'inputs')} = {{"
-    )
-    for key in input_map:
-        sanitized_tensor_name = re.sub(r"\W", "_", key)
-        main_file.write(
-            f"\t.{sanitized_tensor_name} = "
-            f"{_mangle_name(mod_name, 'input_data')}_{sanitized_tensor_name},\n"
-        )
-    main_file.write("};\n")
-
-    main_file.write(
-        f"struct {_mangle_name(mod_name, 'outputs')} {_mangle_name(mod_name, 'outputs')} = {{"
-    )
-    for key in output_map:
-        sanitized_tensor_name = re.sub(r"\W", "_", key)
-        main_file.write(
-            f"\t.{sanitized_tensor_name} = {_mangle_name(mod_name, 'output_data')}_"
-            f"{sanitized_tensor_name},\n"
-        )
-    main_file.write("};\n")
-
-
-def _emit_main_data_setup(main_file, input_map, output_map, mod_name):
-    num_outputs = len(output_map)
-    num_inputs = len(input_map)
-    main_file.write(f'void* {_mangle_name(mod_name,"inputs")}[{num_inputs}] = {{ ')
-    for key in input_map:
-        sanitized_tensor_name = re.sub(r"\W", "_", key)
-        main_file.write(f'{_mangle_name(mod_name,"input_data")}_{sanitized_tensor_name}, ')
-    main_file.write("};\n")
-    main_file.write(f'void* {_mangle_name(mod_name,"outputs")}[{num_outputs}]  = {{ ')
-    for key in output_map:
-        sanitized_tensor_name = re.sub(r"\W", "_", key)
-        main_file.write(f'{_mangle_name(mod_name, "output_data")}_{sanitized_tensor_name}, ')
-    main_file.write("};\n")
-
-
-def _emit_main_c_interface_call(
-    main_file, devices, workspace_pool_names, mod_name, use_workspace_io, debug_last_error
-):
-    sub_strings = list()
-    sub_strings.append(f'if ({_mangle_name(mod_name,"run")}(')
-    if not use_workspace_io:
-        sub_strings.append(f'&{_mangle_name(mod_name,"inputs")}, ')
-        sub_strings.append(f'&{_mangle_name(mod_name,"outputs")}, ')
-    if workspace_pool_names:
-        sub_strings.append(f'&{_mangle_name(mod_name,"workspace_pools")}, ')
-    if devices:
-        sub_strings.append(f'&{_mangle_name(mod_name,"devices")}, ')
-    # Removing the last two characters that is a comma and a space
-    sub_strings[-1] = sub_strings[-1][:-2]
-    # Adding brackets and newline instead
-    sub_strings[-1] = sub_strings[-1] + ") == -1) {\n"
-    main_file_string = "".join(sub_strings)
-    main_file.write(main_file_string)
-    if debug_last_error:
-        main_file.write(f'\tprintf("ERROR: %s\\n", TVMGetLastError());\n')
-    main_file.write(f'\tprintf("{AOT_FAILURE_TOKEN}\\n");\n')
-    main_file.write("\treturn -1;\n")
-    main_file.write("}\n")
-
-
-def _emit_main_fake_packed_values(main_file):
-    main_file.write(
-        """
-    static DLDevice fake_device = {kDLCPU, 0};
-    static int64_t fake_dims = 0;
-    static int64_t fake_shape = {0};
-    """
-    )
-
-
-def _emit_entry_function_forward_declaration(main_file, mod_name, entrypoint_suffix):
-    main_file.write(
-        f"int {_mangle_name(mod_name, entrypoint_suffix)}"
-        f"(TVMValue[], int32_t[], int32_t, void*, int32_t, void*);\n"
-    )
-
-
-def _emit_main_packed_call(main_file, input_map, output_list, mod_name, entrypoint_suffix):
-    tensors_name = _mangle_name(mod_name, "tensors")
-    values_name = _mangle_name(mod_name, "values")
-    typeids_name = _mangle_name(mod_name, "typeids")
-
-    def fake_tensor(source, source_index, packed_index):
-        main_file.write(
-            f"""
-        {tensors_name}[{packed_index}].device = fake_device;
-        {tensors_name}[{packed_index}].data = {source}[{source_index}];
-        {tensors_name}[{packed_index}].shape = &fake_shape;
-        {tensors_name}[{packed_index}].ndim = fake_dims;
-        {tensors_name}[{packed_index}].byte_offset = 0;
-        {tensors_name}[{packed_index}].strides = NULL;
-        {values_name}[{packed_index}].v_handle = &{tensors_name}[{packed_index}];
-        """
-        )
-
-    num_outputs = len(output_list)
-    num_inputs = len(input_map)
-    num_tensors = num_inputs + num_outputs
-    main_file.write(
-        f"""
-    DLTensor {tensors_name}[{num_tensors}];
-    TVMValue {values_name}[{num_tensors}];
-    int32_t {typeids_name}[{num_tensors}];
-    """
-    )
-
-    for i in range(0, num_inputs):
-        fake_tensor(_mangle_name(mod_name, "inputs"), i, i)
-    for i in range(0, num_outputs):
-        fake_tensor(_mangle_name(mod_name, "outputs"), i, i + num_inputs)
-
-    main_file.write(
-        f"{_mangle_name(mod_name, entrypoint_suffix)}"
-        f"({values_name}, {typeids_name}, 0, NULL, 0, NULL);\n"
-    )
-    main_file.write("\n")
-
-
-def _emit_main_compare(
-    main_file,
-    outputs,
-    output_tolerance,
-    mod_name,
-    use_interface_c=False,
-    print_output_on_mismatch=False,
-):
-    for key in outputs:
-        sanitized_tensor_name = re.sub(r"\W", "_", key)
-        expected_data_name = _mangle_name(mod_name, f"expected_output_data_{sanitized_tensor_name}")
-        is_float_dtype = outputs[key].dtype == "float32"
-
-        comparison_function = "abs"
-        tolerance = output_tolerance or 0
-        value_format_specifier = "%d"
-        if is_float_dtype:
-            comparison_function = "fabs"
-            tolerance = output_tolerance or 0.001
-            value_format_specifier = "%f"
-
-        data_length_var_name = (
-            _mangle_name(mod_name, f"output_data_{sanitized_tensor_name}") + "_len"
-        )
-        if use_interface_c:
-            c_type = NP_TYPE_TO_C[str(outputs[key].dtype)]
-            actual_data_name = f"(({c_type}*)" + _mangle_name(
-                mod_name, f"outputs.{sanitized_tensor_name})"
-            )
-        else:
-            actual_data_name = _mangle_name(mod_name, f"output_data_{sanitized_tensor_name}")
-
-        if print_output_on_mismatch:
-            main_file.write(
-                f"""
-                {{
-                int mismatch = 0;
-                int out_ndim = {outputs[key].ndim};
-                int out_shape[] = {{{','.join(map(str, outputs[key].shape))}}};
-                int out_indices[out_ndim];
-                printf("Element [Position]: Actual, Reference\\n");
-                printf("-------------------------------------\\n");
-                for (int i = 0; i<{data_length_var_name}; i++) {{
-                  if ({comparison_function}({actual_data_name}[i] -
-                      {expected_data_name}[i]) > {tolerance}) {{
-                    int flat_index = i;
-                    for (int j = out_ndim - 1; j >= 0; j--){{
-                      out_indices[j] = flat_index % out_shape[j];
-                      flat_index /= out_shape[j];
-                    }}
-                    printf("Element [%d", out_indices[0]);
-                    for (int j = 1; j < out_ndim; j++)
-                      printf(", %d", out_indices[j]);
-                    printf("]: {value_format_specifier}, {value_format_specifier}\\n",
-                           {actual_data_name}[i], {expected_data_name}[i]);
-                    mismatch += 1;
-                  }}
-                }}
-                if (mismatch >= 1) {{
-                  float percent_mismatched =
-                      ((float) mismatch) / ((float) {data_length_var_name}) * 100;
-                  printf("\\nMismatched elements: %d / %zu (%.2f%%)\\n",
-                         mismatch, {data_length_var_name}, percent_mismatched);
-                  printf("{AOT_FAILURE_TOKEN}\\n");
-                  return -1;
-                }}
-                }}
-                """
-            )
-        else:
-            main_file.write(
-                f"for (int i = 0; i<{data_length_var_name}; i++) {{\n"
-                f"\tif ({comparison_function}({actual_data_name}[i]-"
-                f"{expected_data_name}[i]) > {tolerance}) {{\n"
-                f'\t\tprintf("{AOT_FAILURE_TOKEN}\\n");\n'
-                f"\t\treturn -1;\n"
-                f"\t}}\n"
-                f"}}"
-            )
-
-
-def _emit_main_init_memory_manager(main_file):
-    main_file.write("StackMemoryManager_Init(&app_workspace, g_aot_memory, WORKSPACE_SIZE);")
-    main_file.write("\n")
-
-
-def _emit_main_epilogue(main_file, custom_epilogue):
-    main_file.write(custom_epilogue)
-    main_file.write(f'printf("{AOT_SUCCESS_TOKEN}\\n");')
-    main_file.write("return 0;")
-    main_file.write("}\n")
-
-
-def _emit_main_common_includes(main_file, custom_includes, debug_last_error):
-    main_file.write("#include <stdio.h>\n")
-    main_file.write("#include <stdarg.h>\n")
-    main_file.write("#include <stdlib.h>\n")
-    main_file.write("#include <math.h>\n")
-    main_file.write('#include "tvm/runtime/c_runtime_api.h"\n')
-    main_file.write('#include "tvm/runtime/crt/stack_allocator.h"\n')
-    if debug_last_error:
-        main_file.write('#include "tvm/runtime/crt/module.h"\n')
-    for include in custom_includes:
-        main_file.write(f'#include "{include}"\n')
-
-
-def _emit_main_micro_include(main_file, mod_name):
-    main_file.write(f"#include <{mangle_module_name(mod_name)}.h>\n")
-
-
-def _create_main(
-    test_name,
-    compiled_models,
-    output_path,
-    custom_includes,
-    custom_prologue,
-    custom_epilogue,
-    data_linkage,
-    interface_api,
-    workspace_bytes,
-    use_stack_allocator=True,
-    use_workspace_io=False,
-    debug_last_error=False,
-    print_output_on_mismatch=False,
-):
-    file_path = pathlib.Path(f"{output_path}/" + test_name).resolve()
-    # create header file
-    raw_path = file_path.with_suffix(".c").resolve()
-    with open(raw_path, "w") as main_file:
-        _emit_main_common_includes(main_file, custom_includes, debug_last_error)
-
-        if interface_api == "c":
-            for compiled_model in compiled_models:
-                model = compiled_model.model
-                _emit_main_micro_include(main_file, model.name)
-        for compiled_model in compiled_models:
-            model = compiled_model.model
-            _emit_main_data(main_file, model.inputs, model.outputs, model.name)
-
-        if interface_api == "packed":
-            for compiled_model in compiled_models:
-                entrypoint_suffix = _get_entrypoint_suffix(
-                    compiled_model.executor_factory.target[0]
-                )
-                _emit_entry_function_forward_declaration(
-                    main_file, compiled_model.model.name, entrypoint_suffix
-                )
-
-        _emit_main_prologue(
-            main_file,
-            custom_prologue,
-            workspace_bytes,
-            data_linkage,
-            compiled_models,
-            interface_api,
-            use_stack_allocator,
-            debug_last_error,
-        )
-        if use_stack_allocator:
-            _emit_main_init_memory_manager(main_file)
-
-        if interface_api == "c":
-            for compiled_model in compiled_models:
-                model = compiled_model.model
-                executor_codegen_metadata = (
-                    compiled_model.executor_factory.executor_codegen_metadata
-                )
-                devices = compiled_model.executor_factory.get_devices()
-                workspace_pool_names = {}
-                if executor_codegen_metadata.pool_inputs:
-                    workspace_pool_names = {
-                        allocated_pool.pool_info.pool_name: "&"
-                        if isinstance(
-                            allocated_pool.pool_info, tvm.ir.memory_pools.ConstantPoolInfo
-                        )
-                        else ""
-                        for allocated_pool in dict(executor_codegen_metadata.pool_inputs).values()
-                        if not allocated_pool.pool_info.is_internal
-                    }
-                _emit_main_device_structs(main_file, devices, model.name)
-                if not use_workspace_io:
-                    _emit_main_workspace_pool_structs(main_file, workspace_pool_names, model.name)
-                    _emit_main_data_structs(main_file, model.inputs, model.outputs, model.name)
-                _emit_main_c_interface_call(
-                    main_file,
-                    devices,
-                    list(workspace_pool_names.keys()),
-                    model.name,
-                    use_workspace_io,
-                    debug_last_error,
-                )
-        else:
-            _emit_main_fake_packed_values(main_file)
-            for compiled_model in compiled_models:
-                model = compiled_model.model
-                _emit_main_data_setup(main_file, model.inputs, model.outputs, model.name)
-                entrypoint_suffix = _get_entrypoint_suffix(
-                    compiled_model.executor_factory.target[0]
-                )
-                _emit_main_packed_call(
-                    main_file, model.inputs, model.outputs, model.name, entrypoint_suffix
-                )
-
-        for compiled_model in compiled_models:
-            model = compiled_model.model
-            _emit_main_compare(
-                main_file,
-                model.outputs,
-                model.output_tolerance,
-                model.name,
-                interface_api == "c",
-                print_output_on_mismatch,
-            )
-        _emit_main_epilogue(main_file, custom_epilogue)
-
-
-def _create_header_file(tensor_name, npy_data, output_path, data_linkage):
-    """
-    This method generates a header file containing the data contained in the numpy array provided.
-    It is used to capture the tensor data (for both inputs and expected outputs)
-    to be bundled into the standalone application.
-    """
-    file_path = pathlib.Path(f"{output_path}/" + tensor_name).resolve()
-    # create header file
-    raw_path = file_path.with_suffix(".h").resolve()
-    with open(raw_path, "w") as header_file:
-        header_file.write("#include <stddef.h>\n")
-        header_file.write("#include <stdint.h>\n")
-        header_file.write("#include <dlpack/dlpack.h>\n")
-        header_file.write(f"const size_t {tensor_name}_len = {npy_data.size};\n")
-
-        _emit_data_linkage(header_file, data_linkage)
-
-        header_file.write(f"{NP_TYPE_TO_C[str(npy_data.dtype)]} {tensor_name}[] =")
-
-        header_file.write("{")
-        for i in np.ndindex(npy_data.shape):
-            header_file.write(f"{npy_data[i]}, ")
-        header_file.write("};\n\n")
-
-
-def convert_to_relay(tflite_model_buf, bind_params_by_name=True):
-    """Convert a tflite model buffer in a Relay module"""
-    # TFLite.Model.Model has changed to TFLite.Model from 1.14 to 2.1
-    try:
-        import tflite.Model  # pylint: disable=import-outside-toplevel
-
-        tflite_model = tflite.Model.Model.GetRootAsModel(tflite_model_buf, 0)
-    except AttributeError:
-        import tflite  # pylint: disable=import-outside-toplevel
-
-        tflite_model = tflite.Model.GetRootAsModel(tflite_model_buf, 0)
-    except ImportError:
-        raise ImportError("The tflite package must be installed")
-
-    mod, params = relay.frontend.from_tflite(tflite_model)
-    if bind_params_by_name:
-        mod["main"] = relay.build_module.bind_params_by_name(mod["main"], params)
-    return mod, params
-
-
-def compile_models(
-    models: Union[List[AOTTestModel], AOTTestModel],
-    interface_api: str,
-    use_unpacked_api: bool,
-    workspace_byte_alignment: int = 8,
-    constant_byte_alignment: int = 8,
-    enable_op_fusion: bool = True,
-    pass_config: Dict[str, Any] = None,
-    use_runtime_executor: bool = True,
-    target: tvm.target.Target = tvm.target.Target("c"),
-    workspace_memory_pools=None,
-    constant_memory_pools=None,
-    schedule_name: str = None,
-    runtime: tvm.relay.backend.Runtime = Runtime("crt"),
-) -> List[AOTCompiledTestModel]:
-    """
-    This method generates runtime.Modules for the tests
-    """
-    if not isinstance(models, list):
-        models = [models]
-
-    assert (
-        runtime.name == "crt"
-    ), f"Currently only 'crt' is supported by the test framework, but got {runtime.name}"
-
-    executor = Executor(
-        "aot",
-        {
-            "workspace-byte-alignment": workspace_byte_alignment,
-            "constant-byte-alignment": constant_byte_alignment,
-            "interface-api": interface_api,
-            "unpacked-api": use_unpacked_api,
-        },
-    )
-
-    config = {"tir.disable_vectorize": True}
-    if pass_config:
-        config = {**config, **pass_config}
-    if not enable_op_fusion:
-        config["relay.FuseOps.max_depth"] = 1
-
-    compiled_mods = list()
-    for model in models:
-        with contextlib.ExitStack() as context_stack:
-            if schedule_name:
-                # Testing with deterministic schedule
-                task_list = autotvm.task.extract_from_program(
-                    model.module, target=target, params=model.params
-                )
-                context_stack.enter_context(
-                    tvm.autotvm.apply_fixed_config(task_list, schedule_name)
-                )
-
-            context_stack.enter_context(tvm.transform.PassContext(opt_level=3, config=config))
-
-            build_kwargs = dict(
-                ir_mod=model.module,
-                params=model.params,
-                mod_name=model.name,
-            )
-
-            # TODO(Mousius) - Remove once executor/runtime are fully removed from Target
-            if use_runtime_executor:
-                build_kwargs.update(
-                    dict(
-                        target=target,
-                        executor=executor,
-                        runtime=runtime,
-                        workspace_memory_pools=workspace_memory_pools,
-                        constant_memory_pools=constant_memory_pools,
-                    )
-                )
-            else:
-                build_kwargs.update(dict(target=tvm.target.Target(target, host=target)))
-
-            executor_factory = tvm.relay.build(**build_kwargs)
-            compiled_mods.append(
-                AOTCompiledTestModel(model=model, executor_factory=executor_factory)
-            )
-    return compiled_mods
-
-
-def run_and_check(
-    models: List[AOTCompiledTestModel],
-    runner: AOTTestRunner,
-    interface_api: str,
-    debug_calculated_workspaces=False,
-    workspace_byte_alignment=8,
-    constant_byte_alignment=8,
-    data_linkage: AOTDataLinkage = None,
-    test_dir: str = None,
-    verbose: bool = False,
-    use_workspace_io: bool = False,
-    debug_last_error: bool = False,
-    checker: Optional[Callable[[str], bool]] = None,
-    print_output_on_mismatch: bool = False,
-):
-    """
-    This method uses the original test data and compiled runtime.Modules
-    to run in the test runner to verify the results.
-    """
-
-    def run_and_check_body(base_path):
-        cflags = (
-            f"-DTVM_RUNTIME_ALLOC_ALIGNMENT_BYTES={workspace_byte_alignment} "
-            f" -DTVM_RUNTIME_CONST_ALLOC_ALIGNMENT_BYTES={constant_byte_alignment} "
-        )
-        # The calculated workspaces will not account for stack allocator tags used for debugging
-        if debug_calculated_workspaces:
-            cflags += "-DTVM_CRT_STACK_ALLOCATOR_ENABLE_LIFO_CHECK "
-
-        base_path = os.path.abspath(base_path)
-        build_path = os.path.join(base_path, "build")
-        os.makedirs(build_path, exist_ok=True)
-
-        include_path = os.path.join(base_path, "include")
-        os.mkdir(include_path)
-        tvm.micro.copy_crt_config_header("crt", include_path)
-
-        workspace_bytes = 0
-        for compiled_model in models:
-            model = compiled_model.model
-            tar_file = os.path.join(base_path, f"{model.name}.tar")
-            export_model_library_format(compiled_model.executor_factory, tar_file)
-            t = tarfile.open(tar_file)
-            t.extractall(base_path)
-
-            # Interface C APIs does not need compiler generated
-            # workspace to generate the test application, because
-            # workspace size is codegen'd as a macro to
-            # tvmgen_<model_name>.h.
-            if interface_api != "c":
-                workspace_bytes += mlf_extract_workspace_size_bytes(tar_file)
-
-            workspace_bytes += model.extra_memory_in_bytes
-            for key in model.inputs:
-                sanitized_tensor_name = re.sub(r"\W", "_", key)
-                _create_header_file(
-                    f'{_mangle_name(model.name, "input_data")}_{sanitized_tensor_name}',
-                    model.inputs[key],
-                    include_path,
-                    data_linkage,
-                )
-
-            for key in model.outputs:
-                sanitized_tensor_name = re.sub(r"\W", "_", key)
-                _create_header_file(
-                    f'{_mangle_name(model.name, "output_data")}_{sanitized_tensor_name}',
-                    np.zeros(model.outputs[key].shape, model.outputs[key].dtype),
-                    include_path,
-                    data_linkage,
-                )
-                _create_header_file(
-                    f'{_mangle_name(model.name, "expected_output_data")}_{sanitized_tensor_name}',
-                    model.outputs[key],
-                    include_path,
-                    data_linkage,
-                )
-
-        use_usmp = runner.pass_config.get("tir.usmp.enable", False)
-        # We only need the stack allocator if USMP is not used
-        use_stack_allocator = not use_usmp
-
-        _create_main(
-            "test.c",
-            models,
-            build_path,
-            runner.includes,
-            runner.prologue,
-            runner.epilogue,
-            data_linkage,
-            interface_api,
-            workspace_bytes,
-            use_stack_allocator,
-            use_workspace_io,
-            debug_last_error,
-            print_output_on_mismatch,
-        )
-
-        if checker and (not checker(base_path)):
-            return False
-
-        # Verify that compiles fine
-        file_dir = os.path.dirname(os.path.abspath(__file__))
-        makefile_dir = os.path.join(file_dir, "../../../tests/python/relay/aot")
-        codegen_path = os.path.join(base_path, "codegen")
-        makefile = os.path.join(makefile_dir, f"{runner.makefile}.mk")
-
-        if runner.makefile == "aprofile_aem":
-            fvp_dir = "/opt/arm/fvp/Base_RevC_AEMvA_pkg/models/Linux64_GCC-9.3/"
-        else:
-            fvp_dir = "/opt/arm/FVP_Corstone_SSE-300/models/Linux64_GCC-6.4/"
-
-        custom_params = " ".join(
-            [f" {param}='{value}'" for param, value in runner.parameters.items()]
-        )
-        make_command = (
-            f"make -f {makefile} build_dir={build_path}"
-            + f" CFLAGS='{cflags}'"
-            + f" TVM_ROOT={file_dir}/../../.."
-            + f" AOT_TEST_ROOT={makefile_dir}"
-            + f" CODEGEN_ROOT={codegen_path}"
-            + f" STANDALONE_CRT_DIR={tvm.micro.get_standalone_crt_dir()}"
-            + f" FVP_DIR={fvp_dir}"
-            + custom_params
-        )
-
-        compile_log_path = os.path.join(build_path, "test_compile.log")
-        compile_command = f"{make_command} aot_test_runner"
-        if verbose:
-            print("Compile command:\n", compile_command)
-        _subprocess_check_log_output(compile_command, ".", compile_log_path)
-
-        # Verify that runs fine
-        run_log_path = os.path.join(build_path, "test_run.log")
-        run_command = f"{make_command} run"
-        if verbose:
-            print("Run command:\n", run_command)
-
-        _subprocess_check_log_output(run_command, build_path, run_log_path)
-
-        with open(run_log_path) as run_log:
-            run_log_out = run_log.read()
-            if print_output_on_mismatch and AOT_FAILURE_TOKEN in run_log_out:
-                print(run_log_out)
-            assert AOT_SUCCESS_TOKEN in run_log_out
-
-        return True
-
-    if test_dir is None:
-        tmpdir = utils.tempdir()
-        return run_and_check_body(os.path.join(tmpdir.path, "test"))
-    else:
-        return run_and_check_body(test_dir)
-
-
-def compile_and_run(
-    models: Union[List[AOTTestModel], AOTTestModel],
-    runner: AOTTestRunner,
-    interface_api: str,
-    use_unpacked_api: bool,
-    debug_calculated_workspaces: bool = False,
-    workspace_byte_alignment: int = 8,
-    constant_byte_alignment: int = 8,
-    enable_op_fusion: bool = True,
-    data_linkage: AOTDataLinkage = None,
-    use_runtime_executor: bool = True,
-    target: Union[str, tvm.target.Target, List[tvm.target.Target]] = "c",
-    target_opts: Dict = None,
-    test_dir: str = None,
-    verbose: bool = False,
-    schedule_name: str = None,
-    debug_last_error: bool = False,
-    checker: Optional[Callable[[str], bool]] = None,
-    print_output_on_mismatch: bool = False,
-    runtime: tvm.relay.backend.Runtime = Runtime("crt"),
-) -> bool:
-    """This is a wrapper API to compile and run models as test for AoT
-
-    Parameters
-    ----------
-    interface_api : str
-        The external calling convention interface API.
-
-        Examples: "c", "packed"
-
-    use_unpacked_api : bool
-        Whether or not to use type-erased API internally for the
-        operator calling convention.
-
-        Note: This feature can be useful for embedded targets
-        when space is at a premium.
-
-        Permitted values when interface API is:
-        > "c": True
-        > "packed": True/False
-
-    test_dir : str
-        This path will contain build, codegen, include directories.
-
-    verbose : bool
-        Prints commands to build and run AOT test runner.
-
-    print_output_on_mismatch : bool
-        Print both the output and reference values side-by-side
-        when there is a mismatch.
-    """
-
-    if target_opts:
-        for key, val in target_opts.items():
-            target += f" {key}={val}"
-
-    if isinstance(target, str):
-        target = tvm.target.Target(target)
-
-    compiled_test_mods = compile_models(
-        models=models,
-        interface_api=interface_api,
-        use_unpacked_api=use_unpacked_api,
-        workspace_byte_alignment=workspace_byte_alignment,
-        constant_byte_alignment=constant_byte_alignment,
-        enable_op_fusion=enable_op_fusion,
-        pass_config=runner.pass_config,
-        use_runtime_executor=use_runtime_executor,
-        target=target,
-        schedule_name=schedule_name,
-        runtime=runtime,
-    )
-
-    return run_and_check(
-        models=compiled_test_mods,
-        runner=runner,
-        interface_api=interface_api,
-        debug_calculated_workspaces=debug_calculated_workspaces,
-        workspace_byte_alignment=workspace_byte_alignment,
-        constant_byte_alignment=constant_byte_alignment,
-        data_linkage=data_linkage,
-        test_dir=test_dir,
-        verbose=verbose,
-        debug_last_error=debug_last_error,
-        checker=checker,
-        print_output_on_mismatch=print_output_on_mismatch,
-    )
-
-
-def get_dtype_range(dtype: str) -> Tuple[int, int]:
-    """
-    Produces the min,max for a give data type.
-
-    Parameters
-    ----------
-    dtype : str
-        a type string (e.g., int8, float64)
-
-    Returns
-    -------
-    type_info.min : int
-        the minimum of the range
-    type_info.max : int
-        the maximum of the range
-    """
-    type_info = None
-    np_dtype = np.dtype(dtype)
-    kind = np_dtype.kind
-
-    if kind == "f":
-        type_info = np.finfo(np_dtype)
-    elif kind in ["i", "u"]:
-        type_info = np.iinfo(np_dtype)
-    else:
-        raise TypeError(f"dtype ({dtype}) must indicate some floating-point or integral data type.")
-    return type_info.min, type_info.max
-
-
-def generate_ref_data(mod, input_data, params=None, target="llvm"):
-    """Generate reference data through executing the relay module"""
-    with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}):
-        lib = relay.build(mod, target=target, params=params)
-
-    lib_name = "mod.so"
-    temp = utils.tempdir()
-    lib_path = temp.relpath(lib_name)
-    lib.export_library(lib_path)
-    lib = tvm.runtime.load_module(lib_path)
-    grt_mod = graph_executor.GraphModule(lib["default"](tvm.cpu()))
-    grt_mod.set_input(**input_data)
-    grt_mod.run()
-    output_count = grt_mod.get_num_outputs()
-    out = [grt_mod.get_output(i).numpy() for i in range(output_count)]
-    if isinstance(mod, tvm.relay.Function):
-        main = mod
-    else:
-        main = mod["main"]
-    if "output_tensor_names" in main.attrs:
-        output_tensor_names = main.attrs["output_tensor_names"]
-    else:
-        output_tensor_names = (
-            ["output"] if output_count == 1 else [f"output{i}" for i in range(output_count)]
-        )
-
-    return dict(zip(output_tensor_names, out))
-
-
-def create_relay_module_and_inputs_from_tflite_file(tflite_model_file, bind_params_by_name=True):
-    """A helper function to create a Relay IRModule with inputs
-    and params from a tflite file"""
-    with open(tflite_model_file, "rb") as f:
-        tflite_model_buf = f.read()
-    mod, params = convert_to_relay(tflite_model_buf, bind_params_by_name)
-
-    inputs = dict()
-    for param in mod["main"].params:
-        name = str(param.name_hint)
-        data_shape = [int(i) for i in param.type_annotation.shape]
-        dtype = str(param.type_annotation.dtype)
-        if np.issubdtype(dtype, np.floating):
-            # Since np.random.uniform only allows the ranges of float32,
-            # at first float16 is used and scaled afterwards, if necessary.
-            in_min, in_max = (np.finfo("float16").min, np.finfo("float16").max)
-            data = np.random.uniform(low=in_min, high=in_max, size=data_shape).astype(dtype)
-            scale = np.finfo(dtype).min / np.finfo("float16").min
-            data *= scale
-        elif np.issubdtype(dtype, np.integer):
-            in_min, in_max = (np.iinfo(dtype).min, np.iinfo(dtype).max)
-            data = np.random.randint(in_min, high=in_max, size=data_shape, dtype=dtype)
-        else:
-            raise TypeError(f"Type {dtype} not supported")
-        inputs[name] = data
-
-    return mod, inputs, params
diff --git a/python/tvm/testing/plugin.py b/python/tvm/testing/plugin.py
index 7a0cf5dd1324..05eee5955c30 100644
--- a/python/tvm/testing/plugin.py
+++ b/python/tvm/testing/plugin.py
@@ -55,9 +55,7 @@
     "vulkan": "mark a test as requiring vulkan",
     "metal": "mark a test as requiring metal",
     "llvm": "mark a test as requiring llvm",
-    "ethosn": "mark a test as requiring ethosn",
     "hexagon": "mark a test as requiring hexagon",
-    "corstone300": "mark a test as requiring Corstone300 FVP",
 }
 
 
diff --git a/python/tvm/testing/utils.py b/python/tvm/testing/utils.py
index 8b919d2c9dca..8546d4aef233 100644
--- a/python/tvm/testing/utils.py
+++ b/python/tvm/testing/utils.py
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-# pylint: disable=invalid-name,unnecessary-comprehension
+# pylint: disable=invalid-name,unnecessary-comprehension,redefined-outer-name
 """TVM testing utilities
 
 Organization
@@ -90,11 +90,13 @@ def test_something():
 import tvm.te
 import tvm._ffi
 
+from tvm import relay
 from tvm.target import codegen
-from tvm.contrib import nvcc, cudnn, rocm
+from tvm.contrib import nvcc, cudnn, rocm, graph_executor
 import tvm.contrib.hexagon._ci_env_check as hexagon
 from tvm.driver.tvmc.frontends import load_model
 from tvm.error import TVMError
+import tvm.contrib.utils
 
 
 SKIP_SLOW_TESTS = os.getenv("SKIP_SLOW_TESTS", "").lower() in {"true", "1", "yes"}
@@ -987,21 +989,12 @@ def _multi_gpu_exists():
     cmake_flag="USE_NNAPI_CODEGEN",
 )
 
-# Mark a test as requiring microTVM to run
-requires_micro = Feature("micro", "MicroTVM", cmake_flag="USE_MICRO")
-
 # Mark a test as requiring CUTLASS to run
 requires_cutlass = Feature("cutlass", "CUTLASS", cmake_flag="USE_CUTLASS")
 
 # Mark a test as requiring rpc to run
 requires_rpc = Feature("rpc", "RPC", cmake_flag="USE_RPC")
 
-# Mark a test as requiring Arm(R) Ethos(TM)-N to run
-requires_ethosn = Feature("ethosn", "Arm(R) Ethos(TM)-N", cmake_flag="USE_ETHOSN")
-
-# Mark a test as requiring Arm(R) Ethos(TM)-U to run
-requires_ethosu = Feature("ethosu", "Arm(R) Ethos(TM)-U", cmake_flag="USE_ETHOSU")
-
 # Mark a test as requiring libtorch to run
 requires_libtorch = Feature("libtorch", "LibTorch", cmake_flag="USE_LIBTORCH")
 
@@ -1019,24 +1012,6 @@ def _multi_gpu_exists():
     parent_features="llvm",
 )
 
-# Mark a test as requiring the CMSIS NN library
-requires_cmsisnn = Feature("cmsisnn", "CMSIS NN", cmake_flag="USE_CMSISNN")
-
-
-def _corstone300_compile_time_check():
-    if shutil.which("arm-none-eabi-gcc") is None:
-        return "ARM embedded toolchain unavailable"
-    return True
-
-
-# Mark a test as requiring the corstone300 FVP
-requires_corstone300 = Feature(
-    "corstone300",
-    "Corstone-300",
-    compile_time_check=_corstone300_compile_time_check,
-    parent_features="cmsisnn",
-)
-
 
 def _aprofile_aem_fvp_compile_time_check():
     if shutil.which("FVP_Base_RevC-2xAEMvA") is None:
@@ -1643,6 +1618,64 @@ def wraps(func):
     return wraps(func)
 
 
+def get_dtype_range(dtype: str) -> Tuple[int, int]:
+    """
+    Produces the min,max for a give data type.
+
+    Parameters
+    ----------
+    dtype : str
+        a type string (e.g., int8, float64)
+
+    Returns
+    -------
+    type_info.min : int
+        the minimum of the range
+    type_info.max : int
+        the maximum of the range
+    """
+    type_info = None
+    np_dtype = np.dtype(dtype)
+    kind = np_dtype.kind
+
+    if kind == "f":
+        type_info = np.finfo(np_dtype)
+    elif kind in ["i", "u"]:
+        type_info = np.iinfo(np_dtype)
+    else:
+        raise TypeError(f"dtype ({dtype}) must indicate some floating-point or integral data type.")
+    return type_info.min, type_info.max
+
+
+def generate_ref_data(mod, input_data, params=None, target="llvm"):
+    """Generate reference data through executing the relay module"""
+    with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}):
+        lib = relay.build(mod, target=target, params=params)
+
+    lib_name = "mod.so"
+    temp = tvm.contrib.utils.tempdir()
+    lib_path = temp.relpath(lib_name)
+    lib.export_library(lib_path)
+    lib = tvm.runtime.load_module(lib_path)
+    grt_mod = graph_executor.GraphModule(lib["default"](tvm.cpu()))
+    grt_mod.set_input(**input_data)
+    grt_mod.run()
+    output_count = grt_mod.get_num_outputs()
+    out = [grt_mod.get_output(i).numpy() for i in range(output_count)]
+    if isinstance(mod, tvm.relay.Function):
+        main = mod
+    else:
+        main = mod["main"]
+    if "output_tensor_names" in main.attrs:
+        output_tensor_names = main.attrs["output_tensor_names"]
+    else:
+        output_tensor_names = (
+            ["output"] if output_count == 1 else [f"output{i}" for i in range(output_count)]
+        )
+
+    return dict(zip(output_tensor_names, out))
+
+
 class _DeepCopyAllowedClasses(dict):
     def __init__(self, allowed_class_list):
         self.allowed_class_list = allowed_class_list
diff --git a/rust/tvm-rt/Cargo.toml b/rust/tvm-rt/Cargo.toml
index 24d9061a213f..e813c6941921 100644
--- a/rust/tvm-rt/Cargo.toml
+++ b/rust/tvm-rt/Cargo.toml
@@ -52,11 +52,9 @@ use-openmp = ["tvm-sys/use-openmp"]
 use-relay-debug = ["tvm-sys/use-relay-debug"]
 use-rtti = ["tvm-sys/use-rtti"]
 use-mscv-mt = ["tvm-sys/use-mscv-mt"]
-use-micro = ["tvm-sys/use-micro"]
 use-install-dev = ["tvm-sys/use-install-dev"]
 hide-private-symbols = ["tvm-sys/hide-private-symbols"]
 use-fallback-stl-map = ["tvm-sys/use-fallback-stl-map"]
-use-ethosn = ["tvm-sys/use-ethosn"]
 use-index-default-i64 = ["tvm-sys/use-index-default-i64"]
 use-tf-tvmdsoop = ["tvm-sys/use-tf-tvmdsoop"]
 use-byodt-posit = ["tvm-sys/use-byodt-posit"]
@@ -71,7 +69,6 @@ use-rocblas = ["tvm-sys/use-rocblas"]
 use-sort = ["tvm-sys/use-sort"]
 use-nnpack = ["tvm-sys/use-nnpack"]
 use-random = ["tvm-sys/use-random"]
-use-micro-standalone-runtime = ["tvm-sys/use-micro-standalone-runtime"]
 use-cpp-rpc = ["tvm-sys/use-cpp-rpc"]
 use-tflite = ["tvm-sys/use-tflite"]
 use-coreml = ["tvm-sys/use-coreml"]
diff --git a/rust/tvm-sys/Cargo.toml b/rust/tvm-sys/Cargo.toml
index 4494e20afa31..e31ae66881dc 100644
--- a/rust/tvm-sys/Cargo.toml
+++ b/rust/tvm-sys/Cargo.toml
@@ -45,11 +45,9 @@ use-openmp = []
 use-relay-debug = []
 use-rtti = []
 use-mscv-mt = []
-use-micro = []
 use-install-dev = []
 hide-private-symbols = []
 use-fallback-stl-map = []
-use-ethosn = []
 use-index-default-i64 = []
 use-tf-tvmdsoop = []
 use-byodt-posit = []
@@ -64,7 +62,6 @@ use-rocblas = []
 use-sort = []
 use-nnpack = []
 use-random = []
-use-micro-standalone-runtime = []
 use-cpp-rpc = []
 use-tflite = []
 use-coreml = []
diff --git a/rust/tvm-sys/build.rs b/rust/tvm-sys/build.rs
index 80c7efbaf894..eb2c1ee3a21b 100644
--- a/rust/tvm-sys/build.rs
+++ b/rust/tvm-sys/build.rs
@@ -111,9 +111,6 @@ fn find_using_tvm_build() -> Result<TVMInstall> {
     if cfg!(feature = "use-mscv-mt") {
         build_config.settings.use_mscv_mt = Some(true);
     }
-    if cfg!(feature = "use-micro") {
-        build_config.settings.use_micro = Some(true);
-    }
     if cfg!(feature = "use-install-dev") {
         build_config.settings.use_install_dev = Some(true);
     }
@@ -123,9 +120,6 @@ fn find_using_tvm_build() -> Result<TVMInstall> {
     if cfg!(feature = "use-fallback-stl-map") {
         build_config.settings.use_fallback_stl_map = Some(true);
     }
-    if cfg!(feature = "use-ethosn") {
-        build_config.settings.use_ethosn = Some(true);
-    }
     if cfg!(feature = "use-index_default-i64") {
         build_config.settings.use_index_default_i64 = Some(true);
     }
@@ -168,9 +162,6 @@ fn find_using_tvm_build() -> Result<TVMInstall> {
     if cfg!(feature = "use-random") {
         build_config.settings.use_random = Some(true);
     }
-    if cfg!(feature = "use-micro-standalone-runtime") {
-        build_config.settings.use_micro_standalone_runtime = Some(true);
-    }
     if cfg!(feature = "use-cpp-rpc") {
         build_config.settings.use_cpp_rpc = Some(true);
     }
diff --git a/rust/tvm/Cargo.toml b/rust/tvm/Cargo.toml
index 8d9b23f7616b..22dc546c93f7 100644
--- a/rust/tvm/Cargo.toml
+++ b/rust/tvm/Cargo.toml
@@ -51,11 +51,9 @@ use-openmp = ["tvm-rt/use-openmp"]
 use-relay-debug = ["tvm-rt/use-relay-debug"]
 use-rtti = ["tvm-rt/use-rtti"]
 use-mscv-mt = ["tvm-rt/use-mscv-mt"]
-use-micro = ["tvm-rt/use-micro"]
 use-install-dev = ["tvm-rt/use-install-dev"]
 hide-private-symbols = ["tvm-rt/hide-private-symbols"]
 use-fallback-stl-map = ["tvm-rt/use-fallback-stl-map"]
-use-ethosn = ["tvm-rt/use-ethosn"]
 use-index-default-i64 = ["tvm-rt/use-index-default-i64"]
 use-tf-tvmdsoop = ["tvm-rt/use-tf-tvmdsoop"]
 use-byodt-posit = ["tvm-rt/use-byodt-posit"]
@@ -70,7 +68,6 @@ use-rocblas = ["tvm-rt/use-rocblas"]
 use-sort = ["tvm-rt/use-sort"]
 use-nnpack = ["tvm-rt/use-nnpack"]
 use-random = ["tvm-rt/use-random"]
-use-micro-standalone-runtime = ["tvm-rt/use-micro-standalone-runtime"]
 use-cpp-rpc = ["tvm-rt/use-cpp-rpc"]
 use-tflite = ["tvm-rt/use-tflite"]
 use-coreml = ["tvm-rt/use-coreml"]
diff --git a/src/contrib/ethosu/cascader/block_config.cc b/src/contrib/ethosu/cascader/block_config.cc
deleted file mode 100644
index 667d2e1ebefb..000000000000
--- a/src/contrib/ethosu/cascader/block_config.cc
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-#include "block_config.h"
-
-#include <tvm/runtime/container/array.h>
-#include <tvm/runtime/object.h>
-#include <tvm/runtime/registry.h>
-
-#include <utility>
-#include <vector>
-
-#include "common.h"
-
-namespace tvm {
-namespace contrib {
-namespace ethosu {
-namespace cascader {
-
-void BlockConfigNode::VisitAttrs(AttrVisitor* v) {
-  Array<Integer> tmp_arr = make_array(input_shape_);
-  v->Visit("_input_shape", &tmp_arr);
-  tmp_arr = make_array(output_shape_);
-  v->Visit("_output_shape", &tmp_arr);
-  v->Visit("_compute_cycles", &compute_cycles_);
-  v->Visit("_output_cycles", &output_cycles_);
-}
-
-BlockConfig::BlockConfig(const std::vector<int>& input_shape, const std::vector<int>& output_shape,
-                         int compute_cycles, int output_cycles) {
-  auto n = make_object<BlockConfigNode>();
-  n->input_shape_ = std::move(input_shape);
-  n->output_shape_ = std::move(output_shape);
-  n->compute_cycles_ = compute_cycles;
-  n->output_cycles_ = output_cycles;
-  data_ = std::move(n);
-}
-
-TVM_REGISTER_GLOBAL("contrib.ethosu.cascader.BlockConfig")
-    .set_body_typed([](Array<Integer> input_shape, Array<Integer> output_shape, int compute_cycles,
-                       int output_cycles) {
-      std::vector<int> vinput_shape = make_vector<int, Integer>(input_shape);
-      std::vector<int> voutput_shape = make_vector<int, Integer>(output_shape);
-      return BlockConfig(vinput_shape, voutput_shape, compute_cycles, output_cycles);
-    });
-
-TVM_REGISTER_NODE_TYPE(BlockConfigNode);
-
-}  // namespace cascader
-}  // namespace ethosu
-}  // namespace contrib
-}  // namespace tvm
diff --git a/src/contrib/ethosu/cascader/block_config.h b/src/contrib/ethosu/cascader/block_config.h
deleted file mode 100644
index 5e349cee4d06..000000000000
--- a/src/contrib/ethosu/cascader/block_config.h
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file src/contrib/ethosu/cascader/block_config.h
- * \brief BlockConfig object for the NPU cascader
- */
-#ifndef TVM_CONTRIB_ETHOSU_CASCADER_BLOCK_CONFIG_H_
-#define TVM_CONTRIB_ETHOSU_CASCADER_BLOCK_CONFIG_H_
-
-#include <tvm/node/reflection.h>
-#include <tvm/runtime/object.h>
-
-#include <functional>
-#include <vector>
-
-namespace tvm {
-namespace contrib {
-namespace ethosu {
-namespace cascader {
-
-class BlockConfig;
-
-/*! \brief Node to represent a BlockConfig */
-class BlockConfigNode : public Object {
- public:
-  void VisitAttrs(AttrVisitor* v);
-
-  /*!
-   * \brief Get the shape of input block.
-   * \return The input shape of the block config.
-   */
-  inline std::vector<int> GetInputBlockShape() const { return input_shape_; }
-
-  /*!
-   * \brief Get the shape of output block.
-   * \return The output shape of the block config.
-   */
-  inline std::vector<int> GetOutputBlockShape() const { return output_shape_; }
-
-  /*!
-   * \brief Get the number of cycles required to output this block
-   * \return The output cycles
-   */
-  inline int GetOutputCycles() const { return output_cycles_; }
-
-  /*!
-   * \brief Get the number of cycles required to compute this block
-   * \return The compute cycles
-   */
-  inline int GetComputeCycles() const { return compute_cycles_; }
-
-  static constexpr const char* _type_key = "contrib.ethosu.cascader.BlockConfig";
-  TVM_DECLARE_FINAL_OBJECT_INFO(BlockConfigNode, Object);
-
- protected:
-  friend class BlockConfig;
-
-  /*! \brief The shape of the input block */
-  std::vector<int> input_shape_;
-  /*! \brief The shape of the output block */
-  std::vector<int> output_shape_;
-  /*! \brief Cycles required to compute this block */
-  int compute_cycles_;
-  /*! \brief Cycles required to output this block */
-  int output_cycles_;
-};
-
-/*!
- * \brief An object that contains a an output block shape as well as the output and compute cycles
- * required to compute this block
- */
-class BlockConfig : public ObjectRef {
- public:
-  BlockConfig(const std::vector<int>& input_shape, const std::vector<int>& output_shape,
-              int compute_cycles, int output_cycles);
-
-  TVM_DEFINE_OBJECT_REF_METHODS(BlockConfig, ObjectRef, BlockConfigNode);
-};
-
-}  // namespace cascader
-}  // namespace ethosu
-}  // namespace contrib
-}  // namespace tvm
-
-#endif  // TVM_CONTRIB_ETHOSU_CASCADER_BLOCK_CONFIG_H_
diff --git a/src/contrib/ethosu/cascader/cascader_options.cc b/src/contrib/ethosu/cascader/cascader_options.cc
deleted file mode 100644
index 0daf3fed2481..000000000000
--- a/src/contrib/ethosu/cascader/cascader_options.cc
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-#include "cascader_options.h"
-
-#include <utility>
-
-namespace tvm {
-namespace contrib {
-namespace ethosu {
-namespace cascader {
-
-void CascaderOptionsNode::VisitAttrs(AttrVisitor* v) {
-  v->Visit("cascade_region", &cascade_region);
-  v->Visit("max_proposals", &max_proposals);
-  v->Visit("stripe_factors", &stripe_factors);
-  v->Visit("max_plan_size", &max_plan_size);
-  v->Visit("max_open_plans", &max_open_plans);
-  v->Visit("max_closed_plans", &max_closed_plans);
-  v->Visit("always_copy_size", &always_copy_size);
-  v->Visit("disable_pareto_plans", &disable_pareto_plans);
-  v->Visit("disable_pareto_proposals", &disable_pareto_proposals);
-  v->Visit("enable_multi_dimensional_striping", &enable_multi_dimensional_striping);
-  v->Visit("disable_block_culling", &disable_block_culling);
-  v->Visit("enable_striping", &enable_striping);
-}
-
-CascaderOptions::CascaderOptions(const MemoryRegion& cascade_region, int max_proposals,
-                                 int stripe_factors, int max_plan_size, int max_open_plans,
-                                 int max_closed_plans, int always_copy_size,
-                                 bool disable_pareto_plans, bool disable_pareto_proposals,
-                                 bool enable_multi_dimensional_striping, bool disable_block_culling,
-                                 bool enable_striping) {
-  auto n = make_object<CascaderOptionsNode>();
-  n->cascade_region = std::move(cascade_region);
-  n->max_proposals = max_proposals;
-  n->stripe_factors = stripe_factors;
-  n->max_plan_size = max_plan_size;
-  n->max_open_plans = max_open_plans;
-  n->max_closed_plans = max_closed_plans;
-  n->always_copy_size = always_copy_size;
-  n->disable_pareto_plans = disable_pareto_plans;
-  n->disable_pareto_proposals = disable_pareto_proposals;
-  n->enable_multi_dimensional_striping = enable_multi_dimensional_striping;
-  n->disable_block_culling = disable_block_culling;
-  n->enable_striping = enable_striping;
-  data_ = std::move(n);
-}
-
-TVM_REGISTER_GLOBAL("contrib.ethosu.cascader.CascaderOptions")
-    .set_body_typed([](MemoryRegion cascade_region, int max_proposals, int stripe_factors,
-                       int max_plan_size, int max_open_plans, int max_closed_plans,
-                       int always_copy_size, bool disable_pareto_plans,
-                       bool disable_pareto_proposals, bool enable_multi_dimensional_striping,
-                       bool disable_block_culling, bool enable_striping) {
-      return CascaderOptions(
-          cascade_region, max_proposals, stripe_factors, max_plan_size, max_open_plans,
-          max_closed_plans, always_copy_size, disable_pareto_plans, disable_pareto_proposals,
-          enable_multi_dimensional_striping, disable_block_culling, enable_striping);
-    });
-
-TVM_REGISTER_NODE_TYPE(CascaderOptionsNode);
-
-}  // namespace cascader
-}  // namespace ethosu
-}  // namespace contrib
-}  // namespace tvm
diff --git a/src/contrib/ethosu/cascader/cascader_options.h b/src/contrib/ethosu/cascader/cascader_options.h
deleted file mode 100644
index 3545e5cc3ae0..000000000000
--- a/src/contrib/ethosu/cascader/cascader_options.h
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file src/contrib/ethosu/cascader/cascader_options.h
- * \brief Class to store configuration options for the NPU cascader
- */
-#ifndef TVM_CONTRIB_ETHOSU_CASCADER_CASCADER_OPTIONS_H_
-#define TVM_CONTRIB_ETHOSU_CASCADER_CASCADER_OPTIONS_H_
-
-#include <tvm/node/reflection.h>
-#include <tvm/runtime/object.h>
-
-#include "tensor_config.h"
-
-namespace tvm {
-namespace contrib {
-namespace ethosu {
-namespace cascader {
-
-/*! \brief Node to represent CascaderOptions */
-class CascaderOptionsNode : public Object {
- public:
-  void VisitAttrs(AttrVisitor* v);
-
-  /*! \brief The MemoryRegion to place cascading buffer into. */
-  MemoryRegion cascade_region;
-  /*! \brief The maximum number of Proposals to generate. */
-  int max_proposals;
-  /*! \brief How many striping factors to try per axis. */
-  int stripe_factors;
-  /*! \brief The maximum number of Parts in a Plan. */
-  int max_plan_size;
-  /*! \brief The maximum number of open Plans saved for a Part Group */
-  int max_open_plans;
-  /*! \brief The maximum number of closed Plans saved for a Part Group */
-  int max_closed_plans;
-  /*! \brief The maximum size of Tensor that will always be copied into the cascade region. */
-  int always_copy_size;
-  /*! \brief Flag to disable pareto culling for plans to allow non pareto-optimal plans */
-  bool disable_pareto_plans;
-  /*! \brief Flag to disable pareto culling for proposals to allow non pareto-optimal proposals */
-  bool disable_pareto_proposals;
-  /*! \brief Whether to consider multi-dimensional striping */
-  bool enable_multi_dimensional_striping;
-  /*! \brief Flag to disable culling for block configs to allow non-dominant blocks */
-  bool disable_block_culling;
-  /*! \brief A boolean option to enable striping. */
-  bool enable_striping;
-
-  static constexpr const char* _type_key = "contrib.ethosu.cascader.CascaderOptions";
-  TVM_DECLARE_FINAL_OBJECT_INFO(CascaderOptionsNode, Object)
-};
-
-/*! \brief A class to hold configuration options for the cascader. */
-class CascaderOptions : public ObjectRef {
- public:
-  CascaderOptions(const MemoryRegion& cascade_region, int max_proposals, int stripe_factors,
-                  int max_plan_size, int max_open_plans, int max_closed_plans, int always_copy_size,
-                  bool disable_pareto_plans, bool disable_pareto_proposals,
-                  bool enable_multi_dimensional_striping, bool disable_block_culling,
-                  bool multi_dimensional_striping);
-
-  TVM_DEFINE_OBJECT_REF_METHODS(CascaderOptions, ObjectRef, CascaderOptionsNode);
-};
-
-}  // namespace cascader
-}  // namespace ethosu
-}  // namespace contrib
-}  // namespace tvm
-
-#endif  // TVM_CONTRIB_ETHOSU_CASCADER_CASCADER_OPTIONS_H_
diff --git a/src/contrib/ethosu/cascader/common.h b/src/contrib/ethosu/cascader/common.h
deleted file mode 100644
index b4b5664e04b9..000000000000
--- a/src/contrib/ethosu/cascader/common.h
+++ /dev/null
@@ -1,160 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file src/contrib/ethosu/cascader/common.h
- * \brief Common functions used in the NPU cascader
- */
-#ifndef TVM_CONTRIB_ETHOSU_CASCADER_COMMON_H_
-#define TVM_CONTRIB_ETHOSU_CASCADER_COMMON_H_
-
-#include <tvm/ir/expr.h>
-#include <tvm/runtime/container/array.h>
-
-#include <functional>
-#include <numeric>
-#include <vector>
-
-namespace tvm {
-namespace contrib {
-namespace ethosu {
-namespace cascader {
-
-/*!
- * \brief Make a tvm::Array<Integer> from an int vector.
- * \param vec The int vector.
- * \return The Integer Array.
- * \note Array<Integer>(std::vector<int>) doesn't work as this implicit
- * type conversion fails. This is why this helper is required.
- */
-inline Array<Integer> make_array(const std::vector<int>& vec) {
-  Array<Integer> arr;
-  arr.resize(vec.size());
-  for (unsigned int i = 0; i < vec.size(); ++i) {
-    arr.Set(i, Integer(vec[i]));
-  }
-  return arr;
-}
-
-/*!
- * \brief Make a tvm::Array<Integer> from a size_t vector.
- * \param vec The size_t vector.
- * \return The Integer Array.
- * \note Array<Integer>(std::vector<size_t>) doesn't work as this implicit
- * type conversion fails. This is why this helper is required.
- */
-inline Array<Integer> make_array(const std::vector<size_t>& vec) {
-  Array<Integer> arr;
-  arr.resize(vec.size());
-  for (unsigned int i = 0; i < vec.size(); ++i) {
-    arr.Set(i, Integer(vec[i]));
-  }
-  return arr;
-}
-
-/*!
- * \brief Make a tvm::Array<IntImm> from an int64_t vector.
- * \param vec The int64_t vector.
- * \return The IntImm Array.
- * \note Array<IntImm>(std::vector<int64_t>) doesn't work as this implicit
- * type conversion fails. This is why this helper is required.
- */
-inline Array<IntImm> make_array(const std::vector<int64_t>& vec) {
-  Array<IntImm> arr;
-  arr.resize(vec.size());
-  for (unsigned int i = 0; i < vec.size(); ++i) {
-    arr.Set(i, IntImm(DataType::Int(64), vec[i]));
-  }
-  return arr;
-}
-
-/*!
- * \brief Make a tvm::Array<FloatImm> from an float vector.
- * \param vec The float vector.
- * \return The FloatImm Array.
- */
-inline Array<FloatImm> make_array(const std::vector<float>& vec) {
-  Array<FloatImm> arr;
-  arr.resize(vec.size());
-  for (unsigned int i = 0; i < vec.size(); ++i) {
-    arr.Set(i, FloatImm(DataType::Float(32), static_cast<double>(vec[i])));
-  }
-  return arr;
-}
-
-/*!
- * \brief Calculate the ceil of an Integer division
- * \param dividend The dividend of the division
- * \param divisor The divisor of the division
- * \return The quotient
- */
-inline int round_up_divide(int dividend, int divisor) {
-  return dividend / divisor + (dividend % divisor != 0);
-}
-
-/*!
- * \brief Make a vector from a tvm::Array.
- * \param arr The Array.
- * \return The vector.
- */
-template <typename T, typename tvm_T>
-inline std::vector<T> make_vector(const Array<tvm_T>& arr) {
-  std::vector<T> vec(arr.size());
-  for (unsigned int i = 0; i < arr.size(); ++i) {
-    vec[i] = arr[i]->value;
-  }
-  return vec;
-}
-
-/*!
- * \brief Create a combined hash.
- * \param seed The current hash value.
- * \param v The value to combine into the hash.
- * \return The combined hash.
- */
-template <class T>
-inline void hash_combine(std::size_t* seed, T const& v) {
-  *seed ^= std::hash<T>()(v) + 0x9e3779b9 + (*seed << 6) + (*seed >> 2);
-}
-
-/*!
- * \brief Hash a vector.
- * \param vec The vector to hash.
- * \return The hash.
- */
-template <class T>
-inline std::size_t hash_vector(const std::vector<T>& vec) {
-  std::size_t seed = vec.size();
-  for (const auto& elem : vec) {
-    hash_combine(&seed, elem);
-  }
-  return seed;
-}
-
-template <class T>
-inline T mul_reduce(const std::vector<T>& vec) {
-  return std::accumulate(vec.begin(), vec.end(), 1, std::multiplies<T>());
-}
-
-}  // namespace cascader
-}  // namespace ethosu
-}  // namespace contrib
-}  // namespace tvm
-
-#endif  // TVM_CONTRIB_ETHOSU_CASCADER_COMMON_H_
diff --git a/src/contrib/ethosu/cascader/graph.cc b/src/contrib/ethosu/cascader/graph.cc
deleted file mode 100644
index 96f9768d3172..000000000000
--- a/src/contrib/ethosu/cascader/graph.cc
+++ /dev/null
@@ -1,257 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-#include "graph.h"
-
-#include <tvm/node/reflection.h>
-#include <tvm/runtime/container/array.h>
-#include <tvm/runtime/object.h>
-#include <tvm/runtime/registry.h>
-
-#include <algorithm>
-#include <stack>
-#include <unordered_set>
-#include <utility>
-#include <vector>
-
-#include "common.h"
-#include "stripe_config.h"
-
-namespace tvm {
-namespace contrib {
-namespace ethosu {
-namespace cascader {
-
-void PerformanceInfoNode::VisitAttrs(AttrVisitor* v) {
-  v->Visit("_compute_cycles", &compute_cycles);
-  Array<IntImm> tmp_reads = make_array(read_bytes);
-  v->Visit("_read_bytes", &tmp_reads);
-  v->Visit("_write_bytes", &write_bytes);
-  v->Visit("_block_config", &block_config);
-}
-
-TVM_REGISTER_NODE_TYPE(PerformanceInfoNode);
-
-TVM_STATIC_IR_FUNCTOR(ReprPrinter, vtable)
-    .set_dispatch<PerformanceInfoNode>([](const ObjectRef& ref, ReprPrinter* p) {
-      auto* node = static_cast<const PerformanceInfoNode*>(ref.get());
-      p->stream << "PerformanceInfo(compute_cycles=" << node->compute_cycles << ", read_bytes=[";
-      for (auto rb : node->read_bytes) {
-        p->stream << rb << ", ";
-      }
-      p->stream << "], write_bytes=" << node->write_bytes << ")";
-    });
-
-void TensorNode::VisitAttrs(AttrVisitor* v) {
-  Array<Integer> tmp_arr = make_array(shape_);
-  v->Visit("_shape", &tmp_arr);
-  v->Visit("_dtype", &dtype_);
-  v->Visit("_is_constant", &is_constant_);
-  double compression_ratio = static_cast<double>(compression_ratio_);
-  v->Visit("_compression_ratio", &compression_ratio);
-  Array<Part> tmp_prods(producers_);
-  v->Visit("_producers", &tmp_prods);
-  Array<Part> tmp_cons(consumers_);
-  v->Visit("_consumers", &tmp_cons);
-  v->Visit("_size", &size_);
-}
-
-Tensor::Tensor(const std::vector<int>& shape, DataType dtype, bool is_constant = false,
-               float compression_ratio = 1.0) {
-  auto n = make_object<TensorNode>();
-  n->shape_ = std::move(shape);
-  n->dtype_ = dtype;
-  n->is_constant_ = is_constant;
-  n->compression_ratio_ = compression_ratio;
-  n->size_ = mul_reduce(n->shape_) * n->dtype_.bytes();
-  data_ = std::move(n);
-}
-
-TVM_REGISTER_GLOBAL("contrib.ethosu.cascader.Tensor")
-    .set_body_typed([](Array<Integer> shape, DataType dtype, bool is_constant,
-                       double compression_ratio) {
-      std::vector<int> vshape = make_vector<int, Integer>(shape);
-      return Tensor(vshape, dtype, is_constant, compression_ratio);
-    });
-
-TVM_REGISTER_GLOBAL("contrib.ethosu.cascader.TensorAddProducer")
-    .set_body_method<Tensor>(&TensorNode::AddProducer);
-TVM_REGISTER_GLOBAL("contrib.ethosu.cascader.TensorAddConsumer")
-    .set_body_method<Tensor>(&TensorNode::AddConsumer);
-
-TVM_REGISTER_NODE_TYPE(TensorNode);
-
-void PartNode::VisitAttrs(AttrVisitor* v) {
-  Array<Propagator> tmp_prp(propagators_);
-  v->Visit("_propagators", &tmp_prp);
-  Array<Tensor> tmp_ins(input_tensors_);
-  v->Visit("_input_tensors", &tmp_ins);
-  v->Visit("_output_tensor", &output_tensor_);
-  v->Visit("_in_line", &in_line_);
-  Array<te::Tensor> tmp_te_ins(subgraph_.input_tensors);
-  v->Visit("_te_input_tensors", &tmp_te_ins);
-  v->Visit("_te_output_tensor", &subgraph_.output_tensor);
-}
-
-void PartNode::SetInput(uint64_t input_index, const Tensor& input_tensor) {
-  ICHECK_LT(input_index, input_tensors_.size());
-  input_tensors_[input_index] = std::move(input_tensor);
-}
-
-std::vector<StripeConfig> PartNode::CalculateInputStripeConfigs(
-    const StripeConfig& output_stripe_config) {
-  std::vector<StripeConfig> input_stripe_configs;
-  for (const auto& propagator : propagators_) {
-    input_stripe_configs.push_back(propagator->propagate(output_stripe_config));
-  }
-  return input_stripe_configs;
-}
-
-const std::vector<int> PartNode::GetStripeAlignHint() const {
-  ICHECK_GT(propagators_.size(), 0);
-  size_t dims = propagators_[0]->GetOutputDims();
-  std::vector<int> compute_quantum(dims);
-  for (size_t i = 0; i < dims; i++) {
-    compute_quantum[i] = 1;
-  }
-  return compute_quantum;
-}
-
-TVM_REGISTER_GLOBAL("contrib.ethosu.cascader.PartSetInput")
-    .set_body_method<Part>(&PartNode::SetInput);
-TVM_REGISTER_GLOBAL("contrib.ethosu.cascader.PartSetOutput")
-    .set_body_method<Part>(&PartNode::SetOutput);
-TVM_REGISTER_GLOBAL("contrib.ethosu.cascader.PartCalculateInputStripeConfigs")
-    .set_body_typed([](Part part, StripeConfig output_stripe_config) {
-      auto input_stripe_configs = part->CalculateInputStripeConfigs(output_stripe_config);
-      return Array<StripeConfig>(input_stripe_configs);
-    });
-TVM_REGISTER_GLOBAL("contrib.ethosu.cascader.PartGetStripeAlignHint").set_body_typed([](Part part) {
-  std::vector<int> align_hint = part->GetStripeAlignHint();
-  return make_array(align_hint);
-});
-TVM_REGISTER_GLOBAL("contrib.ethosu.cascader.PartGetPerformanceInfo")
-    .set_body_typed([](Part part, StripeConfig stripe_config, int buffer_mode) {
-      BufferMode ebuffer_mode = static_cast<BufferMode>(buffer_mode);
-      return part->GetPerformanceInfo(stripe_config, ebuffer_mode);
-    });
-
-CascaderGraphNode::CascaderGraphNode(std::vector<Tensor> input_tensors,
-                                     std::vector<Tensor> output_tensors)
-    : input_tensors_(input_tensors), output_tensors_(output_tensors) {
-  Init_();
-}
-
-bool VisitedInputs(
-    const Part& part,
-    const std::unordered_set<Tensor, ObjectPtrHash, ObjectPtrEqual>& visited_tensors) {
-  for (const auto& input_tensor : part->GetInputTensors()) {
-    if (visited_tensors.find(input_tensor) == visited_tensors.end()) {
-      return false;
-    }
-  }
-  return true;
-}
-
-void CascaderGraphNode::Init_() {
-  std::stack<Tensor> stack;
-  std::unordered_set<Tensor, ObjectPtrHash, ObjectPtrEqual> visited_tensors;
-  std::unordered_set<Part, ObjectPtrHash, ObjectPtrEqual> visited_parts;
-  for (const auto& input : input_tensors_) {
-    stack.push(input);
-  }
-  // Visit the Parts/Tensors in depth-first order using a non-recursive algorithm
-  while (!stack.empty()) {
-    Tensor tensor = stack.top();
-    stack.pop();
-    if (visited_tensors.find(tensor) == visited_tensors.end()) {
-      visited_tensors.insert(tensor);
-      tensor_order_.push_back(tensor);
-      for (const auto& part : tensor->GetConsumers()) {
-        if (visited_parts.find(part) == visited_parts.end()) {
-          // Only visit a Part once we've visited all its input Tensors
-          if (!VisitedInputs(part, visited_tensors)) continue;
-          visited_parts.insert(part);
-          part_order_.push_back(part);
-          stack.push(part->GetOutputTensor());
-        }
-      }
-    }
-  }
-  std::reverse(tensor_order_.begin(), tensor_order_.end());
-  std::reverse(part_order_.begin(), part_order_.end());
-  int id = 0;
-  for (const auto& part : part_order_) {
-    part_id_map_[part] = id;
-    id++;
-  }
-  id = 0;
-  for (const auto& tensor : tensor_order_) {
-    tensor_id_map_[tensor] = id;
-    id++;
-  }
-}
-
-void CascaderGraphNode::VisitAttrs(AttrVisitor* v) {
-  Array<Tensor> tmp_ins(input_tensors_);
-  v->Visit("_input_tensors", &tmp_ins);
-  Array<Tensor> tmp_outs(output_tensors_);
-  v->Visit("_output_tensors", &tmp_outs);
-  Array<Part> tmp_parr(part_order_);
-  v->Visit("_part_order", &tmp_parr);
-  Array<Tensor> tmp_tarr(tensor_order_);
-  v->Visit("_tensor_order", &tmp_tarr);
-}
-
-int CascaderGraphNode::GetPartID(const Part& part) const {
-  if (part_id_map_.find(part) == part_id_map_.end()) {
-    return -1;
-  }
-  return part_id_map_.at(part);
-}
-
-int CascaderGraphNode::GetTensorID(const Tensor& tensor) const {
-  if (tensor_id_map_.find(tensor) == tensor_id_map_.end()) {
-    return -1;
-  }
-  return tensor_id_map_.at(tensor);
-}
-
-CascaderGraph::CascaderGraph(std::vector<Tensor> input_tensors,
-                             std::vector<Tensor> output_tensors) {
-  auto n = make_object<CascaderGraphNode>(input_tensors, output_tensors);
-  data_ = std::move(n);
-}
-
-TVM_REGISTER_GLOBAL("contrib.ethosu.cascader.CascaderGraph")
-    .set_body_typed([](Array<Tensor> input_tensors, Array<Tensor> output_tensors) {
-      std::vector<Tensor> vinput_tensors(input_tensors.begin(), input_tensors.end());
-      std::vector<Tensor> voutput_tensors(output_tensors.begin(), output_tensors.end());
-      return CascaderGraph(vinput_tensors, voutput_tensors);
-    });
-TVM_REGISTER_GLOBAL("contrib.ethosu.cascader.CascaderGraphGetPartID")
-    .set_body_method<CascaderGraph>(&CascaderGraphNode::GetPartID);
-TVM_REGISTER_GLOBAL("contrib.ethosu.cascader.CascaderGraphGetTensorID")
-    .set_body_method<CascaderGraph>(&CascaderGraphNode::GetTensorID);
-
-TVM_REGISTER_NODE_TYPE(CascaderGraphNode);
-
-}  // namespace cascader
-}  // namespace ethosu
-}  // namespace contrib
-}  // namespace tvm
diff --git a/src/contrib/ethosu/cascader/graph.h b/src/contrib/ethosu/cascader/graph.h
deleted file mode 100644
index 4233493ee805..000000000000
--- a/src/contrib/ethosu/cascader/graph.h
+++ /dev/null
@@ -1,334 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file src/contrib/ethosu/cascader/graph.h
- * \brief Graph objects (Tensor and Part) for the Ethos-U cascader
- */
-#ifndef TVM_CONTRIB_ETHOSU_CASCADER_GRAPH_H_
-#define TVM_CONTRIB_ETHOSU_CASCADER_GRAPH_H_
-
-#include <tvm/runtime/data_type.h>
-#include <tvm/runtime/object.h>
-#include <tvm/te/operation.h>
-#include <tvm/te/tensor.h>
-
-#include <unordered_map>
-#include <utility>
-#include <vector>
-
-#include "block_config.h"
-#include "propagator.h"
-
-namespace tvm {
-namespace contrib {
-namespace ethosu {
-namespace cascader {
-
-class Tensor;
-class Part;
-class StripeConfig;
-
-/*!
- * \brief The buffering mode to use when realizing a tensor.
- * RECOMPUTE - The 'default' behaviour of TVM. Overlapping stripes will be recomputed.
- * ROLLING - Apply both the sliding window and storage folding optimizations to the tensor
- * realization.
- */
-enum BufferMode { RECOMPUTE, ROLLING };
-
-/*! \brief A struct to hold a Tensor Expression subgraph */
-struct TESubgraph {
-  /*! \brief The input te::Tensors to the subgraph */
-  std::vector<te::Tensor> input_tensors;
-  /*! \brief The output te::Tensor of the subgraph */
-  te::Tensor output_tensor;
-};
-
-/*! \brief Node to hold performance information for a Part */
-class PerformanceInfoNode : public Object {
- public:
-  void VisitAttrs(AttrVisitor* v);
-
-  /*! \brief The cycles to compute a block */
-  int64_t compute_cycles;
-  /*! \brief The number of bytes read per input tensor */
-  std::vector<int64_t> read_bytes;
-  /*! \brief The number of bytes written to the output tensor */
-  int64_t write_bytes;
-  /*! \brief The block config used for this performance point */
-  BlockConfig block_config;
-
-  static constexpr const char* _type_key = "contrib.ethosu.cascader.PerformanceInfo";
-  TVM_DECLARE_FINAL_OBJECT_INFO(PerformanceInfoNode, Object);
-};
-
-/*!
- * \brief A class to hold the performance information for a Part.
- * \note The performance information for a Part is composed of 3 factors: the compute cycles,
- * the number of bytes read from each input tensor and the number of bytes written to the output
- * tensor. Bytes read/written is reported in favour of read/write bandwidth cycles so the
- * calculation of the performance information can be re-used with different memory homing.
- */
-class PerformanceInfo : public ObjectRef {
- public:
-  PerformanceInfo(int64_t compute_cycles, std::vector<int64_t> read_bytes, int64_t write_bytes,
-                  BlockConfig block_config) {
-    auto n = make_object<PerformanceInfoNode>();
-    n->compute_cycles = compute_cycles;
-    n->read_bytes = std::move(read_bytes);
-    n->write_bytes = write_bytes;
-    n->block_config = block_config;
-    data_ = std::move(n);
-  }
-
-  TVM_DEFINE_OBJECT_REF_METHODS(PerformanceInfo, ObjectRef, PerformanceInfoNode);
-};
-
-/*! \brief Node to represent a Tensor */
-class TensorNode : public Object {
- public:
-  void VisitAttrs(AttrVisitor* v);
-
-  /*! \return The shape of the tensor */
-  std::vector<int> GetShape() const { return shape_; }
-  /*! \return The data type of the tensor */
-  DataType GetDataType() const { return dtype_; }
-  /*! \return Whether the tensor stores a constant value */
-  bool IsConstant() const { return is_constant_; }
-  /*! \return The compression ratio of the tensor */
-  float GetCompressionRatio() const { return compression_ratio_; }
-  /*! \return The producers of the tensor */
-  const std::vector<Part> GetProducers() const { return producers_; }
-  /*! \return The consumers of the tensor */
-  const std::vector<Part> GetConsumers() const { return consumers_; }
-  /*! \return The size of the tensor in bytes */
-  int GetSize() const { return size_ * compression_ratio_; }
-
-  /*! \brief Add a producer of the tensor */
-  inline void AddProducer(const Part& part) { producers_.push_back(part); }
-  /*! \brief Add a consumer of the tensor */
-  inline void AddConsumer(const Part& part) { consumers_.push_back(part); }
-
-  static constexpr const char* _type_key = "contrib.ethosu.cascader.Tensor";
-  TVM_DECLARE_FINAL_OBJECT_INFO(TensorNode, Object);
-
- protected:
-  friend class Tensor;
-
-  /*! \brief The shape of the tensor */
-  std::vector<int> shape_;
-  /*! \brief The data type of the tensor */
-  DataType dtype_;
-  /*! \brief Whether the tensor stores a constant value */
-  bool is_constant_;
-  /*! \brief The compression ratio of the tensor */
-  float compression_ratio_;
-  /*! \brief The producers of the tensor */
-  std::vector<Part> producers_;
-  /*! \brief The consumers of the tensor */
-  std::vector<Part> consumers_;
-  /*! \brief The size of the tensor in bytes */
-  int size_;
-};
-
-/*!
- * \brief A class to describe a Tensor in a Cascader graph.
- * \note Cascader graphs consist of two object types: Tensors and Parts. This class
- * defines the Tensors which represent the tensors that are consumed and produced
- * as part of the graph. They are augmented with information about their 'kind'
- * (input/output/constant/intermediate), their default memory home (which memory they
- * are expected to be allocated in) and a compression ratio where applicable (weights
- * for instance are compressed).
- */
-class Tensor : public ObjectRef {
- public:
-  Tensor(const std::vector<int>& shape, DataType dtype, bool is_constant, float compression_ratio);
-
-  TVM_DEFINE_MUTABLE_OBJECT_REF_METHODS(Tensor, ObjectRef, TensorNode);
-};
-
-/*! \brief Node to represent a Part */
-class PartNode : public Object {
- public:
-  virtual void VisitAttrs(AttrVisitor* v);
-
-  /*! \return The TE subgraph represented by the Part */
-  const TESubgraph GetSubgraph() const { return subgraph_; }
-  /*! \return The output->input propagators */
-  const std::vector<Propagator> GetPropagators() const { return propagators_; }
-  /*! \return Whether the Part is inline */
-  bool IsInline() const { return in_line_; }
-  /*! \return The input tensors */
-  const std::vector<Tensor> GetInputTensors() const { return input_tensors_; }
-  /*! \return The output tensor */
-  const Tensor GetOutputTensor() const { return output_tensor_; }
-
-  /*! \brief Add a producer of the tensor */
-  void SetInput(uint64_t input_index, const Tensor& input_tensor);
-  /*! \brief Add a consumer of the tensor */
-  void SetOutput(const Tensor& output_tensor) { output_tensor_ = output_tensor; }
-  /*!
-   * \brief Calculate the input stripe configs for a given output stripe config using the
-   * Propagators. \param output_stripe_config The output stripe config to propagate. \return The
-   * calculated input stripe configs.
-   */
-  std::vector<StripeConfig> CalculateInputStripeConfigs(const StripeConfig& output_stripe_config);
-  /*!
-   * \brief Get the preferred alignment in each axis for a stripe of the Part.
-   * \note This is used to bias the selection of StripeConfigs towards those that are integer
-   * multiples of a tensor intrinsic used to compute the Part.
-   */
-  virtual const std::vector<int> GetStripeAlignHint() const;
-  /*!
-   * \brief Get the performance information for a given output stripe config.
-   * \param output_stripe_config The output stripe config to compute the performance for.
-   * \param is_rolling Whether the output config should be computed as a rolling buffer.
-   * \return The performance information containing the compute cycles and read/write bytes.
-   */
-  virtual const PerformanceInfo GetPerformanceInfo(const StripeConfig& output_stripe_config,
-                                                   BufferMode buffer_mode) = 0;
-
-  static constexpr const char* _type_key = "contrib.ethosu.cascader.Part";
-  TVM_DECLARE_BASE_OBJECT_INFO(PartNode, Object);
-
- protected:
-  friend class Part;
-
-  /*! \brief The Tensor Expression subgraph represented by the Part */
-  TESubgraph subgraph_;
-  /*! \brief The output->input propagators */
-  std::vector<Propagator> propagators_;
-  /*! \brief Whether the Part is computed in-line */
-  bool in_line_;
-  /*! \brief The input tensors */
-  std::vector<Tensor> input_tensors_;
-  /*! \brief The output tensor */
-  Tensor output_tensor_;
-};
-
-/*!
- * \brief A class to describe a Part in a Cascader graph.
- * \note Cascader graphs consist of two object types: Tensors and Parts. This class
- * defines the Parts which represent the operations which produce and consume Tensors.
- *
- * A Part can represent one or more Tensor Expression compute operations but the subgraph
- * it represents must have only a single output. Multiple TE compute operations should be
- * represented under a single Part if the intermediate tensors between them won't be
- * realized. This is a common pattern in Ethos-U where a sequence of TE compute operations
- * are used to represent a single hardware primitive operation.
- *
- * Parts contain a Propagator per input which describes how a given output stripe config
- * should be transformed into an input stripe config for each input. This is essential
- * to analyse both the performance of Parts (determining the data that will be read) and
- * in cascading Parts together (determining compatible stripe config choices).
- *
- * A Part can be marked as 'in_line', in which case it is assumed that it doesn't need to
- * allocate space for its output tensor.
- *
- * This is only a base class and concrete Parts must be derived from it, implementing a
- * function to model the performance of the Part as well as to determine its compute
- * quantum.
- */
-class Part : public ObjectRef {
- public:
-  TVM_DEFINE_MUTABLE_OBJECT_REF_METHODS(Part, ObjectRef, PartNode);
-};
-
-/*! \brief Node to represent a CascaderGraph */
-class CascaderGraphNode : public Object {
- public:
-  CascaderGraphNode() {}
-  CascaderGraphNode(std::vector<Tensor> input_tensors, std::vector<Tensor> output_tensors);
-
-  void VisitAttrs(AttrVisitor* v);
-
-  /*! \return The input Tensors of the CascaderGraph */
-  std::vector<Tensor> GetInputTensors() const { return input_tensors_; }
-  /*! \return The output Tensors of the CascaderGraph */
-  std::vector<Tensor> GetOutputTensors() const { return output_tensors_; }
-  /*! \return The order of the Parts in the CascaderGraph */
-  std::vector<Part> GetPartOrder() const { return part_order_; }
-  /*!
-   * \brief Get the ID of a Part in the CascaderGraph.
-   * \param part The Part to get the ID of.
-   * \return The ID of the Part in the CascaderGraph.
-   * \note Each Part is given a unique ID within the CascaderGraph.
-   */
-  int GetPartID(const Part& part) const;
-  /*!
-   * \brief Get the ID of a Tensor in the CascaderGraph.
-   * \param tensor The Tensor to get the ID of.
-   * \return The ID of the Tensor in the CascaderGraph.
-   * \note Each Tensor is given a unique ID within the CascaderGraph.
-   */
-  int GetTensorID(const Tensor& tensor) const;
-
-  static constexpr const char* _type_key = "contrib.ethosu.cascader.CascaderGraph";
-  TVM_DECLARE_FINAL_OBJECT_INFO(CascaderGraphNode, Object);
-
- protected:
-  /*!
-   * \brief Initialize the CascaderGraph by defining a topological ordering.
-   * \note This will traverse the Parts and Tensors using a depth-first
-   * visiting pattern and use the traversal order to initialize both the
-   * 'order' vectors and the ID maps. The order vectors define the ordering
-   * that the cascader expects the CascaderGraph to be executed in, but reversed.
-   * The ID maps assign a unique integer ID to each Part and Tensor corresponding
-   * to their position in their respective order vector.
-   */
-  void Init_();
-
-  /*! \brief The input Tensors of the CascaderGraph */
-  std::vector<Tensor> input_tensors_;
-  /*! \brief The output Tensors of the CascaderGraph */
-  std::vector<Tensor> output_tensors_;
-  /*! \brief The order of the Tensors in the CascaderGraph */
-  std::vector<Tensor> tensor_order_;
-  /*! \brief The order of the Parts in the CascaderGraph */
-  std::vector<Part> part_order_;
-  /*! \brief A map between Parts in the CascaderGraph and their IDs */
-  std::unordered_map<Part, int, ObjectPtrHash, ObjectPtrEqual> part_id_map_;
-  /*! \brief A map between Tensors in the CascaderGraph and their IDs */
-  std::unordered_map<Tensor, int, ObjectPtrHash, ObjectPtrEqual> tensor_id_map_;
-};
-
-/*!
- * \brief A class to describe a graph of Parts and Tensors used by the cascader.
- * \note This class describes a graph consisting of two object types: Tensors and Parts.
- * It defines a topological ordering on the graph such that each Part and Tensor has a
- * position in the ordering. This ordering is used by the Plan and Proposal generation
- * algorithms. It is also the ordering the Parts are expected to be executed in.
- *
- * In addition to defining an ordering, the Parts and Tensors are also all given unique
- * IDs which they can be referred to by.
- */
-class CascaderGraph : public ObjectRef {
- public:
-  CascaderGraph(std::vector<Tensor> input_tensors, std::vector<Tensor> output_tensors);
-
-  TVM_DEFINE_OBJECT_REF_METHODS(CascaderGraph, ObjectRef, CascaderGraphNode);
-};
-
-}  // namespace cascader
-}  // namespace ethosu
-}  // namespace contrib
-}  // namespace tvm
-
-#endif  // TVM_CONTRIB_ETHOSU_CASCADER_GRAPH_H_
diff --git a/src/contrib/ethosu/cascader/pareto.cc b/src/contrib/ethosu/cascader/pareto.cc
deleted file mode 100644
index 5d025b57bbe4..000000000000
--- a/src/contrib/ethosu/cascader/pareto.cc
+++ /dev/null
@@ -1,185 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-#include "pareto.h"
-
-#include <tvm/runtime/container/array.h>
-#include <tvm/runtime/container/map.h>
-#include <tvm/runtime/object.h>
-#include <tvm/runtime/registry.h>
-
-#include <array>
-#include <utility>
-#include <vector>
-
-#include "common.h"
-#include "plan.h"
-#include "proposal.h"
-#include "tensor_config.h"
-
-namespace tvm {
-namespace contrib {
-namespace ethosu {
-namespace cascader {
-
-template <int N>
-std::vector<bool> GetParetoFrontier(const std::vector<std::array<float, N>>& costs) {
-  std::vector<bool> is_optimal(costs.size(), true);
-  for (size_t i = 0; i < costs.size(); i++) {
-    if (is_optimal[i]) {
-      for (size_t j = 0; j < costs.size(); j++) {
-        if (is_optimal[j]) {
-          bool optimal = false;
-          for (size_t k = 0; k < N; k++) {
-            if (costs[i][k] > costs[j][k]) {
-              optimal = true;
-              break;
-            }
-          }
-          is_optimal[j] = optimal;
-        }
-      }
-      is_optimal[i] = true;
-    }
-  }
-  return is_optimal;
-}
-
-template <class T>
-std::vector<T> ThinVector(const std::vector<T>& vec, size_t max_size) {
-  if (max_size < 1) {
-    return std::vector<T>();
-  }
-  if (vec.size() <= max_size || vec.size() == 0) {
-    return vec;
-  }
-  if (max_size == 1) {
-    return std::vector<T>{vec[0]};
-  }
-  std::vector<T> thin_vec;
-  float step = static_cast<float>(vec.size()) / static_cast<float>(max_size - 1);
-  for (float i = 0; i < vec.size() - 1; i += step) {
-    thin_vec.push_back(vec[static_cast<int>(i)]);
-  }
-  thin_vec.push_back(vec.back());
-  return thin_vec;
-}
-
-std::vector<Plan> ParetoCullPlans(std::vector<Plan> plans, size_t max_plans,
-                                  bool disable_pareto_metric) {
-  if (plans.size() <= max_plans) {
-    return plans;
-  }
-  if (disable_pareto_metric) {
-    // Sample from all plans
-    return ThinVector(plans, max_plans);
-  }
-
-  std::sort(plans.begin(), plans.end(), [](const Plan& a, const Plan& b) -> bool {
-    if (a->GetMemoryUsage() == b->GetMemoryUsage()) {
-      return a->GetCycles() < b->GetCycles();
-    }
-    return a->GetMemoryUsage() < b->GetMemoryUsage();
-  });
-  std::vector<std::array<float, 2>> costs;
-  for (const auto& plan : plans) {
-    std::array<float, 2> cost = {static_cast<float>(plan->GetMemoryUsage()),
-                                 static_cast<float>(plan->GetCycles())};
-    costs.emplace_back(cost);
-  }
-  std::vector<bool> is_optimal = GetParetoFrontier<2>(costs);
-  std::vector<Plan> optimal_plans;
-  size_t i = 0;
-  for (bool optimal : is_optimal) {
-    if (optimal) {
-      optimal_plans.push_back(plans[i]);
-    }
-    i++;
-  }
-  if (optimal_plans.size() <= max_plans) {
-    return optimal_plans;
-  }
-  return ThinVector(optimal_plans, max_plans);
-}
-
-std::vector<Proposal> ParetoCullProposals(std::vector<Proposal> proposals, size_t max_proposals,
-                                          bool disable_pareto_metric) {
-  if (disable_pareto_metric) {
-    // Sample from all Proposals
-    return ThinVector(proposals, max_proposals);
-  }
-
-  std::sort(proposals.begin(), proposals.end(), [](const Proposal& a, const Proposal& b) -> bool {
-    if (a->GetMemoryUsage() == b->GetMemoryUsage()) {
-      return a->GetCycles() < b->GetCycles();
-    }
-    return a->GetMemoryUsage() < b->GetMemoryUsage();
-  });
-  std::vector<std::array<float, 2>> costs;
-  for (const auto& proposal : proposals) {
-    std::array<float, 2> cost = {static_cast<float>(proposal->GetMemoryUsage()),
-                                 static_cast<float>(proposal->GetCycles())};
-    costs.emplace_back(cost);
-  }
-  std::vector<bool> is_optimal = GetParetoFrontier<2>(costs);
-  std::vector<Proposal> optimal_proposals;
-  size_t i = 0;
-  for (bool optimal : is_optimal) {
-    if (optimal) {
-      optimal_proposals.push_back(proposals[i]);
-    }
-    i++;
-  }
-  if (optimal_proposals.size() <= max_proposals) {
-    return optimal_proposals;
-  }
-  return ThinVector(optimal_proposals, max_proposals);
-}
-
-TVM_REGISTER_GLOBAL("contrib.ethosu.cascader.GetParetoFrontier")
-    .set_body_typed([](Array<Array<FloatImm>> tcosts) {
-      std::vector<std::array<float, 2>> costs;
-      for (const auto& tcost : tcosts) {
-        ICHECK_EQ(tcost.size(), 2);
-        std::array<float, 2> point = {static_cast<float>(tcost[0]->value),
-                                      static_cast<float>(tcost[1]->value)};
-        costs.push_back(point);
-      }
-      Array<Bool> is_optimal;
-      for (bool opt : GetParetoFrontier<2>(costs)) {
-        is_optimal.push_back(Bool(opt));
-      }
-      return is_optimal;
-    });
-
-TVM_REGISTER_GLOBAL("contrib.ethosu.cascader.ThinVector")
-    .set_body_typed([](Array<ObjectRef> vec, int max_size) {
-      std::vector<ObjectRef> vvec(vec.begin(), vec.end());
-      return Array<ObjectRef>(ThinVector<ObjectRef>(vvec, max_size));
-    });
-
-TVM_REGISTER_GLOBAL("contrib.ethosu.cascader.ParetoCullPlans")
-    .set_body_typed([](Array<Plan> plans, int max_size, bool disable_pareto_metric) {
-      std::vector<Plan> vplans(plans.begin(), plans.end());
-      return Array<Plan>(ParetoCullPlans(vplans, max_size, disable_pareto_metric));
-    });
-
-}  // namespace cascader
-}  // namespace ethosu
-}  // namespace contrib
-}  // namespace tvm
diff --git a/src/contrib/ethosu/cascader/pareto.h b/src/contrib/ethosu/cascader/pareto.h
deleted file mode 100644
index abb6ca516c23..000000000000
--- a/src/contrib/ethosu/cascader/pareto.h
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file src/contrib/ethosu/cascader/pareto.h
- * \brief Pareto optimisation functions for the NPU cascader
- */
-#ifndef TVM_CONTRIB_ETHOSU_CASCADER_PARETO_H_
-#define TVM_CONTRIB_ETHOSU_CASCADER_PARETO_H_
-
-#include <tvm/ir/expr.h>
-#include <tvm/runtime/container/array.h>
-
-#include <algorithm>
-#include <array>
-#include <vector>
-
-namespace tvm {
-namespace contrib {
-namespace ethosu {
-namespace cascader {
-
-class Plan;
-class MemoryRegion;
-class Proposal;
-
-/*!
- * \brief Determine the Pareto optimal points.
- * \param costs The points as a vector of N-dimensional costs.
- * \return A vector that is true where a point is Pareto optimal and false otherwise.
- */
-template <int N>
-std::vector<bool> GetParetoFrontier(const std::vector<std::array<float, N>>& costs);
-
-/*!
- * \brief Evenly sample items from a vector to reduce its size.
- * \param vec The vector to thin.
- * \param max_size The maximum size of the thinned vector.
- * \return The thinned vector.
- */
-template <class T>
-std::vector<T> ThinVector(const std::vector<T>& vec, size_t max_size);
-
-/*!
- * \brief Cull plans which are not Pareto optimal then thin them down.
- * \param plans The plans to apply the Pareto culling to.
- * \param max_plans The maximum number of plans after the culling.
- * \param disable_pareto_metric Whether to only select from Pareto frontier or not.
- * \return The culled plans.
- * \note Plan Pareto-optimality is determined based upon a Plan's memory_usage
- * and cycles.
- */
-std::vector<Plan> ParetoCullPlans(std::vector<Plan> plans, size_t max_plans,
-                                  bool disable_pareto_metric);
-
-std::vector<Proposal> ParetoCullProposals(std::vector<Proposal> proposals, size_t max_proposals,
-                                          bool disable_pareto_metric);
-
-}  // namespace cascader
-}  // namespace ethosu
-}  // namespace contrib
-}  // namespace tvm
-
-#endif  // TVM_CONTRIB_ETHOSU_CASCADER_PARETO_H_
diff --git a/src/contrib/ethosu/cascader/parts/ethosu.cc b/src/contrib/ethosu/cascader/parts/ethosu.cc
deleted file mode 100644
index 4fb6dbd05203..000000000000
--- a/src/contrib/ethosu/cascader/parts/ethosu.cc
+++ /dev/null
@@ -1,207 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-#include "ethosu.h"
-
-#include <tvm/runtime/registry.h>
-
-#include <algorithm>
-#include <cmath>
-#include <limits>
-#include <map>
-#include <utility>
-#include <vector>
-
-#include "../common.h"
-#include "../stripe_config.h"
-
-namespace tvm {
-namespace contrib {
-namespace ethosu {
-namespace cascader {
-
-const std::vector<int64_t> EthosuPartNode::GetBytesRead(const std::vector<int>& block_shape,
-                                                        const std::vector<int>& full_shape) {
-  std::vector<int64_t> bytes_per_input(propagators_.size(), 0);
-
-  std::vector<int> order;
-  std::vector<int> stripes;
-  std::vector<int> offset;
-  std::vector<float> strides;
-  for (size_t i = 0; i < block_shape.size(); i++) {
-    order.push_back(1);
-    stripes.push_back(round_up_divide(full_shape[i], block_shape[i]));
-    offset.push_back(0);
-    strides.push_back(static_cast<float>(block_shape[i]));
-  }
-
-  StripeConfig output_block_config(block_shape, full_shape, strides, order, stripes, offset);
-  auto input_block_configs = CalculateInputStripeConfigs(output_block_config);
-
-  int i = 0;
-  for (const auto& input_block_config : input_block_configs) {
-    std::map<std::vector<int>, int> input_blocks = CountStripes(input_block_config, false);
-    for (const auto& block : input_blocks) {
-      bytes_per_input[i] +=
-          mul_reduce(block.first) * block.second * input_tensors_[i]->GetDataType().bytes();
-    }
-    i++;
-  }
-
-  if (weight_tensor_idx_ != -1) {
-    bytes_per_input[weight_tensor_idx_] *= (stripes[height_idx_] * stripes[width_idx_]);
-  }
-
-  return bytes_per_input;
-}
-
-float EthosuPartNode::CalculateCost(const BlockConfig& block_config,
-                                    const StripeConfig& output_stripe_config) {
-  std::vector<int> output_block = block_config->GetOutputBlockShape();
-  std::vector<int> output_stripe_shape = output_stripe_config->GetShape();
-  auto input_stripe_configs = CalculateInputStripeConfigs(output_stripe_config);
-  std::vector<int> input_stripe_shape = input_stripe_configs[0]->GetShape();
-
-  std::vector<int64_t> bytes_per_input = GetBytesRead(output_block, output_stripe_shape);
-  bytes_per_input[0] *= subkernels_;
-
-  // Calculate bytes read per output element
-  float cost =
-      static_cast<float>(bytes_per_input[0] + bytes_per_input[1]) / mul_reduce(output_stripe_shape);
-
-  // Single buffering hardware optimization
-  if (mul_reduce(input_stripe_shape) <= 2 * mul_reduce(block_config->GetInputBlockShape())) {
-    cost /= 2;
-  }
-  return cost;
-}
-
-const BlockConfig EthosuPartNode::GetBlockConfig(const StripeConfig& output_stripe_config) {
-  BlockConfig best_block_config = valid_block_configs_[0];
-  float best_cost = CalculateCost(best_block_config, output_stripe_config);
-  std::vector<int> output_stripe_shape = output_stripe_config->GetShape();
-  auto input_stripe_configs = CalculateInputStripeConfigs(output_stripe_config);
-  std::vector<int> input_stripe_shape = input_stripe_configs[0]->GetShape();
-
-  for (const auto& block_config : valid_block_configs_) {
-    float relative_cost = CalculateCost(block_config, output_stripe_config);
-    if (relative_cost < best_cost) {
-      best_block_config = block_config;
-      best_cost = relative_cost;
-    }
-  }
-  return best_block_config;
-}
-
-const PerformanceInfo EthosuPartNode::GetPerformanceInfo(const StripeConfig& output_stripe_config,
-                                                         BufferMode buffer_mode) {
-  BlockConfig block_config = GetBlockConfig(output_stripe_config);
-  std::vector<int> block_shape = block_config->GetOutputBlockShape();
-
-  std::vector<int64_t> bytes_per_input =
-      GetBytesRead(block_shape, output_stripe_config->GetShape());
-
-  float num_blocks = 1.0f;
-  for (size_t i = 0; i < block_shape.size(); i++) {
-    if (buffer_mode == BufferMode::RECOMPUTE) {
-      num_blocks *= std::max(static_cast<float>(output_stripe_config->GetShape()[i]) /
-                                 block_shape[i] * output_stripe_config->GetStripes()[i],
-                             1.0f);
-    } else {
-      num_blocks *=
-          std::max(static_cast<float>(output_tensor_->GetShape()[i]) / block_shape[i], 1.0f);
-    }
-  }
-
-  float num_stripes = mul_reduce(output_stripe_config->GetStripes());
-  std::vector<int64_t> read_bytes;
-  for (int64_t stripe_bytes : bytes_per_input) {
-    read_bytes.push_back(num_stripes * stripe_bytes);
-  }
-  int64_t write_bytes =
-      num_blocks * mul_reduce(block_shape) * output_tensor_->GetDataType().bytes();
-
-  int block_output_cycles = block_config->GetOutputCycles();
-  int block_compute_cycles = block_config->GetComputeCycles();
-
-  int64_t total_cycles = 0;
-  if (block_output_cycles > block_compute_cycles) {
-    total_cycles = (block_output_cycles * num_blocks) + block_compute_cycles;
-  } else {
-    total_cycles = (block_compute_cycles * num_blocks) + block_output_cycles;
-  }
-
-  PerformanceInfo info(total_cycles, read_bytes, write_bytes, block_config);
-  return info;
-}
-
-EthosuPart::EthosuPart(const TESubgraph& subgraph, const std::vector<Propagator> propagators,
-                       const std::vector<int>& output_quantum, int subkernels,
-                       const std::vector<BlockConfig>& valid_block_configs, int weight_tensor_idx) {
-  auto n = make_object<EthosuPartNode>();
-  ICHECK_GT(propagators.size(), 0) << "The Part must include at least one Propagator.";
-  n->subgraph_ = subgraph;
-  n->propagators_ = std::move(propagators);
-  n->in_line_ = false;
-  n->input_tensors_.resize(propagators.size());
-  n->output_quantum_ = output_quantum;
-  n->valid_block_configs_ = valid_block_configs;
-  n->subkernels_ = subkernels;
-  n->weight_tensor_idx_ = weight_tensor_idx;
-  if (output_quantum.size() == 5) {
-    // NHCWB16 Format
-    n->height_idx_ = 1;
-    n->width_idx_ = 3;
-  } else {
-    // NHWC Format
-    n->height_idx_ = 1;
-    n->width_idx_ = 2;
-  }
-  data_ = std::move(n);
-}
-
-TVM_REGISTER_GLOBAL("contrib.ethosu.cascader.EthosuPart")
-    .set_body_typed([](Array<te::Tensor> subgraph_inputs, te::Tensor subgraph_output,
-                       Array<Propagator> propagators, Array<Integer> output_quantum, int subkernels,
-                       Array<BlockConfig> valid_block_configs, int weight_tensor_idx) {
-      std::vector<te::Tensor> vsubgraph_inputs(subgraph_inputs.begin(), subgraph_inputs.end());
-      std::vector<Propagator> vpropagators(propagators.begin(), propagators.end());
-      std::vector<int> voutput_quantum;
-      std::transform(output_quantum.begin(), output_quantum.end(),
-                     std::back_inserter(voutput_quantum),
-                     [](auto&& val) { return val.IntValue(); });
-      TESubgraph subgraph;
-      subgraph.input_tensors = vsubgraph_inputs;
-      subgraph.output_tensor = subgraph_output;
-      std::vector<BlockConfig> vvalid_block_configs(valid_block_configs.begin(),
-                                                    valid_block_configs.end());
-      return EthosuPart(subgraph, vpropagators, voutput_quantum, subkernels, vvalid_block_configs,
-                        weight_tensor_idx);
-    });
-
-TVM_REGISTER_GLOBAL("contrib.ethosu.cascader.EthosuPartGetBlockConfig")
-    .set_body_typed([](EthosuPart part, StripeConfig stripe_config) {
-      return part->GetBlockConfig(stripe_config);
-    });
-
-TVM_REGISTER_NODE_TYPE(EthosuPartNode);
-
-}  // namespace cascader
-}  // namespace ethosu
-}  // namespace contrib
-}  // namespace tvm
diff --git a/src/contrib/ethosu/cascader/parts/ethosu.h b/src/contrib/ethosu/cascader/parts/ethosu.h
deleted file mode 100644
index 4738f673e79b..000000000000
--- a/src/contrib/ethosu/cascader/parts/ethosu.h
+++ /dev/null
@@ -1,119 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file src/contrib/ethosu/cascader/parts/ethosu.h
- * \brief Arm(R) Ethos(TM)-U NPU Part object
- */
-#ifndef TVM_CONTRIB_ETHOSU_CASCADER_PARTS_ETHOSU_H_
-#define TVM_CONTRIB_ETHOSU_CASCADER_PARTS_ETHOSU_H_
-
-#include <tvm/runtime/object.h>
-
-#include <vector>
-
-#include "../block_config.h"
-#include "../graph.h"
-
-namespace tvm {
-namespace contrib {
-namespace ethosu {
-namespace cascader {
-
-/*! \brief Node to represent an EthosuPart */
-class EthosuPartNode : public PartNode {
- public:
-  /*!
-   * \brief Get the optimal BlockConfig to use given a StripeConfig
-   * \param output_stripe_config The output StripeConfig.
-   */
-  const BlockConfig GetBlockConfig(const StripeConfig& output_stripe_config);
-  /*!
-   * \brief Get the preferred alignment in each axis for a stripe of the Part.
-   * \note This is used to bias the selection of StripeConfigs towards those that are integer
-   * multiples of a tensor intrinsic used to compute the Part.
-   */
-  const std::vector<int> GetStripeAlignHint() const final { return output_quantum_; }
-  /*!
-   * \brief Get the performance information for a given output stripe config.
-   * \param output_stripe_config The output stripe config to compute the performance for.
-   * \param buffer_mode The mode of buffering, rolling or recompute.
-   * \return The performance information containing the compute cycles and read/write bytes.
-   */
-  const PerformanceInfo GetPerformanceInfo(const StripeConfig& output_stripe_config,
-                                           BufferMode buffer_mode) final;
-
-  static constexpr const char* _type_key = "contrib.ethosu.cascader.EthosuPart";
-  TVM_DECLARE_FINAL_OBJECT_INFO(EthosuPartNode, PartNode);
-
- protected:
-  friend class EthosuPart;
-
-  /*!
-   * \brief Get the size of input required (per input tensor) to compute a stripe given a
-   * block_shape
-   * \param block_shape The shape of the block(s) the stripe is split into
-   * \param stripe_shape The shape of the full stripe to compute.
-   * \return The bytes required per input tensor.
-   */
-  const std::vector<int64_t> GetBytesRead(const std::vector<int>& block_shape,
-                                          const std::vector<int>& full_shape);
-
-  /*!
-   * \brief Get cost heuristic of using a given block config with the associated stripe config
-   * \param block_config The block config that is being checked for the cost
-   * \param output_stripe_config The striping configuration associated with the operator
-   * \return A cost heuristic representative of the choice
-   */
-  float CalculateCost(const BlockConfig& block_config, const StripeConfig& output_stripe_config);
-
-  /*! \brief List of block configs that are valid for this part */
-  std::vector<BlockConfig> valid_block_configs_;
-  /*! \brief The output volume that is atomically computed */
-  std::vector<int> output_quantum_;
-  /*! \brief Index for output height dimension */
-  int height_idx_;
-  /*! \brief Index for output width dimension */
-  int width_idx_;
-  /*! \brief Index of weight tensor, -1 if the Part has no weights */
-  int weight_tensor_idx_;
-  /*! \brief Number of sub-kernels the kernel has been split into */
-  int subkernels_;
-};
-
-/*!
- * \brief A class to describe a Part to be executed on an Arm(R) Ethos(TM)-U NPU.
- * \note EthosuParts must be provided with an output quantum and the cycles taken to
- * compute an output quantum which depend on the operator the NPU is computing.
- */
-class EthosuPart : public Part {
- public:
-  EthosuPart(const TESubgraph& subgraph, const std::vector<Propagator> propagators,
-             const std::vector<int>& output_quantum, int subkernels,
-             const std::vector<BlockConfig>& valid_block_configs, int weight_tensor_idx);
-
-  TVM_DEFINE_MUTABLE_OBJECT_REF_METHODS(EthosuPart, Part, EthosuPartNode);
-};
-
-}  // namespace cascader
-}  // namespace ethosu
-}  // namespace contrib
-}  // namespace tvm
-
-#endif  // TVM_CONTRIB_ETHOSU_CASCADER_PARTS_ETHOSU_H_
diff --git a/src/contrib/ethosu/cascader/parts/inline.cc b/src/contrib/ethosu/cascader/parts/inline.cc
deleted file mode 100644
index 8854bbd90e81..000000000000
--- a/src/contrib/ethosu/cascader/parts/inline.cc
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-#include "inline.h"
-
-#include <tvm/runtime/registry.h>
-
-#include <utility>
-#include <vector>
-
-#include "../block_config.h"
-#include "../common.h"
-
-namespace tvm {
-namespace contrib {
-namespace ethosu {
-namespace cascader {
-
-const PerformanceInfo InlinePartNode::GetPerformanceInfo(const StripeConfig& output_stripe_config,
-                                                         BufferMode buffer_mode) {
-  std::vector<int64_t> read_bytes(input_tensors_.size());
-  BlockConfig block_config = BlockConfig(std::vector<int>(1, 1), std::vector<int>(1, 1), 0, 0);
-  PerformanceInfo info(0, read_bytes, 0, block_config);
-  return info;
-}
-
-InlinePart::InlinePart(const TESubgraph& subgraph, const std::vector<Propagator> propagators) {
-  auto n = make_object<InlinePartNode>();
-  ICHECK_GT(propagators.size(), 0) << "The Part must include at least one Propagator.";
-  n->subgraph_ = subgraph;
-  n->propagators_ = std::move(propagators);
-  n->in_line_ = true;
-  n->input_tensors_.resize(propagators.size());
-  data_ = std::move(n);
-}
-
-TVM_REGISTER_GLOBAL("contrib.ethosu.cascader.InlinePart")
-    .set_body_typed([](Array<te::Tensor> subgraph_inputs, te::Tensor subgraph_output,
-                       Array<Propagator> propagators) {
-      std::vector<te::Tensor> vsubgraph_inputs(subgraph_inputs.begin(), subgraph_inputs.end());
-      std::vector<Propagator> vpropagators(propagators.begin(), propagators.end());
-      TESubgraph subgraph;
-      subgraph.input_tensors = vsubgraph_inputs;
-      subgraph.output_tensor = subgraph_output;
-      return InlinePart(subgraph, vpropagators);
-    });
-
-TVM_REGISTER_NODE_TYPE(InlinePartNode);
-
-}  // namespace cascader
-}  // namespace ethosu
-}  // namespace contrib
-}  // namespace tvm
diff --git a/src/contrib/ethosu/cascader/parts/inline.h b/src/contrib/ethosu/cascader/parts/inline.h
deleted file mode 100644
index 11d94f17397d..000000000000
--- a/src/contrib/ethosu/cascader/parts/inline.h
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file src/contrib/ethosu/cascader/parts/inline.h
- * \brief Inline Part object
- */
-#ifndef TVM_CONTRIB_ETHOSU_CASCADER_PARTS_INLINE_H_
-#define TVM_CONTRIB_ETHOSU_CASCADER_PARTS_INLINE_H_
-
-#include <tvm/runtime/object.h>
-
-#include <vector>
-
-#include "../graph.h"
-
-namespace tvm {
-namespace contrib {
-namespace ethosu {
-namespace cascader {
-
-/*! \brief Node to represent an inlined Part */
-class InlinePartNode : public PartNode {
- public:
-  /*!
-   * \brief Get the performance information for a given output stripe config.
-   * \param output_stripe_config The output stripe config to compute the performance for.
-   * \param is_rolling Whether the output config should be computed as a rolling buffer.
-   * \return The performance information containing the compute cycles and read/write bytes.
-   */
-  const PerformanceInfo GetPerformanceInfo(const StripeConfig& output_stripe_config,
-                                           BufferMode buffer_mode) final;
-
-  static constexpr const char* _type_key = "contrib.ethosu.cascader.InlinePart";
-  TVM_DECLARE_FINAL_OBJECT_INFO(InlinePartNode, PartNode);
-
- protected:
-  friend class InlinePart;
-};
-
-/*!
- * \brief A class to describe a inlined Part in a Cascader graph.
- * \note Inlined Parts have a few special properties. First by IsInline being true,
- * the Cascader will not allocate any space for the outputs of the Part. This is because
- * they will be directly consumed as they are produced by the following Part. Second, they
- * are assumed to be 'free' and require no cycles to execute. Lastly, as they are 'free'
- * the compute quantum is arbitrary, but by convention it is a single tensor element.
- *
- * Examples of inline Parts include strided_slice, reshape and concatenate - all of which
- * get absorbed into the DMA functionality of Ethos-U compute primitives.
- */
-class InlinePart : public Part {
- public:
-  InlinePart(const TESubgraph& subgraph, const std::vector<Propagator> propagators);
-
-  TVM_DEFINE_MUTABLE_OBJECT_REF_METHODS(InlinePart, Part, InlinePartNode);
-};
-
-}  // namespace cascader
-}  // namespace ethosu
-}  // namespace contrib
-}  // namespace tvm
-
-#endif  // TVM_CONTRIB_ETHOSU_CASCADER_PARTS_INLINE_H_
diff --git a/src/contrib/ethosu/cascader/plan.cc b/src/contrib/ethosu/cascader/plan.cc
deleted file mode 100644
index 173b3f9e8d20..000000000000
--- a/src/contrib/ethosu/cascader/plan.cc
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-#include "plan.h"
-
-#include <tvm/runtime/container/array.h>
-#include <tvm/runtime/container/map.h>
-#include <tvm/runtime/object.h>
-#include <tvm/runtime/registry.h>
-
-#include <algorithm>
-#include <utility>
-#include <vector>
-
-#include "graph.h"
-#include "tensor_config.h"
-
-namespace tvm {
-namespace contrib {
-namespace ethosu {
-namespace cascader {
-
-void PlanNode::VisitAttrs(AttrVisitor* v) {
-  Array<TensorConfig> tmp_arr(tensor_configs_);
-  v->Visit("_tensor_configs", &tmp_arr);
-  Array<TensorConfig> tmp_cfgs(open_configs_.begin(), open_configs_.end());
-  v->Visit("_open_configs", &tmp_cfgs);
-  v->Visit("_output_config", &output_config_);
-  Array<Part> tmp_parts(part_group_.begin(), part_group_.end());
-  v->Visit("_part_group", &tmp_parts);
-  v->Visit("_interior_region", &interior_region_);
-  v->Visit("_memory_usage", &memory_usage_);
-  v->Visit("_cycles", &cycles_);
-}
-
-Plan::Plan(const std::vector<TensorConfig>& tensor_configs,
-           const std::vector<TensorConfig>& open_configs, const TensorConfig& output_config,
-           const std::vector<Part>& part_group, const MemoryRegion& interior_region,
-           int memory_usage, int cycles) {
-  auto n = make_object<PlanNode>();
-  n->tensor_configs_ = std::move(tensor_configs);
-  n->open_configs_ = std::move(open_configs);
-  n->output_config_ = std::move(output_config);
-  n->part_group_ = std::move(part_group);
-  n->interior_region_ = interior_region;
-  n->memory_usage_ = memory_usage;
-  n->cycles_ = cycles;
-  data_ = std::move(n);
-}
-
-Plan Plan::Merge(const Plan& other) const {
-  auto n = make_object<PlanNode>(*this->operator->());
-  n->tensor_configs_.insert(n->tensor_configs_.end(), other->tensor_configs_.begin(),
-                            other->tensor_configs_.end());
-  n->open_configs_.erase(
-      std::remove(n->open_configs_.begin(), n->open_configs_.end(), (*this)->output_config_),
-      n->open_configs_.end());
-  for (const auto& config : other->open_configs_) {
-    if (config->GetTensor() != (*this)->output_config_->GetTensor()) {
-      n->open_configs_.push_back(config);
-    }
-  }
-  n->output_config_ = other->output_config_;
-  n->part_group_.insert(n->part_group_.end(), other->part_group_.begin(), other->part_group_.end());
-  std::sort(n->part_group_.begin(), n->part_group_.end());
-  n->memory_usage_ += other->memory_usage_;
-  n->cycles_ += other->cycles_;
-  return Plan(n);
-}
-
-TVM_REGISTER_GLOBAL("contrib.ethosu.cascader.Plan")
-    .set_body_typed([](Array<TensorConfig> tensor_configs, Array<TensorConfig> open_configs,
-                       TensorConfig output_config, Array<Part> part_group,
-                       MemoryRegion interior_region, int memory_usage, int cycles) {
-      std::vector<TensorConfig> vtensor_configs(tensor_configs.begin(), tensor_configs.end());
-      std::vector<TensorConfig> sopen_configs(open_configs.begin(), open_configs.end());
-      std::vector<Part> spart_group(part_group.begin(), part_group.end());
-      return Plan(vtensor_configs, sopen_configs, output_config, spart_group, interior_region,
-                  memory_usage, cycles);
-    });
-
-TVM_REGISTER_GLOBAL("contrib.ethosu.cascader.PlanMerge").set_body_method(&Plan::Merge);
-
-TVM_REGISTER_GLOBAL("contrib.ethosu.cascader.PlanMergeBenchmark")
-    .set_body_typed([](Plan plan, Plan other, int repeats) {
-      for (int i = 0; i < repeats; i++) {
-        plan.Merge(other);
-      }
-      return plan.Merge(other);
-    });
-
-TVM_REGISTER_NODE_TYPE(PlanNode);
-
-}  // namespace cascader
-}  // namespace ethosu
-}  // namespace contrib
-}  // namespace tvm
diff --git a/src/contrib/ethosu/cascader/plan.h b/src/contrib/ethosu/cascader/plan.h
deleted file mode 100644
index 65efe98e4ff5..000000000000
--- a/src/contrib/ethosu/cascader/plan.h
+++ /dev/null
@@ -1,187 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file src/contrib/ethosu/cascader/plan.h
- * \brief Plan object for the NPU cascader
- */
-#ifndef TVM_CONTRIB_ETHOSU_CASCADER_PLAN_H_
-#define TVM_CONTRIB_ETHOSU_CASCADER_PLAN_H_
-
-#include <tvm/node/reflection.h>
-#include <tvm/runtime/object.h>
-
-#include <functional>
-#include <unordered_map>
-#include <unordered_set>
-#include <vector>
-
-#include "graph.h"
-#include "tensor_config.h"
-
-namespace tvm {
-namespace contrib {
-namespace ethosu {
-namespace cascader {
-
-/*! \brief Node to represent a Plan */
-class PlanNode : public Object {
- public:
-  void VisitAttrs(AttrVisitor* v);
-
-  /*! \return The TensorConfigs specified by the Plan */
-  const std::vector<TensorConfig>& GetTensorConfigs() const { return tensor_configs_; }
-  /*! \return The TensorConfigs which are 'open' meaning they are a Plan input/output but have
-   * INTERIOR state */
-  const std::vector<TensorConfig>& GetOpenConfigs() const { return open_configs_; }
-  /*! \return The TensorConfig of the Plan's output tensor */
-  const TensorConfig GetOutputConfig() const { return output_config_; }
-  /*! \return The Parts which are covered by the Plan */
-  const std::vector<Part>& GetPartGroup() const { return part_group_; }
-  /*! \return The memory region in which to store interior Plan buffers */
-  MemoryRegion const GetInteriorRegion() const { return interior_region_; }
-  /*!
-   * \return The interior memory used by the Plan in bytes.
-   * \note The interior memory usage is defined as being the memory required in the interior region
-   * to execute the Plan excluding input and output buffers.
-   */
-  int GetMemoryUsage() const { return memory_usage_; }
-  /*! \return The cycles taken to execute the Plan */
-  int GetCycles() const { return cycles_; }
-  /*! \return Whether the Plan is 'closed' meaning it has no 'open' TensorConfigs */
-  bool IsClosed() const { return open_configs_.size() == 0; }
-
-  static constexpr const char* _type_key = "contrib.ethosu.cascader.Plan";
-  TVM_DECLARE_FINAL_OBJECT_INFO(PlanNode, Object);
-
- protected:
-  friend class Plan;
-
-  /*! \brief The TensorConfigs specified by the Plan */
-  std::vector<TensorConfig> tensor_configs_;
-  /*! \brief The TensorConfigs which are 'open' meaning they are a Plan input/output but have
-   * INTERIOR state */
-  std::vector<TensorConfig> open_configs_;
-  /*! \brief The TensorConfig of the Plan's output tensor */
-  TensorConfig output_config_;
-  /*! \brief The Parts which are covered by the Plan */
-  std::vector<Part> part_group_;
-  /*! \brief The memory region in which to store interior Plan buffers */
-  MemoryRegion interior_region_;
-  /*! \brief The interior memory used by the Plan in bytes */
-  int memory_usage_;
-  /*! \brief The cycles taken to execute the Plan */
-  int cycles_;
-};
-
-/*!
- * \brief A class which describes how to schedule a subgraph of Parts together.
- * \note A Plan takes the form of a subgraph of connected Parts (recorded in part_group) with
- * TensorConfigs for all of the required Tensors (recorded in tensor_configs). This information can
- * be used to produce a Tensor Expression schedule with inter-operator scheduling. A Plan is
- * necessarily single-output such that all non-output Parts are 'computed_at'ed the scope of the
- * output Part. This is what achieves the technique referred to as 'cascading'. A Plan also has an
- * interior memory region which specifies the region of memory into which all the Plans intermediate
- * buffers should be allocated.
- *
- * Additionally, a Plan contains some other information used during the Plan generation and
- * selection algorithms. Both the memory and cycles required to run the Plan are accounted for so
- * that Plans can be ranked and Pareto-culled on these metrics. Furthermore, the TensorConfigs which
- * are 'open' is recorded indicating that these are valid points to merge with another Plan. A Plan
- * can only be turned into a schedule if it has no 'open' TensorConfigs - at which point the Plan is
- * said to be 'closed'.
- */
-class Plan : public ObjectRef {
- public:
-  Plan(const std::vector<TensorConfig>& tensor_configs,
-       const std::vector<TensorConfig>& open_configs, const TensorConfig& output_config,
-       const std::vector<Part>& part_group, const MemoryRegion& interior_region, int memory_usage,
-       int cycles);
-  /*!
-   * \brief Merge two Plans which share an 'open' TensorConfig.
-   * \param other The Plan to merge with.
-   * \return The merged Plan.
-   * \note The current Plan is referred to as the 'upper Plan' and the other Plan as the 'lower
-   * Plan'. The 'open' output config of the upper Plan must be an 'open' input config of the lower
-   * Plan. The Tensor referenced by these configs is the Tensor on which the two Plans will be
-   * merged. The merge process does the following:
-   *
-   * The tensor config maps will be merged with TensorConfigs from the upper Plan taking priority.
-   * The open configs will be merged with the TensorConfigs that are being merged having been
-   * removed. The output config will be that of the lower Plan. The part groups will be merged. The
-   * interior region is necessarily the same for both the upper and lower Plan. The cycles and
-   * memory usage will be summed.
-   */
-  Plan Merge(const Plan& other) const;
-
-  TVM_DEFINE_OBJECT_REF_METHODS(Plan, ObjectRef, PlanNode);
-};
-
-}  // namespace cascader
-}  // namespace ethosu
-}  // namespace contrib
-}  // namespace tvm
-
-// Hash functions TensorConfig and Part sets
-namespace std {
-
-using TensorConfigSet = std::vector<::tvm::contrib::ethosu::cascader::TensorConfig>;
-using PartSet = std::vector<::tvm::contrib::ethosu::cascader::Part>;
-
-template <>
-struct hash<TensorConfigSet> {
-  std::size_t operator()(const TensorConfigSet& tensor_config_set) const {
-    size_t seed = 0;
-    for (const auto& tensor_config : tensor_config_set) {
-      seed ^= hash<::tvm::contrib::ethosu::cascader::TensorConfig>()(tensor_config);
-    }
-    return seed;
-  }
-};
-
-template <>
-struct equal_to<TensorConfigSet> {
-  bool operator()(const TensorConfigSet& lhs, const TensorConfigSet& rhs) const {
-    std::unordered_set<::tvm::contrib::ethosu::cascader::TensorConfig> lh_set(lhs.begin(),
-                                                                              lhs.end());
-    std::unordered_set<::tvm::contrib::ethosu::cascader::TensorConfig> rh_set(rhs.begin(),
-                                                                              rhs.end());
-    return lh_set == rh_set;
-  }
-};
-
-template <>
-struct hash<PartSet> {
-  std::size_t operator()(const PartSet& part_set) const {
-    size_t seed = 0;
-    for (const auto& part : part_set) {
-      seed ^= tvm::runtime::ObjectHash()(part);
-    }
-    return seed;
-  }
-};
-
-template <>
-struct equal_to<PartSet> {
-  bool operator()(const PartSet& lhs, const PartSet& rhs) const { return lhs == rhs; }
-};
-
-}  // namespace std
-
-#endif  // TVM_CONTRIB_ETHOSU_CASCADER_PLAN_H_
diff --git a/src/contrib/ethosu/cascader/plan_generator.cc b/src/contrib/ethosu/cascader/plan_generator.cc
deleted file mode 100644
index 9545a511e71d..000000000000
--- a/src/contrib/ethosu/cascader/plan_generator.cc
+++ /dev/null
@@ -1,617 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-#include "plan_generator.h"
-
-#include <tvm/runtime/container/array.h>
-#include <tvm/runtime/container/map.h>
-#include <tvm/runtime/object.h>
-#include <tvm/runtime/registry.h>
-#include <tvm/support/parallel_for.h>
-
-#include <algorithm>
-#include <cmath>
-#include <map>
-#include <set>
-#include <unordered_map>
-#include <unordered_set>
-#include <utility>
-#include <vector>
-
-#include "block_config.h"
-#include "cascader_options.h"
-#include "common.h"
-#include "graph.h"
-#include "pareto.h"
-#include "plan.h"
-#include "stripe_config.h"
-#include "tensor_config.h"
-
-namespace tvm {
-namespace contrib {
-namespace ethosu {
-namespace cascader {
-
-template <class T>
-std::vector<std::vector<T>> EnumerateCombinations(std::vector<std::vector<T>> values) {
-  if (values.size() == 0) {
-    return values;
-  }
-  if (values.size() == 1) {
-    std::vector<std::vector<T>> combs;
-    for (const auto& value : values[0]) {
-      combs.push_back(std::vector<T>(1, value));
-    }
-    return combs;
-  }
-  auto combs = EnumerateCombinations(std::vector<std::vector<T>>(values.begin(), values.end() - 1));
-  std::vector<std::vector<T>> new_combs;
-  for (const auto& value : values.back()) {
-    for (const auto& comb : combs) {
-      auto new_comb = std::vector<T>(comb);
-      new_comb.push_back(value);
-      new_combs.push_back(new_comb);
-    }
-  }
-  return new_combs;
-}
-
-float GetTransferEfficiency(const Tensor& tensor, const std::vector<int>& block_shape,
-                            const MemoryRegion& memory) {
-  // The block_shape represents the shape of the data transfer required for each job. This is used
-  // to calculate how much of the block_shape is contiguous in memory (source memory for a read or
-  // destination memory for a write) and subsequently calculate how efficient each memory burst is.
-  const auto& shape = tensor->GetShape();
-  int burst_length = block_shape[block_shape.size() - 1];
-  if (block_shape[block_shape.size() - 1] == shape[shape.size() - 1]) {
-    burst_length *= block_shape[block_shape.size() - 2];
-  }
-
-  burst_length *= tensor->GetDataType().bytes();
-  return static_cast<float>(memory->burst_length) / std::min(burst_length, memory->burst_length);
-}
-
-std::vector<bool> GetCascadableAxes(const Part& part) {
-  std::vector<bool> cascadable_axes(part->GetOutputTensor()->GetShape().size());
-  // Check all the propagators to see if an output axis is projected into any
-  // of the inputs. If it is, then that axis is cascadable.
-  for (const auto& propagator : part->GetPropagators()) {
-    auto transform = propagator->GetTransform();
-    for (size_t i = 0; i < transform.size(); i++) {
-      for (size_t j = 0; j < transform[0].size() - 1; j++) {
-        // An axis is projected if there's a non-zero element
-        // in the transform matrix
-        if (transform[i][j] != 0) {
-          cascadable_axes[j] = true;
-        }
-      }
-    }
-  }
-  return cascadable_axes;
-}
-
-std::vector<StripeConfig> GenerateOutputStripeConfigs(const Part& part, int stripe_factors,
-                                                      bool enable_striping,
-                                                      bool multi_dimensional) {
-  // If stripe_factors is <= 0, then we won't produce any StripeConfigs
-  if (stripe_factors <= 0) {
-    return std::vector<StripeConfig>();
-  }
-  // Work out the factors to divide by as inverse powers of 2.
-  // The last factor is always reserved to be '0' which will correspond to
-  // choosing a stripe size of 1 in the dimension. We always include this
-  // as it represents the most extreme striping choice that uses the least
-  // memory, so it is our choice of last resort.
-  // For example, if stripe_factors = 4 then the factors are 1, 1/2, 1/4, 0.
-  std::vector<float> factors;
-  for (size_t i = 0; i < static_cast<size_t>(stripe_factors) - 1; i++) {
-    factors.push_back(1.0f / (std::pow(2.0f, i)));
-  }
-  factors.push_back(0);
-  // Then use the factors to derive the possible ways to split each dimension
-  // into stripes. As an example, if an had extent 128 then by applying
-  // the factors derived above we get the following possible splits for that axis:
-  // 128, 64, 32, 1
-  std::vector<std::vector<int>> splits;
-  std::vector<int> output_shape = part->GetOutputTensor()->GetShape();
-  size_t output_dims = output_shape.size();
-  // Only bother striping along the axes which are cascadable
-  auto cascadable_axes = GetCascadableAxes(part);
-  for (size_t i = 0; i < output_dims; i++) {
-    auto axis = output_shape[i];
-    auto axis_align = part->GetStripeAlignHint()[i];
-    std::set<int> axis_splits;  // Note this is a set to remove duplicate splits
-    if (!cascadable_axes[i] || (!enable_striping)) {
-      axis_splits.insert(axis);
-    } else {
-      for (float factor : factors) {
-        int split =
-            std::max(static_cast<int>(std::ceil(axis * factor / axis_align)), 1) * axis_align;
-        split = std::min(axis, split);
-        axis_splits.insert(split);
-      }
-    }
-    splits.push_back(std::vector<int>(axis_splits.begin(), axis_splits.end()));
-  }
-
-  std::vector<std::vector<int>> stripe_shapes;
-  if (multi_dimensional) {
-    // Now calculate all the possible combinations of splits for each dimension
-    // to give us all the possible stripe shapes. For example, if we had two axes
-    // both with possible splits in {128, 64, 32, 1}, the stripe shapes would be:
-    // (128, 128), (128, 64), (128, 32) ... (1, 64), (1, 32), (1, 1)
-    stripe_shapes = EnumerateCombinations<int>(splits);
-  } else {
-    // Only consider splitting a single axis
-    int axis = 0;
-    for (const auto& split : splits) {
-      for (const auto& axis_split : split) {
-        std::vector<int> stripe_shape = output_shape;
-        if (stripe_shape[axis] != axis_split) {
-          stripe_shape[axis] = axis_split;
-          stripe_shapes.push_back(stripe_shape);
-        }
-      }
-      axis++;
-    }
-    stripe_shapes.push_back(output_shape);
-  }
-  auto offset = std::vector<int>(output_dims);
-  std::vector<StripeConfig> stripe_configs;
-  // Calculate the possible axis orderings such that each axis has the opportunity
-  // to be the 'outermost' axis (which is axis that is chosen for rolling).
-  std::vector<std::vector<int>> orders;
-  for (size_t i = 0; i < output_dims; i++) {
-    std::vector<int> order(output_dims);
-    for (size_t j = 0; j < output_dims; j++) {
-      order[j] = 1 + (j + i) % output_dims;
-    }
-    orders.push_back(order);
-  }
-  // Finally, create the StripeConfigs from the possible stripe shapes and orders
-  for (const auto& stripe_shape : stripe_shapes) {
-    std::vector<int> stripes;
-    std::vector<float> strides;
-    for (size_t i = 0; i < output_dims; i++) {
-      stripes.push_back(std::ceil(static_cast<float>(output_shape[i]) / stripe_shape[i]));
-      strides.push_back(static_cast<float>(stripe_shape[i]));  // strides = stripe_shape
-    }
-    // If the stripe shape equals the output shape (i.e. there's no striping), then
-    // the order doesn't matter, so just pick the first order and continue.
-    if (stripe_shape == output_shape) {
-      stripe_configs.push_back(
-          StripeConfig(stripe_shape, output_shape, strides, orders[0], stripes, offset));
-      continue;
-    }
-    for (const auto& order : orders) {
-      // Some logic to avoid having an axis be the 'outermost' if the stripe is full
-      // size in that axis. This would otherwise be a waste because we can't roll
-      // over an axis that hasn't been split.
-      bool skip = false;
-      for (size_t i = 0; i < output_dims; i++) {
-        if (order[i] == 1 && stripe_shape[i] == output_shape[i]) {
-          skip = true;
-          break;
-        }
-      }
-      if (skip) continue;
-      stripe_configs.push_back(
-          StripeConfig(stripe_shape, output_shape, strides, order, stripes, offset));
-    }
-  }
-  return stripe_configs;
-}
-
-std::vector<TensorConfig> GetPossibleInputConfigs(const StripeConfig& stripe_config,
-                                                  const Tensor& tensor,
-                                                  const std::vector<MemoryRegion>& home_regions,
-                                                  const CascaderOptions& options) {
-  std::vector<TensorConfig> configs;
-  for (const auto& home_region : home_regions) {
-    // Boundary configs
-    if (home_region == options->cascade_region || tensor->GetSize() > options->always_copy_size) {
-      configs.push_back(TensorConfig(tensor, home_region, TensorConfigState::BOUNDARY,
-                                     BufferMode::RECOMPUTE, {stripe_config}, false, home_region));
-    }
-    if (home_region != options->cascade_region) {
-      configs.push_back(TensorConfig(tensor, home_region, TensorConfigState::BOUNDARY,
-                                     BufferMode::ROLLING, {stripe_config}, true,
-                                     options->cascade_region));
-    }
-  }
-  if (!tensor->IsConstant()) {
-    // Interior configs
-    configs.push_back(TensorConfig(tensor, options->cascade_region, TensorConfigState::INTERIOR,
-                                   BufferMode::RECOMPUTE, {stripe_config}, false,
-                                   options->cascade_region));
-    configs.push_back(TensorConfig(tensor, options->cascade_region, TensorConfigState::INTERIOR,
-                                   BufferMode::ROLLING, {stripe_config}, false,
-                                   options->cascade_region));
-  }
-  return configs;
-}
-
-// Check whether a StripeConfig can be an output boundary config
-bool CanBound(const StripeConfig& stripe_config) {
-  // Determine whether the StripeConfig results in non-overlapping stripes
-  // which is the case when the stripe shape equals the strides
-  for (size_t i = 0; i < stripe_config->GetShape().size(); i++) {
-    // Check that the stripe shape and strides are equal
-    if (stripe_config->GetShape()[i] - stripe_config->GetStrides()[i] != 0) {
-      return false;
-    }
-  }
-  return true;
-}
-
-std::vector<TensorConfig> GetPossibleOutputConfigs(const StripeConfig& stripe_config,
-                                                   const Tensor& tensor,
-                                                   const std::vector<MemoryRegion>& home_regions,
-                                                   const CascaderOptions& options) {
-  std::vector<TensorConfig> configs;
-  // Only StripeConfigs with non-overlapping stripes can be output boundary configs
-  if (CanBound(stripe_config)) {
-    for (const auto& home_region : home_regions) {
-      // Boundary configs
-      configs.push_back(TensorConfig(tensor, home_region, TensorConfigState::BOUNDARY,
-                                     BufferMode::RECOMPUTE, {stripe_config}, false, home_region));
-    }
-  }
-  // Interior configs
-  configs.push_back(TensorConfig(tensor, options->cascade_region, TensorConfigState::INTERIOR,
-                                 BufferMode::RECOMPUTE, {stripe_config}, false,
-                                 options->cascade_region));
-  configs.push_back(TensorConfig(tensor, options->cascade_region, TensorConfigState::INTERIOR,
-                                 BufferMode::ROLLING, {stripe_config}, false,
-                                 options->cascade_region));
-  return configs;
-}
-
-int GetInteriorMemoryUsage(const std::vector<TensorConfig>& input_configs,
-                           const TensorConfig& output_config, const MemoryRegion& interior_region) {
-  int memory_usage = 0;
-  if (output_config->GetHomeRegion() == interior_region &&
-      output_config->GetState() == TensorConfigState::BOUNDARY) {
-    memory_usage += output_config->GetTensor()->GetSize();
-  }
-  for (const auto& input_config : input_configs) {
-    if (input_config->GetHomeRegion() == interior_region &&
-        input_config->GetState() == TensorConfigState::BOUNDARY) {
-      memory_usage += input_config->GetTensor()->GetSize();
-    } else if (input_config->GetHomeRegion() == interior_region ||
-               input_config->GetCopyRegion() == interior_region) {
-      memory_usage += input_config->GetBufferSize();
-    }
-  }
-  return memory_usage;
-}
-
-/**
- * \brief Returns a hint estimating the number of cycles required for
- * the copy specified by tensor_config.
- *
- * \param tensor_config  The tensor configuration to estimate.
- * \return mem2mem_cycles Total estimated cycles.
- * \return initial_mem2mem_cycles Estimated cycles for the first block.
- */
-std::pair<int, int> GetCopyCyclesHint(const TensorConfig& tensor_config) {
-  Tensor tensor = tensor_config->GetTensor();
-  MemoryRegion home_region = tensor_config->GetHomeRegion();
-  MemoryRegion copy_region = tensor_config->GetCopyRegion();
-  int initial_mem2mem_cycles = 0;
-  int mem2mem_cycles = 0;
-
-  // This Tensor needs to be copied - Count stripes for this config
-  for (const auto& stripe_config : tensor_config->GetStripeConfigs()) {
-    std::map<std::vector<int>, int> input_blocks = CountStripes(stripe_config, true);
-    bool first_block = true;
-    for (const auto& block : input_blocks) {
-      int bytes_transferred = mul_reduce(block.first) * tensor->GetDataType().bytes() *
-                              tensor->GetCompressionRatio() * block.second;
-      int read_cycles = bytes_transferred * home_region->read_bandwidth + home_region->read_latency;
-      int write_cycles = bytes_transferred * copy_region->write_bandwidth;
-
-      if (first_block) {
-        first_block = false;
-        initial_mem2mem_cycles += std::max(read_cycles, write_cycles);
-      }
-      mem2mem_cycles += std::max(read_cycles, write_cycles);
-    }
-  }
-
-  return {mem2mem_cycles, initial_mem2mem_cycles};
-}
-
-std::vector<Plan> GenerateSinglePlans(
-    const Part& part, const std::vector<StripeConfig>& output_stripe_configs,
-    const std::unordered_map<Tensor, std::vector<MemoryRegion>, ObjectPtrHash, ObjectPtrEqual>&
-        home_map,
-    const CascaderOptions& options) {
-  std::vector<Plan> plans;
-  std::vector<Part> part_group{part};
-  // Create a selection of Plans per output_stripe_config
-  for (const auto& output_stripe_config : output_stripe_configs) {
-    // Calculate the input_stripe_configs
-    auto input_stripe_configs = part->CalculateInputStripeConfigs(output_stripe_config);
-    // From the input_stripe_configs, now derive all the possible input TensorConfigs
-    std::vector<std::vector<TensorConfig>> all_possible_input_configs;
-    size_t i = 0;
-    for (const auto& stripe_config : input_stripe_configs) {
-      Tensor tensor = part->GetInputTensors()[i];
-      all_possible_input_configs.push_back(
-          GetPossibleInputConfigs(stripe_config, tensor, home_map.at(tensor), options));
-      i++;
-    }
-    // Now work out all the possible combinations of input TensorConfigs
-    auto input_config_combinations =
-        EnumerateCombinations<TensorConfig>(all_possible_input_configs);
-    Tensor output_tensor = part->GetOutputTensor();
-    // Then determine the possible output TensorConfigs (no combinations here because there's only
-    // one output)
-    auto output_configs = GetPossibleOutputConfigs(output_stripe_config, output_tensor,
-                                                   home_map.at(output_tensor), options);
-    // Calculate the performance information for the output_stripe_config for both the recompute and
-    // rolling cases
-    PerformanceInfo rolling_perf =
-        part->GetPerformanceInfo(output_stripe_config, BufferMode::ROLLING);
-    PerformanceInfo recompute_perf =
-        part->GetPerformanceInfo(output_stripe_config, BufferMode::RECOMPUTE);
-    // For all the possible input TensorConfig combinations
-    for (const auto& input_configs : input_config_combinations) {
-      std::vector<TensorConfig> tensor_configs;
-      std::vector<TensorConfig> open_input_configs;
-      // Add the input TensorConfigs to the 'tensor_configs' and
-      // record which input TensorConfigs are 'open' (i.e. 'INTERIOR')
-      for (const auto& input_config : input_configs) {
-        tensor_configs.push_back(input_config);
-        if (input_config->GetState() == TensorConfigState::INTERIOR) {
-          open_input_configs.push_back(input_config);
-        }
-      }
-      for (const auto& output_config : output_configs) {
-        // Add the output TensorConfig to the tensor_configs and to
-        // the open configs (if it's 'INTERIOR')
-        tensor_configs.push_back(output_config);
-        std::vector<TensorConfig> open_configs = open_input_configs;
-        if (output_config->GetState() == TensorConfigState::INTERIOR) {
-          open_configs.push_back(output_config);
-        }
-        int bandwidth_cycles = 0;
-        int compute_cycles = 0;
-        int mem2mem_cycles = 0;
-        int initial_mem2mem_cycles = 0;
-
-        // Pick the correct performance info based on the BufferMode
-        PerformanceInfo perf_info;
-        if (output_config->GetBufferMode() == BufferMode::RECOMPUTE) {
-          perf_info = recompute_perf;
-        } else {
-          perf_info = rolling_perf;
-        }
-        // Calculate the bandwidth cycles by multiplying the bytes read/written by the
-        // bandwidth of the memories
-        BlockConfig block_config = perf_info->block_config;
-        for (size_t i = 0; i < input_configs.size(); i++) {
-          Tensor tensor = input_configs[i]->GetTensor();
-          MemoryRegion copy_region = input_configs[i]->GetCopyRegion();
-
-          if (input_configs[i]->DoCopy()) {
-            std::pair<int, int> ret = GetCopyCyclesHint(input_configs[i]);
-            mem2mem_cycles += ret.first;
-            initial_mem2mem_cycles += ret.second;
-          }
-          float read_efficiency =
-              GetTransferEfficiency(tensor, block_config->GetInputBlockShape(), copy_region);
-          bandwidth_cycles +=
-              (perf_info->read_bytes[i] / copy_region->read_bandwidth) * read_efficiency;
-        }
-        MemoryRegion write_region = output_config->GetCopyRegion();
-        float write_efficiency = GetTransferEfficiency(
-            output_config->GetTensor(), block_config->GetOutputBlockShape(), write_region);
-
-        bandwidth_cycles +=
-            perf_info->write_bytes / write_region->write_bandwidth * write_efficiency;
-        compute_cycles = perf_info->compute_cycles;
-        // Take the max of compute and bandwidth cycles as we assume compute cycles
-        // can hide memory latency
-        int cycles = std::max(std::max(compute_cycles, bandwidth_cycles), mem2mem_cycles);
-        if (cycles > mem2mem_cycles) {
-          // NPU cycles are the bottleneck - add initial mem2mem transfer cycles
-          cycles += initial_mem2mem_cycles;
-        }
-
-        int memory_usage =
-            GetInteriorMemoryUsage(input_configs, output_config, options->cascade_region);
-        plans.push_back(Plan(tensor_configs, open_configs, output_config, part_group,
-                             options->cascade_region, memory_usage, cycles));
-      }
-    }
-  }
-  return plans;
-}
-
-std::unordered_map<std::vector<Part>, std::vector<Plan>> GenerateGraphPlans(
-    const CascaderGraph& graph,
-    const std::unordered_map<Tensor, std::vector<MemoryRegion>, ObjectPtrHash, ObjectPtrEqual>&
-        home_map,
-    const CascaderOptions& options) {
-  ICHECK_GT(options->stripe_factors, 0)
-      << "stripe_factors = " << options->stripe_factors << ", but must be > 0";
-  ICHECK_GT(options->max_plan_size, 0)
-      << "max_plan_size = " << options->max_plan_size << ", but must be > 0";
-  // Define a map between the graph Tensors and possible StripeConfigs that the Tensor may be
-  // executed with
-  std::unordered_map<Tensor, std::set<StripeConfig>, ObjectPtrHash, ObjectPtrEqual>
-      stripe_configs_by_tensor;
-  // Define a map between a given open TensorConfig and all the Plans which provide it
-  std::unordered_map<TensorConfig, std::vector<Plan>> plans_by_config;
-  // Define a map between a group of connected Parts and all the closed plans covering them
-  std::unordered_map<std::vector<Part>, std::vector<Plan>> closed_plans;
-  // Define a nested map which indexes open plans by both Part group and the open TensorConfigs they
-  // provide. Note that we index in this way because Part group + open TensorConfigs combined
-  // defines a group of Plans which can be mutually Pareto culled. If we culled of Part group alone,
-  // we'd lose potentially valuable open Plans which could have gone on to be grown into Pareto
-  // optimal closed plans.
-  std::unordered_map<std::vector<Part>,
-                     std::unordered_map<std::vector<TensorConfig>, std::vector<Plan>>>
-      open_plans;
-  // Traverse the graph in a reverse topological order (should be enforced by GetPartOrder)
-  for (const auto& part : graph->GetPartOrder()) {
-    // First generate all the possible StripeConfigs for the Part assuming that it will become the
-    // output of a Plan. The number generated is a function of stripe_factors and the number of
-    // cascadable dimensions in the Part.
-    std::vector<StripeConfig> stripe_configs =
-        GenerateOutputStripeConfigs(part, options->stripe_factors, options->enable_striping,
-                                    options->enable_multi_dimensional_striping);
-    // Check to see if the output Tensor is part of any existing open Plans
-    if (stripe_configs_by_tensor.find(part->GetOutputTensor()) != stripe_configs_by_tensor.end()) {
-      // If there are other open Plans which have this Part's output Tensor as an input, then
-      // additionally consider the StripeConfigs of those open TensorConfigs so that we have the
-      // option to merge into those open Plans.
-      const std::set<StripeConfig>& connecting_configs =
-          stripe_configs_by_tensor.at(part->GetOutputTensor());
-      std::copy(connecting_configs.begin(), connecting_configs.end(),
-                std::back_inserter(stripe_configs));
-    }
-    // Generate all the single Part Plans for the previously determined StripeConfigs
-    auto single_part_plans = GenerateSinglePlans(part, stripe_configs, home_map, options);
-    std::vector<Plan> plans;
-    for (const auto& partial_plan : single_part_plans) {
-      // If the output TensorConfig of the Plan is 'INTERIOR', then it must be merged with
-      // another open Plan
-      if (partial_plan->GetOutputConfig()->GetState() == TensorConfigState::INTERIOR) {
-        if (plans_by_config.find(partial_plan->GetOutputConfig()) != plans_by_config.end() &&
-            partial_plan->GetOutputConfig()->GetTensor()->GetConsumers().size() == 1) {
-          // Search for all the open Plans which require the same TensorConfig
-          const auto& join_plans = plans_by_config.at(partial_plan->GetOutputConfig());
-          for (const auto& join_plan : join_plans) {
-            // Only merge to form a new Plan if the resulting Plan size won't exceed the
-            // max_plan_size
-            if (join_plan->GetPartGroup().size() < static_cast<size_t>(options->max_plan_size)) {
-              if (partial_plan->GetMemoryUsage() + join_plan->GetMemoryUsage() <
-                  options->cascade_region->size) {
-                plans.push_back(partial_plan.Merge(join_plan));
-              }
-            }
-          }
-        }
-      } else {
-        // If the single Part Plan had a 'BOUNDARY' output TensorConfig, then it doesn't need
-        // merging and can stand on its own.
-        plans.push_back(partial_plan);
-      }
-    }
-    // For all the newly created Plans, update the various maps
-    std::unordered_set<std::vector<Part>> new_part_groups;
-    for (const auto& plan : plans) {
-      new_part_groups.insert(plan->GetPartGroup());
-      if (plan->IsClosed()) {
-        closed_plans[plan->GetPartGroup()].push_back(plan);
-      } else {
-        open_plans[plan->GetPartGroup()][plan->GetOpenConfigs()].push_back(plan);
-      }
-    }
-    // Now Pareto cull both the open and closed Plans to remove non-optimal Plans
-    // Additionally, once culled we update another two maps, the stripe_configs_by_tensor
-    // and plans_by_config maps.
-    for (const auto& part_group : new_part_groups) {
-      if (closed_plans.find(part_group) != closed_plans.end()) {
-        closed_plans[part_group] = ParetoCullPlans(
-            closed_plans.at(part_group), options->max_closed_plans, options->disable_pareto_plans);
-      }
-      for (const auto& it : open_plans[part_group]) {
-        auto pareto_plans =
-            ParetoCullPlans(it.second, options->max_open_plans, options->disable_pareto_plans);
-        for (const auto& plan : pareto_plans) {
-          for (const auto& open_config : plan->GetOpenConfigs()) {
-            if (open_config != plan->GetOutputConfig()) {
-              for (const auto& stripe_config : open_config->GetStripeConfigs()) {
-                // Only add a StripeConfig if it contains for than one stripe
-                if (mul_reduce(stripe_config->GetStripes()) > 1) {
-                  stripe_configs_by_tensor[open_config->GetTensor()].insert(stripe_config);
-                }
-              }
-              plans_by_config[open_config].push_back(plan);
-            }
-          }
-        }
-      }
-    }
-  }
-  return closed_plans;
-}
-
-TVM_REGISTER_GLOBAL("contrib.ethosu.cascader.GenerateOutputStripeConfigs")
-    .set_body_typed([](Part part, int stripe_factors, bool enable_striping,
-                       bool multi_dimensional) {
-      if (stripe_factors < 0) {
-        return Array<StripeConfig>();
-      }
-      return Array<StripeConfig>(
-          GenerateOutputStripeConfigs(part, stripe_factors, enable_striping, multi_dimensional));
-    });
-
-TVM_REGISTER_GLOBAL("contrib.ethosu.cascader.GenerateSinglePlans")
-    .set_body_typed([](Part part, Array<StripeConfig> output_stripe_configs,
-                       Map<Tensor, Array<MemoryRegion>> home_map, CascaderOptions options) {
-      std::vector<StripeConfig> voutput_stripe_configs(output_stripe_configs.begin(),
-                                                       output_stripe_configs.end());
-      std::unordered_map<Tensor, std::vector<MemoryRegion>, ObjectPtrHash, ObjectPtrEqual>
-          mhome_map;
-      for (const auto& it : home_map) {
-        std::vector<MemoryRegion> home_regions;
-        for (const auto& i : it.second) {
-          home_regions.push_back(i);
-        }
-        mhome_map[it.first] = home_regions;
-      }
-      return Array<Plan>(GenerateSinglePlans(part, voutput_stripe_configs, mhome_map, options));
-    });
-
-TVM_REGISTER_GLOBAL("contrib.ethosu.cascader.GenerateGraphPlans")
-    .set_body_typed([](CascaderGraph graph, Map<Tensor, Array<MemoryRegion>> home_map,
-                       CascaderOptions options) {
-      std::unordered_map<Tensor, std::vector<MemoryRegion>, ObjectPtrHash, ObjectPtrEqual>
-          mhome_map;
-      for (const auto& it : home_map) {
-        std::vector<MemoryRegion> home_regions;
-        for (const auto& i : it.second) {
-          home_regions.push_back(i);
-        }
-        mhome_map[it.first] = home_regions;
-      }
-      auto closed_plans = GenerateGraphPlans(graph, mhome_map, options);
-      Map<Array<Part>, Array<Plan>> tclosed_plans;
-      for (auto& it : closed_plans) {
-        Array<Part> part_arr(it.first.begin(), it.first.end());
-        Array<Plan> plan_arr(it.second);
-        tclosed_plans.Set(part_arr, plan_arr);
-      }
-      return tclosed_plans;
-    });
-
-TVM_REGISTER_GLOBAL("contrib.ethosu.cascader.GetCopyCyclesHint")
-    .set_body_typed([](TensorConfig tensor_config) {
-      std::pair<int, int> ret = GetCopyCyclesHint(tensor_config);
-      return Array<Integer>({ret.first, ret.second});
-    });
-
-}  // namespace cascader
-}  // namespace ethosu
-}  // namespace contrib
-}  // namespace tvm
diff --git a/src/contrib/ethosu/cascader/plan_generator.h b/src/contrib/ethosu/cascader/plan_generator.h
deleted file mode 100644
index 71bdef82d2cb..000000000000
--- a/src/contrib/ethosu/cascader/plan_generator.h
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file src/contrib/ethosu/cascader/plan_generator.h
- * \brief Algorithm to generate possible Plans in the NPU cascader
- */
-#ifndef TVM_CONTRIB_ETHOSU_CASCADER_PLAN_GENERATOR_H_
-#define TVM_CONTRIB_ETHOSU_CASCADER_PLAN_GENERATOR_H_
-
-#include <tvm/node/reflection.h>
-#include <tvm/runtime/object.h>
-
-#include <unordered_map>
-#include <unordered_set>
-#include <vector>
-
-namespace tvm {
-namespace contrib {
-namespace ethosu {
-namespace cascader {
-
-class CascaderGraph;
-class MemoryRegion;
-class Part;
-class Tensor;
-class StripeConfig;
-class Plan;
-class CascaderOptions;
-
-using HomeMap =
-    std::unordered_map<Tensor, std::vector<MemoryRegion>, ObjectPtrHash, ObjectPtrEqual>;
-
-/*!
- * \brief Generate possible output StripeConfigs that could be applied to a Part's output.
- * \param part The Part to generate StripeConfigs for.
- * \param stripe_factors How many striping factors to try per axis.
- * \param enable_striping Whether striping is enabled
- * \param multi_dimensional Whether to stripe in more than one dimension.
- * \return The generated StripeConfigs for the Part's output.
- */
-std::vector<StripeConfig> GenerateOutputStripeConfigs(const Part& part, int stripe_factors,
-                                                      bool enable_striping, bool multi_dimensional);
-
-/*!
- * \brief Generate single-Part Plans for a Part for a given list of output StripeConfigs.
- * \param part The Part to generate Plans for.
- * \param output_stripe_configs The output StripeConfigs to generate Plans with.
- * \param home_map The Tensor homing map defining valid memory homes for Tensors.
- * \param options The configuration options with which to run the generator.
- * \return The generated Plans covering the Part.
- * \note For each of the output StripeConfigs provided, this algorithm will produce a number
- * of Plans corresponding to different choices of Tensor homing/copying, buffer modes
- * and INTERIOR/BOUNDARY states. For each of these variants, the Part's performance will
- * be queried and the memory usage will be calculated.
- */
-std::vector<Plan> GenerateSinglePlans(const Part& part,
-                                      const std::vector<StripeConfig>& output_stripe_configs,
-                                      const HomeMap& home_map, const CascaderOptions& options);
-
-/*!
- * \brief Generate pareto optimal Plans for a Graph.
- * \param graph The Graph to generate Plans for.
- * \param home_map The Tensor homing map defining valid memory homes for Tensors.
- * \param options The configuration options with which to run the generator.
- * \return A map between Part groups and a list of pareto optimal Plans which cover that group.
- * \note This algorithm does the following:
- *
- * Iterate Part-by-Part in a reversed topological ordering (starting at the output Parts and
- * working towards the input Parts).
- *
- * For each Part:
- *  1. Determine the possible StripeConfigs we might want to use to stripe the Part using
- *     GenerateOutputStripeConfigs.
- *  2. Additionally, collect all the StripeConfigs of open Plans that could connect to this
- *     Part (i.e. the Plan has an open TensorConfig for the Part's output Tensor).
- *  3. Use these two lists of StripeConfigs to produce single Part Plans with GenerateSinglePlans.
- *  4. For the generated Plans that have an open output TensorConfig, try and merge these into
- *     existing Plans which share an open input TensorConfig.
- *  5. All Plans are then indexed by both the Part group they cover and their open TensorConfigs.
- *  6. Plans which cover the same Part group and share the same open TensorConfigs are culled
- *     using ParetoCullPlans.
- *
- * Once every Part has been visited, return the Plans with no open TensorConfigs indexed by Part
- * group.
- */
-std::unordered_map<std::vector<Part>, std::vector<Plan>> GenerateGraphPlans(
-    const CascaderGraph& graph, const HomeMap& home_map, const CascaderOptions& options);
-
-}  // namespace cascader
-}  // namespace ethosu
-}  // namespace contrib
-}  // namespace tvm
-
-#endif  // TVM_CONTRIB_ETHOSU_CASCADER_PLAN_GENERATOR_H_
diff --git a/src/contrib/ethosu/cascader/propagator.cc b/src/contrib/ethosu/cascader/propagator.cc
deleted file mode 100644
index ca8aaf6e27d5..000000000000
--- a/src/contrib/ethosu/cascader/propagator.cc
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-#include "propagator.h"
-
-#include <tvm/relay/expr.h>
-#include <tvm/runtime/container/array.h>
-#include <tvm/runtime/object.h>
-
-#include <utility>
-#include <vector>
-
-#include "common.h"
-#include "stripe_config.h"
-
-namespace tvm {
-namespace contrib {
-namespace ethosu {
-namespace cascader {
-
-void PropagatorNode::VisitAttrs(AttrVisitor* v) {
-  Array<Array<FloatImm>> tmp_transform;
-  for (const auto& vec : transform_) {
-    tmp_transform.push_back(make_array(vec));
-  }
-  v->Visit("_transform", &tmp_transform);
-  Array<Integer> tmp_arr = make_array(offset_);
-  v->Visit("_offset", &tmp_arr);
-}
-
-Propagator::Propagator(const std::vector<std::vector<float>>& transform,
-                       const std::vector<int>& offset) {
-  auto n = make_object<PropagatorNode>();
-  size_t rows = transform.size();
-  ICHECK_GT(rows, 0) << "The transform matrix must have at least 1 row.";
-  size_t columns = transform[0].size();
-  for (const auto& row : transform) {
-    ICHECK_EQ(row.size(), columns)
-        << "All rows of the transform matrix must be of the same length.";
-  }
-  ICHECK_EQ(offset.size(), rows - 1)
-      << "The offset vector length must be equal to the transform matrix rows - 1.";
-  n->transform_ = std::move(transform);
-  n->offset_ = std::move(offset);
-  data_ = std::move(n);
-}
-
-StripeConfig PropagatorNode::propagate(const StripeConfig& stripe_config) const {
-  size_t input_dimensions = transform_[0].size() - 1;
-  size_t output_dimensions = transform_.size() - 1;
-  auto n = make_object<StripeConfigNode>();
-  n->shape_.resize(output_dimensions);
-  n->extent_.resize(output_dimensions);
-  n->strides_.resize(output_dimensions);
-  n->order_.resize(output_dimensions);
-  n->stripes_.resize(output_dimensions);
-  n->offset_.resize(output_dimensions);
-  for (size_t i = 0; i < output_dimensions; i++) {
-    float new_shape_acc{};
-    float new_extent_acc{};
-    const float* row = &transform_[i][0];
-    for (size_t j = 0; j < input_dimensions; j++) {
-      new_shape_acc += row[j] * stripe_config->shape_[j];
-      new_extent_acc += row[j] * stripe_config->extent_[j];
-      n->strides_[i] += row[j] * stripe_config->strides_[j];
-      // Order, stripes and offset should only get re-ordered, so we only
-      // care about whether or not transform elements are non-zero.
-      int non_zero = row[j] != 0;
-      n->order_[i] += non_zero * stripe_config->order_[j];
-      n->stripes_[i] += non_zero * stripe_config->stripes_[j];
-      n->offset_[i] += non_zero * stripe_config->offset_[j];
-    }
-    // Shape and extent gain an additional constant term
-    new_shape_acc += row[input_dimensions];
-    new_extent_acc += row[input_dimensions];
-    // Shape and extent are ceil-rounded back to integers
-    n->shape_[i] = std::ceil(new_shape_acc);
-    n->extent_[i] += std::ceil(new_extent_acc);
-    // Apply the offset
-    n->offset_[i] += offset_[i];
-    // No axis can have '0 stripes', so change all 0 elements to 1
-    n->stripes_[i] = n->stripes_[i] == 0 ? 1 : n->stripes_[i];
-  }
-  // Remember to compute the hash
-  n->ComputeHash_();
-  return StripeConfig(n);
-}
-
-TVM_REGISTER_GLOBAL("contrib.ethosu.cascader.Propagator")
-    .set_body_typed([](Array<Array<FloatImm>> transform, Array<Integer> offset) {
-      std::vector<std::vector<float>> vtransform;
-      for (const auto& vec : transform) {
-        vtransform.push_back(make_vector<float, FloatImm>(vec));
-      }
-      std::vector<int> voffset = make_vector<int, Integer>(offset);
-      return Propagator(vtransform, voffset);
-    });
-
-TVM_REGISTER_GLOBAL("contrib.ethosu.cascader.PropagatorPropagate")
-    .set_body_typed([](Propagator propagator, StripeConfig stripe_config) {
-      return propagator->propagate(stripe_config);
-    });
-
-TVM_REGISTER_NODE_TYPE(PropagatorNode);
-
-}  // namespace cascader
-}  // namespace ethosu
-}  // namespace contrib
-}  // namespace tvm
diff --git a/src/contrib/ethosu/cascader/propagator.h b/src/contrib/ethosu/cascader/propagator.h
deleted file mode 100644
index 3946d0806a0c..000000000000
--- a/src/contrib/ethosu/cascader/propagator.h
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file src/contrib/ethosu/cascader/propagator.h
- * \brief Propagator class for the NPU cascader
- */
-#ifndef TVM_CONTRIB_ETHOSU_CASCADER_PROPAGATOR_H_
-#define TVM_CONTRIB_ETHOSU_CASCADER_PROPAGATOR_H_
-
-#include <tvm/node/reflection.h>
-#include <tvm/runtime/object.h>
-
-#include <vector>
-
-namespace tvm {
-namespace contrib {
-namespace ethosu {
-namespace cascader {
-
-class Propagator;
-class StripeConfig;
-
-/*! \brief Node to represent a Propagator */
-class PropagatorNode : public Object {
- public:
-  void VisitAttrs(AttrVisitor* v);
-
-  /*! \return The transform matrix to apply to the StripeConfigs */
-  const std::vector<std::vector<float>> GetTransform() const { return transform_; }
-  /*! \return The offset vector to apply to the StripeConfigs */
-  const std::vector<int> GetOffset() const { return offset_; }
-  /*! \return The number of input dimensions */
-  size_t GetInputDims() const { return offset_.size(); }
-  /*! \return The number of output dimensions */
-  size_t GetOutputDims() const { return transform_[0].size() - 1; }
-  /*!
-   * \brief Propagate a StripeConfig through the transform and offset matrices.
-   * \param stripe_config The StripeConfig to propagate.
-   * \return The transformed StripeConfig.
-   * \note The propagation proceeds as follows:
-   *
-   * Both the stripe shape and extent have 1 appended to them (so they pick up
-   * constant factors from the affine transform) and are then multiplied by the
-   * transform matrix. The result is then ceil-rounded and has the trailing 1
-   * stripped to give the new shape and extent.
-   *
-   * The strides has 0 appended to it (so it doesn't pick up constant factors)
-   * and is then multiplied by the transform matrix. The trailing 0 is stripped.
-   *
-   * For the remaining three values we introduce the concept of the 'binarized'
-   * transform matrix. This is the transform matrix but with every non-zero element
-   * set to 1. It represents how axes get re-ordered as part of the propagation.
-   *
-   * [2,   0,   0, 1]            [1, 0, 0, 1]
-   * [0,   0, 0.4, 2]  binarize  [0, 0, 1, 1]
-   * [0, 1.5,   0, 0]   ---->    [0, 1, 0, 0]
-   * [0,   0,   0, 1]            [0, 0, 0, 1]
-   *
-   * The order has 0 appended to it and is multiplied by the 'binarized' transform
-   * matrix. The trailing 0 is then stripped.
-   *
-   * The stripes has 0 appended to it and multiplied by the 'binarized' transform
-   * matrix. The trailing 0 is then stripped and any remaining 0 elements that
-   * were introduced by the transform are set instead to 1.
-   *
-   * The stripe offset is multiplied by the 'binarized' transform matrix and is
-   * then summed with the propagator offset.
-   */
-  StripeConfig propagate(const StripeConfig& stripe_config) const;
-
-  static constexpr const char* _type_key = "contrib.ethosu.cascader.Propagator";
-  TVM_DECLARE_FINAL_OBJECT_INFO(PropagatorNode, Object);
-
- protected:
-  friend class Propagator;
-
-  /*! \brief The transform matrix to apply to the StripeConfigs */
-  std::vector<std::vector<float>> transform_;
-  /*! \brief The offset vector to apply to the StripeConfigs */
-  std::vector<int> offset_;
-};
-
-/*!
- * \brief A class to transform StripeConfigs according to the data dependencies
- between Part outputs and inputs. The dependency is represented as an affine
- transformation matrix + an offset vector. Using this, an output StripeConfig
- can be propagated through a Part to arrive at the input StripeConfigs.
- * \note The transform matrix should be a 2D affine transform matrix.
- * As an example, consider a (1, 1, 2, 32) output stripe for an NHWC pooling
- * operation with a 3x3 pool size:
- *
- * [1, 0, 0, 0, 0]     [ 1]     [ 1]
- * [0, 1, 0, 0, 2]     [ 1]     [ 3]
- * [0, 0, 1, 0, 2]  x  [ 2]  =  [ 4]
- * [0, 0, 0, 1, 0]     [32]     [32]
- * [0, 0, 0, 0, 1]     [ 1]     [ 1]
- *
- * Using the appropriate affine matrix we see that the required input data to
- * produce that output stripe is a (1, 3, 4, 32) stripe. These matrices should
- * be derived for the Parts to relate input and output data dependencies.
- *
- * The offset is a 1D vector representing the first tensor element to read.
- * Often this is just the 0 element, but for an operator such as pad it may be
- * negative. For instance, a symmetric padding by 1 of a 2D tensor would require
- * the offset vector [-1, -1]. Additionally, positive offsets may be required
- * for operators like strided_slice where only part of a tensor is read from.
- */
-class Propagator : public ObjectRef {
- public:
-  Propagator(const std::vector<std::vector<float>>& transform, const std::vector<int>& offset);
-
-  TVM_DEFINE_OBJECT_REF_METHODS(Propagator, ObjectRef, PropagatorNode);
-};
-
-}  // namespace cascader
-}  // namespace ethosu
-}  // namespace contrib
-}  // namespace tvm
-
-#endif  // TVM_CONTRIB_ETHOSU_CASCADER_PROPAGATOR_H_
diff --git a/src/contrib/ethosu/cascader/proposal.cc b/src/contrib/ethosu/cascader/proposal.cc
deleted file mode 100644
index e96be3466e10..000000000000
--- a/src/contrib/ethosu/cascader/proposal.cc
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-#include "proposal.h"
-
-#include <tvm/runtime/container/array.h>
-#include <tvm/runtime/container/map.h>
-#include <tvm/runtime/object.h>
-#include <tvm/runtime/registry.h>
-
-#include <algorithm>
-#include <utility>
-#include <vector>
-
-#include "plan.h"
-
-namespace tvm {
-namespace contrib {
-namespace ethosu {
-namespace cascader {
-
-void ProposalNode::VisitAttrs(AttrVisitor* v) {
-  v->Visit("_graph", &graph_);
-  Array<Part> tmp_parts(part_group_.begin(), part_group_.end());
-  v->Visit("_part_group", &tmp_parts);
-  Array<Plan> tmp_plans(plans_.begin(), plans_.end());
-  v->Visit("_plans", &tmp_plans);
-  Map<Tensor, TensorConfig> tmp_tmap(input_tensor_configs_.begin(), input_tensor_configs_.end());
-  v->Visit("_input_tensor_configs", &tmp_tmap);
-  v->Visit("_cascade_region", &cascade_region_);
-  v->Visit("_memory_usage", &memory_usage_);
-  v->Visit("_cycles", &cycles_);
-}
-
-Proposal::Proposal(const CascaderGraph& graph, const std::vector<Part>& part_group,
-                   const std::vector<Plan>& plans, const TensorConfigMap& input_tensor_configs,
-                   const MemoryRegion& cascade_region, int memory_usage, int cycles) {
-  auto n = make_object<ProposalNode>();
-  n->graph_ = std::move(graph);
-  n->part_group_ = std::move(part_group);
-  std::sort(n->part_group_.begin(), n->part_group_.end());
-  n->plans_ = std::move(plans);
-  n->input_tensor_configs_ = std::move(input_tensor_configs);
-  n->cascade_region_ = std::move(cascade_region);
-  n->memory_usage_ = std::move(memory_usage);
-  n->cycles_ = cycles;
-  data_ = std::move(n);
-}
-
-TVM_REGISTER_GLOBAL("contrib.ethosu.cascader.Proposal")
-    .set_body_typed([](CascaderGraph graph, Array<Part> part_group, Array<Plan> plans,
-                       Map<Tensor, TensorConfig> input_tensor_configs, MemoryRegion cascade_region,
-                       int memory_usage, int cycles) {
-      std::vector<Part> spart_group(part_group.begin(), part_group.end());
-      std::vector<Plan> vplans(plans.begin(), plans.end());
-      TensorConfigMap minput_tensor_configs(input_tensor_configs.begin(),
-                                            input_tensor_configs.end());
-      return Proposal(graph, spart_group, vplans, minput_tensor_configs, cascade_region,
-                      memory_usage, cycles);
-    });
-
-TVM_REGISTER_NODE_TYPE(ProposalNode);
-
-}  // namespace cascader
-}  // namespace ethosu
-}  // namespace contrib
-}  // namespace tvm
diff --git a/src/contrib/ethosu/cascader/proposal.h b/src/contrib/ethosu/cascader/proposal.h
deleted file mode 100644
index e5db0328b731..000000000000
--- a/src/contrib/ethosu/cascader/proposal.h
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file src/contrib/ethosu/cascader/proposal.h
- * \brief Proposal object for the NPU cascader
- */
-#ifndef TVM_CONTRIB_ETHOSU_CASCADER_PROPOSAL_H_
-#define TVM_CONTRIB_ETHOSU_CASCADER_PROPOSAL_H_
-
-#include <tvm/node/reflection.h>
-#include <tvm/runtime/object.h>
-
-#include <unordered_map>
-#include <unordered_set>
-#include <vector>
-
-#include "graph.h"
-#include "plan.h"
-#include "tensor_config.h"
-
-namespace tvm {
-namespace contrib {
-namespace ethosu {
-namespace cascader {
-
-using MemoryUsageMap = std::unordered_map<MemoryRegion, int, ObjectPtrHash, ObjectPtrEqual>;
-using TensorConfigMap = std::unordered_map<Tensor, TensorConfig, ObjectPtrHash, ObjectPtrEqual>;
-
-/*! \brief Node to represent a Proposal */
-class ProposalNode : public Object {
- public:
-  void VisitAttrs(AttrVisitor* v);
-
-  /*! \return The CascaderGraph to which the Proposal applies */
-  const CascaderGraph GetGraph() const { return graph_; }
-  /*! \return The Parts which are covered by the Proposal */
-  const std::vector<Part> GetPartGroup() const { return part_group_; }
-  /*! \return The Plans used in the Proposal */
-  const std::vector<Plan> GetPlans() const { return plans_; }
-  /*! \return The TensorConfigs indexed by Tensor in the Proposal which aren't produced by a Plan */
-  const TensorConfigMap GetInputTensorConfigs() const { return input_tensor_configs_; }
-  /*! \return The MemoryRegion where cascading buffers should be homed */
-  const MemoryRegion GetCascadeRegion() const { return cascade_region_; }
-  /*! \return The memory required to execute the Proposal in the cascading MemoryRegion */
-  const int GetMemoryUsage() const { return memory_usage_; }
-  /*! \return The estimated cycles taken to execute the Proposal */
-  int GetCycles() const { return cycles_; }
-
-  static constexpr const char* _type_key = "contrib.ethosu.cascader.Proposal";
-  TVM_DECLARE_FINAL_OBJECT_INFO(ProposalNode, Object);
-
- protected:
-  friend class Proposal;
-
-  /*! \brief The CascaderGraph to which the Proposal applies */
-  CascaderGraph graph_;
-  /*! \brief The Parts which are covered by the Proposal */
-  std::vector<Part> part_group_;
-  /*! \brief The Plans used in the Proposal */
-  std::vector<Plan> plans_;
-  /*! \brief The TensorConfigs indexed by Tensor in the Proposal which aren't produced by a Plan */
-  TensorConfigMap input_tensor_configs_;
-  /*! \brief The MemoryRegion where cascading buffers should be homed */
-  MemoryRegion cascade_region_;
-  /*! \brief The memory required to execute the Proposal in the cascading MemoryRegion */
-  int memory_usage_;
-  /*! \brief The estimated cycles taken to execute the Proposal */
-  int cycles_;
-};
-
-/*!
- * \brief A class which describes how to schedule a CascaderGraph as a series of disjoint Plans.
- */
-class Proposal : public ObjectRef {
- public:
-  Proposal(const CascaderGraph& graph, const std::vector<Part>& part_group,
-           const std::vector<Plan>& plans, const TensorConfigMap& input_tensor_configs,
-           const MemoryRegion& cascade_region, int memory_usage, int cycles);
-
-  TVM_DEFINE_OBJECT_REF_METHODS(Proposal, ObjectRef, ProposalNode);
-};
-
-}  // namespace cascader
-}  // namespace ethosu
-}  // namespace contrib
-}  // namespace tvm
-
-#endif  // TVM_CONTRIB_ETHOSU_CASCADER_PROPOSAL_H_
diff --git a/src/contrib/ethosu/cascader/proposal_generator.cc b/src/contrib/ethosu/cascader/proposal_generator.cc
deleted file mode 100644
index f886aad42408..000000000000
--- a/src/contrib/ethosu/cascader/proposal_generator.cc
+++ /dev/null
@@ -1,220 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-#include <tvm/runtime/container/array.h>
-#include <tvm/runtime/container/map.h>
-#include <tvm/runtime/object.h>
-#include <tvm/runtime/registry.h>
-#include <tvm/support/parallel_for.h>
-
-#include <algorithm>
-#include <unordered_map>
-#include <unordered_set>
-#include <vector>
-
-#include "cascader_options.h"
-#include "graph.h"
-#include "pareto.h"
-#include "plan.h"
-#include "plan_generator.h"
-#include "proposal.h"
-#include "stripe_config.h"
-#include "tensor_config.h"
-
-namespace tvm {
-namespace contrib {
-namespace ethosu {
-namespace cascader {
-
-std::unordered_set<TensorConfig> GetPlanBoundaryConfigs(const Plan& plan) {
-  std::unordered_set<TensorConfig> boundary_configs;
-  for (const auto& config : plan->GetTensorConfigs()) {
-    if (config->GetState() == TensorConfigState::BOUNDARY) {
-      boundary_configs.insert(config);
-    }
-  }
-  return boundary_configs;
-}
-
-bool IsPlanCompatible(const Proposal& proposal, const std::vector<Part>& plan_part_group,
-                      const std::unordered_set<TensorConfig>& plan_boundary_configs) {
-  // Check the Plan Part group is disjoint with the Proposal Part group
-  for (const auto& plan_part : plan_part_group) {
-    for (const auto& proposal_part : proposal->GetPartGroup()) {
-      if (plan_part == proposal_part) {
-        return false;
-      }
-    }
-  }
-  // If the Plan and Proposal disagree on the memory home of a Tensor, they
-  // are incompatible and can't be used to create a new Proposal
-  auto tensor_configs = proposal->GetInputTensorConfigs();
-  for (const auto& plan_config : plan_boundary_configs) {
-    if (tensor_configs.find(plan_config->GetTensor()) != tensor_configs.end()) {
-      auto proposal_config = tensor_configs.at(plan_config->GetTensor());
-      if (proposal_config->GetHomeRegion() != plan_config->GetHomeRegion()) {
-        return false;
-      }
-    }
-  }
-  return true;
-}
-
-std::unordered_map<Part, std::vector<Plan>, ObjectPtrHash, ObjectPtrEqual> CreatePlansByPart(
-    const std::unordered_map<std::vector<Part>, std::vector<Plan>>& plans_by_group,
-    const CascaderGraph& graph) {
-  std::unordered_map<Part, std::vector<Plan>, ObjectPtrHash, ObjectPtrEqual> plans_by_part;
-  for (const auto& it : plans_by_group) {
-    auto part_group = it.first;
-    auto plans = it.second;
-    int highest_index = 0;
-    Part& index_part = part_group.front();
-    // Determine the Part in the Part group with the highest ID - this will be used to index
-    // the Plans
-    for (const auto& part : part_group) {
-      int pid = graph->GetPartID(part);
-      if (pid >= highest_index) {
-        index_part = part;
-        highest_index = pid;
-      }
-    }
-    plans_by_part[index_part].insert(plans_by_part[index_part].begin(), plans.begin(), plans.end());
-  }
-  return plans_by_part;
-}
-
-Proposal AddPlanToProposal(const Proposal& proposal, const Plan& plan,
-                           const std::unordered_set<TensorConfig>& plan_boundary_configs) {
-  std::vector<Plan> new_plans = proposal->GetPlans();
-  new_plans.push_back(plan);
-  TensorConfigMap new_configs = proposal->GetInputTensorConfigs();
-  // Add input configs from the Plan if they're homed in the cascade region
-  for (const auto& config : plan_boundary_configs) {
-    if (config->GetHomeRegion() == proposal->GetCascadeRegion()) {
-      new_configs[config->GetTensor()] = config;
-    }
-  }
-  // Remove the Plan's output config from the new_configs if it's present because
-  // it won't be an input to the Proposal any more
-  if (new_configs.find(plan->GetOutputConfig()->GetTensor()) != new_configs.end()) {
-    new_configs.erase(plan->GetOutputConfig()->GetTensor());
-  }
-  // The updated memory usage is the memory required to run the Plan plus the
-  // non-local memory that's required in the Proposal at that point in time
-  int new_memory_usage = plan->GetMemoryUsage();
-  for (const auto& it : new_configs) {
-    if (plan_boundary_configs.find(it.second) == plan_boundary_configs.end()) {
-      new_memory_usage += it.first->GetSize();
-    }
-  }
-  new_memory_usage = std::max(new_memory_usage, proposal->GetMemoryUsage());
-  int new_cycles = proposal->GetCycles() + plan->GetCycles();
-  std::vector<Part> new_part_group = proposal->GetPartGroup();
-  new_part_group.insert(new_part_group.end(), plan->GetPartGroup().begin(),
-                        plan->GetPartGroup().end());
-  std::sort(new_part_group.begin(), new_part_group.end());
-  return Proposal(proposal->GetGraph(), new_part_group, new_plans, new_configs,
-                  proposal->GetCascadeRegion(), new_memory_usage, new_cycles);
-}
-
-std::vector<Proposal> GeneratePartialProposals(
-    const CascaderGraph& graph, const HomeMap& home_map, const CascaderOptions options,
-    const std::unordered_map<Part, std::vector<Plan>, ObjectPtrHash, ObjectPtrEqual>& plans_by_part,
-    const std::vector<Part>& partial_proposal_group,
-    std::unordered_map<std::vector<Part>, std::vector<Proposal>>* proposals_by_group) {
-  if (proposals_by_group->find(partial_proposal_group) != proposals_by_group->end()) {
-    return proposals_by_group->at(partial_proposal_group);
-  }
-  if (partial_proposal_group.size() == 0) {
-    (*proposals_by_group)[partial_proposal_group] =
-        std::vector<Proposal>{Proposal(graph, std::vector<Part>(), std::vector<Plan>(),
-                                       TensorConfigMap(), options->cascade_region, 0, 0)};
-  } else {
-    Part part = partial_proposal_group.back();
-    const auto& plans = plans_by_part.at(part);
-    for (const auto& plan : plans) {
-      if (plan->GetInteriorRegion() == options->cascade_region) {
-        // Doing this isn't very efficient, but it improves the performance of the Plan
-        // generator
-        std::unordered_set<TensorConfig> plan_boundary_configs = GetPlanBoundaryConfigs(plan);
-        // The residual_proposal_group is a Part group indicating the Parts which aren't
-        // covered by the current Plan. It's the group for which we must find 'residual
-        // Proposals', meaning Proposals which cover the rest of the CascaderGraph assuming we
-        // pick the current Plan.
-        std::vector<Part> residual_proposal_group;
-        std::copy_if(partial_proposal_group.begin(), partial_proposal_group.end(),
-                     std::back_inserter(residual_proposal_group), [&plan](Part value) {
-                       return std::find(plan->GetPartGroup().begin(), plan->GetPartGroup().end(),
-                                        value) == plan->GetPartGroup().end();
-                     });
-        // std::sort(residual_proposal_group.begin(), residual_proposal_group.end());
-        const auto& residual_proposals = GeneratePartialProposals(
-            graph, home_map, options, plans_by_part, residual_proposal_group, proposals_by_group);
-        auto plan_output_tensor = plan->GetOutputConfig()->GetTensor();
-        ICHECK_LE(plan_output_tensor->GetProducers().size(), 1)
-            << "All tensors must have at most one producer.";
-        for (const auto& residual_proposal : residual_proposals) {
-          if (IsPlanCompatible(residual_proposal, plan->GetPartGroup(), plan_boundary_configs)) {
-            (*proposals_by_group)[partial_proposal_group].push_back(
-                AddPlanToProposal(residual_proposal, plan, plan_boundary_configs));
-          }
-        }
-      }
-    }
-    (*proposals_by_group)[partial_proposal_group] =
-        ParetoCullProposals(proposals_by_group->at(partial_proposal_group), options->max_proposals,
-                            options->disable_pareto_proposals);
-  }
-  return proposals_by_group->at(partial_proposal_group);
-}
-
-std::vector<Proposal> GenerateProposals(const CascaderGraph& graph, const HomeMap& home_map,
-                                        const CascaderOptions options) {
-  // First generate all the Pareto optimal Plans for the CascaderGraph
-  auto plans_by_group = GenerateGraphPlans(graph, home_map, options);
-  // First create a map between every Part in the CascaderGraph and all the Plans for which that
-  // Part is the lowest ID Part within the Plan's Part group
-  std::unordered_map<Part, std::vector<Plan>, ObjectPtrHash, ObjectPtrEqual> plans_by_part =
-      CreatePlansByPart(plans_by_group, graph);
-  // The Part group that partial Proposals are current being generated for
-  std::vector<Part> partial_proposal_group = graph->GetPartOrder();
-  // A map of Proposals indexed by the Part group they cover
-  std::unordered_map<std::vector<Part>, std::vector<Proposal>> proposals_by_group;
-  return GeneratePartialProposals(graph, home_map, options, plans_by_part, partial_proposal_group,
-                                  &proposals_by_group);
-}
-
-TVM_REGISTER_GLOBAL("contrib.ethosu.cascader.GenerateProposals")
-    .set_body_typed([](CascaderGraph graph, Map<Tensor, Array<MemoryRegion>> home_map,
-                       CascaderOptions options) {
-      std::unordered_map<Tensor, std::vector<MemoryRegion>, ObjectPtrHash, ObjectPtrEqual>
-          mhome_map;
-      for (const auto& it : home_map) {
-        std::vector<MemoryRegion> home_regions;
-        for (const auto& i : it.second) {
-          home_regions.push_back(i);
-        }
-        mhome_map[it.first] = home_regions;
-      }
-      return Array<Proposal>(GenerateProposals(graph, mhome_map, options));
-    });
-
-}  // namespace cascader
-}  // namespace ethosu
-}  // namespace contrib
-}  // namespace tvm
diff --git a/src/contrib/ethosu/cascader/proposal_generator.h b/src/contrib/ethosu/cascader/proposal_generator.h
deleted file mode 100644
index 624dc5b98a85..000000000000
--- a/src/contrib/ethosu/cascader/proposal_generator.h
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file src/contrib/ethosu/cascader/proposal_generator.h
- * \brief Algorithm to generate possible Proposals in the NPU cascader
- */
-#ifndef TVM_CONTRIB_ETHOSU_CASCADER_PROPOSAL_GENERATOR_H_
-#define TVM_CONTRIB_ETHOSU_CASCADER_PROPOSAL_GENERATOR_H_
-
-#include <tvm/node/reflection.h>
-#include <tvm/runtime/object.h>
-
-#include <unordered_map>
-#include <unordered_set>
-#include <vector>
-
-namespace tvm {
-namespace contrib {
-namespace ethosu {
-namespace cascader {
-
-class CascaderGraph;
-class MemoryRegion;
-class Tensor;
-class Proposal;
-class CascaderOptions;
-
-using HomeMap =
-    std::unordered_map<Tensor, std::vector<MemoryRegion>, ObjectPtrHash, ObjectPtrEqual>;
-
-/*!
- * \brief Generate Pareto optimal Proposals for a CascaderGraph.
- * \param graph The CascaderGraph to generate Proposals for.
- * \param home_map The Tensor homing map defining valid memory homes for Tensors.
- * \param options The configuration options with which to run the generator.
- * \return A vector of Pareto optimal Proposals.
- * \note This algorithm takes a top-down dynamic programming approach to determining how
- * to optimally combine Plans into Proposals. It does the following:
- *
- * First, run GenerateGraphPlans to generate the Pareto optimal Plans that cover all the
- * Part groups in the CascaderGraph.
- *
- * Solve the problem recursively, generating optimal Proposals for increasingly small
- * portions of the overall graph.
- *
- * Take the first Part in the graph:
- *   1. Find all the Plans for which the Part is both in the Plan's Part group and has the
- *      highest Part ID of any Part in the Part group (i.e. it's the 'first' Part in the
- *      group).
- *   For each Plan:
- *     2. Get the Part group covered by the Plan and subtract it from the 'total Part group'
- *        covering all the Parts. This forms a 'residual Part group'.
- *     3. Recursively, determine the optimal Proposals for the 'residual Part group' (the graph
- *        minus the Parts included in the Plan). Memoize the results.
- *     For each residual Proposal:
- *       4. Create a new Proposal by adding the current Plan to the residual Proposal.
- *   5. Pareto cull all the newly created Proposals (which all share the same Part group).
- * 6. Return the Proposals which cover all the Parts in the CascaderGraph.
- *
- */
-std::vector<Proposal> GenerateProposals(const CascaderGraph& graph, const HomeMap& home_map,
-                                        const CascaderOptions& options);
-
-}  // namespace cascader
-}  // namespace ethosu
-}  // namespace contrib
-}  // namespace tvm
-
-#endif  // TVM_CONTRIB_ETHOSU_CASCADER_PROPOSAL_GENERATOR_H_
diff --git a/src/contrib/ethosu/cascader/stripe_config.cc b/src/contrib/ethosu/cascader/stripe_config.cc
deleted file mode 100644
index 4a75730e5e39..000000000000
--- a/src/contrib/ethosu/cascader/stripe_config.cc
+++ /dev/null
@@ -1,181 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-#include "stripe_config.h"
-
-#include <tvm/runtime/container/array.h>
-#include <tvm/runtime/object.h>
-#include <tvm/runtime/registry.h>
-
-#include <algorithm>
-#include <limits>
-#include <map>
-#include <utility>
-#include <vector>
-
-#include "common.h"
-
-namespace tvm {
-namespace contrib {
-namespace ethosu {
-namespace cascader {
-
-template <class T>
-std::map<std::vector<T>, int> MultiplyCombinations(std::vector<std::map<T, int>> values) {
-  if (values.size() == 1) {
-    std::map<std::vector<T>, int> combs;
-    for (const auto& it : values[0]) {
-      combs[std::vector<T>(1, it.first)] = it.second;
-    }
-    return combs;
-  }
-  auto combs =
-      MultiplyCombinations(std::vector<std::map<T, int>>(values.begin(), values.end() - 1));
-  std::map<std::vector<T>, int> new_combs;
-  for (const auto& val_it : values.back()) {
-    for (const auto& comb_it : combs) {
-      auto new_comb = std::vector<T>(comb_it.first);
-      new_comb.push_back(val_it.first);
-      new_combs[new_comb] = val_it.second * comb_it.second;
-    }
-  }
-  return new_combs;
-}
-
-std::map<std::vector<int>, int> CountStripes(const StripeConfig& stripe_config,
-                                             bool enable_sliding_window = false) {
-  std::vector<std::map<int, int>> per_axis_sizes(stripe_config->GetOrder().size());
-  for (size_t axis = 0; axis < stripe_config->GetOrder().size(); axis++) {
-    int start = stripe_config->GetOffset()[axis];
-    size_t stripe_count = static_cast<size_t>(stripe_config->GetStripes()[axis]);
-    int stride = stripe_config->GetStrides()[axis];
-    int shape = stripe_config->GetShape()[axis];
-    int extent = stripe_config->GetExtent()[axis];
-    int low;
-    int high = std::numeric_limits<int>::min();
-    for (size_t i = 0; i < stripe_count; i++) {
-      // Calculate the 'non-edge case' sizes in one go to save effort
-      if (!enable_sliding_window || i > 0) {
-        if (start >= 0 && extent - shape - start >= 0 && stride > 0) {
-          int whole_stripes =
-              std::min(static_cast<int>(stripe_count - i), (extent - shape - start) / stride + 1);
-          if (enable_sliding_window) {
-            per_axis_sizes[axis][stride] += whole_stripes;
-          } else {
-            per_axis_sizes[axis][shape] += whole_stripes;
-          }
-          i += whole_stripes - 1;
-          start += whole_stripes * stride;
-          high = std::min(start - stride + shape, extent);
-          continue;
-        }
-      }
-      low = std::max(start, 0);
-      if (enable_sliding_window) {
-        low = std::max(low, high);
-      }
-      high = std::min(start + shape, extent);
-      int size = high - low;
-      if (size > 0) {
-        per_axis_sizes[axis][size]++;
-      }
-      start += stride;
-    }
-  }
-  return MultiplyCombinations(per_axis_sizes);
-}
-
-TVM_REGISTER_GLOBAL("contrib.ethosu.cascader.CountStripes")
-    .set_body_typed([](StripeConfig stripe_config, bool enable_sliding_window) {
-      Map<Array<Integer>, Integer> ret;
-      auto stripe_counts = CountStripes(stripe_config, enable_sliding_window);
-      for (const auto& it : stripe_counts) {
-        ret.Set(make_array(it.first), it.second);
-      }
-      return ret;
-    });
-
-void StripeConfigNode::VisitAttrs(AttrVisitor* v) {
-  Array<Integer> tmp_arr = make_array(shape_);
-  v->Visit("_shape", &tmp_arr);
-  tmp_arr = make_array(extent_);
-  v->Visit("_extent", &tmp_arr);
-  tmp_arr = make_array(order_);
-  v->Visit("_order", &tmp_arr);
-  tmp_arr = make_array(stripes_);
-  v->Visit("_stripes", &tmp_arr);
-  tmp_arr = make_array(offset_);
-  v->Visit("_offset", &tmp_arr);
-  Array<FloatImm> tmp_float_arr = make_array(strides_);
-  v->Visit("_strides", &tmp_float_arr);
-  int64_t tmp_hash = static_cast<int64_t>(hash_);
-  v->Visit("_hash", &tmp_hash);
-}
-
-void StripeConfigNode::ComputeHash_() {
-  hash_ = hash_vector(shape_);
-  hash_combine(&hash_, hash_vector(extent_));
-  hash_combine(&hash_, hash_vector(strides_));
-  hash_combine(&hash_, hash_vector(order_));
-  hash_combine(&hash_, hash_vector(stripes_));
-  hash_combine(&hash_, hash_vector(offset_));
-}
-
-StripeConfig::StripeConfig(const std::vector<int>& shape, const std::vector<int>& extent,
-                           const std::vector<float>& strides, const std::vector<int>& order,
-                           const std::vector<int>& stripes, const std::vector<int>& offset) {
-  auto n = make_object<StripeConfigNode>();
-  n->shape_ = std::move(shape);
-  n->extent_ = std::move(extent);
-  n->strides_ = std::move(strides);
-  n->order_ = std::move(order);
-  n->stripes_ = std::move(stripes);
-  n->offset_ = std::move(offset);
-  n->ComputeHash_();
-  data_ = std::move(n);
-}
-
-inline bool StripeConfig::operator==(const StripeConfig& other) const {
-  if (get() == other.get()) return true;
-  if (get() == nullptr || other.get() == nullptr) return false;
-  return ((*this)->shape_ == other->shape_ && (*this)->extent_ == other->extent_ &&
-          (*this)->strides_ == other->strides_ && (*this)->order_ == other->order_ &&
-          (*this)->stripes_ == other->stripes_ && (*this)->offset_ == other->offset_);
-}
-
-TVM_REGISTER_GLOBAL("contrib.ethosu.cascader.StripeConfig")
-    .set_body_typed([](Array<Integer> shape, Array<Integer> extent, Array<FloatImm> strides,
-                       Array<Integer> order, Array<Integer> stripes, Array<Integer> offset) {
-      std::vector<int> vshape = make_vector<int, Integer>(shape);
-      std::vector<int> vextent = make_vector<int, Integer>(extent);
-      std::vector<float> vstrides = make_vector<float, FloatImm>(strides);
-      std::vector<int> vorder = make_vector<int, Integer>(order);
-      std::vector<int> vstripes = make_vector<int, Integer>(stripes);
-      std::vector<int> voffset = make_vector<int, Integer>(offset);
-      return StripeConfig(vshape, vextent, vstrides, vorder, vstripes, voffset);
-    });
-
-TVM_REGISTER_GLOBAL("contrib.ethosu.cascader.StripeConfigEqual")
-    .set_body_method(&StripeConfig::operator==);
-
-TVM_REGISTER_NODE_TYPE(StripeConfigNode);
-
-}  // namespace cascader
-}  // namespace ethosu
-}  // namespace contrib
-}  // namespace tvm
diff --git a/src/contrib/ethosu/cascader/stripe_config.h b/src/contrib/ethosu/cascader/stripe_config.h
deleted file mode 100644
index 95759c7e4f03..000000000000
--- a/src/contrib/ethosu/cascader/stripe_config.h
+++ /dev/null
@@ -1,235 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file src/contrib/ethosu/cascader/stripe_config.h
- * \brief StripeConfig object for the NPU cascader
- */
-#ifndef TVM_CONTRIB_ETHOSU_CASCADER_STRIPE_CONFIG_H_
-#define TVM_CONTRIB_ETHOSU_CASCADER_STRIPE_CONFIG_H_
-
-#include <tvm/node/reflection.h>
-#include <tvm/runtime/object.h>
-
-#include <functional>
-#include <map>
-#include <vector>
-
-namespace tvm {
-namespace contrib {
-namespace ethosu {
-namespace cascader {
-
-class StripeConfig;
-class PropagatorNode;
-
-/*! \brief Node to represent a StripeConfig */
-class StripeConfigNode : public Object {
- public:
-  void VisitAttrs(AttrVisitor* v);
-
-  /*!
-   * \brief Get the shape of the stripe config.
-   * \return The shape of the stripe config.
-   * \note The shape refers to the size of the stripes in each dimension.
-   */
-  inline std::vector<int> GetShape() const { return shape_; }
-  /*!
-   * \brief Get the extent of the stripe config.
-   * \return The extent of the stripe config.
-   * \note The extent refers to the extent over which a StripeConfig operates.
-   * Specifically, it is the extent in each axis between the lowest value read
-   * by a stripe and the highest value read by a stripe.
-   */
-  inline std::vector<int> GetExtent() const { return extent_; }
-  /*!
-   * \brief Get the strides of the stripe config.
-   * \return The strides of the stripe config.
-   * \note The strides refer to the stride between stripes in each axis.
-   * The strides are represented as a float rather than an int to account for
-   * cases of 'fractional striding'. The stride should therefore be interpreted
-   * as the average striding in each axis.
-   *
-   * The starting offset of the i-th stripe in axis 'ax' is given by:
-   *
-   * stripe_offset_i[ax] = offset[ax] + floor(strides[ax]*i)
-   *
-   * As a concrete example, consider a 2x2 upscaling operation. If an output
-   * stripe config with a stride of (3, 3) is chosen, then when this is
-   * propagated to the input it will be reduced by a factor of two to become
-   * (1.5, 1.5).
-   *
-   * This means the first stripe in axis 0 should begin at (floor(1.5*0), 0) = (0, 0),
-   * the second at (floor(1.5*1), 0) = (1, 0), and the third at (floor(1.5*2), 0) =
-   * (3, 0). This results in irregular striding where 'strides' is the average
-   * striding value.
-   */
-  inline std::vector<float> GetStrides() const { return strides_; }
-  /*!
-   * \brief Get the order of the stripe config.
-   * \return The order of the stripe config.
-   * \note The order refers to order in which the axes are iterated over.
-   * The first (outermost) axis is labelled as 1 with the rest increasing
-   * according to the axis' position. Any axis labelled with 0 isn't iterated over.
-   * For example, [1, 3, 2] would mean axis 0 is the outermost iteration axis,
-   * then axis 2, then finally axis 1.
-   */
-  inline std::vector<int> GetOrder() const { return order_; }
-  /*!
-   * \brief Get the stripes of the stripe config.
-   * \return The stripes of the stripe config.
-   * \note The stripes refer to the number of stripes in each axis.
-   * There must be at least one stripe in any given axis.
-   */
-  inline std::vector<int> GetStripes() const { return stripes_; }
-  /*!
-   * \brief Get the offset of the stripe config.
-   * \return The offset of the stripe config.
-   * \note The offset refers to the offset of the first stripe
-   * from the first element of the tensor. For example, in a slice operation
-   * which only returns the second (4, 8) half of a (8, 8) tensor, the offset
-   * would need to be [4, 0].
-   */
-  inline std::vector<int> GetOffset() const { return offset_; }
-  /*! \return The hash of the StripeConfigNode */
-  size_t GetHash() const { return hash_; }
-
-  static constexpr const char* _type_key = "contrib.ethosu.cascader.StripeConfig";
-  TVM_DECLARE_FINAL_OBJECT_INFO(StripeConfigNode, Object);
-
- protected:
-  friend class StripeConfig;
-  friend class PropagatorNode;
-
-  /*! \brief Compute the hash of the StripeConfigNode */
-  void ComputeHash_();
-
-  /*! \brief The shape of the stripes */
-  std::vector<int> shape_;
-  /*! \brief The extent of region to stripe over */
-  std::vector<int> extent_;
-  /*! \brief The strides of the stripes */
-  std::vector<float> strides_;
-  /*! \brief The order of the striping axes */
-  std::vector<int> order_;
-  /*! \brief The number of stripes in each axis */
-  std::vector<int> stripes_;
-  /*! \brief The offset of the first stripe */
-  std::vector<int> offset_;
-  /*! \brief The hash of the StripeConfigNode */
-  std::size_t hash_{0};
-};
-
-/*!
- * \brief An object to describe how a tensor should be computed as a series
- of n-dimensional tiles, or 'stripes'.
- * \note The StripeConfig is a verbose way of specifying how to tile a tensor.
- * We can imagine taking a 2D tensor of size (12, 12) and wanting to compute
- * it in tiles of (4, 4). The tile is referred to as a stripe here to generalize
- * this to n-dimensional tiles.
- *
- * The size of that stripe in each axis is the 'shape'. The strides is how far
- * you should move between stripes, so also (4, 4) for a simple non-overlappping
- * tiling. However, we explore some overlapping scheduling options so shape != strides
- * in general. Note that the striding may be fractional, for instance (1.5, 1.5).
- * This means the first stripe should begin at (floor(1.5*0), 0) = (0, 0), the second
- * at (floor(1.5*1), 0) = (1, 0), and the third at (floor(1.5*2), 0) = (3, 0). This results
- * in slightly irregular striding where 'strides' should be interpreted as the average
- * striding value.
- *
- * The 'extent' is simply (12, 12), the region over which we're conducting our tiling.
- *
- * The 'order' tells us which axis to iterate over first and which second and the
- * 'stripes' tells us how many stripes we need to compute in each of those axes.
- *
- * Finally, the 'offset' tells us where to start the first stripe. In this simple
- * case the offset is just (0, 0), but in something like a slice operation we
- * may want to start part way through a tensor.
- */
-class StripeConfig : public ObjectRef {
- public:
-  StripeConfig(const std::vector<int>& shape, const std::vector<int>& extent,
-               const std::vector<float>& strides, const std::vector<int>& order,
-               const std::vector<int>& stripes, const std::vector<int>& offset);
-  /*!
-   * \brief Check if two StripeConfigs are equals to each other.
-   * \param other StripeConfig to be checked.
-   * \return Whether the two StripeConfigs equal each other.
-   */
-  bool operator==(const StripeConfig& other) const;
-
-  TVM_DEFINE_OBJECT_REF_METHODS(StripeConfig, ObjectRef, StripeConfigNode);
-};
-
-/*!
- * \brief Count the number of stripes of each shape that are executed for a given
- StripeConfig.
- * \param stripe_config The StripeConfig to count the stripes for.
- * \param enable_sliding_window Whether to assume the sliding window optimization.
- * \return A map between stripe shapes and the number of stripes of that shape that need
- * executing.
- * \note If the StripeConfig were to split an (8, 8) tensor into (4, 4) stripes with
- * (4, 4) striding, then this function will return {(4, 4): 4} indicating that 4 (4, 4)
- * stripes will be executed. If instead an (8, 8) were striped using (5, 5) stripes
- * with (5, 5) striding, this function would return:
- *
- * {
- *   (5, 5): 1,
- *   (3, 5): 1,
- *   (5, 3): 1,
- *   (3, 3): 1,
- * }
- *
- * This is because some of the stripes will exceed the extent of the tensor and so only part
- * of them will need executing. Therefore, CountStripes will return the exact number of each
- * shape of stripe that is executed, accounting for edge and overlap behaviour which is not
- * explicit in the StripeConfig alone.
- */
-std::map<std::vector<int>, int> CountStripes(const StripeConfig& stripe_config,
-                                             bool enable_sliding_window);
-
-}  // namespace cascader
-}  // namespace ethosu
-}  // namespace contrib
-}  // namespace tvm
-
-// Hash and equal function for StripeConfig
-namespace std {
-
-/*! \brief The equal_to function for tvm::contrib::ethosu::cascader::StripeConfig */
-template <>
-struct equal_to<::tvm::contrib::ethosu::cascader::StripeConfig> {
-  bool operator()(const ::tvm::contrib::ethosu::cascader::StripeConfig& lhs,
-                  const ::tvm::contrib::ethosu::cascader::StripeConfig& rhs) const {
-    return lhs == rhs;
-  }
-};
-
-/*! \brief The hash function for tvm::contrib::ethosu::cascader::StripeConfig */
-template <>
-struct hash<::tvm::contrib::ethosu::cascader::StripeConfig> {
-  std::size_t operator()(
-      const ::tvm::contrib::ethosu::cascader::StripeConfig& stripe_config) const {
-    return stripe_config->GetHash();
-  }
-};
-
-}  // namespace std
-
-#endif  // TVM_CONTRIB_ETHOSU_CASCADER_STRIPE_CONFIG_H_
diff --git a/src/contrib/ethosu/cascader/tensor_config.cc b/src/contrib/ethosu/cascader/tensor_config.cc
deleted file mode 100644
index fc9abd7346e1..000000000000
--- a/src/contrib/ethosu/cascader/tensor_config.cc
+++ /dev/null
@@ -1,182 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-#include "tensor_config.h"
-
-#include <tvm/runtime/container/array.h>
-#include <tvm/runtime/object.h>
-#include <tvm/runtime/registry.h>
-
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "common.h"
-
-namespace tvm {
-namespace contrib {
-namespace ethosu {
-namespace cascader {
-
-void MemoryRegionNode::VisitAttrs(AttrVisitor* v) {
-  v->Visit("name", &name);
-  v->Visit("size", &size);
-  v->Visit("read_bandwidth", &read_bandwidth);
-  v->Visit("write_bandwidth", &write_bandwidth);
-  v->Visit("read_latency", &read_latency);
-  v->Visit("write_latency", &write_latency);
-  v->Visit("burst_length", &burst_length);
-}
-
-TVM_REGISTER_GLOBAL("contrib.ethosu.cascader.MemoryRegion")
-    .set_body_typed([](String name, int size, int read_bandwidth, int write_bandwidth,
-                       int read_latency, int write_latency, int burst_length) {
-      return MemoryRegion(name, size, read_bandwidth, write_bandwidth, read_latency, write_latency,
-                          burst_length);
-    });
-
-TVM_REGISTER_NODE_TYPE(MemoryRegionNode);
-
-void TensorConfigNode::VisitAttrs(AttrVisitor* v) {
-  v->Visit("_tensor", &tensor_);
-  v->Visit("_home_region", &home_region_);
-  int state = static_cast<int>(state_);
-  v->Visit("_state", &state);
-  int buffer_mode = static_cast<int>(buffer_mode_);
-  v->Visit("_buffer_mode", &buffer_mode);
-  Array<StripeConfig> tmp_arr(stripe_configs_);
-  v->Visit("_stripe_configs", &tmp_arr);
-  v->Visit("_copy_tensor", &copy_tensor_);
-  v->Visit("_copy_region", &copy_region_);
-  int64_t tmp_hash = static_cast<int64_t>(hash_);
-  v->Visit("_hash", &tmp_hash);
-}
-
-int TensorConfigNode::GetBufferSize() const {
-  if (buffer_mode_ == BufferMode::RECOMPUTE) {
-    return GetRecomputeBufferSize_();
-  } else {
-    return GetRollingBufferSize_();
-  }
-}
-
-void TensorConfigNode::ComputeHash_() {
-  hash_ = ObjectHash()(tensor_);
-  hash_combine(&hash_, std::hash<std::string>()(home_region_->name));
-  hash_combine(&hash_, std::hash<int>()(static_cast<int>(state_)));
-  hash_combine(&hash_, std::hash<int>()(static_cast<int>(buffer_mode_)));
-  hash_combine(&hash_, hash_vector(stripe_configs_));
-  hash_combine(&hash_, std::hash<bool>()(copy_tensor_));
-  hash_combine(&hash_, std::hash<std::string>()(copy_region_->name));
-}
-
-int TensorConfigNode::GetRecomputeBufferSize_() const {
-  size_t buffer_size = 0;
-  for (const auto& stripe_config : stripe_configs_) {
-    buffer_size += mul_reduce(stripe_config->GetShape());
-  }
-  return buffer_size * tensor_->GetDataType().bytes() * tensor_->GetCompressionRatio();
-}
-
-int TensorConfigNode::GetRollingBufferSize_() const {
-  int buffer_size = 0;
-  for (const auto& stripe_config : stripe_configs_) {
-    int rolling_axis = -1;
-    for (size_t i = 0; i < stripe_config->GetOrder().size(); i++) {
-      // The axis must be striped (> 1 stripes) and ordered (order != 0)
-      if (stripe_config->GetStripes()[i] > 1 && stripe_config->GetOrder()[i] != 0) {
-        // If we've yet to find a possible rolling axis, use this one
-        if (rolling_axis == -1) {
-          rolling_axis = i;
-          continue;
-        }
-        // Otherwise, replace the rolling axis if the current axis has an earlier order
-        if (stripe_config->GetOrder()[i] < stripe_config->GetOrder()[rolling_axis]) {
-          rolling_axis = i;
-        }
-      }
-    }
-    // If we didn't find a rolling axis, just use axis 0
-    if (rolling_axis == -1) {
-      rolling_axis = 0;
-    }
-    int rolling_size = 1;
-    for (size_t i = 0; i < tensor_->GetShape().size(); i++) {
-      if (static_cast<int>(i) == rolling_axis) {
-        rolling_size *= stripe_config->GetShape()[i];
-      } else {
-        rolling_size *= tensor_->GetShape()[i];
-      }
-    }
-    buffer_size += rolling_size;
-  }
-  return buffer_size * tensor_->GetDataType().bytes() * tensor_->GetCompressionRatio();
-}
-
-TensorConfig::TensorConfig(const Tensor& tensor, const MemoryRegion& home_region,
-                           TensorConfigState state, BufferMode buffer_mode,
-                           const std::vector<StripeConfig>& stripe_configs, bool copy_tensor,
-                           const MemoryRegion& copy_region) {
-  auto n = make_object<TensorConfigNode>();
-  n->tensor_ = std::move(tensor);
-  n->home_region_ = std::move(home_region);
-  n->state_ = state;
-  n->buffer_mode_ = buffer_mode;
-  n->stripe_configs_ = std::move(stripe_configs);
-  n->copy_tensor_ = copy_tensor;
-  n->copy_region_ = std::move(copy_region);
-  n->ComputeHash_();
-  data_ = std::move(n);
-}
-
-inline bool TensorConfig::operator==(const TensorConfig& other) const {
-  if (get() == other.get()) return true;
-  if (get() == nullptr || other.get() == nullptr) return false;
-  if ((*this)->tensor_ == other->tensor_ && (*this)->home_region_ == other->home_region_ &&
-      (*this)->state_ == other->state_ && (*this)->buffer_mode_ == other->buffer_mode_ &&
-      (*this)->stripe_configs_ == other->stripe_configs_ &&
-      (*this)->copy_tensor_ == other->copy_tensor_ &&
-      (*this)->copy_region_ == other->copy_region_) {
-    return true;
-  }
-  return false;
-}
-
-TVM_REGISTER_GLOBAL("contrib.ethosu.cascader.TensorConfig")
-    .set_body_typed([](Tensor tensor, MemoryRegion home_region, int state, int buffer_mode,
-                       Array<StripeConfig> stripe_configs, bool copy_tensor,
-                       MemoryRegion copy_region) {
-      TensorConfigState estate = static_cast<TensorConfigState>(state);
-      BufferMode ebuffer_mode = static_cast<BufferMode>(buffer_mode);
-      std::vector<StripeConfig> vstripe_configs(stripe_configs.begin(), stripe_configs.end());
-      return TensorConfig(tensor, home_region, estate, ebuffer_mode, vstripe_configs, copy_tensor,
-                          copy_region);
-    });
-
-TVM_REGISTER_GLOBAL("contrib.ethosu.cascader.TensorConfigEqual")
-    .set_body_method(&TensorConfig::operator==);
-
-TVM_REGISTER_GLOBAL("contrib.ethosu.cascader.TensorConfigGetBufferSize")
-    .set_body_method<TensorConfig>(&TensorConfigNode::GetBufferSize);
-
-TVM_REGISTER_NODE_TYPE(TensorConfigNode);
-
-}  // namespace cascader
-}  // namespace ethosu
-}  // namespace contrib
-}  // namespace tvm
diff --git a/src/contrib/ethosu/cascader/tensor_config.h b/src/contrib/ethosu/cascader/tensor_config.h
deleted file mode 100644
index 134e02c3e4cf..000000000000
--- a/src/contrib/ethosu/cascader/tensor_config.h
+++ /dev/null
@@ -1,236 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file src/contrib/ethosu/cascader/tensor_config.h
- * \brief TensorConfig object for the NPU cascader
- */
-#ifndef TVM_CONTRIB_ETHOSU_CASCADER_TENSOR_CONFIG_H_
-#define TVM_CONTRIB_ETHOSU_CASCADER_TENSOR_CONFIG_H_
-
-#include <tvm/node/reflection.h>
-#include <tvm/runtime/object.h>
-
-#include <functional>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "graph.h"
-#include "stripe_config.h"
-
-namespace tvm {
-namespace contrib {
-namespace ethosu {
-namespace cascader {
-
-class MemoryRegionNode : public Object {
- public:
-  void VisitAttrs(AttrVisitor* v);
-
-  /*! \brief The name of the region */
-  std::string name;
-  /*! \brief The size of the region */
-  int size;
-  /*! \brief The read bandwidth of the region in bytes per cycle */
-  int read_bandwidth;
-  /*! \brief The write bandwidth of the region in bytes per cycle */
-  int write_bandwidth;
-  /*! \brief The read bandwidth of the region in bytes per cycle */
-  int read_latency;
-  /*! \brief The write bandwidth of the region in bytes per cycle */
-  int write_latency;
-  /*! \brief Length of memory burst */
-  int burst_length;
-
-  static constexpr const char* _type_key = "contrib.ethosu.cascader.MemoryRegion";
-  TVM_DECLARE_FINAL_OBJECT_INFO(MemoryRegionNode, Object)
-};
-
-class MemoryRegion : public ObjectRef {
- public:
-  MemoryRegion(std::string name, int size, int read_bandwidth, int write_bandwidth,
-               int read_latency, int write_latency, int burst_length) {
-    auto n = make_object<MemoryRegionNode>();
-    n->name = name;
-    n->size = size;
-    n->read_bandwidth = read_bandwidth;
-    n->write_bandwidth = write_bandwidth;
-    n->read_latency = read_latency;
-    n->write_latency = write_latency;
-    n->burst_length = burst_length;
-    data_ = std::move(n);
-  }
-
-  TVM_DEFINE_OBJECT_REF_METHODS(MemoryRegion, ObjectRef, MemoryRegionNode);
-};
-
-/*! \brief The 'state' of a TensorConfig as used in the Plan generation algorithm.
- * BOUNDARY - Should describe a Plan input/output Tensor.
- * INTERIOR - Should describe an intermediate Tensor in a 'closed' Plan.
- */
-enum TensorConfigState { BOUNDARY, INTERIOR };
-
-/*! \brief Node to represent a TensorConfig */
-class TensorConfigNode : public Object {
- public:
-  void VisitAttrs(AttrVisitor* v);
-
-  /*! \return The Tensor the config applies to */
-  const Tensor GetTensor() const { return tensor_; }
-  /*! \return The region where the tensor is allocated */
-  MemoryRegion GetHomeRegion() const { return home_region_; }
-  /*!
-   * \return The state of the TensorConfig.
-   * \note The TensorConfigState is only used as part of the Plan generation algorithm. For a Plan
-   * to be 'closed' (and therefore not subject to any further merging), all the TensorConfigs that
-   * describe Plan input or output Tensors must be in the 'BOUNDARY' state with the rest being
-   * 'INTERIOR'. If any of the input or output tensors are described by an 'INTERIOR' TensorConfig,
-   * then the Plan is 'open' and should be merged with other 'open' Plans until the result becomes
-   * 'closed'.
-   */
-  TensorConfigState GetState() const { return state_; }
-  /*!
-   * \return The mode in which the buffer should be realized
-   * \note There are multiple buffering strategies by which a tensor may be realized (computed).
-   * These affect the amount of recomputation necessary as well as the size of buffer required to
-   * store the tensor. See 'BufferMode' for a description of the allowable buffering modes.
-   */
-  BufferMode GetBufferMode() const { return buffer_mode_; }
-  /*!
-   * \return Whether to copy the tensor.
-   * \note While a tensor will originally reside in its home region, the TensorConfig may optionally
-   * specify that the tensor should be copied (according to the StripeConfigs) into another
-   * MemoryRegion. As an example for where this may be used, if a weights tensor initially resides
-   * in slow Flash memory then necessarily the home region will be Flash. However, if the weights
-   * values are used multiple times by a Part, it may be more performant to choose to copy the
-   * weights into a faster memory like SRAM.
-   */
-  bool DoCopy() const { return copy_tensor_; }
-  /*! \return The region to copy the tensor to */
-  MemoryRegion GetCopyRegion() const {
-    if (!copy_tensor_) {
-      return home_region_;
-    }
-    return copy_region_;
-  }
-  /*!
-   * \return The StripeConfigs with which to compute the tensor.
-   * \note The StripeConfigs determine the order in which the elements of the tensor should be
-   * computed, including potentially computing them multiple times (recompute). Multiple
-   * StripeConfigs are used over just a single StripeConfig for the case where the tensor is
-   * consumed by two different Parts executing themselves with different StripeConfigs. In this
-   * case, there is a StripeConfig per consumer of the tensor.
-   */
-  const std::vector<StripeConfig> GetStripeConfigs() const { return stripe_configs_; }
-  /*!
-   * \return The size of the buffer needed for the TensorConfig.
-   * \note The size of buffer necessary to store a tensor being produced using the TensorConfig is
-   * not necessarily just the size of the tensor. In Plans, a tensor may be being produced and
-   * consumed in 'stripes' which are smaller than the full tensor. Therefore, the buffer necessary
-   * to store the tensor may only need to be as large as the stripe. The precise size of the buffer
-   * will depend both on the BufferMode and StripeConfigs (as well as, of course, the Tensor).
-   */
-  int GetBufferSize() const;
-  /*! \return The hash of the TensorConfigNode */
-  size_t GetHash() const { return hash_; }
-
-  static constexpr const char* _type_key = "contrib.ethosu.cascader.TensorConfig";
-  TVM_DECLARE_FINAL_OBJECT_INFO(TensorConfigNode, Object);
-
- protected:
-  friend class TensorConfig;
-
-  /*! \brief Compute the hash of the TensorConfigNode */
-  void ComputeHash_();
-
-  /*! \return The size of the recompute buffer needed*/
-  int GetRecomputeBufferSize_() const;
-  /*! \return The size of the rolling buffer needed*/
-  int GetRollingBufferSize_() const;
-
-  /*! \brief The Tensor the config applies to */
-  Tensor tensor_;
-  /*! \brief The region where the tensor is allocated */
-  MemoryRegion home_region_;
-  /*! \return The state of the TensorConfig */
-  TensorConfigState state_;
-  /*! \brief The mode in which the buffer should be realized */
-  BufferMode buffer_mode_;
-  /*! \return The StripeConfigs with which to compute the tensor */
-  std::vector<StripeConfig> stripe_configs_;
-  /*! \brief Whether to copy the tensor */
-  bool copy_tensor_;
-  /*! \brief The region to copy the tensor to */
-  MemoryRegion copy_region_;
-  /*! \brief The hash of the TensorConfigNode */
-  size_t hash_{0};
-};
-
-/*!
- * \brief A class which describes how to realize a Tensor.
- * \note The TensorConfig describes both how a Tensor is scheduled (the order in which it's
- * produced/consumed) and how its allocated in memory (which region it should reside in and whether
- * it should be copied). For further detail on how TensorConfig stores this information, consult the
- * documentation of TensorConfigNode.
- */
-class TensorConfig : public ObjectRef {
- public:
-  TensorConfig(const Tensor& tensor, const MemoryRegion& home_region, TensorConfigState state,
-               BufferMode buffer_mode, const std::vector<StripeConfig>& stripe_configs,
-               bool copy_tensor, const MemoryRegion& copy_region);
-  /*!
-   * \brief Check if two TensorConfigs are equal to each other.
-   * \param other TensorConfig to be checked.
-   * \return Whether the two TensorConfigs equal each other.
-   */
-  bool operator==(const TensorConfig& other) const;
-
-  TVM_DEFINE_MUTABLE_OBJECT_REF_METHODS(TensorConfig, ObjectRef, TensorConfigNode);
-};
-
-}  // namespace cascader
-}  // namespace ethosu
-}  // namespace contrib
-}  // namespace tvm
-
-// Hash and equal function for TensorConfig
-namespace std {
-
-/*! \brief The equal_to function for tvm::contrib::ethosu::cascader::TensorConfig */
-template <>
-struct equal_to<::tvm::contrib::ethosu::cascader::TensorConfig> {
-  bool operator()(const ::tvm::contrib::ethosu::cascader::TensorConfig& lhs,
-                  const ::tvm::contrib::ethosu::cascader::TensorConfig& rhs) const {
-    return lhs == rhs;
-  }
-};
-
-/*! \brief The hash function for tvm::contrib::ethosu::cascader::TensorConfig */
-template <>
-struct hash<::tvm::contrib::ethosu::cascader::TensorConfig> {
-  std::size_t operator()(
-      const ::tvm::contrib::ethosu::cascader::TensorConfig& tensor_config) const {
-    return tensor_config->GetHash();
-  }
-};
-
-}  // namespace std
-
-#endif  // TVM_CONTRIB_ETHOSU_CASCADER_TENSOR_CONFIG_H_
diff --git a/src/meta_schedule/mutator/mutator.cc b/src/meta_schedule/mutator/mutator.cc
index ddc2d73590f9..8e9bfc8bde4b 100644
--- a/src/meta_schedule/mutator/mutator.cc
+++ b/src/meta_schedule/mutator/mutator.cc
@@ -76,13 +76,6 @@ Map<Mutator, FloatImm> Mutator::DefaultHexagon() {
       {Mutator::MutateParallel(/*max_jobs_per_core=*/16), FloatImm(DataType::Float(64), 0.02)}};
 }
 
-Map<Mutator, FloatImm> Mutator::DefaultMicro() {
-  return Map<Mutator, FloatImm>{
-      {Mutator::MutateTileSize(), FloatImm(DataType::Float(64), 0.9)},
-      {Mutator::MutateComputeLocation(), FloatImm(DataType::Float(64), 0.05)},
-      {Mutator::MutateUnroll(), FloatImm(DataType::Float(64), 0.03)}};
-}
-
 TVM_STATIC_IR_FUNCTOR(ReprPrinter, vtable)
     .set_dispatch<PyMutatorNode>([](const ObjectRef& n, ReprPrinter* p) {
       const auto* self = n.as<PyMutatorNode>();
@@ -109,7 +102,6 @@ TVM_REGISTER_GLOBAL("meta_schedule.MutatorDefaultCUDA").set_body_typed(Mutator::
 TVM_REGISTER_GLOBAL("meta_schedule.MutatorDefaultCUDATensorCore")
     .set_body_typed(Mutator::DefaultCUDATensorCore);
 TVM_REGISTER_GLOBAL("meta_schedule.MutatorDefaultHexagon").set_body_typed(Mutator::DefaultHexagon);
-TVM_REGISTER_GLOBAL("meta_schedule.MutatorDefaultMicro").set_body_typed(Mutator::DefaultMicro);
 
 }  // namespace meta_schedule
 }  // namespace tvm
diff --git a/src/meta_schedule/postproc/postproc.cc b/src/meta_schedule/postproc/postproc.cc
index bcd0cef4dd69..a5f833044cbe 100644
--- a/src/meta_schedule/postproc/postproc.cc
+++ b/src/meta_schedule/postproc/postproc.cc
@@ -100,14 +100,6 @@ Array<Postproc> Postproc::DefaultHexagon() {
   };
 }
 
-Array<Postproc> Postproc::DefaultMicro() {
-  return Array<Postproc>{
-      Postproc::DisallowDynamicLoop(),
-      Postproc::RewriteParallelVectorizeUnroll(),
-      Postproc::RewriteReductionBlock(),
-  };
-}
-
 TVM_STATIC_IR_FUNCTOR(ReprPrinter, vtable)
     .set_dispatch<PyPostprocNode>([](const ObjectRef& n, ReprPrinter* p) {
       const auto* self = n.as<PyPostprocNode>();
diff --git a/src/meta_schedule/schedule_rule/schedule_rule.cc b/src/meta_schedule/schedule_rule/schedule_rule.cc
index 83f5d073cb32..c5a7ad40730e 100644
--- a/src/meta_schedule/schedule_rule/schedule_rule.cc
+++ b/src/meta_schedule/schedule_rule/schedule_rule.cc
@@ -302,31 +302,6 @@ Array<ScheduleRule> ScheduleRule::DefaultHexagon() {
   };
 }
 
-Array<ScheduleRule> ScheduleRule::DefaultMicro() {
-  return {
-      ScheduleRule::ApplyCustomRule(),
-      ScheduleRule::InlineConstantScalars(),
-      ScheduleRule::AutoInline(
-          /*into_producer=*/false,
-          /*into_consumer=*/true,
-          /*inline_const_tensor=*/true,
-          /*disallow_if_then_else=*/true,
-          /*require_injective=*/true,
-          /*require_ordered=*/true,
-          /*disallow_op=*/Array<String>{"tir.exp"}),
-      ScheduleRule::MultiLevelTiling(
-          /*structure=*/"SSRSRS",
-          /*tile_binds=*/NullOpt,
-          /*max_innermost_factor=*/Integer(64),
-          /*vector_load_lens=*/NullOpt,
-          /*reuse_read=*/NullOpt,
-          /*reuse_write=*/
-          Map<String, ObjectRef>{{"req", String("may")},
-                                 {"levels", Array<Integer>{1, 2}},
-                                 {"scope", String("global")}}),
-  };
-}
-
 Array<ScheduleRule> GetARMNeonSpecificRules() {
   return {
       ScheduleRule::MultiLevelTilingWithIntrin(
@@ -443,8 +418,6 @@ TVM_REGISTER_GLOBAL("meta_schedule.ScheduleRuleDefaultCUDATensorCore")
     .set_body_typed(ScheduleRule::DefaultCUDATensorCore);
 TVM_REGISTER_GLOBAL("meta_schedule.ScheduleRuleDefaultHexagon")
     .set_body_typed(ScheduleRule::DefaultHexagon);
-TVM_REGISTER_GLOBAL("meta_schedule.ScheduleRuleDefaultMicro")
-    .set_body_typed(ScheduleRule::DefaultMicro);
 TVM_REGISTER_GLOBAL("meta_schedule.ScheduleRuleDefaultARM")
     .set_body_typed(ScheduleRule::DefaultARM);
 
diff --git a/src/meta_schedule/space_generator/space_generator.cc b/src/meta_schedule/space_generator/space_generator.cc
index 73df303f725c..400532750cc7 100644
--- a/src/meta_schedule/space_generator/space_generator.cc
+++ b/src/meta_schedule/space_generator/space_generator.cc
@@ -117,10 +117,6 @@ void SpaceGeneratorNode::InitializeWithTuneContext(const TuneContext& context) {
       default_sch_rules = ScheduleRule::DefaultX86("avx512");
       default_postprocs = Postproc::DefaultCPUTensorization();
       default_mutator_probs = Mutator::DefaultLLVM();
-    } else if (kind == "c") {
-      default_sch_rules = ScheduleRule::DefaultMicro();
-      default_postprocs = Postproc::DefaultMicro();
-      default_mutator_probs = Mutator::DefaultMicro();
     } else if (kind == "asimd") {
       default_sch_rules = ScheduleRule::DefaultARM("neon");
       default_postprocs = Postproc::DefaultCPUTensorization();
diff --git a/src/meta_schedule/utils.h b/src/meta_schedule/utils.h
index 28c45ea7455d..61ce62347af7 100644
--- a/src/meta_schedule/utils.h
+++ b/src/meta_schedule/utils.h
@@ -543,8 +543,6 @@ inline ScheduleRule GetDefaultAutoInline(const std::string& target_name) {
     rules = ScheduleRule::DefaultLLVM();
   } else if (target_name == "hexagon") {
     rules = ScheduleRule::DefaultHexagon();
-  } else if (target_name == "c") {
-    rules = ScheduleRule::DefaultMicro();
   } else if (IsGPUTarget(target_name)) {
     rules = ScheduleRule::DefaultCUDA();
   } else {
diff --git a/src/relay/backend/contrib/cmsisnn/buffer_size.cc b/src/relay/backend/contrib/cmsisnn/buffer_size.cc
deleted file mode 100644
index d5ac80cdfc26..000000000000
--- a/src/relay/backend/contrib/cmsisnn/buffer_size.cc
+++ /dev/null
@@ -1,160 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#include "buffer_size.h"
-
-#include <tvm/ir/attrs.h>
-#include <tvm/ir/transform.h>
-
-#include "compiler_attrs.h"
-
-namespace tvm {
-namespace relay {
-namespace contrib {
-namespace cmsisnn {
-
-int Conv2dBufferSize(bool is_int16, Target target, int32_t padding_w, int32_t padding_h,
-                     int32_t input_n, int32_t input_h, int32_t input_c, int32_t output_h,
-                     int32_t output_w, int32_t stride_w, int32_t stride_h, int32_t dilation_w,
-                     int32_t dilation_h, int32_t filter_w, int32_t filter_h) {
-  int size = -1;
-  if (is_int16) {
-    size = Conv2dBufferSizeInt16(target, padding_w, padding_h, input_n, input_h, input_c, output_h,
-                                 output_w, stride_w, stride_h, dilation_w, dilation_h, filter_w,
-                                 filter_h);
-  } else {
-    size = Conv2dBufferSizeInt8(target, padding_w, padding_h, input_n, input_h, input_c, output_h,
-                                output_w, stride_w, stride_h, dilation_w, dilation_h, filter_w,
-                                filter_h);
-  }
-  return size;
-}
-
-int Conv2dBufferSizeInt8(Target target, int32_t padding_w, int32_t padding_h, int32_t input_n,
-                         int32_t input_h, int32_t input_c, int32_t output_h, int32_t output_w,
-                         int32_t stride_w, int32_t stride_h, int32_t dilation_w, int32_t dilation_h,
-                         int32_t filter_w, int32_t filter_h) {
-  bool is1x1 = (padding_w == 0) && (padding_h == 0) && (input_c % 4 == 0) && (stride_w == 1) &&
-               (stride_h == 1) && (filter_w == 1) && (filter_h == 1) && (dilation_w == 1) &&
-               (dilation_h == 1);
-  bool is1xN = (output_h == 1) && (input_h == 1) && (filter_h == 1) && (output_w % 4 == 0) &&
-               (input_n == 1) && (dilation_w == 1) && (dilation_h == 1);
-
-  bool has_mve = target->GetFeature<Bool>("has_mve").value_or(Bool(false));
-
-  if (is1x1) {
-    return 0;
-  }
-
-  if (is1xN) {
-    if (has_mve) {
-      return 0;
-    }
-    return (2 * input_c * filter_w * filter_h) * (int32_t)sizeof(int16_t);
-  }
-
-  if (has_mve || is1xN) {
-    int32_t col_length = input_c * filter_w * filter_h;
-    col_length = (col_length + 7) / 8;
-    return 4 * col_length * 8 * (int32_t)sizeof(int8_t);
-  } else {
-    return (2 * input_c * filter_w * filter_h) * (int32_t)sizeof(int16_t);
-  }
-  return 0;
-}
-
-int Conv2dBufferSizeInt16(Target target, int32_t padding_w, int32_t padding_h, int32_t input_n,
-                          int32_t input_h, int32_t input_c, int32_t output_h, int32_t output_w,
-                          int32_t stride_w, int32_t stride_h, int32_t dilation_w,
-                          int32_t dilation_h, int32_t filter_w, int32_t filter_h) {
-  bool has_mve = target->GetFeature<Bool>("has_mve").value_or(Bool(false));
-  bool has_dsp = target->GetFeature<Bool>("has_dsp").value_or(Bool(false));
-
-  if (has_dsp && !has_mve) {
-    if ((filter_w * filter_h * input_c < 512) && dilation_w == 1 && dilation_h == 1) {
-      return (2 * input_c * filter_w * filter_h) * (int32_t)sizeof(int16_t);
-    }
-  }
-  return 0;
-}
-
-int DepthwiseConv2dBufferSize(bool is_int16, Target target, int32_t input_n, int32_t input_c,
-                              int32_t output_c, int32_t filter_w, int32_t filter_h,
-                              int32_t dilation_w, int32_t dilation_h, int32_t depth_multiplier) {
-  int size = -1;
-  if (is_int16) {
-    size = DepthwiseConv2dBufferSizeInt16(target, input_n, input_c, output_c, filter_w, filter_h,
-                                          dilation_w, dilation_h, depth_multiplier);
-  } else {
-    size = DepthwiseConv2dBufferSizeInt8(target, input_n, input_c, output_c, filter_w, filter_h,
-                                         dilation_w, dilation_h, depth_multiplier);
-  }
-  return size;
-}
-
-int DepthwiseConv2dBufferSizeInt8(Target target, int32_t input_n, int32_t input_c, int32_t output_c,
-                                  int32_t filter_w, int32_t filter_h, int32_t dilation_w,
-                                  int32_t dilation_h, int32_t depth_multiplier) {
-  bool has_mve = target->GetFeature<Bool>("has_mve").value_or(Bool(false));
-  bool has_dsp = target->GetFeature<Bool>("has_dsp").value_or(Bool(false));
-
-  if (input_c == output_c && input_n == 1 && dilation_w == 1 && dilation_h == 1) {
-    if (has_mve) {
-      return (4 * CH_IN_BLOCK_MVE * filter_w * filter_h) * (int32_t)sizeof(int8_t);
-    } else if (has_dsp) {
-      return (input_c * filter_w * filter_h) * (int32_t)sizeof(int16_t);
-    }
-  }
-  return 0;
-}
-
-int DepthwiseConv2dBufferSizeInt16(Target target, int32_t input_n, int32_t input_c,
-                                   int32_t output_c, int32_t filter_w, int32_t filter_h,
-                                   int32_t dilation_w, int32_t dilation_h,
-                                   int32_t depth_multiplier) {
-  bool has_mve = target->GetFeature<Bool>("has_mve").value_or(Bool(false));
-  bool has_dsp = target->GetFeature<Bool>("has_dsp").value_or(Bool(false));
-
-  if (depth_multiplier == 1 && dilation_w == 1 && dilation_h == 1 &&
-      filter_w * filter_h * input_c < 512) {
-    if (has_dsp) {
-      if (has_mve) {
-        return 4 * input_c * filter_w * filter_h * (int32_t)sizeof(int16_t) + 8;
-      } else {
-        return input_c * filter_w * filter_h * (int32_t)sizeof(int16_t);
-      }
-    }
-  }
-  return 0;
-}
-
-int AvgPoolBufferSize(Target target, int32_t input_c) {
-  bool has_mve = target->GetFeature<Bool>("has_mve").value_or(Bool(false));
-  bool has_dsp = target->GetFeature<Bool>("has_dsp").value_or(Bool(false));
-
-  if (has_dsp && !has_mve) {
-    return (input_c * sizeof(int32_t));
-  }
-  return 0;
-}
-
-}  // namespace cmsisnn
-}  // namespace contrib
-}  // namespace relay
-}  // namespace tvm
diff --git a/src/relay/backend/contrib/cmsisnn/buffer_size.h b/src/relay/backend/contrib/cmsisnn/buffer_size.h
deleted file mode 100644
index 5cf8c309cc5e..000000000000
--- a/src/relay/backend/contrib/cmsisnn/buffer_size.h
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file src/relay/backend/contrib/cmsisnn/buffer_size.h
- * \brief CMSIS-NN Buffer Size calculation functions
- */
-
-#ifndef TVM_RELAY_BACKEND_CONTRIB_CMSISNN_BUFFER_SIZE_H_
-#define TVM_RELAY_BACKEND_CONTRIB_CMSISNN_BUFFER_SIZE_H_
-
-#include <tvm/ir/transform.h>
-
-#include "compiler_attrs.h"
-
-namespace tvm {
-namespace relay {
-namespace contrib {
-namespace cmsisnn {
-
-#define CH_IN_BLOCK_MVE (124)
-
-/*!
- * \brief Calculates the appropriate buffer size for CMSIS-NN Convolutions
- * See:
- * https://github.com/ARM-software/CMSIS_5/blob/8c60448c0e1e50e426180b26db9bc31ddf774361/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_wrapper_s8.c#L108-L127
- *
- * \param is_int16 - type of conv2d
- * \param target - CMSIS-NN Target
- * \param padding_w - Width padding
- * \param padding_h - Height padding
- * \param input_n - Input batch size
- * \param input_h - Input height
- * \param input_c - Input channels
- * \param output_h - Output height
- * \param output_w - Output width
- * \param stride_w - Stride width
- * \param stride_h - Stride height
- * \param filter_w - Filter width
- * \param filter_h - Filter height
- *
- * \return Size of buffer to allocate for convolution
- */
-int Conv2dBufferSize(bool is_int16, Target target, int32_t padding_w, int32_t padding_h,
-                     int32_t input_n, int32_t input_h, int32_t input_c, int32_t output_h,
-                     int32_t output_w, int32_t stride_w, int32_t stride_h, int32_t dilation_w,
-                     int32_t dilation_h, int32_t filter_w, int32_t filter_h);
-
-int Conv2dBufferSizeInt8(Target target, int32_t padding_w, int32_t padding_h, int32_t input_n,
-                         int32_t input_h, int32_t input_c, int32_t output_h, int32_t output_w,
-                         int32_t stride_w, int32_t stride_h, int32_t dilation_w, int32_t dilation_h,
-                         int32_t filter_w, int32_t filter_h);
-
-int Conv2dBufferSizeInt16(Target target, int32_t padding_w, int32_t padding_h, int32_t input_n,
-                          int32_t input_h, int32_t input_c, int32_t output_h, int32_t output_w,
-                          int32_t stride_w, int32_t stride_h, int32_t dilation_w,
-                          int32_t dilation_h, int32_t filter_w, int32_t filter_h);
-
-/*!
- * \brief Calculates the appropriate buffer size for CMSIS-NN Depthwise Convolutions
- * See:
- * https://github.com/ARM-software/CMSIS_5/blob/325443e52637b6c7eedbd160d238a6c462e89c9f/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_wrapper_s8.c#L115-L129
- *
- * \param is_int16 - type of conv2d
- * \param target - CMSIS-NN Target
- * \param input_n - Input batch size
- * \param input_c - Input channels
- * \param output_c - Output channels
- * \param filter_w - Filter width
- * \param filter_h - Filter height
- * \param dilation_w - Dilation width
- * \param dilation_h - Dilation height
- * \param depth_multiplier - Depth Multiplier for Depthwise Convolution
- *
- * \return Size of buffer to allocate for depthwise convolution
- */
-int DepthwiseConv2dBufferSize(bool is_int16, Target target, int32_t input_n, int32_t input_c,
-                              int32_t output_c, int32_t filter_w, int32_t filter_h,
-                              int32_t dilation_w, int32_t dilation_h, int32_t depth_multiplier);
-
-int DepthwiseConv2dBufferSizeInt8(Target target, int32_t input_n, int32_t input_c, int32_t output_c,
-                                  int32_t filter_w, int32_t filter_h, int32_t dilation_w,
-                                  int32_t dilation_h, int32_t depth_multiplier);
-
-int DepthwiseConv2dBufferSizeInt16(Target target, int32_t input_n, int32_t input_c,
-                                   int32_t output_c, int32_t filter_w, int32_t filter_h,
-                                   int32_t dilation_w, int32_t dilation_h,
-                                   int32_t depth_multiplier);
-
-/*!
- * \brief Calculates the appropriate buffer size for CMSIS-NN Average Pooling
- * See:
- * https://github.com/ARM-software/CMSIS_5/blob/bff28575f0c96a4ee9008947fea2b018a69b4900/CMSIS/NN/Source/PoolingFunctions/arm_avgpool_s8.c#L388-L398
- *
- * \param target - CMSIS-NN Target
- * \param input_c - Input channels
- *
- * \return Size of buffer to allocate for average pooling
- */
-int AvgPoolBufferSize(Target target, int32_t input_c);
-
-}  // namespace cmsisnn
-}  // namespace contrib
-}  // namespace relay
-}  // namespace tvm
-
-#endif  // TVM_RELAY_BACKEND_CONTRIB_CMSISNN_BUFFER_SIZE_H_
diff --git a/src/relay/backend/contrib/cmsisnn/compiler_attrs.cc b/src/relay/backend/contrib/cmsisnn/compiler_attrs.cc
deleted file mode 100644
index 345e2d0e60da..000000000000
--- a/src/relay/backend/contrib/cmsisnn/compiler_attrs.cc
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-#include "compiler_attrs.h"
-
-#include <tvm/ir/attrs.h>
-#include <tvm/ir/transform.h>
-#include <tvm/target/target.h>
-
-#include <string>
-
-namespace tvm {
-namespace relay {
-namespace contrib {
-namespace cmsisnn {
-
-TVM_REGISTER_NODE_TYPE(CMSISNNCompilerConfigNode);
-TVM_REGISTER_PASS_CONFIG_OPTION("relay.ext.cmsisnn.options", CMSISNNCompilerConfig);
-
-Target CreateTarget(const tvm::transform::PassContext& ctx) {
-  auto cfg = ctx->GetConfig<CMSISNNCompilerConfig>("relay.ext.cmsisnn.options");
-  if (!cfg.defined()) {
-    return Target("cmsis-nn");
-  }
-
-  String mcpu = cfg.value()->mcpu;
-  Array<String> mattr = {cfg.value()->mattr};
-  runtime::Bool debug_last_error = cfg.value()->debug_last_error->value;
-
-  Target cmsis_nn_target(TargetJSON{
-      {"kind", String("cmsis-nn")},
-      {"mcpu", mcpu},
-      {"mattr", mattr},
-      {"debug_last_error", debug_last_error},
-  });
-
-  return cmsis_nn_target;
-}
-
-}  // namespace cmsisnn
-}  // namespace contrib
-}  // namespace relay
-}  // namespace tvm
diff --git a/src/relay/backend/contrib/cmsisnn/compiler_attrs.h b/src/relay/backend/contrib/cmsisnn/compiler_attrs.h
deleted file mode 100644
index 7bb355e0b212..000000000000
--- a/src/relay/backend/contrib/cmsisnn/compiler_attrs.h
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file src/relay/backend/contrib/cmsisnn/compiler_attrs.h
- * \brief CMSIS-NN Compiler Attribute functionality
- */
-
-#ifndef TVM_RELAY_BACKEND_CONTRIB_CMSISNN_COMPILER_ATTRS_H_
-#define TVM_RELAY_BACKEND_CONTRIB_CMSISNN_COMPILER_ATTRS_H_
-
-#include <tvm/ir/transform.h>
-#include <tvm/target/target.h>
-
-namespace tvm {
-namespace relay {
-namespace contrib {
-namespace cmsisnn {
-
-/*! \brief Attributes to store the compiler options for CMSIS-NN. */
-struct CMSISNNCompilerConfigNode : public tvm::AttrsNode<CMSISNNCompilerConfigNode> {
-  String mcpu;
-  String mattr;
-  Bool debug_last_error = Bool(false);
-
-  TVM_DECLARE_ATTRS(CMSISNNCompilerConfigNode, "ext.attrs.CMSISNNCompilerConfigNode") {
-    TVM_ATTR_FIELD(mcpu)
-        .describe(
-            "The CPU to configure CMSIS-NN for (i.e. cortex-m55, cortex-m4), can also include "
-            "attributes (i.e. cortex-m55+nomve)")
-        .set_default("");
-    TVM_ATTR_FIELD(mattr)
-        .describe("The attributes to configure CMSIS-NN (i.e. +nodsp, +nomve)")
-        .set_default("");
-    TVM_ATTR_FIELD(debug_last_error)
-        .describe("Whether to enable storing the last error")
-        .set_default(Bool(false));
-  }
-};
-
-class CMSISNNCompilerConfig : public Attrs {
- public:
-  TVM_DEFINE_NOTNULLABLE_OBJECT_REF_METHODS(CMSISNNCompilerConfig, Attrs,
-                                            CMSISNNCompilerConfigNode);
-};
-
-/*! \brief Convert External Code Generator options to TVM Target. */
-Target CreateTarget(const tvm::transform::PassContext& ctx);
-
-}  // namespace cmsisnn
-}  // namespace contrib
-}  // namespace relay
-}  // namespace tvm
-
-#endif  // TVM_RELAY_BACKEND_CONTRIB_CMSISNN_COMPILER_ATTRS_H_
diff --git a/src/relay/backend/contrib/cmsisnn/compute_luts.cc b/src/relay/backend/contrib/cmsisnn/compute_luts.cc
deleted file mode 100644
index 13dcb395b337..000000000000
--- a/src/relay/backend/contrib/cmsisnn/compute_luts.cc
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-/*!
- * \file src/relay/backend/contrib/cmsisnn/compute_luts.cc
- * \brief Creates LUTs for operators in different bit formats for accelerating computations.
- */
-
-#include "compute_luts.h"
-
-#include <algorithm>
-#include <cmath>
-#include <limits>
-
-namespace tvm {
-namespace relay {
-namespace contrib {
-namespace cmsisnn {
-
-void CalculateLUTInt16(int key_zero_point, float key_scale, int value_zero_point, float value_scale,
-                       float (*func)(float), const int steps, int16_t* lut) {
-  const float value_min = static_cast<float>(std::numeric_limits<int16_t>::min());
-  const float value_max = static_cast<float>(std::numeric_limits<int16_t>::max());
-  const float key_min_deq = key_scale * (std::numeric_limits<int16_t>::min() - key_zero_point);
-  const float key_max_deq = key_scale * (std::numeric_limits<int16_t>::max() - key_zero_point);
-  const float value_min_deq =
-      value_scale * (std::numeric_limits<int16_t>::min() - value_zero_point);
-  const float value_max_deq =
-      value_scale * (std::numeric_limits<int16_t>::max() - value_zero_point);
-
-  const float step_size_deq = (key_max_deq - key_min_deq) / (steps - 1);
-  const float half_step_size_deq = step_size_deq / 2;
-
-  const float value_inv_quantizing =
-      (std::numeric_limits<int16_t>::max() - std::numeric_limits<int16_t>::min() + 1) /
-      (value_max_deq - value_min_deq);
-
-  for (int i = 0; i < steps - 1; i++) {
-    float value_deq = func(key_min_deq + i * step_size_deq);
-    float mid_value_deq = func(key_min_deq + i * step_size_deq + half_step_size_deq);
-    float next_value_deq = func(key_min_deq + (i + 1) * step_size_deq);
-
-    float value = std::round(value_deq * value_inv_quantizing);
-    float mid_value = std::round(mid_value_deq * value_inv_quantizing);
-    float next_value = std::round(next_value_deq * value_inv_quantizing);
-    float mid_iterp_value = std::round((value + next_value) / 2);
-
-    float mid_err = mid_iterp_value - mid_value;
-    float bias = std::round(mid_err / 2);
-
-    lut[i] = static_cast<int16_t>(std::max(std::min(value - bias, value_max), value_min));
-  }
-
-  lut[steps - 1] = static_cast<int16_t>(
-      std::max(std::min(func(value_max_deq) * value_inv_quantizing, value_max), value_min));
-}
-
-}  // namespace cmsisnn
-}  // namespace contrib
-}  // namespace relay
-}  // namespace tvm
diff --git a/src/relay/backend/contrib/cmsisnn/compute_luts.h b/src/relay/backend/contrib/cmsisnn/compute_luts.h
deleted file mode 100644
index eca4127e40c6..000000000000
--- a/src/relay/backend/contrib/cmsisnn/compute_luts.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file src/relay/backend/contrib/cmsisnn/compute_luts.h
- * \brief CMSIS-NN LUTs calculation functions
- */
-
-#ifndef TVM_RELAY_BACKEND_CONTRIB_CMSISNN_COMPUTE_LUTS_H_
-#define TVM_RELAY_BACKEND_CONTRIB_CMSISNN_COMPUTE_LUTS_H_
-
-#include <cstdint>
-
-namespace tvm {
-namespace relay {
-namespace contrib {
-namespace cmsisnn {
-
-/*!
- * \brief Populates an int16 LUT based on the quantization parameters of its keys, values and
- * respective transformation function
- *
- * \param key_zero_point - zero point of table's keys
- * \param key_scale - scale of the table's keys
- * \param value_zero_point - zero point of table's values
- * \param value_scale - scale of the table's values
- * \param func - function pointer of the transformation performed by the LUT
- * \param steps - number of total values inside the table
- * \param lut - int16_t array storing the values of the LUT
- */
-void CalculateLUTInt16(int key_zero_point, float key_scale, int value_zero_point, float value_scale,
-                       float (*func)(float), const int steps, int16_t* lut);
-
-}  // namespace cmsisnn
-}  // namespace contrib
-}  // namespace relay
-}  // namespace tvm
-
-#endif  // TVM_RELAY_BACKEND_CONTRIB_CMSISNN_COMPUTE_LUTS_H_
diff --git a/src/relay/backend/contrib/cmsisnn/convolutions.cc b/src/relay/backend/contrib/cmsisnn/convolutions.cc
deleted file mode 100644
index ebac83b81250..000000000000
--- a/src/relay/backend/contrib/cmsisnn/convolutions.cc
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-#include "convolutions.h"
-
-#include <string>
-
-#include "../../../qnn/utils.h"
-#include "tvm/ir/transform.h"
-#include "tvm/relay/attrs/nn.h"
-
-namespace tvm {
-namespace relay {
-namespace contrib {
-namespace cmsisnn {
-
-bool IsCMSISNNDepthwise(const Conv2DAttrs* conv2d_attrs, const Array<PrimExpr>& input_shape,
-                        const Array<PrimExpr>& kernel_shape) {
-  std::string kernel_layout = conv2d_attrs->kernel_layout.c_str();
-  int kernel_pos_o = kernel_layout.find("O");
-  int kernel_pos_i = kernel_layout.find("I");
-  int kernel_dim_o_val = qnn::get_const_int(kernel_shape[kernel_pos_o]);
-  int kernel_dim_i_val = qnn::get_const_int(kernel_shape[kernel_pos_i]);
-  int64_t out_channels = conv2d_attrs->channels.as<IntImmNode>()->value;
-  return (out_channels == kernel_dim_o_val * kernel_dim_i_val);
-}
-
-}  // namespace cmsisnn
-}  // namespace contrib
-}  // namespace relay
-}  // namespace tvm
diff --git a/src/relay/backend/contrib/cmsisnn/convolutions.h b/src/relay/backend/contrib/cmsisnn/convolutions.h
deleted file mode 100644
index e635702bf353..000000000000
--- a/src/relay/backend/contrib/cmsisnn/convolutions.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file src/relay/backend/contrib/cmsisnn/convolutions.h
- * \brief CMSIS-NN utility functions for Convolutions
- */
-
-#ifndef TVM_RELAY_BACKEND_CONTRIB_CMSISNN_CONVOLUTIONS_H_
-#define TVM_RELAY_BACKEND_CONTRIB_CMSISNN_CONVOLUTIONS_H_
-
-#include <tvm/relay/attrs/nn.h>
-#include <tvm/relay/attrs/transform.h>
-#include <tvm/relay/expr_functor.h>
-#include <tvm/relay/transform.h>
-#include <tvm/runtime/ndarray.h>
-
-#include "../../../op/make_op.h"
-#include "../../../qnn/utils.h"
-#include "../../../transforms/pattern_utils.h"
-
-namespace tvm {
-namespace relay {
-namespace contrib {
-namespace cmsisnn {
-/*!
- * \brief Checks if Relay Conv2D was originally CMSIS-NN compliant Depthwise Convolution
- * See:
- * https://github.com/apache/tvm/blob/6ed3ab3e33f8eafa4acaf53b7a671831de7587e9/python/tvm/relay/frontend/tflite.py#L2107
- *
- *
- * \return true if a Conv2D is a Depthwise Convolution based on Conv2D's inputs' shapes and
- * attributes
- */
-
-bool IsCMSISNNDepthwise(const Conv2DAttrs* conv2d_attrs, const Array<PrimExpr>& input_shape,
-                        const Array<PrimExpr>& kernel_shape);
-
-}  // namespace cmsisnn
-}  // namespace contrib
-}  // namespace relay
-}  // namespace tvm
-
-#endif  // TVM_RELAY_BACKEND_CONTRIB_CMSISNN_CONVOLUTIONS_H_
diff --git a/src/relay/backend/contrib/cmsisnn/extract_constants.cc b/src/relay/backend/contrib/cmsisnn/extract_constants.cc
deleted file mode 100644
index 1ce757a62fa9..000000000000
--- a/src/relay/backend/contrib/cmsisnn/extract_constants.cc
+++ /dev/null
@@ -1,254 +0,0 @@
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-/*!
- * \file extract_constant.cc
- * \brief Pushes out constants within partitioned functions all the way upto main()
- */
-
-#include <tvm/relay/attrs/nn.h>
-#include <tvm/relay/expr_functor.h>
-#include <tvm/relay/transform.h>
-#include <tvm/runtime/ndarray.h>
-
-#include "../../../qnn/utils.h"
-#include "../../../transforms/pattern_utils.h"
-
-namespace tvm {
-namespace relay {
-namespace contrib {
-namespace cmsisnn {
-
-/*!
- * \brief This Mutator finds all functions with constants. Constants are replaced with function
- * parameter variables. Constants are pushed all the way upto main().
- */
-class ExtractConstantsMutator : public MixedModeMutator {
- public:
-  explicit ExtractConstantsMutator(const IRModule& mod) : mod_(mod) {}
-
- private:
-  String gen_var_name() { return "tvm_var_extract_const_" + std::to_string(var_count_++); }
-
-  using MixedModeMutator::VisitExpr_;
-
-  Expr VisitExpr_(const FunctionNode* function) final {
-    Function func = GetRef<Function>(function);
-    auto composite_name = func->GetAttr<String>(attr::kComposite);
-    if (composite_name.defined()) {
-      std::string name = composite_name.value().operator std::string();
-      if (name.find("cmsis-nn") == std::string::npos) {
-        return func;
-      }
-    }
-    auto compiler_name = func->GetAttr<String>(::tvm::relay::attr::kCompiler);
-    if (compiler_name.defined() && compiler_name != "cmsis-nn") {
-      return func;
-    }
-
-    function_to_arguments_.Set(func, Array<Expr>{});
-    functions_.push_back(func);
-    auto new_body = VisitExpr(func->body);
-    functions_.pop_back();
-    if (function_to_arguments_[func].size()) {
-      func = WithFields(func, FreeVars(new_body), new_body, func->ret_type,
-                        FreeTypeVars(new_body, mod_), func->attrs);
-    }
-    return std::move(func);
-  }
-
-  // Creates new arguments from current call's arguments
-  // Updates constants into the caller arguments: here caller signifies caller that comprises call
-  // to func
-  Array<Expr> CreateNewCallArgsFromExtractedConstants(Call call, Function func) {
-    ICHECK(function_to_arguments_.find(func) != function_to_arguments_.end());
-    Array<Expr> function_signature(function_to_arguments_[func]);
-
-    // Is func a global_function?
-    // main() is not registered for extracting constants
-    bool is_global_function = functions_.empty() ? true : false;
-
-    bool new_constants_added = false;
-    // This tracks arguments traversed inside function_signature
-    uint32_t function_signature_id = 0;
-    // This contains arguments including constants for the caller of this function inside which
-    // post_call resides.
-    Array<Expr> new_caller_args;
-    // New arguments to post_call that includes new variables representing constants extracted from
-    // the function
-    Array<Expr> new_call_args;
-    for (auto& arg : call->args) {
-      if (auto* constant = arg.as<ConstantNode>()) {
-        new_caller_args.push_back(arg);
-        new_call_args.push_back(Var(gen_var_name(), constant->tensor_type()));
-        ++function_signature_id;
-        new_constants_added = true;
-        continue;
-      }
-
-      // Push all constants from the function_signature until a variable corresponding to the
-      // current argument is hit
-      while (function_signature_id < function_signature.size()) {
-        auto* constant = function_signature[function_signature_id].as<ConstantNode>();
-        if (constant == nullptr) {
-          break;
-        }
-        new_caller_args.push_back(function_signature[function_signature_id++]);
-        new_call_args.push_back(Var(gen_var_name(), constant->tensor_type()));
-        new_constants_added = true;
-      }
-
-      new_call_args.push_back(arg);
-      if (is_global_function || arg.as<VarNode>()) {
-        new_caller_args.push_back(arg);
-      }
-      ++function_signature_id;
-    }
-
-    // Push remaining constants as new arguments
-    for (uint32_t i = function_signature_id; i < function_signature.size(); ++i) {
-      auto* constant = function_signature[i].as<ConstantNode>();
-      ICHECK(constant)
-          << "Rest of the collected arguments should be constant in the partitioned function.";
-      new_caller_args.push_back(GetRef<Constant>(constant));
-      new_call_args.push_back(Var(gen_var_name(), constant->tensor_type()));
-      new_constants_added = true;
-    }
-
-    // Update the arguments of caller of local function
-    if (new_constants_added && !is_global_function) {
-      const Function& last_func = functions_.back();
-      Array<Expr> function_constants(function_to_arguments_[last_func]);
-      function_to_arguments_.Set(last_func,
-                                 tvm::runtime::Concat(function_constants, new_caller_args));
-    } else {
-      new_call_args = new_caller_args;
-    }
-
-    return new_call_args;
-  }
-
-  Expr Rewrite_(const CallNode* call, const Expr& post) final {
-    Expr final_call = post;
-    auto* post_call = post.as<CallNode>();
-
-    // Replace Constant arguments with Vars for ML Operators
-    // Perform this for non-main Call Nodes only
-    if (!functions_.empty() && call->op.as<OpNode>()) {
-      Array<Expr> new_args;
-      const Function& last_func = functions_.back();
-      Array<Expr> function_signature(function_to_arguments_[last_func]);
-      for (auto& arg : post_call->args) {
-        // Push all arguments including constants to maintain correct order of
-        // variables and constants
-        auto* const_arg = arg.as<ConstantNode>();
-        if (const_arg && !const_arg->is_scalar()) {
-          Var var_arg = Var(gen_var_name(), const_arg->tensor_type());
-          new_args.push_back(var_arg);
-          function_signature.push_back(arg);
-        } else {
-          if (arg.as<VarNode>()) {
-            // Only push if its not already present as multiple consumers of any input var
-            // will appear only once in the function signature.
-            bool found_in_existing_signature = false;
-            for (auto& sign : function_signature) {
-              if (arg.same_as(sign)) {
-                found_in_existing_signature = true;
-                break;
-              }
-            }
-            if (!found_in_existing_signature) {
-              function_signature.push_back(arg);
-            }
-          }
-          new_args.push_back(arg);
-        }
-      }
-      function_to_arguments_.Set(last_func, function_signature);
-      final_call = Call(call->op, new_args, call->attrs, {});
-    }
-
-    // Since the constants are extracted from partitioned functions
-    // a new call to global function is needed
-    if (auto opt = post_call->op.as<GlobalVar>()) {
-      auto glob_var = opt.value();
-      auto glob_func = Downcast<Function>(mod_->Lookup(glob_var));
-      auto new_glob_func = VisitExpr(glob_func);
-      if (!new_glob_func.same_as(glob_func)) {
-        mod_->Update(glob_var, Downcast<Function>(new_glob_func));
-        auto new_args = CreateNewCallArgsFromExtractedConstants(GetRef<Call>(post_call), glob_func);
-        final_call = Call(glob_var, new_args);
-      }
-    }
-
-    // Since the constants are extracted from the local partitioned functions
-    // a new call to local function is needed
-    if (auto opt = call->op.as<Function>()) {
-      Function func = opt.value();
-      auto new_func = VisitExpr(func);
-      Array<Expr> new_args = CreateNewCallArgsFromExtractedConstants(GetRef<Call>(post_call), func);
-      final_call = Call(new_func, new_args);
-    }
-
-    final_call->span = call->span;
-    return final_call;
-  }
-
- private:
-  /* \brief Updated module where all calls have replaced constants with new variables */
-  IRModule mod_;
-  /* \brief Maintains mapping of original function to the replaced constants along with other
-   * arguments to retain the order in which variables are used within the function */
-  Map<Function, Array<Expr>> function_to_arguments_;
-  /* \brief Stack of functions to determine scope while filling up function_to_arguments_ */
-  Array<Function> functions_;
-  /* \brief Keeps track of variables being created */
-  int var_count_ = 0;
-};
-
-/*!  * \brief Extracts all constants out of the partitioned function into main()  */
-IRModule ExtractConstants(const IRModule& mod) {
-  String func_name;
-  Function func;
-
-  auto extract_constants = ExtractConstantsMutator(mod);
-  Function main_func = Downcast<Function>(mod->Lookup("main"));
-  auto new_main_body = extract_constants.VisitExpr(main_func->body);
-  if (!new_main_body.same_as(main_func->body)) {
-    auto main_var = mod->GetGlobalVar("main");
-    Function new_main_func = WithFields(main_func, main_func->params, new_main_body);
-    mod->Update(main_var, new_main_func);
-  }
-  return mod;
-}
-
-transform::Pass ExtractConstantsFromPartitionedFunction() {
-  runtime::TypedPackedFunc<IRModule(IRModule, transform::PassContext)> pass_func =
-      [=](IRModule m, transform::PassContext pc) { return ExtractConstants(m); };
-  return tvm::transform::CreateModulePass(pass_func, 0, "ExtractConstantsFromPartitionedFunction",
-                                          {"InferType"});
-}
-
-TVM_REGISTER_GLOBAL("relay.ext.cmsisnn.transform.ExtractConstantsFromPartitionedFunction")
-    .set_body_typed(ExtractConstantsFromPartitionedFunction);
-
-}  // namespace cmsisnn
-}  // namespace contrib
-}  // namespace relay
-}  // namespace tvm
diff --git a/src/relay/backend/contrib/cmsisnn/fuse_pads.cc b/src/relay/backend/contrib/cmsisnn/fuse_pads.cc
deleted file mode 100644
index 0ef7091fc289..000000000000
--- a/src/relay/backend/contrib/cmsisnn/fuse_pads.cc
+++ /dev/null
@@ -1,210 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-/*!
- * \file src/relay/backend/contrib/cmsisnn/fuse_pads.cc
- * \brief Fuses pads that precede qnn.conv2d ops inside CMSIS-NN composite functions.
- */
-
-#include <tvm/relay/attrs/nn.h>
-#include <tvm/relay/attrs/transform.h>
-#include <tvm/relay/expr_functor.h>
-#include <tvm/relay/transform.h>
-#include <tvm/runtime/ndarray.h>
-
-#include "../../../op/make_op.h"
-#include "../../../qnn/utils.h"
-#include "../../../transforms/pattern_utils.h"
-#include "convolutions.h"
-
-namespace tvm {
-namespace relay {
-namespace contrib {
-namespace cmsisnn {
-
-inline IntImm ToIntImm(int32_t value) { return IntImm(DataType::Int(32), value); }
-
-/*!
- * \brief From padding attributes of nn.pad and qnn.conv2d, calculates effective padding along H
- * and W dimensions.
- */
-Array<IntImm> GetEffectiveConv2DPadding(Expr conv2d, Expr pad) {
-  // pad_width: ((), (top, bottom), (left, right), ()) for NHWC layout
-  // conv2d_attrs->padding: (top, left, bottom, right)
-  auto* conv2d_call = conv2d.as<CallNode>();
-  auto* conv2d_attrs = conv2d_call->attrs.as<Conv2DAttrs>();
-  std::string data_layout = conv2d_attrs->data_layout.c_str();
-  int pos_h = data_layout.find("H");
-  int pos_w = data_layout.find("W");
-
-  auto* pad_call = pad.as<CallNode>();
-  Array<Array<Integer>> pad_width = pad_call->attrs.as<PadAttrs>()->pad_width;
-  int pad_top =
-      qnn::get_const_int(conv2d_attrs->padding[0]) + qnn::get_const_int(pad_width[pos_h][0]);
-  int pad_left =
-      qnn::get_const_int(conv2d_attrs->padding[1]) + qnn::get_const_int(pad_width[pos_w][0]);
-  int pad_bottom =
-      qnn::get_const_int(conv2d_attrs->padding[2]) + qnn::get_const_int(pad_width[pos_h][1]);
-  int pad_right =
-      qnn::get_const_int(conv2d_attrs->padding[3]) + qnn::get_const_int(pad_width[pos_w][1]);
-
-  return {ToIntImm(pad_top), ToIntImm(pad_left), ToIntImm(pad_bottom), ToIntImm(pad_right)};
-}
-
-/*!
- * \brief This Mutator will find all partitioned functions meant for CMSIS-NN Conv2D.
- * Then, it will fuse preceding pads with qnn.conv2d.
- */
-class FusePadsMutator : public MixedModeMutator {
- public:
-  explicit FusePadsMutator(const IRModule& mod) : mod_(mod) {}
-
- private:
-  /*!
-   * \brief In order to eliminate preceding nn.pad op, pad_width of nn.pad is passed onto
-   * convolution layer to update Conv2DAttrs's padding attribute. */
-  void UpdateConv2DPadding(const CallNode* conv2d_call, const CallNode* pad_call,
-                           Attrs* new_attrs) {
-    Array<IntImm> effective_padding =
-        GetEffectiveConv2DPadding(GetRef<Call>(conv2d_call), GetRef<Call>(pad_call));
-    int pad_top = effective_padding[0]->value;
-    int pad_left = effective_padding[1]->value;
-    int pad_bottom = effective_padding[2]->value;
-    int pad_right = effective_padding[3]->value;
-    int pad_diff_w = pad_right - pad_left;
-    int pad_diff_h = pad_bottom - pad_top;
-    bool can_pad_be_fused =
-        ((pad_diff_w == 0 || pad_diff_w == 1) && (pad_diff_h == 0 || pad_diff_h == 1));
-    std::string error = "Difference on each side of a dimension should be either 0 or 1. ";
-    error += "Effective padding in this case: (pad_top, pad_left, pad_bottom, pad_right)=(";
-    error += std::to_string(pad_top);
-    error += ", ";
-    error += std::to_string(pad_left);
-    error += ", ";
-    error += std::to_string(pad_bottom);
-    error += ", ";
-    error += std::to_string(pad_right);
-    error += ")";
-    ICHECK(can_pad_be_fused) << error;
-
-    // Prepare new attrs as padding has changed
-    auto* conv2d_attrs = conv2d_call->attrs.as<Conv2DAttrs>();
-    auto attrs = make_object<Conv2DAttrs>();
-    attrs->strides = std::move(conv2d_attrs->strides);
-    attrs->dilation = std::move(conv2d_attrs->dilation);
-    attrs->groups = conv2d_attrs->groups;
-    attrs->channels = std::move(conv2d_attrs->channels);
-    attrs->kernel_size = std::move(conv2d_attrs->kernel_size);
-    attrs->data_layout = std::move(conv2d_attrs->data_layout);
-    attrs->kernel_layout = std::move(conv2d_attrs->kernel_layout);
-    attrs->out_layout = std::move(conv2d_attrs->out_layout);
-    attrs->out_dtype = std::move(conv2d_attrs->out_dtype);
-    attrs->padding = {pad_top, pad_left, pad_bottom, pad_right};
-    *new_attrs = tvm::Attrs{attrs};
-  }
-
-  /*!
-   * \brief Identifies the sequence for qnn.conv2D and fuses the preceding nn.pad present within the
-   * CMSIS-NN partitioned function. */
-  Expr FusePadConv2d(const CallNode* conv2d_call) {
-    // create new paddings for qnn.conv2d
-    tvm::Attrs new_conv2d_attrs = conv2d_call->attrs;
-    Expr new_conv2d_input = conv2d_call->args[0];
-    if (auto* pad_call = conv2d_call->args[0].as<CallNode>()) {
-      if (auto* pad_call_op = pad_call->op.as<OpNode>()) {
-        if (pad_call_op->name == "nn.pad") {
-          new_conv2d_input = pad_call->args[0];
-          UpdateConv2DPadding(conv2d_call, pad_call, &new_conv2d_attrs);
-        }
-      }
-    }
-
-    // Conv2D arguments: pad's input + rest of the origin args
-    auto new_conv2d_args = conv2d_call->args;
-    new_conv2d_args.erase(new_conv2d_args.begin());
-    new_conv2d_args.insert(new_conv2d_args.begin(), new_conv2d_input);
-    Call ret_call = Call(conv2d_call->op, new_conv2d_args, new_conv2d_attrs, {}, conv2d_call->span);
-    return std::move(ret_call);
-  }
-
-  Expr Rewrite_(const CallNode* call, const Expr& post) final {
-    Expr ret_call = post;
-    auto* post_call = post.as<CallNode>();
-
-    // Fuse nn.pad and qnn.conv2d
-    if (auto* conv2d_op = post_call->op.as<OpNode>()) {
-      if (conv2d_op->name == "qnn.conv2d") {
-        ret_call = FusePadConv2d(post_call);
-      }
-    }
-
-    // Identify qnn.conv2d partitioned function
-    if (post_call->op.as<FunctionNode>()) {
-      auto* func = call->op.as<FunctionNode>();
-      auto func_name = func->GetAttr<String>(attr::kComposite);
-      if (func_name.defined() && func_name == "cmsis-nn.qnn_conv2d") {
-        Expr new_body = VisitExpr(func->body);
-        Function new_func = Function(FreeVars(new_body), new_body, func->ret_type,
-                                     FreeTypeVars(new_body, mod_), func->attrs);
-        ret_call = Call(new_func, post_call->args);
-        ret_call->span = call->span;
-      }
-    }
-
-    return ret_call;
-  }
-
- private:
-  IRModule mod_;
-};
-
-IRModule FusePads(const IRModule& mod) {
-  for (auto gv : mod->GetGlobalVars()) {
-    Function func = Downcast<Function>(mod->Lookup(gv));
-
-    // only mutate CMSIS-NN partitioned functions
-    auto compiler_name = func->GetAttr<String>(attr::kCompiler);
-    if (!compiler_name.defined() || compiler_name != "cmsis-nn") {
-      continue;
-    }
-
-    auto fuse_pads_mutator = FusePadsMutator(mod);
-    auto new_func_body = fuse_pads_mutator.VisitExpr(func->body);
-    if (!new_func_body.same_as(func->body)) {
-      Function new_func =
-          Function(func->params, new_func_body, func->ret_type, func->type_params, func->attrs);
-      mod->Update(gv, new_func);
-    }
-  }
-  return mod;
-}
-
-transform::Pass CMSISNNFusePads() {
-  runtime::TypedPackedFunc<IRModule(IRModule, transform::PassContext)> pass_func =
-      [=](IRModule m, transform::PassContext pc) { return FusePads(m); };
-  return tvm::transform::CreateModulePass(pass_func, 0, "CMSISNNFusePads", {});
-}
-
-TVM_REGISTER_GLOBAL("relay.ext.cmsisnn.transform.CMSISNNFusePads").set_body_typed(CMSISNNFusePads);
-TVM_REGISTER_GLOBAL("relay.ext.cmsisnn.transform.GetEffectiveConv2DPadding")
-    .set_body_typed(GetEffectiveConv2DPadding);
-
-}  // namespace cmsisnn
-}  // namespace contrib
-}  // namespace relay
-}  // namespace tvm
diff --git a/src/relay/backend/contrib/cmsisnn/generate_constants.cc b/src/relay/backend/contrib/cmsisnn/generate_constants.cc
deleted file mode 100644
index 3bdbb5d057eb..000000000000
--- a/src/relay/backend/contrib/cmsisnn/generate_constants.cc
+++ /dev/null
@@ -1,240 +0,0 @@
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-/*!
- * \file generate_constant.cc
- * \brief Generates quantization parameters needed by CMSIS-NN
- */
-
-#include <tvm/relay/attrs/nn.h>
-#include <tvm/relay/attrs/transform.h>
-#include <tvm/relay/expr_functor.h>
-#include <tvm/relay/transform.h>
-#include <tvm/runtime/ndarray.h>
-
-#include "../../../op/make_op.h"
-#include "../../../qnn/utils.h"
-#include "../../../transforms/pattern_utils.h"
-#include "../constant_transforms.h"
-#include "convolutions.h"
-
-namespace tvm {
-namespace relay {
-namespace contrib {
-namespace cmsisnn {
-
-/*!
- * \brief This Mutator will find all partitioned functions meant for CMSIS-NN Conv2D.
- * It will substitute original Conv2D's weight zero point and original Requantize's input zero point
- * with CMSIS-NN's quantization parameters.
- * https://github.com/tensorflow/tflite-micro/blob/0f40100fc60276e9f345c23282de3baf19a78059/tensorflow/lite/kernels/internal/quantization_util.cc#L53
- */
-class GenerateConstantsMutator : public MixedModeMutator {
- public:
-  explicit GenerateConstantsMutator(const IRModule& mod) : mod_(mod) {}
-
- private:
-  /*!  * \brief Converts Kernel layout from HWIO to OHWI to align to CMSIS-NN requirements */
-  Expr ConvertKernelLayout(Expr kernel_expr, const Conv2DAttrs* conv2d_attrs, Attrs* new_attrs) {
-    auto attrs = make_object<Conv2DAttrs>();
-    attrs->strides = std::move(conv2d_attrs->strides);
-    attrs->padding = std::move(conv2d_attrs->padding);
-    attrs->dilation = std::move(conv2d_attrs->dilation);
-    attrs->groups = conv2d_attrs->groups;
-    attrs->channels = std::move(conv2d_attrs->channels);
-    attrs->kernel_size = std::move(conv2d_attrs->kernel_size);
-    attrs->data_layout = std::move(conv2d_attrs->data_layout);
-    attrs->kernel_layout = runtime::String("OHWI");
-    attrs->out_layout = std::move(conv2d_attrs->out_layout);
-    attrs->out_dtype = std::move(conv2d_attrs->out_dtype);
-    *new_attrs = tvm::Attrs{attrs};
-
-    Constant conv2d_kernel = Downcast<Constant>(kernel_expr);
-    conv2d_kernel = TransposeWeights(conv2d_kernel, conv2d_attrs->kernel_layout, "OHWI");
-    return conv2d_kernel;
-  }
-
-  /*!  * \brief Performs weight transpose and substitutes existing constants in the composite
-   *            function for Conv2D with CMSIS-NN Requantize constants */
-  Expr GenerateConv2dRequantConstants(const Expr& expr) {
-    const CallNode* clip_call = nullptr;
-    const CallNode* requantize_call = nullptr;
-    const CallNode* bias_add_call = nullptr;
-    const CallNode* conv2d_call = nullptr;
-    auto* final_call = expr.as<CallNode>();
-    auto* final_op = final_call->op.as<OpNode>();
-    if (final_op->name == "clip") {
-      clip_call = final_call;
-      requantize_call = clip_call->args[0].as<CallNode>();
-    } else {
-      requantize_call = final_call;
-    }
-    auto* requantize_input = requantize_call->args[0].as<CallNode>();
-    auto* requantize_input_op = requantize_input->op.as<OpNode>();
-    if (requantize_input_op->name == "nn.bias_add") {
-      bias_add_call = requantize_input;
-      conv2d_call = bias_add_call->args[0].as<CallNode>();
-    } else {
-      conv2d_call = requantize_input;
-    }
-
-    auto* conv2d_attrs = conv2d_call->attrs.as<Conv2DAttrs>();
-    tvm::Attrs new_conv2d_attrs = conv2d_call->attrs;
-    Expr conv2d_kernel = conv2d_call->args[1];
-
-    Array<PrimExpr> input_shape = conv2d_call->args[0]->type_as<TensorTypeNode>()->shape;
-    Array<PrimExpr> kernel_shape = conv2d_call->args[1]->type_as<TensorTypeNode>()->shape;
-    if (!IsCMSISNNDepthwise(conv2d_attrs, input_shape, kernel_shape)) {
-      // Transpose weights: HWIO -> OHWI for Conv2D
-      conv2d_kernel = ConvertKernelLayout(conv2d_call->args[1], conv2d_attrs, &new_conv2d_attrs);
-    }
-
-    // Obtain input and output scales from Relay's Requantization
-    int64_t out_channels = conv2d_attrs->channels.as<IntImmNode>()->value;
-    float output_scale = GetScalarFromConstant<float>(requantize_call->args[3]);
-    auto input_scale = GetScalarFromConstant<float>(conv2d_call->args[4]);
-    auto filter_scales = tvm::relay::qnn::GetFloatVectorFromConstant(conv2d_call->args[5]);
-
-    // Calculate requantization multiplier and shift
-    Device dev{DLDeviceType::kDLCPU, 0};
-    runtime::NDArray multiplier_nda =
-        runtime::NDArray::Empty({out_channels}, DataType::Int(32), dev);
-    runtime::NDArray shift_nda = runtime::NDArray::Empty({out_channels}, DataType::Int(32), dev);
-    int32_t* multiplier = static_cast<int32_t*>(multiplier_nda->data);
-    int32_t* shift = static_cast<int32_t*>(shift_nda->data);
-    for (int i = 0; i < out_channels; ++i) {
-      double effective_output_scale =
-          static_cast<double>(input_scale) * filter_scales[i] / static_cast<double>(output_scale);
-      std::tie(*(multiplier + i), *(shift + i)) =
-          tvm::relay::qnn::GetFixedPointMultiplierShift(effective_output_scale);
-    }
-
-    // Create constants from requantization multiplier and shift
-    Constant multiplier_const(multiplier_nda);
-    Constant shift_const(shift_nda);
-
-    // Convert scale scalars into Constants
-    // Scales are expected as Constants by following passes
-    Expr weight_scale = conv2d_call->args[5];
-    Expr req_inp_scale = requantize_call->args[1];
-    if (out_channels == 1) {
-      runtime::NDArray weight_scale_nda =
-          runtime::NDArray::Empty({out_channels}, DataType::Float(32), dev);
-      float* weight_scale_p = static_cast<float*>(weight_scale_nda->data);
-      *weight_scale_p = GetScalarFromConstant<float>(weight_scale);
-      weight_scale = Constant(weight_scale_nda);
-
-      runtime::NDArray req_inp_scale_nda =
-          runtime::NDArray::Empty({out_channels}, DataType::Float(32), dev);
-      float* req_inp_scale_p = static_cast<float*>(req_inp_scale_nda->data);
-      *req_inp_scale_p = GetScalarFromConstant<float>(req_inp_scale);
-      req_inp_scale = Constant(req_inp_scale_nda);
-    }
-
-    // Replace existing weights (HWIO) with the transposed ones (OHWI) for Conv2D
-    // Substitute Conv2D weight_zero_point with the CMSIS-NN multiplier
-    // Substitute Requantize input_zero_point with CMSIS-NN shift
-    // Conv2D arguments: data, weight, input_zp, weight_zp, input_sc, weight_sc
-    Array<Expr> conv2d_args = {conv2d_call->args[0], conv2d_kernel,        conv2d_call->args[2],
-                               multiplier_const,     conv2d_call->args[4], weight_scale};
-    Call ret_call = Call(conv2d_call->op, conv2d_args, new_conv2d_attrs, {}, conv2d_call->span);
-    if (bias_add_call) {
-      ret_call = Call(bias_add_call->op, {ret_call, bias_add_call->args[1]}, bias_add_call->attrs,
-                      {}, bias_add_call->span);
-    }
-    Array<Expr> requantize_args = {ret_call, req_inp_scale, shift_const, requantize_call->args[3],
-                                   requantize_call->args[4]};
-    ret_call = Call(requantize_call->op, requantize_args, requantize_call->attrs, {},
-                    requantize_call->span);
-    if (clip_call) {
-      ret_call = Call(clip_call->op, {ret_call}, clip_call->attrs, {}, clip_call->span);
-    }
-    return std::move(ret_call);
-  }
-
-  Expr Rewrite_(const CallNode* call, const Expr& post) final {
-    Expr final_call = post;
-    auto* post_call = post.as<CallNode>();
-
-    auto* global_var = call->op.as<GlobalVarNode>();
-    if (global_var) {
-      // Update to global function call needed because the body changes while
-      // generating new constants
-      Function func = Downcast<Function>(mod_->Lookup(global_var->name_hint));
-      Expr new_body = VisitExpr(func->body);
-      if (!new_body.same_as(func->body)) {
-        Function new_func = Function(FreeVars(new_body), new_body, func->ret_type,
-                                     FreeTypeVars(new_body, mod_), func->attrs);
-        mod_->Update(GetRef<GlobalVar>(global_var), new_func);
-        final_call = Call(GetRef<GlobalVar>(global_var), post_call->args);
-      }
-    }
-
-    // Recreate composite function and corresponding call
-    // Updated composite function contains CMSIS-NN quantized multiplier and shift constants
-    if (call->op.as<FunctionNode>()) {
-      auto* func = call->op.as<FunctionNode>();
-      auto func_name = func->GetAttr<String>(attr::kComposite);
-      if (func_name.defined() && func_name == "cmsis-nn.qnn_conv2d") {
-        Expr new_body = GenerateConv2dRequantConstants(func->body);
-        Function new_func = Function(FreeVars(new_body), new_body, func->ret_type,
-                                     FreeTypeVars(new_body, mod_), func->attrs);
-        final_call = Call(new_func, post_call->args);
-      }
-    }
-
-    final_call->span = call->span;
-    return final_call;
-  }
-
- private:
-  IRModule mod_;
-};
-
-IRModule GenerateConstants(const IRModule& mod) {
-  String func_name;
-  Function func;
-
-  // Introduces CMSIS-NN constants before the call to the external Relay function
-  auto generate_constants = GenerateConstantsMutator(mod);
-  Function main_func = Downcast<Function>(mod->Lookup("main"));
-  auto new_main_body = generate_constants.VisitExpr(main_func->body);
-  if (!new_main_body.same_as(main_func->body)) {
-    auto main_var = mod->GetGlobalVar("main");
-    auto new_main_func = Function(main_func->params, new_main_body, main_func->ret_type,
-                                  main_func->type_params, main_func->attrs);
-    mod->Update(main_var, new_main_func);
-  }
-
-  return mod;
-}
-
-transform::Pass GenerateCMSISNNConstants() {
-  runtime::TypedPackedFunc<IRModule(IRModule, transform::PassContext)> pass_func =
-      [=](IRModule m, transform::PassContext pc) { return GenerateConstants(m); };
-  return tvm::transform::CreateModulePass(pass_func, 0, "GenerateCMSISNNConstants", {});
-}
-
-TVM_REGISTER_GLOBAL("relay.ext.cmsisnn.transform.GenerateCMSISNNConstants")
-    .set_body_typed(GenerateCMSISNNConstants);
-
-}  // namespace cmsisnn
-}  // namespace contrib
-}  // namespace relay
-}  // namespace tvm
diff --git a/src/relay/backend/contrib/cmsisnn/relay_to_tir.cc b/src/relay/backend/contrib/cmsisnn/relay_to_tir.cc
deleted file mode 100644
index 49800195f68b..000000000000
--- a/src/relay/backend/contrib/cmsisnn/relay_to_tir.cc
+++ /dev/null
@@ -1,938 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-#include <tvm/ir/transform.h>
-#include <tvm/relay/attrs/nn.h>
-#include <tvm/relay/expr_functor.h>
-#include <tvm/relay/transform.h>
-#include <tvm/tir/builtin.h>
-#include <tvm/tir/expr.h>
-#include <tvm/tir/function.h>
-#include <tvm/tir/op.h>
-#include <tvm/tir/stmt_functor.h>
-
-#include "../../../qnn/utils.h"
-#include "../../../transforms/pattern_utils.h"
-#include "buffer_size.h"
-#include "compiler_attrs.h"
-#include "compute_luts.h"
-#include "convolutions.h"
-
-namespace tvm {
-namespace relay {
-namespace contrib {
-namespace cmsisnn {
-
-/*!
- * \brief This is a helper class to generate tir.Buffers and BufferMap
- *
- * The PrimFuncs generated needs Buffers produced to attach information
- * about the inputs and output tir::Vars. This is helper class to generate
- * them in the relay to TIR lowering
- */
-class BufferCreator {
- public:
-  /*! \brief Creates a tir::Var and tir::Buffer then returns the buffer var to be used by the body
-   */
-  tir::Var CreateBufferVar(String name_hint, DataType dtype) {
-    tir::Var var = tir::Var(name_hint, dtype);
-    tir::Buffer buffer = tir::decl_buffer({}, DataType::Int(dtype.bits()), name_hint + "_");
-    _primfunc_params_.push_back(var);
-    _buffer_map_.Set(var, buffer);
-    _buffer_vars_.Set(name_hint, buffer->data);
-    return buffer->data;
-  }
-  /*! \brief Access already created buffer_var by associated tir::Var name */
-  tir::Var GetBufferVar(String name_hint) { return _buffer_vars_[name_hint]; }
-  /*! \brief Get the BufferMap that maps tir::Var to tir::Buffer */
-  Map<tir::Var, tir::Buffer> GetBufferMap() { return _buffer_map_; }
-  /*! \brief Get the PrimFunc params that is a collection of tir::Vars created in the process */
-  Array<tir::Var> GetPrimFuncParams() { return _primfunc_params_; }
-
- private:
-  Map<String, tir::Var> _buffer_vars_;
-  Map<tir::Var, tir::Buffer> _buffer_map_;
-  Array<tir::Var> _primfunc_params_;
-};
-
-class RelayToTIRVisitor : public MixedModeMutator {
- public:
-  explicit RelayToTIRVisitor(IRModule ir_module, Target target)
-      : ir_module_(ir_module), target_(target) {
-    context_buffer_id_ = 0;
-  }
-
-  IRModule Mutate() {
-    GlobalVar main_global_var = ir_module_->GetGlobalVar("main");
-    Function main = Downcast<Function>(ir_module_->Lookup(main_global_var));
-    Function mutated_main = WithFields(main, main->params, VisitExpr(main->body));
-
-    ir_module_->Update(main_global_var, mutated_main);
-
-    return ir_module_;
-  }
-
- private:
-  inline IntImm ToArg(int32_t value) { return IntImm(DataType::Int(32), value); }
-
-  //  struct used to allocated const NDArray
-  struct tir_input_constant_buffers {
-    tir::Var buffer_var;
-    tvm::runtime::NDArray ndarray;
-  };
-
-  void CreatePrimFuncForExtern(
-      const GlobalVar& global_var, Array<tir::Var> func_signature,
-      const Map<tir::Var, tir::Buffer>& buffer_map, tvm::Array<PrimExpr> call_extern_args,
-      PrimExpr context_buffer_var = PrimExpr(), int context_buffer_size = 0, int num_bits = 8,
-      std::vector<tir_input_constant_buffers> context_const_buffer_vars = {}) {
-    Map<String, ObjectRef> dict_attrs;
-    dict_attrs.Set(tvm::attr::kGlobalSymbol, global_var->name_hint);
-    dict_attrs.Set(tvm::attr::kTarget, target_);
-    dict_attrs.Set("tir.noalias", Bool(true));
-
-    tir::Stmt body = tir::Evaluate(
-        tvm::tir::Call(DataType::Int(num_bits), tir::builtin::call_extern(), call_extern_args));
-
-    if (context_buffer_size) {
-      body = tir::Allocate(Downcast<tir::Var>(context_buffer_var), DataType::Int(num_bits),
-                           {context_buffer_size}, tir::const_true(), body);
-    }
-
-    for (int i = 0; i < static_cast<int>(context_const_buffer_vars.size()); i++) {
-      int bits = context_const_buffer_vars[i].ndarray.DataType().bits();
-
-      Array<PrimExpr> extents;
-      for (int shape : context_const_buffer_vars[i].ndarray.Shape()) {
-        extents.push_back(PrimExpr(shape));
-      }
-
-      body = tir::AllocateConst(Downcast<tir::Var>(context_const_buffer_vars[i].buffer_var),
-                                DataType::Int(bits), extents, context_const_buffer_vars[i].ndarray,
-                                body);
-    }
-
-    tir::PrimFunc replacement_func(func_signature, body, VoidType(), buffer_map,
-                                   DictAttrs(dict_attrs));
-
-    ir_module_->Add(global_var, replacement_func);
-  }
-
-  auto GetIntMinMax(int bit_width) {
-    const int32_t min =
-        (bit_width == 8) ? std::numeric_limits<int8_t>::min() : std::numeric_limits<int16_t>::min();
-    const int32_t max =
-        (bit_width == 8) ? std::numeric_limits<int8_t>::max() : std::numeric_limits<int16_t>::max();
-    return std::pair(min, max);
-  }
-
-  auto GetClipMinMax(const ClipAttrs* clip_attrs) {
-    return std::pair(static_cast<int32_t>(clip_attrs->a_min),
-                     static_cast<int32_t>(clip_attrs->a_max));
-  }
-
-  auto GetClipMinMax(const Call& clip_op) { return GetClipMinMax(clip_op->attrs.as<ClipAttrs>()); }
-
-  void EmitConv2D(const GlobalVar& global_var, const Expr& expr) {
-    const CallNode* clip_call = nullptr;
-    const CallNode* requantize_call = nullptr;
-    const CallNode* bias_add_call = nullptr;
-    const CallNode* conv2d_call = nullptr;
-    const CallNode* final_call = expr.as<CallNode>();
-    const OpNode* final_op = final_call->op.as<OpNode>();
-    if (final_op->name == "clip") {
-      clip_call = final_call;
-      requantize_call = clip_call->args[0].as<CallNode>();
-    } else {
-      requantize_call = final_call;
-    }
-    const CallNode* requantize_input = requantize_call->args[0].as<CallNode>();
-    const OpNode* requantize_input_op = requantize_input->op.as<OpNode>();
-    if (requantize_input_op->name == "nn.bias_add") {
-      bias_add_call = requantize_input;
-      conv2d_call = bias_add_call->args[0].as<CallNode>();
-    } else {
-      conv2d_call = requantize_input;
-    }
-    int32_t dtype_bits = conv2d_call->args[0]->type_as<TensorTypeNode>()->dtype.bits();
-
-    // Determine bitwidth of buffers based on input dtype
-    int32_t input_bits = 8;
-    int32_t filter_bits = 8;
-    int32_t bias_bits = 32;
-    int32_t output_bits = 8;
-    int32_t context_buffer_bits = 8;
-    bool is_int16 = false;
-    if (dtype_bits == 16) {
-      is_int16 = true;
-      input_bits = 16;
-      bias_bits = 64;
-      output_bits = 16;
-      context_buffer_bits = 16;
-    }
-
-    // TIR variables are created in the order they appear in the Relay partitioned function
-    // %1 = qnn.conv2d(%input, %weight_const_0, input_zero_point_scalar,
-    //                 %cmsisnn_multiplier_const_1, %input_scale_scalar, %weight_scale_const_2)
-    // %2 = nn.bias_add(%1, %bias_const_3, axis=3)
-    // %3 = qnn.requantize(%2, %input_scale_const_4, %cmsisnn_shift_const_5,
-    //                     %output_scale_scalar, %output_zero_point_scalar)
-    // clip(%3, a_min=%min_scalar, a_max=%max_scalar)
-    // Position of scales in the global function for Conv2D
-    const int filter_scale_pos = 3;
-    const int input_scale_pos = bias_add_call ? 5 : 4;
-    BufferCreator buffer_creator;
-    tir::Var input = buffer_creator.CreateBufferVar("input", DataType::Handle(input_bits));
-    tir::Var filter = buffer_creator.CreateBufferVar("filter", DataType::Handle(filter_bits));
-    tir::Var multiplier = buffer_creator.CreateBufferVar("multiplier", DataType::Handle(32));
-    if (bias_add_call) {
-      buffer_creator.CreateBufferVar("bias", DataType::Handle(bias_bits));
-    }
-    tir::Var shift = buffer_creator.CreateBufferVar("shift", DataType::Handle(32));
-    tir::Var output = buffer_creator.CreateBufferVar("output", DataType::Handle(output_bits));
-
-    // Relay function contains input_scale and filter_scale as function parameters at the following
-    // locations in the global partitioned function for Conv2D
-    skip_call_args_.insert(filter_scale_pos);
-    skip_call_args_.insert(input_scale_pos);
-
-    // Individual arguments to the structs arguments of the CMSIS-NN API are filled into call_extern
-    // https://github.com/ARM-software/CMSIS_5/blob/def6f800f95661eb3451d317f7d0dde504f6020d/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_wrapper_s8.c#L50
-
-    // prepare cmsis_nn_conv_params
-    const Conv2DAttrs* conv2d_attrs = conv2d_call->attrs.as<Conv2DAttrs>();
-    int32_t input_offset = -GetScalarFromConstant<int32_t>(conv2d_call->args[2]);
-    int32_t output_offset = GetScalarFromConstant<int32_t>(requantize_call->args[4]);
-    int32_t stride_w = qnn::get_const_int(conv2d_attrs->strides[1]);
-    int32_t stride_h = qnn::get_const_int(conv2d_attrs->strides[0]);
-    int32_t padding_w = qnn::get_const_int(conv2d_attrs->padding[1]);
-    int32_t padding_h = qnn::get_const_int(conv2d_attrs->padding[0]);
-    int32_t dilation_w = qnn::get_const_int(conv2d_attrs->dilation[1]);
-    int32_t dilation_h = qnn::get_const_int(conv2d_attrs->dilation[0]);
-    int32_t out_channels = qnn::get_const_int(conv2d_attrs->channels);
-    std::string kernel_layout = conv2d_attrs->kernel_layout.c_str();
-    const auto [clip_min, clip_max] =
-        clip_call ? GetClipMinMax(GetRef<Call>(clip_call)) : GetIntMinMax(dtype_bits);
-
-    tvm::Array<PrimExpr> scalar_args = {ToArg(input_offset), ToArg(output_offset), ToArg(stride_w),
-                                        ToArg(stride_h),     ToArg(padding_w),     ToArg(padding_h),
-                                        ToArg(dilation_w),   ToArg(dilation_h),    ToArg(clip_min),
-                                        ToArg(clip_max)};
-
-    // CMSIS-NN data structure "cmsis_nn_dims" for ifm expects input layout as NHWC
-    // This is the same layout we expect in Relay
-    Array<PrimExpr> input_shape = conv2d_call->args[0]->type_as<TensorTypeNode>()->shape;
-    int32_t input_n = qnn::get_const_int(input_shape[0]);
-    int32_t input_h = qnn::get_const_int(input_shape[1]);
-    int32_t input_c = qnn::get_const_int(input_shape[3]);
-
-    // CMSIS-NN data structure "cmsis_nn_dims" for weights expects following layouts
-    // OHWI for Conv2D and IHWO for Depthwise convolutions
-    Array<PrimExpr> filter_shape = conv2d_call->args[1]->type_as<TensorTypeNode>()->shape;
-
-    Array<PrimExpr> bias_shape{1, 1, 1, out_channels};
-
-    Array<PrimExpr> output_shape = conv2d_call->type_as<TensorTypeNode>()->shape;
-    int32_t output_h = qnn::get_const_int(output_shape[1]);
-    int32_t output_w = qnn::get_const_int(output_shape[2]);
-    int32_t output_c = qnn::get_const_int(output_shape[3]);
-
-    int32_t depth_multiplier = -1;
-    if (IsCMSISNNDepthwise(conv2d_attrs, input_shape, filter_shape)) {
-      // Refer to TVM frontend to know how depth multiplier and out_channels are related
-      // https://github.com/apache/tvm/blob/6ed3ab3e33f8eafa4acaf53b7a671831de7587e9/python/tvm/relay/frontend/tflite.py#L2129
-      int kernel_pos_i = kernel_layout.find("I");
-      int kernel_pos_o = kernel_layout.find("O");
-      int kernel_pos_dm = input_c == 1 ? kernel_pos_o : kernel_pos_i;
-      depth_multiplier = qnn::get_const_int(filter_shape[kernel_pos_dm]);
-    }
-    scalar_args.push_back(ToArg(depth_multiplier));
-
-    // original filter_layout for depthwise is HWOI
-    std::string cmsisnn_api = is_int16 ? "arm_convolve_wrapper_s16" : "arm_convolve_wrapper_s8";
-    bool is_depthwise = depth_multiplier != -1;
-    if (is_depthwise) {
-      cmsisnn_api = is_int16 ? "arm_depthwise_conv_wrapper_s16" : "arm_depthwise_conv_wrapper_s8";
-      int filter_pos_h = kernel_layout.find("H");
-      int filter_pos_w = kernel_layout.find("W");
-      Array<PrimExpr> depthwise_filter_shape{1, filter_shape[filter_pos_h],
-                                             filter_shape[filter_pos_w], out_channels};
-      filter_shape = depthwise_filter_shape;
-    }
-    int32_t filter_h = qnn::get_const_int(filter_shape[1]);
-    int32_t filter_w = qnn::get_const_int(filter_shape[2]);
-
-    tvm::Array<PrimExpr> call_ext_args = {tir::StringImm(cmsisnn_api), input, filter, multiplier};
-    if (bias_add_call) {
-      tir::Var bias = buffer_creator.GetBufferVar("bias");
-      call_ext_args.push_back(bias);
-    }
-    call_ext_args.push_back(shift);
-    call_ext_args.push_back(output);
-
-    PrimExpr context_buffer_var = tir::StringImm("NULL");
-    Target target = CreateTarget(transform::PassContext::Current());
-    size_t context_buffer_size;
-    if (is_depthwise) {
-      context_buffer_size =
-          DepthwiseConv2dBufferSize(is_int16, target, input_n, input_c, output_c, filter_w,
-                                    filter_h, dilation_w, dilation_h, depth_multiplier);
-    } else {
-      context_buffer_size = Conv2dBufferSize(is_int16, target, padding_w, padding_h, input_n,
-                                             input_h, input_c, output_h, output_w, stride_w,
-                                             stride_h, dilation_w, dilation_h, filter_w, filter_h);
-    }
-
-    if (context_buffer_size) {
-      String context_buffer_name = "context_buffer_" + std::to_string(context_buffer_id_++);
-      context_buffer_var =
-          tir::Var(context_buffer_name,
-                   PointerType(PrimType(DataType::Int(context_buffer_bits)), "global.workspace"));
-    }
-    tvm::Array<PrimExpr> context_buffer_args = {context_buffer_var, ToArg(context_buffer_size)};
-
-    scalar_args = tvm::runtime::Concat(context_buffer_args, scalar_args);
-    scalar_args = tvm::runtime::Concat(scalar_args, input_shape);
-    scalar_args = tvm::runtime::Concat(scalar_args, filter_shape);
-    scalar_args = tvm::runtime::Concat(scalar_args, bias_shape);
-    scalar_args = tvm::runtime::Concat(scalar_args, output_shape);
-    call_ext_args = tvm::runtime::Concat(call_ext_args, scalar_args);
-
-    CreatePrimFuncForExtern(global_var, buffer_creator.GetPrimFuncParams(),
-                            buffer_creator.GetBufferMap(), call_ext_args, context_buffer_var,
-                            context_buffer_size, context_buffer_bits);
-  }
-
-  void EmitFullyConnected(const GlobalVar& global_var, const Expr& expr) {
-    const CallNode* clip_call = nullptr;
-    const CallNode* requantize_call = nullptr;
-    const CallNode* bias_add_call = nullptr;
-    const CallNode* fc_call = nullptr;
-    const CallNode* final_call = expr.as<CallNode>();
-    const OpNode* final_op = final_call->op.as<OpNode>();
-    if (final_op->name == "clip") {
-      clip_call = final_call;
-      requantize_call = clip_call->args[0].as<CallNode>();
-    } else {
-      requantize_call = final_call;
-    }
-    const CallNode* requantize_input = requantize_call->args[0].as<CallNode>();
-    const OpNode* requantize_input_op = requantize_input->op.as<OpNode>();
-    if (requantize_input_op->name == "nn.bias_add") {
-      bias_add_call = requantize_input;
-      fc_call = bias_add_call->args[0].as<CallNode>();
-    } else {
-      fc_call = requantize_input;
-    }
-
-    // Extract the size of the input parameter from the call arguments. Other params are based off
-    // the input size
-    int32_t dtype_bits = fc_call->args[0]->type_as<TensorTypeNode>()->dtype.bits();
-    int32_t input_bits = dtype_bits;
-    int32_t filter_bits = 8;
-    int32_t bias_bits = dtype_bits * 4U;
-    int32_t output_bits = dtype_bits;
-
-    // TIR variables are created in the order they appear in the Relay partitioned function
-    // %1 = qnn.dense(%input, %weight_const_0, input_zero_point_scalar, kernel_zero_point_scalar,
-    //                 %input_scale_scalar, %kernel_scale_scalar)
-    // %2 = nn.bias_add(%1, %bias_const_1, axis=1)
-    // %3 = qnn.requantize(%2, %req_input_scale_scalar, %req_input_zero_point_scalar,
-    //                     %output_scale_scalar, %output_zero_point_scalar)
-    // clip(%3, a_min=%min_scalar, a_max=%max_scalar)
-    BufferCreator buffer_creator;
-    tir::Var input = buffer_creator.CreateBufferVar("input", DataType::Handle(input_bits));
-    tir::Var filter = buffer_creator.CreateBufferVar("filter", DataType::Handle(filter_bits));
-    if (bias_add_call) {
-      buffer_creator.CreateBufferVar("bias", DataType::Handle(bias_bits));
-    }
-    tir::Var output = buffer_creator.CreateBufferVar("output", DataType::Handle(output_bits));
-
-    // Individual arguments to the structs arguments of the CMSIS-NN API are filled into call_extern
-    // https://github.com/ARM-software/CMSIS_5/blob/def6f800f95661eb3451d317f7d0dde504f6020d/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_wrapper_s8.c#L50
-
-    // prepare cmsis_nn_fc_params
-    const DenseAttrs* dense_attrs = fc_call->attrs.as<DenseAttrs>();
-    int32_t input_offset = -GetScalarFromConstant<int32_t>(fc_call->args[2]);
-    int32_t filter_offset = -GetScalarFromConstant<int32_t>(fc_call->args[3]);
-    int32_t output_offset = GetScalarFromConstant<int32_t>(requantize_call->args[4]);
-    float input_scale = GetScalarFromConstant<float>(requantize_call->args[1]);
-    float output_scale = GetScalarFromConstant<float>(requantize_call->args[3]);
-    int32_t out_channels = qnn::get_const_int(dense_attrs->units);
-    const auto [clip_min, clip_max] =
-        clip_call ? GetClipMinMax(GetRef<Call>(clip_call)) : GetIntMinMax(dtype_bits);
-
-    double quantized_multiplier =
-        static_cast<double>(input_scale) / static_cast<double>(output_scale);
-    auto mult_shift_pair = tvm::relay::qnn::GetFixedPointMultiplierShift(quantized_multiplier);
-    int32_t multiplier = std::get<0>(mult_shift_pair);
-    int32_t shift = std::get<1>(mult_shift_pair);
-
-    tvm::Array<PrimExpr> scalar_args = {
-        ToArg(input_offset), ToArg(filter_offset), ToArg(output_offset), ToArg(clip_min),
-        ToArg(clip_max),     ToArg(multiplier),    ToArg(shift)};
-
-    Array<PrimExpr> input_shape = fc_call->args[0]->type_as<TensorTypeNode>()->shape;
-    int32_t batch_size = qnn::get_const_int(input_shape[0]);
-    int32_t in_channels = qnn::get_const_int(input_shape[1]);
-    Array<PrimExpr> cmsisnn_input_shape{input_shape[0], 1, 1, input_shape[1]};
-
-    Array<PrimExpr> cmsisnn_filter_shape{in_channels, 1, 1, out_channels};
-
-    Array<PrimExpr> bias_shape{1, 1, 1, out_channels};
-
-    Array<PrimExpr> cmsisnn_output_shape{batch_size, 1, 1, out_channels};
-
-    std::string cmsisnn_api =
-        dtype_bits == 16 ? "arm_fully_connected_s16" : "arm_fully_connected_s8";
-
-    tvm::Array<PrimExpr> call_ext_args = {tir::StringImm(cmsisnn_api), input, filter};
-    if (bias_add_call) {
-      call_ext_args.push_back(buffer_creator.GetBufferVar("bias"));
-    }
-    call_ext_args.push_back(output);
-
-    int context_buffer_size = 0;
-    PrimExpr context_buffer_var = tir::StringImm("NULL");
-    tvm::Array<PrimExpr> context_buffer_args = {context_buffer_var, ToArg(context_buffer_size)};
-
-    scalar_args = tvm::runtime::Concat(context_buffer_args, scalar_args);
-    scalar_args = tvm::runtime::Concat(scalar_args, cmsisnn_input_shape);
-    scalar_args = tvm::runtime::Concat(scalar_args, cmsisnn_filter_shape);
-    scalar_args = tvm::runtime::Concat(scalar_args, bias_shape);
-    scalar_args = tvm::runtime::Concat(scalar_args, cmsisnn_output_shape);
-    call_ext_args = tvm::runtime::Concat(call_ext_args, scalar_args);
-
-    CreatePrimFuncForExtern(global_var, buffer_creator.GetPrimFuncParams(),
-                            buffer_creator.GetBufferMap(), call_ext_args, context_buffer_var,
-                            context_buffer_size);
-  }
-
-  void EmitPool2D(const GlobalVar& global_var, const Expr& expr, const String pool_name) {
-    Call clip, pool;
-    Call final_call = Downcast<Call>(expr);
-    Op final_op = Downcast<Op>(final_call->op);
-    if (final_op->name == "clip") {
-      clip = final_call;
-      Call clip_input = Downcast<Call>(clip->args[0]);
-      Op clip_input_op = Downcast<Op>(clip_input->op);
-      if (clip_input_op->name == "cast") {
-        pool = Downcast<Call>(clip_input->args[0]);
-      } else {  // max_pool2d
-        pool = clip_input;
-      }
-    } else if (final_op->name == "cast") {
-      pool = Downcast<Call>(final_call->args[0]);
-    } else {  // max_pool2d
-      pool = final_call;
-    }
-
-    int32_t dtype_bits = final_call->type_as<TensorTypeNode>()->dtype.bits();
-
-    // prepare cmsis_nn_pool_params
-    int32_t stride_h, stride_w, padding_h, padding_w, pool_size_h, pool_size_w;
-    std::string cmsisnn_api;
-    if (pool_name == "cmsis-nn.qnn_avg_pool2d") {
-      if (dtype_bits == 8) {
-        cmsisnn_api = "arm_avgpool_s8";
-      } else {
-        cmsisnn_api = "arm_avgpool_s16";
-      }
-
-      const AvgPool2DAttrs* attrs = pool->attrs.as<AvgPool2DAttrs>();
-      stride_h = qnn::get_const_int(attrs->strides[0]);
-      stride_w = qnn::get_const_int(attrs->strides[1]);
-      padding_h = qnn::get_const_int(attrs->padding[0]);
-      padding_w = qnn::get_const_int(attrs->padding[1]);
-      pool_size_h = qnn::get_const_int(attrs->pool_size[0]);
-      pool_size_w = qnn::get_const_int(attrs->pool_size[1]);
-    } else {
-      if (dtype_bits == 8) {
-        cmsisnn_api = "arm_max_pool_s8";
-      } else {
-        cmsisnn_api = "arm_max_pool_s16";
-      }
-
-      const MaxPool2DAttrs* attrs = pool->attrs.as<MaxPool2DAttrs>();
-      stride_h = qnn::get_const_int(attrs->strides[0]);
-      stride_w = qnn::get_const_int(attrs->strides[1]);
-      padding_h = qnn::get_const_int(attrs->padding[0]);
-      padding_w = qnn::get_const_int(attrs->padding[1]);
-      pool_size_h = qnn::get_const_int(attrs->pool_size[0]);
-      pool_size_w = qnn::get_const_int(attrs->pool_size[1]);
-    }
-
-    const auto [clip_min, clip_max] =
-        clip.defined() ? GetClipMinMax(clip) : GetIntMinMax(dtype_bits);
-
-    tvm::Array<PrimExpr> scalar_args = {ToArg(stride_h),  ToArg(stride_w), ToArg(padding_h),
-                                        ToArg(padding_w), ToArg(clip_min), ToArg(clip_max)};
-
-    Array<PrimExpr> input_shape = pool->args[0]->type_as<TensorTypeNode>()->shape;
-    Array<PrimExpr> cmsisnn_input_shape{1, input_shape[1], input_shape[2], input_shape[3]};
-
-    Array<PrimExpr> cmsisnn_filter_shape{1, pool_size_h, pool_size_w, 1};
-
-    Array<PrimExpr> output_shape = pool->type_as<TensorTypeNode>()->shape;
-    Array<PrimExpr> cmsisnn_output_shape{1, output_shape[1], output_shape[2], output_shape[3]};
-
-    BufferCreator buffer_creator;
-    tir::Var input = buffer_creator.CreateBufferVar("input", DataType::Handle(dtype_bits));
-    tir::Var output = buffer_creator.CreateBufferVar("output", DataType::Handle(dtype_bits));
-    tvm::Array<PrimExpr> call_ext_args = {tir::StringImm(cmsisnn_api), input, output};
-
-    int context_buffer_size = 0;
-    PrimExpr context_buffer_var = tir::StringImm("NULL");
-    if (pool_name == "cmsis-nn.qnn_avg_pool2d") {
-      Target target = CreateTarget(transform::PassContext::Current());
-      int32_t input_c = qnn::get_const_int(input_shape[3]);
-      context_buffer_size = AvgPoolBufferSize(target, input_c);
-      if (context_buffer_size) {
-        std::string context_buffer_name = "context_buffer_" + std::to_string(context_buffer_id_++);
-        context_buffer_var = tir::Var(context_buffer_name,
-                                      PointerType(PrimType(DataType::Int(8)), "global.workspace"));
-      }
-    }
-    tvm::Array<PrimExpr> context_buffer_args = {context_buffer_var, ToArg(context_buffer_size)};
-
-    scalar_args = tvm::runtime::Concat(context_buffer_args, scalar_args);
-    scalar_args = tvm::runtime::Concat(scalar_args, cmsisnn_input_shape);
-    scalar_args = tvm::runtime::Concat(scalar_args, cmsisnn_filter_shape);
-    scalar_args = tvm::runtime::Concat(scalar_args, cmsisnn_output_shape);
-    call_ext_args = tvm::runtime::Concat(call_ext_args, scalar_args);
-
-    CreatePrimFuncForExtern(global_var, buffer_creator.GetPrimFuncParams(),
-                            buffer_creator.GetBufferMap(), call_ext_args, context_buffer_var,
-                            context_buffer_size);
-  }
-
-  void EmitSoftMax(const GlobalVar& global_var, const Expr& expr) {
-    const CallNode* quantize_call = expr.as<CallNode>();
-    const CallNode* softmax_call = quantize_call->args[0].as<CallNode>();
-    const CallNode* dequant_call = softmax_call->args[0].as<CallNode>();
-    const float quant_scale = GetScalarFromConstant<float>(dequant_call->args[1]);
-    const auto bit_width = quantize_call->type_as<TensorTypeNode>()->dtype.bits();
-
-    // assuming layout as NHWC
-    auto shape = quantize_call->type_as<TensorTypeNode>()->shape;
-    int trailing_dim = shape.size() - 1;
-    int row_size = shape[trailing_dim].as<tir::IntImmNode>()->value;
-    int num_rows = 1;
-    for (int i = 0; i < trailing_dim; ++i) {
-      num_rows *= shape[i].as<tir::IntImmNode>()->value;
-    }
-
-    // calculate multiplier and shift for CMSIS-NN softmax API
-    // Note: TensorFlow Lite Micro assumptions
-    // Output zero point and scale are fixed to -128 and 1 / 256 in the case of an int8 operator
-    // or to 0 and 1 / 32768 in the case of an int16 operator
-    // kScaledDiffIntegerBits, kInputBits, kBeta are described on the following github page
-    // https://github.com/tensorflow/tflite-micro/blob/d97cd0908d8cf5021e9d86f05a49888bee28c2a4/tensorflow/lite/micro/kernels/softmax_common.cc#L47
-
-    int32_t mult;
-    int32_t shift;
-    int32_t diff_min = 0;
-
-    std::vector<tir_input_constant_buffers> softmax_params(2);
-    Device dev{DLDeviceType::kDLCPU, 0};
-
-    if (bit_width == 8) {
-      double beta_multiplier = (kBeta * quant_scale * (1 << (31 - kInputBits)));
-      beta_multiplier = std::min<double>(beta_multiplier, (1ll << 31) - 1.0);
-      auto mult_shift_pair = tvm::relay::qnn::GetFixedPointMultiplierShift(beta_multiplier);
-      mult = std::get<0>(mult_shift_pair);
-      shift = std::get<1>(mult_shift_pair);
-      diff_min = (1 << kScaledDiffIntegerBits) - 1;
-      diff_min <<= (31 - kScaledDiffIntegerBits);
-      diff_min >>= shift;
-      diff_min *= -1;
-    } else {  // bit_width == 16
-      double scale_beta_rescale = quant_scale * kBeta / (10.0 / 65535.0);
-      auto mult_shift_pair = tvm::relay::qnn::GetFixedPointMultiplierShift(scale_beta_rescale);
-      mult = std::get<0>(mult_shift_pair);
-      shift = std::get<1>(mult_shift_pair);
-
-      const int kLUTEntries = 513;
-      int16_t softmax_s16_exp_lut[kLUTEntries];
-      int16_t softmax_s16_one_by_one_lut[kLUTEntries];
-
-      const int range_int16 =
-          std::numeric_limits<int16_t>::max() - std::numeric_limits<int16_t>::min();
-      int exp_zero_point = std::numeric_limits<int16_t>::max();
-      float exp_scale = 10.0f / range_int16;
-
-      int one_by_one_zero_point = std::numeric_limits<int16_t>::min();
-      float one_by_one_scale = 1.0f / range_int16;
-
-      int lut_value_zero_point = 0;
-      float lut_value_scale = 2.0f / range_int16;
-
-      CalculateLUTInt16(
-          exp_zero_point, exp_scale, lut_value_zero_point, lut_value_scale,
-          [](float key) { return std::exp(key); }, kLUTEntries, softmax_s16_exp_lut);
-      CalculateLUTInt16(
-          one_by_one_zero_point, one_by_one_scale, lut_value_zero_point, lut_value_scale,
-          [](float key) { return 1.0f / (1.0f + key); }, kLUTEntries, softmax_s16_one_by_one_lut);
-
-      // first LUT
-      softmax_params[0].buffer_var =
-          tir::Var("exp_lut", PointerType(PrimType(DataType::Int(bit_width)), "global.workspace"));
-      softmax_params[0].ndarray =
-          runtime::NDArray::Empty({kLUTEntries}, DataType::Int(bit_width), dev);
-      softmax_params[0].ndarray.CopyFromBytes(softmax_s16_exp_lut, sizeof(int16_t) * kLUTEntries);
-
-      // second LUT
-      softmax_params[1].buffer_var = tir::Var(
-          "one_by_one_lut", PointerType(PrimType(DataType::Int(bit_width)), "global.workspace"));
-      softmax_params[1].ndarray =
-          runtime::NDArray::Empty({kLUTEntries}, DataType::Int(bit_width), dev);
-      softmax_params[1].ndarray.CopyFromBytes(softmax_s16_one_by_one_lut,
-                                              sizeof(int16_t) * kLUTEntries);
-    }
-
-    BufferCreator buffer_creator;
-    tir::Var in_var = buffer_creator.CreateBufferVar("input", DataType::Handle(bit_width));
-    tir::Var out_var = buffer_creator.CreateBufferVar("output", DataType::Handle(bit_width));
-
-    if (bit_width == 8) {
-      tvm::Array<PrimExpr> args = {
-          tir::StringImm("arm_softmax_s" + std::to_string(bit_width)),
-          in_var,
-          ToArg(num_rows),
-          ToArg(row_size),
-          ToArg(mult),
-          ToArg(shift),
-          ToArg(diff_min),
-          out_var,
-      };
-
-      CreatePrimFuncForExtern(global_var, buffer_creator.GetPrimFuncParams(),
-                              buffer_creator.GetBufferMap(), args);
-    } else {  // bit_width == 16
-      tvm::Array<PrimExpr> args = {
-          tir::StringImm("arm_softmax_s" + std::to_string(bit_width)),
-          in_var,
-          ToArg(num_rows),
-          ToArg(row_size),
-          ToArg(mult),
-          ToArg(shift),
-          softmax_params[0].buffer_var,
-          softmax_params[1].buffer_var,
-          out_var,
-      };
-
-      CreatePrimFuncForExtern(global_var, buffer_creator.GetPrimFuncParams(),
-                              buffer_creator.GetBufferMap(), args, PrimExpr(), 0, 16,
-                              softmax_params);
-    }
-  }
-
-  struct BinaryElementwiseClipPattern {
-    Call binary_op;
-    Optional<Call> clip_op;
-  };
-
-  BinaryElementwiseClipPattern ParseBinaryElementwiseOpClipPattern(const Expr& expr) {
-    BinaryElementwiseClipPattern pattern;
-    Call final_call = Downcast<Call>(expr);
-    const OpNode* final_op = final_call->op.as<OpNode>();
-    if (final_op->name == "clip") {
-      pattern.clip_op = final_call;
-      pattern.binary_op = Downcast<Call>(final_call->args[0]);
-    } else {
-      pattern.binary_op = final_call;
-      pattern.clip_op = Optional<Call>{nullptr};
-    }
-    return pattern;
-  }
-
-  void EmitMul(const GlobalVar& global_var, const Expr& expr) {
-    const auto& pattern = ParseBinaryElementwiseOpClipPattern(expr);
-    Call mul_call = pattern.binary_op;
-    const auto bit_width = mul_call->type_as<TensorTypeNode>()->dtype.bits();
-    const auto [output_min, output_max] =
-        pattern.clip_op ? GetClipMinMax(pattern.clip_op.value()) : GetIntMinMax(bit_width);
-
-    const float input_0_scale = GetScalarFromConstant<float>(mul_call->args[2]);
-    const int32_t input_0_zero_point = GetScalarFromConstant<int32_t>(mul_call->args[3]);
-    const float input_1_scale = GetScalarFromConstant<float>(mul_call->args[4]);
-    const int32_t input_1_zero_point = GetScalarFromConstant<int32_t>(mul_call->args[5]);
-    const float output_scale = GetScalarFromConstant<float>(mul_call->args[6]);
-    const int32_t output_zero_point = GetScalarFromConstant<int32_t>(mul_call->args[7]);
-
-    double quantized_multiplier = static_cast<double>(input_0_scale) *
-                                  static_cast<double>(input_1_scale) /
-                                  static_cast<double>(output_scale);
-    auto mult_shift_pair = tvm::relay::qnn::GetFixedPointMultiplierShift(quantized_multiplier);
-    int32_t output_multiplier = std::get<0>(mult_shift_pair);
-    int32_t output_shift = std::get<1>(mult_shift_pair);
-
-    PrimExpr tensor_size = mul_call->type_as<TensorTypeNode>()->Size();
-
-    BufferCreator buffer_creator;
-    tir::Var input_0 = buffer_creator.CreateBufferVar("input_0", DataType::Handle(bit_width));
-    tir::Var input_1;
-    if (mul_call->args[0].same_as(mul_call->args[1])) {
-      input_1 = input_0;
-    } else {
-      input_1 = buffer_creator.CreateBufferVar("input_1", DataType::Handle(bit_width));
-    }
-    tir::Var output = buffer_creator.CreateBufferVar("output", DataType::Handle(bit_width));
-
-    tvm::Array<PrimExpr> args = {
-        tir::StringImm("arm_elementwise_mul_s" + std::to_string(bit_width)),
-        input_0,
-        input_1,
-        ToArg(-input_0_zero_point),
-        ToArg(-input_1_zero_point),
-        output,
-        ToArg(output_zero_point),
-        ToArg(output_multiplier),
-        ToArg(output_shift),
-        ToArg(output_min),
-        ToArg(output_max),
-        tensor_size,
-    };
-
-    CreatePrimFuncForExtern(global_var, buffer_creator.GetPrimFuncParams(),
-                            buffer_creator.GetBufferMap(), args);
-  }
-
-  void EmitAdd(const GlobalVar& global_var, const Expr& expr) {
-    const auto& pattern = ParseBinaryElementwiseOpClipPattern(expr);
-    Call add_call = pattern.binary_op;
-    const auto bit_width = add_call->type_as<TensorTypeNode>()->dtype.bits();
-
-    const auto [output_min, output_max] =
-        pattern.clip_op ? GetClipMinMax(pattern.clip_op.value()) : GetIntMinMax(bit_width);
-
-    const float input_0_scale = GetScalarFromConstant<float>(add_call->args[2]);
-    const int32_t input_0_zero_point = GetScalarFromConstant<int32_t>(add_call->args[3]);
-    const float input_1_scale = GetScalarFromConstant<float>(add_call->args[4]);
-    const int32_t input_1_zero_point = GetScalarFromConstant<int32_t>(add_call->args[5]);
-    const float output_scale = GetScalarFromConstant<float>(add_call->args[6]);
-    const int32_t output_zero_point = GetScalarFromConstant<int32_t>(add_call->args[7]);
-
-    const int32_t left_shift = (bit_width == 16) ? 15 : 20;
-    const int32_t input_0_offset = -input_0_zero_point;
-    const int32_t input_1_offset = -input_1_zero_point;
-    const int32_t output_offset = output_zero_point;
-
-    const float max_input_scale = std::max(input_0_scale, input_1_scale);
-    const double twice_max_input_scale = 2 * static_cast<double>(max_input_scale);
-    const double scaled_input_0_scale = static_cast<double>(input_0_scale) / twice_max_input_scale;
-    const double scaled_input_1_scale = static_cast<double>(input_1_scale) / twice_max_input_scale;
-    const double scaled_output_scale =
-        twice_max_input_scale / ((1 << left_shift) * static_cast<double>(output_scale));
-
-    auto input_0_mult_shift_pair =
-        tvm::relay::qnn::GetFixedPointMultiplierShift(scaled_input_0_scale);
-    int32_t input_0_multiplier = std::get<0>(input_0_mult_shift_pair);
-    int32_t input_0_shift = std::get<1>(input_0_mult_shift_pair);
-
-    auto input_1_mult_shift_pair =
-        tvm::relay::qnn::GetFixedPointMultiplierShift(scaled_input_1_scale);
-    int32_t input_1_multiplier = std::get<0>(input_1_mult_shift_pair);
-    int32_t input_1_shift = std::get<1>(input_1_mult_shift_pair);
-
-    auto output_mult_shift_pair =
-        tvm::relay::qnn::GetFixedPointMultiplierShift(scaled_output_scale);
-    int32_t output_multiplier = std::get<0>(output_mult_shift_pair);
-    int32_t output_shift = std::get<1>(output_mult_shift_pair);
-
-    PrimExpr tensor_size = add_call->type_as<TensorTypeNode>()->Size();
-
-    BufferCreator buffer_creator;
-    tir::Var input_0 = buffer_creator.CreateBufferVar("input_0", DataType::Handle(bit_width));
-    tir::Var input_1;
-    if (add_call->args[0].same_as(add_call->args[1])) {
-      input_1 = input_0;
-    } else {
-      input_1 = buffer_creator.CreateBufferVar("input_1", DataType::Handle(bit_width));
-    }
-    tir::Var output = buffer_creator.CreateBufferVar("output", DataType::Handle(bit_width));
-
-    tvm::Array<PrimExpr> args = {
-        tir::StringImm("arm_elementwise_add_s" + std::to_string(bit_width)),
-        input_0,
-        input_1,
-        ToArg(input_0_offset),
-        ToArg(input_0_multiplier),
-        ToArg(input_0_shift),
-        ToArg(input_1_offset),
-        ToArg(input_1_multiplier),
-        ToArg(input_1_shift),
-        ToArg(left_shift),
-        output,
-        ToArg(output_offset),
-        ToArg(output_multiplier),
-        ToArg(output_shift),
-        ToArg(output_min),
-        ToArg(output_max),
-        tensor_size,
-    };
-
-    CreatePrimFuncForExtern(global_var, buffer_creator.GetPrimFuncParams(),
-                            buffer_creator.GetBufferMap(), args);
-  }
-
-  // Removes kCompiler attribute from the partitioned functions that are not supported by this
-  // RelayToTIR
-  Call CallToFuncWithoutCompilerAttr(GlobalVar new_global_var, Call call, Function func) {
-    Function new_func = WithoutAttr(std::move(func), attr::kCompiler);
-    ir_module_->Update(new_global_var, new_func);
-    return Call(new_global_var, call->args, call->attrs, call->type_args, call->span);
-  }
-
-  Expr VisitExpr_(const LetNode* op) final {
-    auto pre_visit = [this](const LetNode* op) {
-      Expr var = this->VisitExpr(op->var);
-      Expr value = this->VisitExpr(op->value);
-      // outlineable function no longer needs let binding
-      if (this->CanOutlineExpr(value)) {
-        this->memo_[var] = value;
-      }
-    };
-    auto post_visit = [this](const LetNode* op) {
-      // Rely on the Memoizer to cache pre-visit values
-      Expr value = this->VisitExpr(op->value);
-      Expr body = this->VisitExpr(op->body);
-      auto expr = GetRef<Expr>(op);
-      // drop the let binding
-      if (this->CanOutlineExpr(value)) {
-        this->memo_[expr] = this->VisitExpr(op->body);
-      } else {
-        Var var = Downcast<Var>(this->VisitExpr(op->var));
-        if (var.same_as(op->var) && value.same_as(op->value) && body.same_as(op->body)) {
-          this->memo_[expr] = expr;
-        } else {
-          this->memo_[expr] = Let(var, value, body);
-        }
-      }
-    };
-    ExpandANormalForm(op, pre_visit, post_visit);
-    return memo_[GetRef<Expr>(op)];
-  }
-
-  bool CanOutlineExpr(const Expr& expr) {
-    // TODO(@lhutton1): This behaviour is similar to the OutlineCompilerFunctions pass
-    // we could reuse this functionality by separating outlining and lowering in this
-    // pass.
-    if (!expr->IsInstance<FunctionNode>()) {
-      return false;
-    }
-    const auto* func = expr.as<FunctionNode>();
-    auto codegen_name = func->GetAttr<String>(attr::kCompiler);
-    if (!codegen_name.defined() || codegen_name != "cmsis-nn") {
-      return false;
-    }
-    return true;
-  }
-
-  Expr Rewrite_(const CallNode* pre, const Expr& post) override {
-    if (const auto* call = post.as<CallNode>()) {
-      if (CanOutlineExpr(call->op)) {
-        const auto* func = call->op.as<FunctionNode>();
-        ICHECK(func) << "Expected function node but was " << call->op->GetTypeKey();
-        const auto codegen_name = func->GetAttr<String>(attr::kCompiler);
-        auto global_func_name = func->GetAttr<String>(tvm::attr::kGlobalSymbol);
-        GlobalVar new_global_var(global_func_name.value());
-
-        const CallNode* inner_call = func->body.as<CallNode>();
-        if (!inner_call) {
-          return CallToFuncWithoutCompilerAttr(new_global_var, GetRef<Call>(call),
-                                               GetRef<Function>(func));
-        }
-
-        const FunctionNode* composite_func = inner_call->op.as<FunctionNode>();
-        if (!composite_func) {
-          return CallToFuncWithoutCompilerAttr(new_global_var, GetRef<Call>(call),
-                                               GetRef<Function>(func));
-        }
-
-        auto comp_name = composite_func->GetAttr<String>(attr::kComposite);
-        new_global_var->checked_type_ = composite_func->checked_type();
-
-        if (comp_name == "cmsis-nn.qnn_softmax") {
-          EmitSoftMax(new_global_var, composite_func->body);
-        } else if (comp_name == "cmsis-nn.qnn_mul") {
-          EmitMul(new_global_var, composite_func->body);
-        } else if (comp_name == "cmsis-nn.qnn_add") {
-          EmitAdd(new_global_var, composite_func->body);
-        } else if (comp_name == "cmsis-nn.qnn_conv2d") {
-          EmitConv2D(new_global_var, composite_func->body);
-        } else if (comp_name == "cmsis-nn.qnn_fully_connected") {
-          EmitFullyConnected(new_global_var, composite_func->body);
-        } else if (comp_name == "cmsis-nn.qnn_avg_pool2d" ||
-                   comp_name == "cmsis-nn.qnn_max_pool2d") {
-          EmitPool2D(new_global_var, composite_func->body, comp_name.value());
-        } else {
-          return CallToFuncWithoutCompilerAttr(new_global_var, GetRef<Call>(call),
-                                               GetRef<Function>(func));
-        }
-
-        // Drop out the redundant arguments, and the arg_types from the global function call
-        Array<Expr> args;
-        Array<Type> arg_types;
-        auto* func_type = new_global_var->checked_type_.as<FuncTypeNode>();
-        int arg_id = -1;
-        for (const auto& arg : call->args) {
-          ++arg_id;
-          if (std::find(skip_call_args_.begin(), skip_call_args_.end(), arg_id) !=
-              skip_call_args_.end()) {
-            continue;
-          }
-          args.push_back(VisitExpr(arg));
-          arg_types.push_back(func_type->arg_types[arg_id]);
-        }
-        if (arg_types.size() != func_type->arg_types.size()) {
-          new_global_var->checked_type_ =
-              FuncType(arg_types, func_type->ret_type, {}, func_type->type_constraints);
-        }
-        skip_call_args_.clear();
-        return Call(new_global_var, args, call->attrs, call->type_args, call->span);
-      }
-    }
-    return post;
-  }
-
- private:
-  static constexpr int32_t kScaledDiffIntegerBits = 5;
-  static constexpr int32_t kInputBits = 5;
-  static constexpr double kBeta = 1.0;
-  /*! \brief Unique id for context buffer needed by CMSIS-NN layers. */
-  int32_t context_buffer_id_;
-  /*! \brief Skip arguments in the call to global partitioned function. */
-  std::unordered_set<int32_t> skip_call_args_;
-  IRModule ir_module_;
-  Target target_;
-};
-
-tvm::transform::Pass RelayToTIR() {
-  runtime::TypedPackedFunc<IRModule(IRModule, transform::PassContext)> pass_func =
-      [=](IRModule ir_module, transform::PassContext pass_context) {
-        auto relay_to_tir = RelayToTIRVisitor(ir_module, Target("cmsis-nn"));
-        return relay_to_tir.Mutate();
-      };
-  return tvm::transform::CreateModulePass(pass_func, 0, "RelayToTIR", {});
-}
-
-}  // namespace cmsisnn
-}  // namespace contrib
-}  // namespace relay
-}  // namespace tvm
diff --git a/src/relay/backend/contrib/cmsisnn/scalar_to_tensor_constant.cc b/src/relay/backend/contrib/cmsisnn/scalar_to_tensor_constant.cc
deleted file mode 100644
index 6180fa85160f..000000000000
--- a/src/relay/backend/contrib/cmsisnn/scalar_to_tensor_constant.cc
+++ /dev/null
@@ -1,230 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-/*!
- * \file scalar_to_tensor_constant.cc
- * \brief Converts scalar constant into tensor constant for binary ops of CMSIS-NN
- */
-
-#include <tvm/relay/attrs/nn.h>
-#include <tvm/relay/attrs/transform.h>
-#include <tvm/relay/expr_functor.h>
-#include <tvm/relay/transform.h>
-#include <tvm/runtime/ndarray.h>
-
-#include "../../../op/make_op.h"
-#include "../../../qnn/utils.h"
-#include "../../../transforms/pattern_utils.h"
-
-namespace tvm {
-namespace relay {
-namespace contrib {
-namespace cmsisnn {
-
-/*!
- * \brief This Mutator finds all partitioned functions meant for CMSIS-NN binary ops.
- * Then, it substitutes the scalar constants with tensor constants. It makes the shape of this
- * new constant same as that of the neighbouring constant of the other binary operand. The
- * expectation is that the ExtractConstant pass would later extract this tensor constant out of the
- * global partitioned function, thus making the entire global partitioned and its composite function
- * constant free. This makes the TIR generation for binary ops via CMSIS-NN independent of
- * constants.
- */
-class ScalarToTensorConstantMutator : public MixedModeMutator {
- public:
-  explicit ScalarToTensorConstantMutator(const IRModule& mod) : mod_(mod) {}
-
- private:
-  using MixedModeMutator::VisitExpr_;
-
-  // Here is an example with the annotated scalar constant:
-  // def @tvmgen_default_cmsis_nn_main_1(%cmsis_nn_input: Tensor[], Inline=1, Compiler="cmsis-nn",
-  //                                     global_symbol="tvmgen_default_cmsis_nn_main",
-  //                                     Primitive=1) -> Tensor[] {
-  //   %56 = fn (%input0: _scalar_constant_, %input1: Tensor[],
-  //             PartitionedFromPattern="qnn.mul_", Composite="cmsis-nn.qnn_mul") -> Tensor[] {
-  //     qnn.mul(%input0, %input1, scale0, zero_point0,
-  //              scale1, zero_point_1, output_scale, output_zero_point)
-  //   };
-  //   %56(meta[relay.Constant] /* _scalar constant_ */, %cmsis-nn_input)
-  // }
-  Expr Rewrite_(const CallNode* call, const Expr& post) final {
-    Expr final_call = post;
-    call = post.as<CallNode>();
-
-    // Substitute scalar variable with a tensor variable.
-    if (call->op.as<OpNode>()) {
-      final_call = ReplaceScalarWithTensorVariable(GetRef<Call>(call));
-    }
-
-    if (auto opt = call->op.as<GlobalVar>()) {
-      GlobalVar global_var = opt.value();
-      Function func = Downcast<Function>(mod_->Lookup(global_var));
-      auto new_body = VisitExpr(func->body);
-      if (new_body.same_as(func->body)) {
-        return final_call;
-      }
-      Function new_func = WithFields(func, FreeVars(new_body), new_body, func->ret_type,
-                                     FreeTypeVars(new_body, mod_), func->attrs);
-      mod_->Update(global_var, new_func);
-      final_call = Call(global_var, call->args);
-      final_call->span = call->span;
-    }
-
-    // Substitute scalar constant with tensor constant in the call to composite function.
-    if (auto func = call->op.as<Function>()) {
-      final_call = ReplaceScalarWithTensorConstant(GetRef<Call>(call), func.value());
-    }
-
-    return final_call;
-  }
-
-  // Checks if expr can undergo scalar to tensor replacement
-  bool WorthyOfScalarToTensorReplacement(const Expr& expr) {
-    if (const CallNode* call = expr.as<CallNode>()) {
-      if (const OpNode* opnode = call->op.as<OpNode>()) {
-        if (opnode->name == "qnn.add" || opnode->name == "qnn.mul") {
-          return true;
-        }
-      }
-    }
-    if (const FunctionNode* func = expr.as<FunctionNode>()) {
-      auto func_name = func->GetAttr<String>(attr::kComposite);
-      if (func_name.defined() &&
-          (func_name == "cmsis-nn.qnn_add" || func_name == "cmsis-nn.qnn_mul")) {
-        return true;
-      }
-    }
-    return false;
-  }
-
-  // Replaces scalar variable with a tensor variable with same shape as that of the neighbouring
-  // operand tensor in a binary op (add or multiply supported via CMSIS-NN path). This applies only
-  // to 1st and 2nd arguments of the ops.
-  Call ReplaceScalarWithTensorVariable(Call call) {
-    // Returns if the operands of the binary operator come from the same input.
-    if (!WorthyOfScalarToTensorReplacement(call) || call->args.size() < 2) {
-      return call;
-    }
-    Array<Expr> new_args(call->args);
-    for (uint32_t i = 0; i < 2; ++i) {
-      Expr scalar_arg = call->args[i];
-      if (!scalar_arg->IsInstance<VarNode>() || !scalar_arg->checked_type_.defined() ||
-          !scalar_arg->checked_type_->IsInstance<TensorTypeNode>()) {
-        continue;
-      }
-      Array<PrimExpr> scalar_shape = scalar_arg->type_as<TensorTypeNode>()->shape;
-      if (scalar_shape.size() != 0) {
-        continue;
-      }
-      int tensor_arg_id = (i + 1) % 2;
-      Expr tensor_arg = call->args[tensor_arg_id];
-      if (!tensor_arg->checked_type_.defined()) {
-        continue;
-      }
-      String arg_name = scalar_arg.as<VarNode>()->name_hint();
-      new_args.Set(i, Var(arg_name, tensor_arg->checked_type_));
-    }
-    return Call(call->op, new_args, call->attrs, {}, call->span);
-  }
-
-  // Replaces scalar constant with a tensor constant with same shape as that of the neighbouring
-  // operand tensor in a binary op (add or multiply supported via CMSIS-NN path). This applies only
-  // to 1st and 2nd arguments of the ops.
-  Call ReplaceScalarWithTensorConstant(Call call, Function func) {
-    // Returns if the operands of the binary operator come from the same input.
-    if (!WorthyOfScalarToTensorReplacement(func) || call->args.size() < 2) {
-      return call;
-    }
-    Array<Expr> new_args(call->args);
-    for (uint32_t i = 0; i < 2; ++i) {
-      Expr scalar_arg = call->args[i];
-      if (!scalar_arg->checked_type_.defined()) {
-        continue;
-      }
-      Array<PrimExpr> scalar_shape = scalar_arg->type_as<TensorTypeNode>()->shape;
-      if (scalar_shape.size() != 0 || !scalar_arg->IsInstance<ConstantNode>()) {
-        continue;
-      }
-      int tensor_arg_id = (i + 1) % 2;
-      Expr tensor_arg = call->args[tensor_arg_id];
-      if (!tensor_arg->checked_type_.defined()) {
-        continue;
-      }
-      TensorType tensor_type = GetRef<TensorType>(tensor_arg->type_as<TensorTypeNode>());
-      std::vector<int64_t> tensor_shape;
-      for (auto& dim : tensor_type->shape) {
-        tensor_shape.push_back(qnn::get_const_int(dim));
-      }
-      int8_t scalar_value = GetScalarFromConstant<int8_t>(scalar_arg);
-      int tensor_num_elements = qnn::get_const_int(tensor_type->Size());
-      std::vector<int8_t> tensor_values(tensor_num_elements, scalar_value);
-      Constant tensor_constant =
-          MakeConstantTensor<int8_t>(DataType::Int(8), tensor_shape, tensor_values);
-      new_args.Set(i, tensor_constant);
-    }
-    auto new_body = VisitExpr(func->body);
-    Function new_func = WithFields(func, FreeVars(new_body), new_body, func->ret_type,
-                                   FreeTypeVars(new_body, mod_), func->attrs);
-
-    // Updating new_func parameters could result into uniquification of function parameters.
-    // Call arguments need to be aligned to the number of arguments expected by new_func.
-    if (new_args[0].same_as(new_args[1])) {
-      new_args.erase(new_args.begin());
-    }
-    return Call(new_func, new_args, Attrs(), {}, call->span);
-  }
-
- private:
-  IRModule mod_;
-};
-
-IRModule ScalarToTensorConstant(const IRModule& mod) {
-  for (auto gv : mod->GetGlobalVars()) {
-    Function func = Downcast<Function>(mod->Lookup(gv));
-
-    // only mutate CMSIS-NN external functions
-    auto compiler_name = func->GetAttr<String>(attr::kCompiler);
-    if (!compiler_name.defined() || compiler_name != "cmsis-nn") {
-      continue;
-    }
-
-    auto mutator = ScalarToTensorConstantMutator(mod);
-    auto new_func_body = mutator.VisitExpr(func->body);
-    if (!new_func_body.same_as(func->body)) {
-      Function new_func =
-          Function(func->params, new_func_body, func->ret_type, func->type_params, func->attrs);
-      mod->Update(gv, new_func);
-    }
-  }
-  return mod;
-}
-
-transform::Pass ScalarToTensorConstantPass() {
-  runtime::TypedPackedFunc<IRModule(IRModule, transform::PassContext)> pass_func =
-      [=](IRModule m, transform::PassContext pc) { return ScalarToTensorConstant(m); };
-  return tvm::transform::CreateModulePass(pass_func, 0, "ScalarToTensorConstant", {"InferType"});
-}
-
-TVM_REGISTER_GLOBAL("relay.ext.cmsisnn.transform.ScalarToTensorConstants")
-    .set_body_typed(ScalarToTensorConstantPass);
-
-}  // namespace cmsisnn
-}  // namespace contrib
-}  // namespace relay
-}  // namespace tvm
diff --git a/src/relay/backend/contrib/cmsisnn/target.cc b/src/relay/backend/contrib/cmsisnn/target.cc
deleted file mode 100644
index 00581a089a4a..000000000000
--- a/src/relay/backend/contrib/cmsisnn/target.cc
+++ /dev/null
@@ -1,48 +0,0 @@
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#include <tvm/relay/transform.h>
-#include <tvm/target/target.h>
-
-#include "../../../../target/parsers/cpu.h"
-#include "compiler_attrs.h"
-
-namespace tvm {
-
-namespace relay {
-namespace contrib {
-namespace cmsisnn {
-
-tvm::transform::Pass RelayToTIR();
-runtime::Module TIRToRuntime(IRModule mod, Target target);
-using FTVMTIRToRuntime = tvm::runtime::TypedPackedFunc<runtime::Module(IRModule, Target)>;
-
-TVM_REGISTER_TARGET_KIND("cmsis-nn", kDLCPU)
-    .add_attr_option<Array<String>>("mattr")
-    .add_attr_option<String>("mcpu")
-    .add_attr_option<runtime::Bool>("debug_last_error")
-    .set_attr<relay::transform::FTVMRelayToTIR>(tvm::attr::kRelayToTIR, RelayToTIR())
-    .set_attr<FTVMTIRToRuntime>("TIRToRuntime", TIRToRuntime)
-    .set_target_parser(tvm::target::parsers::cpu::ParseTarget);
-
-}  // namespace cmsisnn
-}  // namespace contrib
-}  // namespace relay
-}  // namespace tvm
diff --git a/src/relay/backend/contrib/cmsisnn/tir_to_runtime.cc b/src/relay/backend/contrib/cmsisnn/tir_to_runtime.cc
deleted file mode 100644
index 6febfe3486af..000000000000
--- a/src/relay/backend/contrib/cmsisnn/tir_to_runtime.cc
+++ /dev/null
@@ -1,606 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-#include <tvm/ir/transform.h>
-
-#include <cmath>
-#include <fstream>
-#include <map>
-#include <sstream>
-#include <string>
-#include <vector>
-
-#include "../../../../runtime/file_utils.h"
-#include "../../../../target/source/codegen_c.h"
-#include "../../../../target/source/codegen_c_host.h"
-#include "compiler_attrs.h"
-
-namespace tvm {
-using namespace tir;
-namespace relay {
-namespace contrib {
-namespace cmsisnn {
-
-class CodeGenCMSISNN : public codegen::CodeGenCHost {
- public:
-  void Init(bool output_ssa, bool emit_asserts, bool emit_fwd_func_decl, std::string target_str,
-            bool debug_last_error) {
-    this->debug_last_error = debug_last_error;
-    std::unordered_set<std::string> devices;
-    devices.insert("cmsis-nn");
-    CodeGenCHost::Init(output_ssa, emit_asserts, emit_fwd_func_decl, target_str, devices);
-  }
-
- private:
-  /*!  * \brief Enable storing the last error */
-  bool debug_last_error;
-
-  /*!  * \brief CMSIS-NN context buffer info */
-  struct CMSISNNContextBuffer {
-    std::string name;
-    int size;
-  };
-
-  /*!  * \brief CMSIS-NN buffer dimensions */
-  struct CMSISNNDims {
-    int n;
-    int h;
-    int w;
-    int c;
-  };
-
-  /*!  * \brief CMSIS-NN Conv2D and Depthwise parameters */
-  struct Conv2DParams {
-    int input_offset;
-    int output_offset;
-    int stride_w;
-    int stride_h;
-    int padding_w;
-    int padding_h;
-    int dilation_w;
-    int dilation_h;
-    int clip_min;
-    int clip_max;
-    int depth_multiplier;
-  };
-
-  /*!  * \brief CMSIS-NN Conv2D and Depthwise parameters */
-  struct FCParams {
-    int input_offset;
-    int filter_offset;
-    int output_offset;
-    int clip_min;
-    int clip_max;
-    int multiplier;
-    int shift;
-  };
-
-  struct PoolParams {
-    int stride_h;
-    int stride_w;
-    int padding_h;
-    int padding_w;
-    int clip_min;
-    int clip_max;
-  };
-
-  struct CMSISNNSoftmaxLutS16 {
-    std::string exp_lut_name;
-    std::string one_by_one_lut_name;
-  };
-
-  using codegen::CodeGenCHost::VisitStmt_;
-
-  /*!  * \brief Emits CMSIS-NN APIs for every call_extern */
-  void VisitExpr_(const CallNode* op, std::ostream& os) final {
-    if (!op->op.same_as(builtin::call_extern())) {
-      CodeGenCHost::VisitExpr_(op, os);
-      return;
-    }
-
-    std::string cmsis_func_name = op->args[0].as<StringImmNode>()->value;
-    if (cmsis_func_name == "arm_softmax_s8" || cmsis_func_name == "arm_elementwise_mul_s8" ||
-        cmsis_func_name == "arm_elementwise_add_s8" ||
-        cmsis_func_name == "arm_elementwise_mul_s16" ||
-        cmsis_func_name == "arm_elementwise_add_s16") {
-      CodeGenC::VisitExpr_(op, os);
-    } else if (cmsis_func_name == "arm_convolve_wrapper_s8" ||
-               cmsis_func_name == "arm_convolve_wrapper_s16" ||
-               cmsis_func_name == "arm_depthwise_conv_wrapper_s8" ||
-               cmsis_func_name == "arm_depthwise_conv_wrapper_s16") {
-      EmitConv2D(op);
-    } else if (cmsis_func_name == "arm_fully_connected_s8" ||
-               cmsis_func_name == "arm_fully_connected_s16") {
-      EmitFullyConnected(op);
-    } else if (cmsis_func_name == "arm_avgpool_s8" || cmsis_func_name == "arm_avgpool_s16" ||
-               cmsis_func_name == "arm_max_pool_s8" || cmsis_func_name == "arm_max_pool_s16") {
-      EmitPool2D(op);
-    } else if (cmsis_func_name == "arm_softmax_s16") {
-      EmitSoftmaxInt16(op);
-    }
-    return;
-  }
-
-  /*!  * \brief Emits cmsis_nn_context struct */
-  std::string EmitCMSISNNContext(std::ostream& os, CMSISNNContextBuffer context_buffer) {
-    std::string struct_name = "context";
-    PrintIndent();
-    os << "cmsis_nn_context " << struct_name << "= {" << context_buffer.name << ","
-       << context_buffer.size << "};\n";
-    return struct_name;
-  }
-
-  /*!  * \brief Emits cmsis_nn_conv_params struct */
-  std::string EmitCMSISNNConvParams(std::ostream& os, Conv2DParams params) {
-    std::string struct_name = "cmsis_nn_conv_params";
-    std::string instance_name = "conv_params";
-    if (params.depth_multiplier != -1) {
-      struct_name = "cmsis_nn_dw_conv_params";
-    }
-    PrintIndent();
-    os << "cmsis_nn_tile stride = {" << params.stride_w << "," << params.stride_h << "};\n";
-    PrintIndent();
-    os << "cmsis_nn_tile padding = {" << params.padding_w << "," << params.padding_h << "};\n";
-    PrintIndent();
-    os << "cmsis_nn_tile dilation = {" << params.dilation_w << "," << params.dilation_h << "};\n";
-    PrintIndent();
-    os << "cmsis_nn_activation activation = {" << params.clip_min << "," << params.clip_max
-       << "};\n";
-    PrintIndent();
-    os << struct_name << " " << instance_name << " = {" << params.input_offset << ", "
-       << params.output_offset;
-    if (params.depth_multiplier != -1) {
-      os << ", " << params.depth_multiplier;
-    }
-    os << ", stride, padding, dilation, activation};\n";
-    return instance_name;
-  }
-
-  /*!  * \brief Emits cmsis_nn_fc_params struct */
-  std::string EmitCMSISNNFCParams(std::ostream& os, FCParams params) {
-    std::string struct_name = "cmsis_nn_fc_params";
-    std::string instance_name = "fc_params";
-    PrintIndent();
-    os << "cmsis_nn_activation activation = {" << params.clip_min << "," << params.clip_max
-       << "};\n";
-    PrintIndent();
-    os << struct_name << " " << instance_name << " = {" << params.input_offset << ", "
-       << params.filter_offset << ", " << params.output_offset;
-    os << ", activation};\n";
-    return instance_name;
-  }
-
-  /*!  * \brief Emits cmsis_nn_pool_params struct */
-  std::string EmitCMSISNNPoolParams(std::ostream& os, PoolParams params) {
-    std::string struct_name = "cmsis_nn_pool_params";
-    std::string instance_name = "pool_params";
-    PrintIndent();
-    os << "cmsis_nn_tile stride = {" << params.stride_w << "," << params.stride_h << "};\n";
-    PrintIndent();
-    os << "cmsis_nn_tile padding = {" << params.padding_w << "," << params.padding_h << "};\n";
-    PrintIndent();
-    os << "cmsis_nn_activation activation = {" << params.clip_min << "," << params.clip_max
-       << "};\n";
-    PrintIndent();
-    os << struct_name << " " << instance_name << " = {stride, padding, activation};\n";
-    return instance_name;
-  }
-
-  /*!  * \brief Emits cmsis_nn_per_channel_quant_params struct */
-  std::string EmitCMSISNNPerChannelQuantParams(std::ostream& os, std::string multiplier,
-                                               std::string shift) {
-    std::string struct_name = "quant_params";
-    PrintIndent();
-    os << "cmsis_nn_per_channel_quant_params " << struct_name << " = {" << multiplier << ", "
-       << shift << "};\n";
-    return struct_name;
-  }
-
-  /*!  * \brief Emits cmsis_nn_per_tensor_quant_params struct */
-  std::string EmitCMSISNNPerTensorQuantParams(std::ostream& os, int multiplier, int shift) {
-    std::string struct_name = "quant_params";
-    PrintIndent();
-    os << "cmsis_nn_per_tensor_quant_params " << struct_name << " = {" << multiplier << ", "
-       << shift << "};\n";
-    return struct_name;
-  }
-
-  /*!  * \brief Emits cmsis_nn_dims struct */
-  std::string EmitCMSISNNDims(std::ostream& os, std::string tensor_type, CMSISNNDims dims) {
-    std::string struct_name = tensor_type + "_dims";
-    PrintIndent();
-    os << "cmsis_nn_dims " << struct_name << " = {" << dims.n << "," << dims.h << "," << dims.w
-       << "," << dims.c << "};\n";
-    return struct_name;
-  }
-  /*!  * \brief Emits cmsis_nn_softmax_params struct */
-  std::string EmitCMSISNNSoftmaxLutS16(std::ostream& os, CMSISNNSoftmaxLutS16 softmax_params) {
-    std::string struct_name = "softmax_params";
-    PrintIndent();
-    os << "cmsis_nn_softmax_lut_s16 " << struct_name << "= {" << softmax_params.exp_lut_name << ", "
-       << softmax_params.one_by_one_lut_name << "};\n";
-    return struct_name;
-  }
-
-  /*!  * \brief Deduces variable name from call_extern argument resting at id */
-  std::string VarNameFromArg(const CallNode* op, int id) {
-    return op->args[id].as<VarNode>()->name_hint.c_str();
-  }
-
-  /*!  * \brief Deduces value from call_extern argument resting at id */
-  int ValueFromArg(const CallNode* op, int id) { return op->args[id].as<IntImmNode>()->value; }
-
-  /*!  * \brief extracts CMSIS-NN context buffer information */
-  CMSISNNContextBuffer extract_context_buffer_info(const CallNode* op, int base_pos) {
-    CMSISNNContextBuffer context_buffer;
-
-    // The argument could be a Var if it is allocated to hold the
-    // context buffer OR it will be a StringImm with "NULL"
-    if (op->args[base_pos]->IsInstance<VarNode>()) {
-      context_buffer.name = op->args[base_pos].as<VarNode>()->name_hint;
-    } else {
-      context_buffer.name = op->args[base_pos].as<StringImmNode>()->value;
-    }
-    context_buffer.size = ValueFromArg(op, base_pos + 1);
-    return context_buffer;
-  }
-
-  /*!  * \brief extracts CMSIS-NN conv2d parameters from call_extern */
-  Conv2DParams extract_conv2d_params(const CallNode* op, int base_pos) {
-    Conv2DParams conv2d_params;
-    conv2d_params.input_offset = ValueFromArg(op, base_pos);
-    conv2d_params.output_offset = ValueFromArg(op, ++base_pos);
-    conv2d_params.stride_w = ValueFromArg(op, ++base_pos);
-    conv2d_params.stride_h = ValueFromArg(op, ++base_pos);
-    conv2d_params.padding_w = ValueFromArg(op, ++base_pos);
-    conv2d_params.padding_h = ValueFromArg(op, ++base_pos);
-    conv2d_params.dilation_w = ValueFromArg(op, ++base_pos);
-    conv2d_params.dilation_h = ValueFromArg(op, ++base_pos);
-    conv2d_params.clip_min = ValueFromArg(op, ++base_pos);
-    conv2d_params.clip_max = ValueFromArg(op, ++base_pos);
-    conv2d_params.depth_multiplier = ValueFromArg(op, ++base_pos);
-    return conv2d_params;
-  }
-
-  /*!  * \brief extracts CMSIS-NN FC parameters from call_extern */
-  FCParams extract_fc_params(const CallNode* op, int base_pos) {
-    FCParams fc_params;
-    fc_params.input_offset = ValueFromArg(op, base_pos);
-    fc_params.filter_offset = ValueFromArg(op, ++base_pos);
-    fc_params.output_offset = ValueFromArg(op, ++base_pos);
-    fc_params.clip_min = ValueFromArg(op, ++base_pos);
-    fc_params.clip_max = ValueFromArg(op, ++base_pos);
-    fc_params.multiplier = ValueFromArg(op, ++base_pos);
-    fc_params.shift = ValueFromArg(op, ++base_pos);
-    return fc_params;
-  }
-
-  /*!  * \brief extracts CMSIS-NN Pooling parameters from call_extern */
-  PoolParams extract_pool_params(const CallNode* op, int base_pos) {
-    PoolParams pool_params;
-    pool_params.stride_h = ValueFromArg(op, base_pos);
-    pool_params.stride_w = ValueFromArg(op, ++base_pos);
-    pool_params.padding_h = ValueFromArg(op, ++base_pos);
-    pool_params.padding_w = ValueFromArg(op, ++base_pos);
-    pool_params.clip_min = ValueFromArg(op, ++base_pos);
-    pool_params.clip_max = ValueFromArg(op, ++base_pos);
-    return pool_params;
-  }
-
-  /*!  * \brief extracts CMSIS-NN buffer dimensions from call_extern */
-  CMSISNNDims extract_buffer_dims(const CallNode* op, int base_pos) {
-    CMSISNNDims dims;
-    dims.n = ValueFromArg(op, base_pos);
-    dims.h = ValueFromArg(op, ++base_pos);
-    dims.w = ValueFromArg(op, ++base_pos);
-    dims.c = ValueFromArg(op, ++base_pos);
-    return dims;
-  }
-  /*!  * \brief extracts CMSIS-NN softmax LUTs from call_extern */
-  CMSISNNSoftmaxLutS16 extract_softmax_softmax_lut_s16(const CallNode* op, int exp_lut_pos,
-                                                       int one_by_one_lut_pos) {
-    CMSISNNSoftmaxLutS16 softmax_params;
-    softmax_params.exp_lut_name = op->args[exp_lut_pos].as<VarNode>()->name_hint;
-    softmax_params.one_by_one_lut_name = op->args[one_by_one_lut_pos].as<VarNode>()->name_hint;
-    return softmax_params;
-  }
-
-  /*!  * \brief Emits CMSIS-NN APIs for every call_extern comprising convolution */
-  void EmitConv2D(const CallNode* op) {
-    // Position of various arguments relative to buffers in the call_extern
-    enum CallExternArgPos {
-      CONTEXT_BUFFER_POS = 1,
-      CONV2D_PARAMS_POS = 3,
-      INPUT_DIM_POS = 14,
-      FILTER_DIM_POS = 18,
-      BIAS_DIM_POS = 22,
-      OUTPUT_DIM_POS = 26,
-      MAX_NUM_ARGS = 36
-    };
-
-    std::string cmsis_func_name = op->args[0].as<StringImmNode>()->value;
-
-    // extract buffer names from call_extern
-    int arg_id = 0;
-    std::string input_data = VarNameFromArg(op, ++arg_id);
-    std::string filter_data = VarNameFromArg(op, ++arg_id);
-    std::string multiplier = VarNameFromArg(op, ++arg_id);
-    std::string bias_data("NULL");
-    if (op->args.size() == CallExternArgPos::MAX_NUM_ARGS) {
-      bias_data = VarNameFromArg(op, ++arg_id);
-    }
-    std::string shift = VarNameFromArg(op, ++arg_id);
-    std::string output_data = VarNameFromArg(op, ++arg_id);
-
-    // extract CMSIS-NN API parameters
-    int context_buffer_pos = arg_id + CallExternArgPos::CONTEXT_BUFFER_POS;
-    int conv2d_params_pos = arg_id + CallExternArgPos::CONV2D_PARAMS_POS;
-    int input_dim_pos = arg_id + CallExternArgPos::INPUT_DIM_POS;
-    int filter_dim_pos = arg_id + CallExternArgPos::FILTER_DIM_POS;
-    int bias_dim_pos = arg_id + CallExternArgPos::BIAS_DIM_POS;
-    int output_dim_pos = arg_id + CallExternArgPos::OUTPUT_DIM_POS;
-
-    CMSISNNContextBuffer context_buffer = extract_context_buffer_info(op, context_buffer_pos);
-    Conv2DParams conv2d_params = extract_conv2d_params(op, conv2d_params_pos);
-    CMSISNNDims input_dims = extract_buffer_dims(op, input_dim_pos);
-    CMSISNNDims filter_dims = extract_buffer_dims(op, filter_dim_pos);
-    CMSISNNDims bias_dims = extract_buffer_dims(op, bias_dim_pos);
-    CMSISNNDims output_dims = extract_buffer_dims(op, output_dim_pos);
-
-    // Emit CMSIS-NN API arguments
-    std::string context = EmitCMSISNNContext(stream, context_buffer);
-    std::string conv_params = EmitCMSISNNConvParams(stream, conv2d_params);
-    std::string quant_params = EmitCMSISNNPerChannelQuantParams(stream, multiplier, shift);
-    std::string input_dim = EmitCMSISNNDims(stream, "input", input_dims);
-    std::string filter_dim = EmitCMSISNNDims(stream, "filter", filter_dims);
-    std::string bias_dim = EmitCMSISNNDims(stream, "bias", bias_dims);
-    std::string output_dim = EmitCMSISNNDims(stream, "output", output_dims);
-
-    // Emit CMSIS-NN API
-    PrintIndent();
-    stream << "arm_cmsis_nn_status status = ";
-    stream << cmsis_func_name << "(";
-    stream << "&" << context << ", ";
-    stream << "&" << conv_params << ", ";
-    stream << "&" << quant_params << ", ";
-    stream << "&" << input_dim << ", " << input_data << ", ";
-    stream << "&" << filter_dim << ", " << filter_data << ", ";
-    stream << "&" << bias_dim << ", " << bias_data << ", ";
-    stream << "&" << output_dim << ", " << output_data << ");\n";
-    EmitErrorCheck();
-  }
-
-  /*!  * \brief Emits CMSIS-NN APIs for every call_extern comprising fully connected */
-  void EmitFullyConnected(const CallNode* op) {
-    // Position of various arguments relative to buffers in the call_extern
-    enum CallExternArgPos {
-      CONTEXT_BUFFER_POS = 1,
-      FC_PARAMS_POS = 3,
-      INPUT_DIM_POS = 10,
-      FILTER_DIM_POS = 14,
-      BIAS_DIM_POS = 18,
-      OUTPUT_DIM_POS = 22,
-      MAX_NUM_ARGS = 30
-    };
-
-    std::string cmsis_func_name = op->args[0].as<StringImmNode>()->value;
-
-    // extract buffer names from call_extern
-    int arg_id = 0;
-    std::string input_data = VarNameFromArg(op, ++arg_id);
-    std::string filter_data = VarNameFromArg(op, ++arg_id);
-    std::string bias_data("NULL");
-    if (op->args.size() == CallExternArgPos::MAX_NUM_ARGS) {
-      bias_data = VarNameFromArg(op, ++arg_id);
-    }
-    std::string output_data = VarNameFromArg(op, ++arg_id);
-
-    // extract CMSIS-NN API parameters
-    int context_buffer_pos = arg_id + CallExternArgPos::CONTEXT_BUFFER_POS;
-    int fc_params_pos = arg_id + CallExternArgPos::FC_PARAMS_POS;
-    int input_dim_pos = arg_id + CallExternArgPos::INPUT_DIM_POS;
-    int filter_dim_pos = arg_id + CallExternArgPos::FILTER_DIM_POS;
-    int bias_dim_pos = arg_id + CallExternArgPos::BIAS_DIM_POS;
-    int output_dim_pos = arg_id + CallExternArgPos::OUTPUT_DIM_POS;
-
-    CMSISNNContextBuffer context_buffer = extract_context_buffer_info(op, context_buffer_pos);
-    FCParams fc_params = extract_fc_params(op, fc_params_pos);
-    CMSISNNDims input_dims = extract_buffer_dims(op, input_dim_pos);
-    CMSISNNDims filter_dims = extract_buffer_dims(op, filter_dim_pos);
-    CMSISNNDims bias_dims = extract_buffer_dims(op, bias_dim_pos);
-    CMSISNNDims output_dims = extract_buffer_dims(op, output_dim_pos);
-
-    // Emit CMSIS-NN API arguments
-    std::string context = EmitCMSISNNContext(stream, context_buffer);
-    std::string cmsisnn_fc_params = EmitCMSISNNFCParams(stream, fc_params);
-    std::string quant_params =
-        EmitCMSISNNPerTensorQuantParams(stream, fc_params.multiplier, fc_params.shift);
-    std::string input_dim = EmitCMSISNNDims(stream, "input", input_dims);
-    std::string filter_dim = EmitCMSISNNDims(stream, "filter", filter_dims);
-    std::string bias_dim = EmitCMSISNNDims(stream, "bias", bias_dims);
-    std::string output_dim = EmitCMSISNNDims(stream, "output", output_dims);
-
-    PrintIndent();
-    stream << "arm_cmsis_nn_status status = ";
-    stream << cmsis_func_name << "(";
-    stream << "&" << context << ", ";
-    stream << "&" << cmsisnn_fc_params << ", ";
-    stream << "&" << quant_params << ", ";
-    stream << "&" << input_dim << ", " << input_data << ", ";
-    stream << "&" << filter_dim << ", " << filter_data << ", ";
-    stream << "&" << bias_dim << ", " << bias_data << ", ";
-    stream << "&" << output_dim << ", " << output_data << ");\n";
-    EmitErrorCheck();
-  }
-
-  /*!  * \brief Emits CMSIS-NN APIs for every call_extern comprising pooling ops */
-  void EmitPool2D(const CallNode* op) {
-    // Position of various arguments relative to buffers in the call_extern
-    enum CallExternArgPos {
-      CONTEXT_BUFFER_POS = 1,
-      POOL_PARAMS_POS = 3,
-      INPUT_DIM_POS = 9,
-      FILTER_DIM_POS = 13,
-      OUTPUT_DIM_POS = 17,
-      MAX_NUM_ARGS = 23
-    };
-    std::string cmsis_func_name = op->args[0].as<StringImmNode>()->value;
-
-    // extract buffer names from call_extern
-    int arg_id = 0;
-    std::string input_data = VarNameFromArg(op, ++arg_id);
-    std::string output_data = VarNameFromArg(op, ++arg_id);
-
-    // extract CMSIS-NN API parameters
-    int context_buffer_pos = arg_id + CallExternArgPos::CONTEXT_BUFFER_POS;
-    int pool_params_pos = arg_id + CallExternArgPos::POOL_PARAMS_POS;
-    int input_dim_pos = arg_id + CallExternArgPos::INPUT_DIM_POS;
-    int filter_dim_pos = arg_id + CallExternArgPos::FILTER_DIM_POS;
-    int output_dim_pos = arg_id + CallExternArgPos::OUTPUT_DIM_POS;
-
-    CMSISNNContextBuffer context_buffer = extract_context_buffer_info(op, context_buffer_pos);
-    PoolParams pool_params = extract_pool_params(op, pool_params_pos);
-    CMSISNNDims input_dims = extract_buffer_dims(op, input_dim_pos);
-    CMSISNNDims filter_dims = extract_buffer_dims(op, filter_dim_pos);
-    CMSISNNDims output_dims = extract_buffer_dims(op, output_dim_pos);
-
-    std::string context = EmitCMSISNNContext(stream, context_buffer);
-    std::string cmsisnn_pool_params = EmitCMSISNNPoolParams(stream, pool_params);
-    std::string input_dim = EmitCMSISNNDims(stream, "input", input_dims);
-    std::string filter_dim = EmitCMSISNNDims(stream, "filter", filter_dims);
-    std::string output_dim = EmitCMSISNNDims(stream, "output", output_dims);
-
-    PrintIndent();
-    stream << "arm_cmsis_nn_status status = ";
-    stream << cmsis_func_name << "(";
-    stream << "&" << context << ", ";
-    stream << "&" << cmsisnn_pool_params << ", ";
-    stream << "&" << input_dim << ", " << input_data << ", ";
-    stream << "&" << filter_dim << ", ";
-    stream << "&" << output_dim << ", " << output_data << ");\n";
-    EmitErrorCheck();
-  }
-
-  void EmitSoftmaxInt16(const CallNode* op) {
-    std::string cmsis_func_name = op->args[0].as<StringImmNode>()->value;
-
-    // extract buffer names from call_extern
-    int arg_id = 0;
-    std::string input_data = VarNameFromArg(op, ++arg_id);
-    int num_rows = ValueFromArg(op, ++arg_id);
-    int row_size = ValueFromArg(op, ++arg_id);
-    int multiplier = ValueFromArg(op, ++arg_id);
-    int shift = ValueFromArg(op, ++arg_id);
-    // extracting LUT names from call_extern
-    CMSISNNSoftmaxLutS16 softmax_params_buffer =
-        extract_softmax_softmax_lut_s16(op, arg_id + 1, arg_id + 2);
-    arg_id += 2;
-    std::string output_data = VarNameFromArg(op, ++arg_id);
-
-    // Emit CMSIS-NN API arguments
-    std::string softmax_params = EmitCMSISNNSoftmaxLutS16(stream, softmax_params_buffer);
-
-    PrintIndent();
-    stream << "arm_cmsis_nn_status status = ";
-    stream << cmsis_func_name << "(";
-    stream << input_data << ", ";
-    stream << num_rows << ", ";
-    stream << row_size << ", ";
-    stream << multiplier << ", ";
-    stream << shift << ", ";
-    stream << "&" << softmax_params << ", ";
-    stream << output_data << ");\n";
-    EmitErrorCheck();
-  }
-
-  void EmitErrorCheck() {
-    auto emit_error = [&](std::string error) {
-      if (this->debug_last_error) {
-        stream << "TVMAPISetLastError(\"" << error << "\"); ";
-      }
-    };
-
-    PrintIndent();
-    stream << "switch (!status) {\n";
-    PrintIndent();
-    stream << "case ARM_CMSIS_NN_SUCCESS: break;\n";
-    PrintIndent();
-    stream << "case ARM_CMSIS_NN_ARG_ERROR: ";
-    emit_error("ARM_CMSIS_NN_ARG_ERROR");
-    stream << "return -1;\n";
-    PrintIndent();
-    stream << "case ARM_CMSIS_NN_NO_IMPL_ERROR: ";
-    emit_error("ARM_CMSIS_NN_NO_IMPL_ERROR");
-    stream << "return -1;\n";
-    PrintIndent();
-    stream << "}\n";
-  }
-};
-
-static CMSISNNCompilerConfig GetCompilerAttrs() {
-  auto ctx = tvm::tir::transform::PassContext::Current();
-  Optional<CMSISNNCompilerConfig> cfg =
-      ctx->GetConfig<CMSISNNCompilerConfig>("relay.ext.cmsisnn.options");
-  if (!cfg.defined()) {
-    return AttrsWithDefaultValues<CMSISNNCompilerConfig>();
-  }
-  return cfg.value();
-}
-
-runtime::Module TIRToRuntime(IRModule mod, Target target) {
-  bool output_ssa = false;
-  bool emit_asserts = false;
-  bool emit_fwd_func_decl = false;
-  bool debug_last_error = GetCompilerAttrs()->debug_last_error;
-  CodeGenCMSISNN codegen;
-  codegen.Init(output_ssa, emit_asserts, emit_fwd_func_decl, target->str(), debug_last_error);
-
-  std::vector<std::pair<tvm::GlobalVar, tvm::PrimFunc>> funcs;
-  for (auto [gvar, base_func] : mod->functions) {
-    funcs.push_back({gvar, Downcast<PrimFunc>(base_func)});
-  }
-
-  std::sort(funcs.begin(), funcs.end(),
-            [](std::pair<tvm::GlobalVar, tvm::BaseFunc> kv_a,
-               std::pair<tvm::GlobalVar, tvm::BaseFunc> kv_b) {
-              std::string name_hint_a = kv_a.first->name_hint;
-              std::string name_hint_b = kv_b.first->name_hint;
-              size_t name_a_length = name_hint_a.length();
-              size_t name_b_length = name_hint_b.length();
-              if (name_a_length < name_b_length) return true;
-              if (name_a_length > name_b_length) return false;
-              return name_hint_a < name_hint_b;
-            });
-
-  for (auto [gvar, prim_func] : funcs) {
-    codegen.AddFunction(gvar, prim_func);
-  }
-  std::string code = codegen.Finish();
-
-  Array<String> function_names;
-  for (auto [gvar, prim_func] : funcs) {
-    function_names.push_back(codegen.GetFunctionName(gvar));
-  }
-
-  return codegen::CSourceModuleCreate(code, "c", function_names);
-}
-
-}  // namespace cmsisnn
-}  // namespace contrib
-}  // namespace relay
-}  // namespace tvm
diff --git a/src/relay/backend/contrib/ethosn/codegen.cc b/src/relay/backend/contrib/ethosn/codegen.cc
deleted file mode 100644
index 841b35593d4b..000000000000
--- a/src/relay/backend/contrib/ethosn/codegen.cc
+++ /dev/null
@@ -1,1097 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file src/relay/backend/contrib/ethosn/codegen.cc
- * \brief The Relay -> Arm(R) Ethos(TM)-N command stream compiler.
- */
-#include <tvm/relay/expr_functor.h>
-#include <tvm/runtime/container/string.h>
-#include <tvm/runtime/module.h>
-
-#include "codegen_ethosn.h"
-#include "ethosn_api.h"
-
-namespace tvm {
-namespace relay {
-namespace contrib {
-namespace ethosn {
-
-constexpr size_t kReasonMaxLength = sl::g_ReasonMaxLength;
-
-sl::TensorInfo GetTensorInfo(std::map<Expr, std::vector<sl::TensorInfo>> tensor_table,
-                             const Call& call) {
-  if (tensor_table.find(call) != tensor_table.end()) return tensor_table[call][0];
-
-  return sl::TensorInfo();
-}
-
-bool IsEthosnOp(const Call& call, const std::string& op_name) {
-  if (call->op->IsInstance<OpNode>()) {
-    Op op = Downcast<Op>(call->op);
-    ICHECK(op.defined());
-    return op == Op::Get(op_name);
-  } else {
-    return false;
-  }
-}
-
-bool IsEthosnFunc(const Call& call, const std::string& op_name) {
-  if (call->op->IsInstance<FunctionNode>()) {
-    Function func = Downcast<Function>(call->op);
-    ICHECK(func.defined());
-    auto name_node = func->GetAttr<String>(attr::kComposite);
-    return name_node.value() == op_name;
-  }
-  return false;
-}
-
-std::map<Expr, std::vector<sl::TensorInfo>> InferTensorsVisitor::Infer(const Expr& expr) {
-  tensor_table_.clear();
-  ICHECK(expr->checked_type().defined());
-  size_t output_size = 1;
-  if (auto tuple = expr->checked_type().as<TupleTypeNode>()) {
-    output_size = tuple->fields.size();
-  }
-  for (size_t i = 0; i < output_size; i++) {
-    tensor_table_[expr].push_back(sl::TensorInfo({1, 1, 1, 1}, sl::DataType::UINT8_QUANTIZED,
-                                                 sl::DataFormat::NHWC, sl::QuantizationInfo()));
-  }
-  VisitInferred(expr);
-  return tensor_table_;
-}
-
-void InferTensorsVisitor::InferCall(const CallNode* cn) {
-  EthosnError err;
-  Call call = GetRef<Call>(cn);
-  // Determine call -> NPU mapping
-  if (IsEthosnFunc(call, "ethos-n.qnn_conv2d")) {
-    ConvolutionParams params;
-    err += EthosnAPI::QnnConv2d(cn->op.as<FunctionNode>()->body, &params);
-    tensor_table_[cn->args[0]] = {params.input_info};
-  } else if (IsEthosnFunc(call, "ethos-n.qnn_fc")) {
-    FullyConnectedParams params;
-    err += EthosnAPI::QnnFullyConnected(cn->op.as<FunctionNode>()->body, &params);
-    tensor_table_[cn->args[0]] = {params.input_info};
-  } else if (IsEthosnOp(call, "nn.max_pool2d")) {
-    MaxPool2DParams params;
-    params.input_info = GetTensorInfo(tensor_table_, call);
-    err += EthosnAPI::MaxPool2D(call, &params);
-    tensor_table_[cn->args[0]] = {params.input_info};
-  } else if (IsEthosnFunc(call, "ethos-n.qnn_avg_pool2d")) {
-    AvgPool2DParams params;
-    params.input_info = GetTensorInfo(tensor_table_, call);
-    err += EthosnAPI::AvgPool2D(cn->op.as<FunctionNode>()->body, &params);
-    tensor_table_[cn->args[0]] = {params.input_info};
-  } else if (IsEthosnOp(call, "reshape")) {
-    ReshapeParams params;
-    params.input_info = GetTensorInfo(tensor_table_, call);
-    err += EthosnAPI::Reshape(call, &params);
-    tensor_table_[cn->args[0]] = {params.input_info};
-  } else if (IsEthosnFunc(call, "ethos-n.qnn_add")) {
-    AdditionParams params;
-    err += EthosnAPI::Addition(cn->op.as<FunctionNode>()->body, &params);
-    tensor_table_[cn->args[0]] = {params.lhs_info};
-    tensor_table_[cn->args[1]] = {params.rhs_info};
-  } else if (IsEthosnFunc(call, "ethos-n.qnn_sigmoid")) {
-    SigmoidParams params;
-    err += EthosnAPI::Sigmoid(cn->op.as<FunctionNode>()->body, &params);
-    tensor_table_[cn->args[0]] = {params.input_info};
-  } else if (IsEthosnFunc(call, "ethos-n.qnn_mean")) {
-    MeanParams params;
-    err += EthosnAPI::Mean(cn->op.as<FunctionNode>()->body, &params);
-    tensor_table_[cn->args[0]] = {params.input_info};
-  } else if (IsEthosnFunc(call, "ethos-n.qnn_tanh")) {
-    TanhParams params;
-    err += EthosnAPI::Tanh(cn->op.as<FunctionNode>()->body, &params);
-    tensor_table_[cn->args[0]] = {params.input_info};
-  } else if (IsEthosnFunc(call, "ethos-n.qnn_leaky_relu")) {
-    LeakyReLUParams params;
-    err += EthosnAPI::LeakyReLU(cn->op.as<FunctionNode>()->body, &params);
-    tensor_table_[cn->args[0]] = {params.input_info};
-  } else if (IsEthosnFunc(call, "ethos-n.qnn_conv2d_transpose")) {
-    QnnConv2dTransposeParams params;
-    err += EthosnAPI::QnnConv2dTranspose(cn->op.as<FunctionNode>()->body, &params);
-    tensor_table_[cn->args[0]] = {params.input_info};
-  } else if (IsEthosnOp(call, "qnn.concatenate")) {
-    ConcatenateParams params;
-    err = EthosnAPI::Concatenate(call, &params);
-    tensor_table_[cn->args[0]] = params.input_infos;
-  } else if (IsEthosnOp(call, "split")) {
-    SplitParams params;
-    params.input_info = GetTensorInfo(tensor_table_, call);
-    err = EthosnAPI::Split(call, &params);
-    tensor_table_[cn->args[0]] = {params.input_info};
-  } else if (IsEthosnOp(call, "nn.depth_to_space")) {
-    DepthToSpaceParams params;
-    params.input_info = GetTensorInfo(tensor_table_, call);
-    err += EthosnAPI::DepthToSpace(call, &params);
-    tensor_table_[cn->args[0]] = {params.input_info};
-  } else if (IsEthosnOp(call, "clip")) {
-    ReluParams params;
-    params.input_info = GetTensorInfo(tensor_table_, call);
-    err = EthosnAPI::Relu(call, &params);
-    tensor_table_[cn->args[0]] = {params.input_info};
-  } else if (IsEthosnFunc(call, "ethos-n.qnn_requantize")) {
-    RequantizeParams params;
-    err += EthosnAPI::Requantize(cn->op.as<FunctionNode>()->body, &params);
-    tensor_table_[cn->args[0]] = {params.input_info};
-  } else if (IsEthosnFunc(call, "ethos-n.qnn_reinterpret_quantize")) {
-    ReinterpretQuantizationParams params;
-    err += EthosnAPI::ReinterpretQuantize(cn->op.as<FunctionNode>()->body, &params);
-    tensor_table_[cn->args[0]] = {params.input_info};
-  } else if (IsEthosnFunc(call, "ethos-n.qnn_resize")) {
-    ResizeParams params;
-    err += EthosnAPI::Resize(cn->op.as<FunctionNode>()->body, &params);
-    tensor_table_[cn->args[0]] = {params.input_info};
-  } else {
-    err = EthosnError("unknown operator");
-  }
-  if (err) {
-    ReportFatalError(call, err);
-  }
-}
-
-// This will only visit an expression if the expression's tensor info
-// has already been entirely inferred.
-// An example where this is important is a tuple node where each
-// get item node will only infer one field of the tuple's expression info.
-// We don't want to traverse the tuple until all of its fields have been inferred.
-void InferTensorsVisitor::VisitInferred(const Expr& expr) {
-  if (tensor_table_.find(expr) != tensor_table_.end()) {
-    for (const auto& tensor_info : tensor_table_[expr]) {
-      if (tensor_info == sl::TensorInfo()) return;
-    }
-    VisitExpr(expr);
-  }
-}
-
-void InferTensorsVisitor::VisitExpr_(const CallNode* cn) {
-  InferCall(cn);
-  // Pre-order visitor
-  for (const auto& arg : cn->args) {
-    VisitInferred(arg);
-  }
-}
-
-void ConstructNetworkVisitor::VisitExpr_(const ConstantNode* cn) {
-  Constant constant = GetRef<Constant>(cn);
-  if (tensor_table_.count(constant)) {
-    sl::TensorInfo tensor_info = tensor_table_[constant][0];
-    sl::TensorAndId<sl::Constant> tensor_and_id =
-        sl::AddConstant(network_, tensor_info, constant->data->data);
-    auto operand = sl::GetOperand(tensor_and_id.tensor);
-    operand_table_[constant] = std::vector{operand};
-  }
-}
-
-void InferTensorsVisitor::VisitExpr_(const TupleNode* tn) {
-  auto tuple = GetRef<Tuple>(tn);
-  ICHECK(tensor_table_.find(tuple) != tensor_table_.end());
-  for (size_t i = 0; i < tn->fields.size(); i++) {
-    tensor_table_[tn->fields[i]] = {tensor_table_[tuple][i]};
-  }
-  // Pre-order visitor
-  for (const auto& field : tn->fields) {
-    VisitExpr(field);
-  }
-}
-
-void InferTensorsVisitor::VisitExpr_(const TupleGetItemNode* tgn) {
-  // Don't assume it must be targeting a TupleNode
-  // Vars and calls can still have TupleType
-  auto tg = GetRef<TupleGetItem>(tgn);
-  ICHECK(tensor_table_.find(tg) != tensor_table_.end());
-  auto tuple = tg->tuple;
-  auto type = tuple->checked_type().as<TupleTypeNode>();
-  int index = tg->index;
-  // Resize the tensor infos to the tuple size if not already done
-  if (tensor_table_.find(tuple) == tensor_table_.end()) {
-    tensor_table_[tuple].resize(type->fields.size());
-  }
-  tensor_table_[tuple][index] = tensor_table_[tg][0];
-  // Pre-order visitor
-  VisitInferred(tuple);
-}
-
-sl::TensorsAndId MakeOps(const sl::TensorAndId<sl::Operand>& op) {
-  sl::TensorsAndId ops;
-  ops.tensors = {op.tensor};
-  ops.operationId = op.operationId;
-  return ops;
-}
-
-sl::EthosNVariant MakeVariant(EthosnCompilerConfig configuration) {
-  String variant = configuration->variant;
-  String tops = configuration->tops;
-  String ple_ratio = configuration->ple_ratio;
-
-  std::string capitalized_variant = variant;
-  std::transform(capitalized_variant.begin(), capitalized_variant.end(),
-                 capitalized_variant.begin(), ::toupper);
-  std::string sl_variant_string =
-      "Ethos-" + capitalized_variant + "_" + tops + "TOPS_" + ple_ratio + "PLE_RATIO";
-  return sl::EthosNVariantFromString(sl_variant_string.c_str());
-}
-
-NetworkWithIDs ConstructNetworkVisitor::Construct(const Function& func) {
-  // Initialise everything
-  EthosnCompilerConfig cfg = GetCompilerAttrs();
-  sl::EthosNVariant variant = MakeVariant(cfg);
-
-  NetworkWithIDs network_with_ids;
-  network_ = sl::CreateNetwork(
-      sl::GetFwAndHwCapabilities(variant, static_cast<uint32_t>(std::stoul(cfg->sram_size))));
-  network_with_ids.network = network_;
-  operand_table_.clear();
-
-  // Infer tensor information
-  tensor_table_ = InferTensors(this->mod_, this->var_, func->body);
-  // Add the inputs in the order they appear in the parameters
-  unsigned int idx = 0;
-  for (const auto& param : func->params) {
-    for (const auto& tensor_info : tensor_table_[param]) {
-      auto tensor_and_id = AddInput(network_, tensor_info);
-      operand_table_[param].push_back(tensor_and_id.tensor);
-      id_table_[param].push_back(std::make_pair(tensor_and_id.operationId, 0));
-      network_with_ids.input_ids[tensor_and_id.operationId] = idx++;
-    }
-  }
-  // Add the function body
-  VisitExpr(func->body);
-  // Add the outputs
-  idx = 0;
-  for (const auto& layer : operand_table_[func->body]) {
-    AddOutput(network_, *layer);
-    network_with_ids.output_ids[id_table_[func->body][idx]] = idx;
-    idx++;
-  }
-  return network_with_ids;
-}
-
-sl::TensorsAndId ConstructNetworkVisitor::HandleCall(const CallNode* cn) {
-  EthosnError err;
-  Call call = GetRef<Call>(cn);
-  sl::TensorAndId<sl::Operand> tensor;
-  sl::TensorsAndId tensors;
-  // Determine call -> NPU mapping
-  if (IsEthosnFunc(call, "ethos-n.qnn_conv2d")) {
-    if ((err = MakeConvolutionLayer(call, &tensor))) ReportFatalError(call, err);
-    return MakeOps(tensor);
-  } else if (IsEthosnFunc(call, "ethos-n.qnn_fc")) {
-    if ((err = MakeFullyConnectedLayer(call, &tensor))) ReportFatalError(call, err);
-    return MakeOps(tensor);
-  } else if (IsEthosnOp(call, "nn.max_pool2d")) {
-    if ((err = MakeMaxPool2DLayer(call, &tensor))) ReportFatalError(call, err);
-    return MakeOps(tensor);
-  } else if (IsEthosnFunc(call, "ethos-n.qnn_avg_pool2d")) {
-    if ((err = MakeAvgPool2DLayer(call, &tensor))) ReportFatalError(call, err);
-    return MakeOps(tensor);
-  } else if (IsEthosnOp(call, "reshape")) {
-    if ((err = MakeReshapeLayer(call, &tensor))) ReportFatalError(call, err);
-    return MakeOps(tensor);
-  } else if (IsEthosnFunc(call, "ethos-n.qnn_add")) {
-    if ((err = MakeAdditionLayer(call, &tensor))) ReportFatalError(call, err);
-    return MakeOps(tensor);
-  } else if (IsEthosnFunc(call, "ethos-n.qnn_sigmoid")) {
-    if ((err = MakeSigmoidLayer(call, &tensor))) ReportFatalError(call, err);
-    return MakeOps(tensor);
-  } else if (IsEthosnFunc(call, "ethos-n.qnn_mean")) {
-    if ((err = MakeMeanLayer(call, &tensor))) ReportFatalError(call, err);
-    return MakeOps(tensor);
-  } else if (IsEthosnFunc(call, "ethos-n.qnn_tanh")) {
-    if ((err = MakeTanhLayer(call, &tensor))) ReportFatalError(call, err);
-    return MakeOps(tensor);
-  } else if (IsEthosnFunc(call, "ethos-n.qnn_leaky_relu")) {
-    if ((err = MakeLeakyReLULayer(call, &tensor))) ReportFatalError(call, err);
-    return MakeOps(tensor);
-  } else if (IsEthosnFunc(call, "ethos-n.qnn_conv2d_transpose")) {
-    if ((err = MakeConv2DTransposeLayer(call, &tensor))) ReportFatalError(call, err);
-    return MakeOps(tensor);
-  } else if (IsEthosnOp(call, "qnn.concatenate")) {
-    if ((err = MakeConcatenateLayer(call, &tensor))) ReportFatalError(call, err);
-    return MakeOps(tensor);
-  } else if (IsEthosnOp(call, "split")) {
-    if ((err = MakeSplitLayer(call, &tensors))) ReportFatalError(call, err);
-    return tensors;
-  } else if (IsEthosnOp(call, "nn.depth_to_space")) {
-    if ((err = MakeDepthToSpaceLayer(call, &tensor))) ReportFatalError(call, err);
-    return MakeOps(tensor);
-  } else if (IsEthosnOp(call, "clip")) {
-    if ((err = MakeReluLayer(call, &tensor))) ReportFatalError(call, err);
-    return MakeOps(tensor);
-  } else if (IsEthosnFunc(call, "ethos-n.qnn_requantize")) {
-    if ((err = MakeRequantizeLayer(call, &tensor))) ReportFatalError(call, err);
-    return MakeOps(tensor);
-  } else if (IsEthosnFunc(call, "ethos-n.qnn_reinterpret_quantize")) {
-    if ((err = MakeReinterpretQuantizeLayer(call, &tensor))) ReportFatalError(call, err);
-    return MakeOps(tensor);
-  } else if (IsEthosnFunc(call, "ethos-n.qnn_resize")) {
-    if ((err = MakeResizeLayer(call, &tensor))) ReportFatalError(call, err);
-    return MakeOps(tensor);
-  } else {
-    ReportFatalError(call, EthosnError("unknown operator"));
-    return {};
-  }
-}
-
-void ConstructNetworkVisitor::VisitExpr_(const CallNode* cn) {
-  auto operand = HandleCall(cn);
-  operand_table_[GetRef<Call>(cn)] = operand.tensors;
-  for (size_t i = 0; i < operand.tensors.size(); i++) {
-    id_table_[GetRef<Call>(cn)].push_back(std::make_pair(operand.operationId, i));
-  }
-}
-
-void ConstructNetworkVisitor::VisitExpr_(const TupleNode* op) {
-  Tuple tuple = GetRef<Tuple>(op);
-  for (const auto& arg : tuple->fields) {
-    // The fields in a tuple should not themselves be tuples
-    // Nested tuples are not supported
-    if (operand_table_[arg].size() == 1) {
-      operand_table_[tuple].push_back(operand_table_[arg][0]);
-      id_table_[tuple].push_back(id_table_[arg][0]);
-    } else {
-      operand_table_[tuple].push_back(nullptr);
-      id_table_[tuple].push_back(std::make_pair(0, 0));
-    }
-  }
-}
-
-void ConstructNetworkVisitor::VisitExpr_(const TupleGetItemNode* tg) {
-  Expr tuple = tg->tuple;
-  operand_table_[GetRef<TupleGetItem>(tg)] = {operand_table_[tuple][tg->index]};
-  id_table_[GetRef<TupleGetItem>(tg)] = {id_table_[tuple][tg->index]};
-}
-
-void ConstructNetworkVisitor::VisitLeaf(const Expr& expr) {
-  // Don't traverse into functions, they're not supported
-  if (!expr->IsInstance<FunctionNode>()) MixedModeVisitor::VisitLeaf(expr);
-}
-
-EthosnError ConstructNetworkVisitor::MakeConvolutionLayer(const Call& call,
-                                                          sl::TensorAndId<sl::Operand>* out) {
-  ConvolutionParams params;
-  if (auto err = EthosnAPI::QnnConv2d(call->op.as<FunctionNode>()->body, &params)) {
-    return err;
-  }
-
-  auto activation = operand_table_[call->args[0]][0];
-  auto weights = AddConstant(network_, params.weights_info, params.raw_weights).tensor;
-  auto bias = AddConstant(network_, params.bias_info, params.raw_bias).tensor;
-  try {
-    if (params.is_depthwise) {
-      *out = AddDepthwiseConvolution(network_, *activation, *bias, *weights, params.conv_info);
-    } else {
-      *out = AddConvolution(network_, *activation, *bias, *weights, params.conv_info);
-    }
-  } catch (const sl::NotSupportedException& e) {
-    return EthosnError(e.what());
-  }
-  return EthosnError();
-}
-
-EthosnError ConstructNetworkVisitor::MakeFullyConnectedLayer(const Call& call,
-                                                             sl::TensorAndId<sl::Operand>* out) {
-  FullyConnectedParams params;
-  if (auto err = EthosnAPI::QnnFullyConnected(call->op.as<FunctionNode>()->body, &params)) {
-    return err;
-  }
-
-  auto weights = AddConstant(network_, params.weights_info, params.raw_weights->data).tensor;
-  auto bias = AddConstant(network_, params.bias_info, params.raw_bias->data).tensor;
-  try {
-    auto input =
-        AddReshape(network_, *operand_table_[call->args[0]][0], params.input_info.m_Dimensions)
-            .tensor;
-    *out = AddFullyConnected(network_, *input, *bias, *weights, params.fc_info);
-  } catch (const sl::NotSupportedException& e) {
-    return EthosnError(e.what());
-  }
-  return EthosnError();
-}
-
-EthosnError ConstructNetworkVisitor::MakeMaxPool2DLayer(const Call& call,
-                                                        sl::TensorAndId<sl::Operand>* out) {
-  MaxPool2DParams params;
-  params.input_info = GetTensorInfo(tensor_table_, call);
-  if (auto err = EthosnAPI::MaxPool2D(call, &params)) {
-    return err;
-  }
-
-  auto input = operand_table_[call->args[0]][0];
-
-  try {
-    *out = AddPooling(network_, *input, params.pool_info);
-  } catch (const sl::NotSupportedException& e) {
-    return EthosnError(e.what());
-  }
-  return EthosnError();
-}
-
-EthosnError ConstructNetworkVisitor::MakeAvgPool2DLayer(const Call& call,
-                                                        sl::TensorAndId<sl::Operand>* out) {
-  AvgPool2DParams params;
-  params.input_info = GetTensorInfo(tensor_table_, call);
-  if (auto err = EthosnAPI::AvgPool2D(call->op.as<FunctionNode>()->body, &params)) {
-    return err;
-  }
-
-  auto input = operand_table_[call->args[0]][0];
-
-  try {
-    *out = AddPooling(network_, *input, params.pool_info);
-  } catch (const sl::NotSupportedException& e) {
-    return EthosnError(e.what());
-  }
-  return EthosnError();
-}
-
-EthosnError ConstructNetworkVisitor::MakeReshapeLayer(const Call& call,
-                                                      sl::TensorAndId<sl::Operand>* out) {
-  ReshapeParams params;
-  params.input_info = GetTensorInfo(tensor_table_, call);
-  if (auto err = EthosnAPI::Reshape(call, &params)) {
-    return err;
-  }
-
-  auto input = operand_table_[call->args[0]][0];
-
-  try {
-    *out = AddReshape(network_, *input, params.new_shape);
-  } catch (const sl::NotSupportedException& e) {
-    return EthosnError(e.what());
-  }
-  return EthosnError();
-}
-
-EthosnError ConstructNetworkVisitor::MakeAdditionLayer(const Call& call,
-                                                       sl::TensorAndId<sl::Operand>* out) {
-  AdditionParams params;
-  if (auto err = EthosnAPI::Addition(call->op.as<FunctionNode>()->body, &params)) {
-    return err;
-  }
-
-  auto lhs = operand_table_[call->args[0]][0];
-  auto rhs = operand_table_[call->args[1]][0];
-
-  try {
-    *out = AddAddition(network_, *lhs, *rhs, params.output_quantization_info);
-  } catch (const sl::NotSupportedException& e) {
-    return EthosnError(e.what());
-  }
-  return EthosnError();
-}
-
-EthosnError ConstructNetworkVisitor::MakeSigmoidLayer(const Call& call,
-                                                      sl::TensorAndId<sl::Operand>* out) {
-  auto input = operand_table_[call->args[0]][0];
-
-  try {
-    *out = AddSigmoid(network_, *input);
-  } catch (const sl::NotSupportedException& e) {
-    return EthosnError(e.what());
-  }
-  return EthosnError();
-}
-
-EthosnError ConstructNetworkVisitor::MakeMeanLayer(const Call& call,
-                                                   sl::TensorAndId<sl::Operand>* out) {
-  auto input = operand_table_[call->args[0]][0];
-
-  try {
-    *out = AddMeanXy(network_, *input);
-  } catch (const sl::NotSupportedException& e) {
-    return EthosnError(e.what());
-  }
-  return EthosnError();
-}
-
-EthosnError ConstructNetworkVisitor::MakeTanhLayer(const Call& call,
-                                                   sl::TensorAndId<sl::Operand>* out) {
-  auto input = operand_table_[call->args[0]][0];
-
-  try {
-    *out = AddTanh(network_, *input);
-  } catch (const sl::NotSupportedException& e) {
-    return EthosnError(e.what());
-  }
-  return EthosnError();
-}
-
-EthosnError ConstructNetworkVisitor::MakeLeakyReLULayer(const Call& call,
-                                                        sl::TensorAndId<sl::Operand>* out) {
-  LeakyReLUParams params;
-  params.input_info = GetTensorInfo(tensor_table_, call);
-  if (auto err = EthosnAPI::LeakyReLU(call->op.as<FunctionNode>()->body, &params)) {
-    return err;
-  }
-
-  auto input = operand_table_[call->args[0]][0];
-
-  try {
-    *out = AddLeakyRelu(network_, *input, params.leaky_relu_info);
-  } catch (const sl::NotSupportedException& e) {
-    return EthosnError(e.what());
-  }
-  return EthosnError();
-}
-
-EthosnError ConstructNetworkVisitor::MakeConv2DTransposeLayer(const Call& call,
-                                                              sl::TensorAndId<sl::Operand>* out) {
-  QnnConv2dTransposeParams params;
-  if (auto err = EthosnAPI::QnnConv2dTranspose(call->op.as<FunctionNode>()->body, &params)) {
-    return err;
-  }
-
-  auto activation = operand_table_[call->args[0]][0];
-  auto weights = AddConstant(network_, params.weights_info, params.raw_weights->data).tensor;
-  auto bias = AddConstant(network_, params.bias_info, params.raw_bias->data).tensor;
-  try {
-    *out = AddTransposeConvolution(network_, *activation, *bias, *weights, params.conv_info);
-  } catch (const sl::NotSupportedException& e) {
-    return EthosnError(e.what());
-  }
-  return EthosnError();
-}
-
-EthosnError ConstructNetworkVisitor::MakeConcatenateLayer(const Call& call,
-                                                          sl::TensorAndId<sl::Operand>* out) {
-  ConcatenateParams params;
-  if (auto err = EthosnAPI::Concatenate(call, &params)) {
-    return err;
-  }
-
-  std::vector<sl::Operand*> layers;
-  auto ops = operand_table_[call->args[0]];
-
-  for (const auto& op : ops) {
-    layers.emplace_back(op.get());
-  }
-  try {
-    *out = AddConcatenation(network_, layers, params.concat_info);
-  } catch (const sl::NotSupportedException& e) {
-    return EthosnError(e.what());
-  }
-  return EthosnError();
-}
-
-EthosnError ConstructNetworkVisitor::MakeSplitLayer(const Call& call, sl::TensorsAndId* outs) {
-  SplitParams params;
-  params.input_info = GetTensorInfo(tensor_table_, call);
-  if (auto err = EthosnAPI::Split(call, &params)) {
-    return err;
-  }
-
-  auto input = operand_table_[call->args[0]][0];
-
-  try {
-    *outs = AddSplit(network_, *input, params.split_info);
-  } catch (const sl::NotSupportedException& e) {
-    return EthosnError(e.what());
-  }
-  return EthosnError();
-}
-
-EthosnError ConstructNetworkVisitor::MakeDepthToSpaceLayer(const Call& call,
-                                                           sl::TensorAndId<sl::Operand>* out) {
-  DepthToSpaceParams params;
-  params.input_info = GetTensorInfo(tensor_table_, call);
-  if (auto err = EthosnAPI::DepthToSpace(call, &params)) {
-    return err;
-  }
-
-  auto input = operand_table_[call->args[0]][0];
-
-  try {
-    *out = AddDepthToSpace(network_, *input, params.depth_info);
-  } catch (const sl::NotSupportedException& e) {
-    return EthosnError(e.what());
-  }
-  return EthosnError();
-}
-
-EthosnError ConstructNetworkVisitor::MakeReluLayer(const Call& call,
-                                                   sl::TensorAndId<sl::Operand>* out) {
-  ReluParams params;
-  params.input_info = GetTensorInfo(tensor_table_, call);
-  if (auto err = EthosnAPI::Relu(call, &params)) {
-    return err;
-  }
-
-  auto input = operand_table_[call->args[0]][0];
-
-  try {
-    *out = AddRelu(network_, *input, params.relu_info);
-  } catch (const sl::NotSupportedException& e) {
-    return EthosnError(e.what());
-  }
-  return EthosnError();
-}
-
-EthosnError ConstructNetworkVisitor::MakeRequantizeLayer(const Call& call,
-                                                         sl::TensorAndId<sl::Operand>* out) {
-  RequantizeParams params;
-  params.input_info = GetTensorInfo(tensor_table_, call);
-  if (auto err = EthosnAPI::Requantize(call->op.as<FunctionNode>()->body, &params)) {
-    return err;
-  }
-
-  auto input = operand_table_[call->args[0]][0];
-
-  try {
-    *out = AddRequantize(network_, *input, params.requantize_info);
-  } catch (const sl::NotSupportedException& e) {
-    return EthosnError(e.what());
-  }
-  return EthosnError();
-}
-
-EthosnError ConstructNetworkVisitor::MakeReinterpretQuantizeLayer(
-    const Call& call, sl::TensorAndId<sl::Operand>* out) {
-  ReinterpretQuantizationParams params;
-  params.input_info = GetTensorInfo(tensor_table_, call);
-  if (auto err = EthosnAPI::ReinterpretQuantize(call->op.as<FunctionNode>()->body, &params)) {
-    return err;
-  }
-
-  auto input = operand_table_[call->args[0]][0];
-
-  try {
-    *out = AddReinterpretQuantization(network_, *input, params.reinterpret_quantize_info);
-  } catch (const sl::NotSupportedException& e) {
-    return EthosnError(e.what());
-  }
-  return EthosnError();
-}
-
-EthosnError ConstructNetworkVisitor::MakeResizeLayer(const Call& call,
-                                                     sl::TensorAndId<sl::Operand>* out) {
-  ResizeParams params;
-  params.input_info = GetTensorInfo(tensor_table_, call);
-  if (auto err = EthosnAPI::Resize(call->op.as<FunctionNode>()->body, &params)) {
-    return err;
-  }
-
-  auto input = operand_table_[call->args[0]][0];
-
-  try {
-    *out = AddResize(network_, *input, params.resize_info);
-  } catch (const sl::NotSupportedException& e) {
-    return EthosnError(e.what());
-  }
-  return EthosnError();
-}
-
-runtime::Module EthosnCompiler::CreateRuntimeModule(const ObjectRef& ref) {
-  std::vector<runtime::ethosn::OrderedCompiledNetwork> cmms;
-  if (ref->IsInstance<FunctionNode>()) {
-    IRModule mod;
-    Function func = Downcast<Function>(ref);
-    auto name_node = func->GetAttr<String>(tvm::attr::kGlobalSymbol);
-    ICHECK(name_node.defined()) << "Failed to retrieved external symbol.";
-    GlobalVar gvar = GlobalVar(name_node.value());
-    mod->Add(gvar, func);
-    Function mod_func = Downcast<Function>(mod->functions.at(gvar));
-    cmms.emplace_back(CompileEthosnFunc(mod, gvar, mod_func));
-  } else {
-    LOG(FATAL) << "The input ref is expected to be a Relay function";
-  }
-  auto n = make_object<runtime::ethosn::EthosnModule>(&cmms);
-  return runtime::Module(n);
-}
-
-runtime::ethosn::OrderedCompiledNetwork EthosnCompiler::CompileEthosnFunc(const IRModule& mod,
-                                                                          const GlobalVar& gvar,
-                                                                          const Function& func) {
-  // Construct the network
-  auto network_with_ids = ConstructNetwork(mod, gvar, func);
-  // Now set the required build flags
-  sl::CompilationOptions options = CreateOptions();
-  // Set the experimental compiler if enabled, for now this is not part of the
-  // support library compilation options.
-  bool experimental_compiler = GetCompilerAttrs()->experimental_compiler;
-  if (experimental_compiler) {
-    setenv("FORCE_EXPERIMENTAL_COMPILER", "1", 1);
-  }
-  std::vector<std::unique_ptr<sl::CompiledNetwork>> compiled_networks =
-      sl::Compile(*network_with_ids.network, options);
-  if (experimental_compiler) {
-    unsetenv("FORCE_EXPERIMENTAL_COMPILER");
-  }
-  ICHECK_GE(compiled_networks.size(), 1) << "Ethos-N compiler failed to compile network";
-  auto compiled_network = std::move(compiled_networks[0]);
-  // Determine the order that the inputs/outputs are in and how that corresponds to the
-  // order that the TVM runtime will expect them in
-  auto input_output_order = GetInputOutputOrder(network_with_ids, compiled_network);
-  auto io_sizes = GetIOSizes(compiled_network);
-  // Use the order information to create an 'ordered' network with includes how to map
-  // the inputs/outputs from the TVM runtime to the inputs/outputs of the compiled network
-  runtime::ethosn::OrderedCompiledNetwork ordered_network;
-  ordered_network.name = gvar->name_hint;
-  ordered_network.compiled_cmm = std::move(compiled_network);
-  ordered_network.inputs = input_output_order.first;
-  ordered_network.outputs = input_output_order.second;
-  ordered_network.input_sizes = io_sizes.first;
-  ordered_network.output_sizes = io_sizes.second;
-  return ordered_network;
-}
-
-sl::CompilationOptions EthosnCompiler::CreateOptions() {
-  EthosnCompilerConfig cfg = GetCompilerAttrs();
-
-  sl::CompilationOptions options;
-  options.m_Strategy0 = cfg->strategy0;
-  options.m_Strategy1 = cfg->strategy1;
-  options.m_Strategy3 = cfg->strategy3;
-  options.m_Strategy4 = cfg->strategy4;
-  options.m_Strategy6 = cfg->strategy6;
-  options.m_Strategy7 = cfg->strategy7;
-  options.m_DebugInfo.m_DumpRam = cfg->dump_ram;
-  options.m_DebugInfo.m_InitialSramDump = cfg->initial_sram_dump;
-  options.m_BlockConfig16x16 = cfg->block_config_16x16;
-  options.m_BlockConfig32x8 = cfg->block_config_32x8;
-  options.m_BlockConfig8x32 = cfg->block_config_8x32;
-  options.m_BlockConfig8x8 = cfg->block_config_8x8;
-  options.m_EnableIntermediateCompression = cfg->enable_intermediate_compression;
-  options.m_DisableWinograd = cfg->disable_winograd;
-  options.m_DebugInfo.m_DebugDir = cfg->debug_dir;
-  return options;
-}
-
-std::pair<std::vector<uint32_t>, std::vector<uint32_t>> EthosnCompiler::GetInputOutputOrder(
-    NetworkWithIDs network, const std::unique_ptr<sl::CompiledNetwork>& compiled_network) {
-  std::vector<sl::InputBufferInfo> input_infos = compiled_network->GetInputBufferInfos();
-  std::vector<sl::OutputBufferInfo> output_infos = compiled_network->GetOutputBufferInfos();
-  std::vector<uint32_t> input_order;
-  std::vector<uint32_t> output_order;
-  // Find the order of the inputs in the compiled network
-  for (const auto& input_info : input_infos) {
-    input_order.push_back(network.input_ids[input_info.m_SourceOperationId]);
-  }
-  // Find the order of the outputs in the compiled network
-  for (const auto& output_info : output_infos) {
-    auto output_id =
-        std::make_pair(output_info.m_SourceOperationId, output_info.m_SourceOperationOutputIndex);
-    output_order.push_back(network.output_ids[output_id]);
-  }
-  return std::make_pair(input_order, output_order);
-}
-
-std::pair<std::vector<uint32_t>, std::vector<uint32_t>> EthosnCompiler::GetIOSizes(
-    const std::unique_ptr<sl::CompiledNetwork>& compiled_network) {
-  std::vector<uint32_t> input_sizes;
-  std::vector<uint32_t> output_sizes;
-  for (const sl::InputBufferInfo info : compiled_network->GetInputBufferInfos()) {
-    input_sizes.push_back(info.m_Size);
-  }
-  for (const sl::OutputBufferInfo info : compiled_network->GetOutputBufferInfos()) {
-    output_sizes.push_back(info.m_Size);
-  }
-
-  return std::make_pair(input_sizes, output_sizes);
-}
-
-std::unique_ptr<sl::SupportQueries> EthosnCompiler::m_Queries;
-
-EthosnError EthosnCompiler::SupportedSetup() {
-  if (m_Queries == nullptr) {
-    EthosnCompilerConfig cfg = GetCompilerAttrs();
-    sl::EthosNVariant variant = MakeVariant(cfg);
-    m_Queries = std::make_unique<sl::SupportQueries>(
-        sl::GetFwAndHwCapabilities(variant, std::stoul(cfg->sram_size)));
-    if (m_Queries == nullptr) {
-      return EthosnError("Could not initialise Arm(R) Ethos(TM)-N compiler isSupported");
-    }
-  }
-  return EthosnError();
-}
-
-TVM_REGISTER_GLOBAL("relay.ethos-n.support.conv2d")
-    .set_body([](tvm::TVMArgs args, tvm::TVMRetValue* rv) {
-      Call call = args[0];
-      ConvolutionParams params;
-      auto err = EthosnAPI::QnnConv2d(call, &params);
-      err += EthosnCompiler::SupportedSetup();
-      char reason[kReasonMaxLength];
-      reason[0] = '\0';
-      if (params.is_depthwise) {
-        *rv = !err && EthosnCompiler::GetSupported()->IsDepthwiseConvolutionSupported(
-                          params.bias_info, params.weights_info, params.conv_info,
-                          params.input_info, &params.output_info, reason, sizeof(reason));
-      } else {
-        *rv = !err && EthosnCompiler::GetSupported()->IsConvolutionSupported(
-                          params.bias_info, params.weights_info, params.conv_info,
-                          params.input_info, &params.output_info, reason, sizeof(reason));
-      }
-      err += EthosnError(reason);
-    });
-
-TVM_REGISTER_GLOBAL("relay.ethos-n.support.fc")
-    .set_body([](tvm::TVMArgs args, tvm::TVMRetValue* rv) {
-      Call call = args[0];
-      FullyConnectedParams params;
-      auto err = EthosnAPI::QnnFullyConnected(call, &params);
-      err += EthosnCompiler::SupportedSetup();
-      char reason[kReasonMaxLength];
-      reason[0] = '\0';
-      *rv = !err && EthosnCompiler::GetSupported()->IsFullyConnectedSupported(
-                        params.bias_info, params.weights_info, params.fc_info, params.input_info,
-                        &params.output_info, reason, sizeof(reason));
-      err += EthosnError(reason);
-    });
-
-TVM_REGISTER_GLOBAL("relay.ethos-n.support.max_pool2d")
-    .set_body([](tvm::TVMArgs args, tvm::TVMRetValue* rv) {
-      Call call = args[0];
-      MaxPool2DParams params;
-      auto err = EthosnAPI::MaxPool2D(call, &params);
-      err += EthosnCompiler::SupportedSetup();
-      char reason[kReasonMaxLength];
-      reason[0] = '\0';
-      *rv = !err &&
-            EthosnCompiler::GetSupported()->IsPoolingSupported(
-                params.pool_info, params.input_info, &params.output_info, reason, sizeof(reason));
-      err += EthosnError(reason);
-    });
-
-TVM_REGISTER_GLOBAL("relay.ethos-n.support.avg_pool2d")
-    .set_body([](tvm::TVMArgs args, tvm::TVMRetValue* rv) {
-      Call call = args[0];
-      AvgPool2DParams params;
-      auto err = EthosnAPI::AvgPool2D(call, &params);
-      err += EthosnCompiler::SupportedSetup();
-      char reason[kReasonMaxLength];
-      reason[0] = '\0';
-      *rv = !err &&
-            EthosnCompiler::GetSupported()->IsPoolingSupported(
-                params.pool_info, params.input_info, &params.output_info, reason, sizeof(reason));
-      err += EthosnError(reason);
-    });
-
-TVM_REGISTER_GLOBAL("relay.ethos-n.support.reshape")
-    .set_body([](tvm::TVMArgs args, tvm::TVMRetValue* rv) {
-      Call call = args[0];
-      ReshapeParams params;
-      EthosnAPI::DefaultInputTensor(call);
-      auto err = EthosnAPI::Reshape(call, &params);
-      err += EthosnCompiler::SupportedSetup();
-      char reason[kReasonMaxLength];
-      reason[0] = '\0';
-      *rv = !err &&
-            EthosnCompiler::GetSupported()->IsReshapeSupported(
-                params.new_shape, params.input_info, &params.output_info, reason, sizeof(reason));
-      err += EthosnError(reason);
-    });
-
-TVM_REGISTER_GLOBAL("relay.ethos-n.support.addition")
-    .set_body([](tvm::TVMArgs args, tvm::TVMRetValue* rv) {
-      Call call = args[0];
-      AdditionParams params;
-      auto err = EthosnAPI::Addition(call, &params);
-      err += EthosnCompiler::SupportedSetup();
-      char reason[kReasonMaxLength];
-      reason[0] = '\0';
-      *rv = !err && EthosnCompiler::GetSupported()->IsAdditionSupported(
-                        params.lhs_info, params.rhs_info, params.output_quantization_info,
-                        &params.output_info, reason, sizeof(reason));
-      err += EthosnError(reason);
-    });
-
-TVM_REGISTER_GLOBAL("relay.ethos-n.support.sigmoid")
-    .set_body([](tvm::TVMArgs args, tvm::TVMRetValue* rv) {
-      Call call = args[0];
-      SigmoidParams params;
-      auto err = EthosnAPI::Sigmoid(call, &params);
-      err += EthosnCompiler::SupportedSetup();
-      char reason[kReasonMaxLength];
-      reason[0] = '\0';
-      *rv = !err && EthosnCompiler::GetSupported()->IsSigmoidSupported(
-                        params.input_info, &params.output_info, reason, sizeof(reason));
-      err += EthosnError(reason);
-    });
-
-TVM_REGISTER_GLOBAL("relay.ethos-n.support.mean")
-    .set_body([](tvm::TVMArgs args, tvm::TVMRetValue* rv) {
-      Call call = args[0];
-      MeanParams params;
-      auto err = EthosnAPI::Mean(call, &params);
-      err += EthosnCompiler::SupportedSetup();
-      char reason[kReasonMaxLength];
-      reason[0] = '\0';
-      *rv = !err && EthosnCompiler::GetSupported()->IsMeanXySupported(
-                        params.input_info, &params.output_info, reason, sizeof(reason));
-      err += EthosnError(reason);
-    });
-
-TVM_REGISTER_GLOBAL("relay.ethos-n.support.tanh")
-    .set_body([](tvm::TVMArgs args, tvm::TVMRetValue* rv) {
-      Call call = args[0];
-      TanhParams params;
-      auto err = EthosnAPI::Tanh(call, &params);
-      err += EthosnCompiler::SupportedSetup();
-      char reason[kReasonMaxLength];
-      reason[0] = '\0';
-      *rv = !err && EthosnCompiler::GetSupported()->IsTanhSupported(
-                        params.input_info, &params.output_info, reason, sizeof(reason));
-      err += EthosnError(reason);
-    });
-
-TVM_REGISTER_GLOBAL("relay.ethos-n.support.leaky_relu")
-    .set_body([](tvm::TVMArgs args, tvm::TVMRetValue* rv) {
-      Call call = args[0];
-      LeakyReLUParams params;
-      auto err = EthosnAPI::LeakyReLU(call, &params);
-      err += EthosnCompiler::SupportedSetup();
-      char reason[kReasonMaxLength];
-      reason[0] = '\0';
-      *rv = !err && EthosnCompiler::GetSupported()->IsLeakyReluSupported(
-                        params.leaky_relu_info, params.input_info, &params.output_info, reason,
-                        sizeof(reason));
-      err += EthosnError(reason);
-    });
-
-TVM_REGISTER_GLOBAL("relay.ethos-n.support.conv2d_transpose")
-    .set_body([](tvm::TVMArgs args, tvm::TVMRetValue* rv) {
-      Call call = args[0];
-      QnnConv2dTransposeParams params;
-      auto err = EthosnAPI::QnnConv2dTranspose(call, &params);
-      err += EthosnCompiler::SupportedSetup();
-      char reason[kReasonMaxLength];
-      reason[0] = '\0';
-      *rv = !err && EthosnCompiler::GetSupported()->IsTransposeConvolutionSupported(
-                        params.bias_info, params.weights_info, params.conv_info, params.input_info,
-                        &params.output_info, reason, sizeof(reason));
-      err += EthosnError(reason);
-    });
-
-TVM_REGISTER_GLOBAL("relay.ethos-n.support.concatenate")
-    .set_body([](tvm::TVMArgs args, tvm::TVMRetValue* rv) {
-      Call call = args[0];
-      ConcatenateParams params;
-      auto err = EthosnAPI::Concatenate(call, &params);
-      err += EthosnCompiler::SupportedSetup();
-      char reason[kReasonMaxLength];
-      reason[0] = '\0';
-      *rv = !err && EthosnCompiler::GetSupported()->IsConcatenationSupported(
-                        params.input_infos, params.concat_info, &params.output_info, reason,
-                        sizeof(reason));
-      err += EthosnError(reason);
-    });
-
-TVM_REGISTER_GLOBAL("relay.ethos-n.support.split")
-    .set_body([](tvm::TVMArgs args, tvm::TVMRetValue* rv) {
-      Call call = args[0];
-      SplitParams params;
-      EthosnAPI::DefaultInputTensor(call);
-      auto err = EthosnAPI::Split(call, &params);
-      err += EthosnCompiler::SupportedSetup();
-      char reason[kReasonMaxLength];
-      reason[0] = '\0';
-      *rv = !err && EthosnCompiler::GetSupported()->IsSplitSupported(
-                        params.input_info, params.split_info, nullptr, reason, sizeof(reason));
-      err += EthosnError(reason);
-    });
-
-TVM_REGISTER_GLOBAL("relay.ethos-n.support.depth_to_space")
-    .set_body([](tvm::TVMArgs args, tvm::TVMRetValue* rv) {
-      Call call = args[0];
-      DepthToSpaceParams params;
-      auto err = EthosnAPI::DepthToSpace(call, &params);
-      err += EthosnCompiler::SupportedSetup();
-      char reason[kReasonMaxLength];
-      reason[0] = '\0';
-      *rv = !err &&
-            EthosnCompiler::GetSupported()->IsDepthToSpaceSupported(
-                params.input_info, params.depth_info, &params.output_info, reason, sizeof(reason));
-      err += EthosnError(reason);
-    });
-
-TVM_REGISTER_GLOBAL("relay.ethos-n.support.relu")
-    .set_body([](tvm::TVMArgs args, tvm::TVMRetValue* rv) {
-      Call call = args[0];
-      ReluParams params;
-      auto err = EthosnAPI::Relu(call, &params);
-      err += EthosnCompiler::SupportedSetup();
-      char reason[kReasonMaxLength];
-      reason[0] = '\0';
-      *rv = !err &&
-            EthosnCompiler::GetSupported()->IsReluSupported(
-                params.relu_info, params.input_info, &params.output_info, reason, sizeof(reason));
-      err += EthosnError(reason);
-    });
-
-TVM_REGISTER_GLOBAL("relay.ethos-n.support.requantize")
-    .set_body([](tvm::TVMArgs args, tvm::TVMRetValue* rv) {
-      Call call = args[0];
-      RequantizeParams params;
-      auto err = EthosnAPI::Requantize(call, &params);
-      err += EthosnCompiler::SupportedSetup();
-      char reason[kReasonMaxLength];
-      reason[0] = '\0';
-      *rv = !err && EthosnCompiler::GetSupported()->IsRequantizeSupported(
-                        params.requantize_info, params.input_info, &params.output_info, reason,
-                        sizeof(reason));
-      err += EthosnError(reason);
-    });
-
-TVM_REGISTER_GLOBAL("relay.ethos-n.support.reinterpret_quantize")
-    .set_body([](tvm::TVMArgs args, tvm::TVMRetValue* rv) {
-      Call call = args[0];
-      ReinterpretQuantizationParams params;
-      auto err = EthosnAPI::ReinterpretQuantize(call, &params);
-      err += EthosnCompiler::SupportedSetup();
-      char reason[kReasonMaxLength];
-      reason[0] = '\0';
-      *rv = !err && EthosnCompiler::GetSupported()->IsReinterpretQuantizationSupported(
-                        params.reinterpret_quantize_info, params.input_info, &params.output_info,
-                        reason, sizeof(reason));
-      err += EthosnError(reason);
-    });
-
-TVM_REGISTER_GLOBAL("relay.ethos-n.support.resize")
-    .set_body([](tvm::TVMArgs args, tvm::TVMRetValue* rv) {
-      Call call = args[0];
-      ResizeParams params;
-      auto err = EthosnAPI::Resize(call, &params);
-      err += EthosnCompiler::SupportedSetup();
-      char reason[kReasonMaxLength];
-      reason[0] = '\0';
-      *rv = !err &&
-            EthosnCompiler::GetSupported()->IsResizeSupported(
-                params.resize_info, params.input_info, &params.output_info, reason, sizeof(reason));
-      err += EthosnError(reason);
-    });
-
-TVM_REGISTER_GLOBAL("relay.ethos-n.query").set_body([](tvm::TVMArgs args, tvm::TVMRetValue* rv) {
-#if defined ETHOSN_HW
-  *rv = true;
-#else
-  *rv = false;
-#endif
-});
-
-TVM_REGISTER_GLOBAL("relay.ethos-n.api.version").set_body_typed([]() -> String {
-  return sl::GetLibraryVersion().ToString();
-});
-
-}  // namespace ethosn
-}  // namespace contrib
-}  // namespace relay
-}  // namespace tvm
diff --git a/src/relay/backend/contrib/ethosn/codegen_ethosn.h b/src/relay/backend/contrib/ethosn/codegen_ethosn.h
deleted file mode 100644
index e9198c2260f8..000000000000
--- a/src/relay/backend/contrib/ethosn/codegen_ethosn.h
+++ /dev/null
@@ -1,411 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file src/relay/backend/contrib/ethosn/codegen_ethosn.h
- * \brief The Relay -> Arm(R) Ethos(TM)-N command stream compiler.
- */
-
-#ifndef TVM_RELAY_BACKEND_CONTRIB_ETHOSN_CODEGEN_ETHOSN_H_
-#define TVM_RELAY_BACKEND_CONTRIB_ETHOSN_CODEGEN_ETHOSN_H_
-
-#include <dmlc/memory_io.h>
-#include <tvm/relay/attrs/nn.h>
-#include <tvm/relay/expr_functor.h>
-#include <tvm/relay/transform.h>
-#include <tvm/relay/type.h>
-#include <tvm/runtime/module.h>
-#include <tvm/runtime/registry.h>
-
-#include <algorithm>
-#include <fstream>
-#include <map>
-#include <memory>
-#include <sstream>
-#include <string>
-#include <unordered_map>
-#include <utility>
-#include <vector>
-
-#include "../../../../runtime/contrib/ethosn/ethosn_runtime.h"
-#include "../codegen_c/codegen_c.h"
-#include "ethosn_api.h"
-#include "ethosn_support_library/Support.hpp"
-#include "ethosn_support_library/SupportQueries.hpp"
-
-namespace tvm {
-namespace relay {
-namespace contrib {
-namespace ethosn {
-
-namespace sl = ::ethosn::support_library;
-
-/*!
- * \brief A struct to hold an uncompiled support library network alongside
- * the desired order of input and output operation ids.
- */
-struct NetworkWithIDs {
-  struct hash_pair {
-    template <class T_0, class T_1>
-    size_t operator()(const std::pair<T_0, T_1>& p) const {
-      return std::hash<T_0>{}(p.first) ^ std::hash<T_1>{}(p.second);
-    }
-  };
-  std::shared_ptr<sl::Network> network;
-  std::unordered_map<uint32_t, unsigned int> input_ids;
-  std::unordered_map<std::pair<uint32_t, uint32_t>, unsigned int, hash_pair> output_ids;
-};
-
-/*!
- * \brief A base class for error handling using ErrorReporter.
- */
-class ErrorReportingPass {
- public:
-  ErrorReportingPass(const IRModule& mod, const GlobalVar& var) : mod_(mod), var_(var) {}
-
-  /*!
-   * \brief Report fatal errors for an expression.
-   * \param expr The expression to report errors at.
-   * \param err The errors to report.
-   */
-  void ReportFatalError(const ObjectRef& expr, const EthosnError& err) {
-    for (const auto& msg : err.msgs) {
-      error_reporter_.ReportAt(this->var_, expr, ErrorBuilder() << msg);
-    }
-    error_reporter_.RenderErrors(this->mod_);
-  }
-
- protected:
-  /*! \brief An ErrorReporter object to render the errors.*/
-  ErrorReporter error_reporter_;
-  /*! \brief The module to report errors for. */
-  IRModule mod_;
-  /*! \brief The GlobalVar to report errors for. */
-  GlobalVar var_;
-};
-
-/*!
- * \brief A custom pass to infer the support library tensor information
- * for a Relay expression.
- *
- * Support Library requires that tensors are explicitly declared with
- * information on their size, data type, format (eg. NHWC) and quantisation
- * parameters. In Relay, size and data type are already determined when the
- * type_infer pass is run. However, format and quantisation parameters are
- * properties of the operators that consume the tensors.
- *
- * This pass works by having each node initialise the information of its
- * parents, essentially propagating the inferred information all the way up
- * to the inputs of the expression.
- *
- * Because the children initialise the information of the parents, it is
- * necessary to traverse the graph in such a way so as to ensure all the
- * children of a node are visited before the parent is. As Relay does not
- * keep a reference to child nodes, this pass goes in preorder but will
- * skip visiting a parent if all the children haven't yet been visited (see
- * VisitInferred for the logic that implements this).
- *
- * Inference only works for supported callnodes, for tuplenodes, tuplegetitem
- * nodes and free var nodes. Other nodes should not be off-loaded to Ethos-N.
- */
-class InferTensorsVisitor : private ErrorReportingPass, private ExprVisitor {
- public:
-  InferTensorsVisitor(const IRModule& mod, const GlobalVar& var) : ErrorReportingPass(mod, var) {}
-
-  /*!
-   * \brief Infer the support library tensor information for all the nodes
-   * in an expression.
-   * \param expr The expression for which to infer tensor information.
-   * \return A map of expressions to tensor information.
-   * \note This algorithm does not traverse into functions, so call it on
-   * the body of the function you're interested in.
-   */
-  std::map<Expr, std::vector<sl::TensorInfo>> Infer(const Expr& expr);
-
- private:
-  // Infer a callnode if it's a supported operator/composite function
-  void InferCall(const CallNode* cn);
-  void VisitInferred(const Expr& expr);
-
-  void VisitExpr_(const CallNode* cn) final;
-  void VisitExpr_(const TupleNode* tn) final;
-  void VisitExpr_(const TupleGetItemNode* tg) final;
-  // Don't traverse into functions, the Ethos-N codegen isn't meant to support them.
-  void VisitExpr_(const FunctionNode* fn) final {}
-
-  /*! \brief A look-up table from Expr to tensor infos. */
-  std::map<Expr, std::vector<sl::TensorInfo>> tensor_table_;
-};
-
-std::map<Expr, std::vector<sl::TensorInfo>> InferTensors(const IRModule& mod, const GlobalVar& var,
-                                                         const Expr& expr) {
-  return InferTensorsVisitor(mod, var).Infer(expr);
-}
-
-/*!
- * \brief A pass to generate a support library network from a Relay function.
- *
- * This pass constructs an equivalent support library network from a Relay
- * function in two visits. One to infer the tensor information of all the nodes
- * and another in postorder to add the nodes as support library operands.
- * (Supported) Callnodes, tuplenodes, tuplegetitemnodes and (free)
- * varnodes are handled by this pass.
- *
- * As part of the pass, nodes in the function body are associated with both
- * type information in the 'tensor_table', and support library operands in the
- * 'operand_table'. Both of these are maps of vectors as a Relay node can have
- * tuple type and accordingly be associated with multiple tensors. For nodes
- * which are not tuple type, vectors of size 1 are used.
- */
-class ConstructNetworkVisitor : public MixedModeVisitor, private ErrorReportingPass {
- public:
-  explicit ConstructNetworkVisitor(const IRModule& mod, const GlobalVar& var)
-      : ErrorReportingPass(mod, var) {}
-
-  /*!
-   * \brief Construct a support library network from a given Relay function. The
-   * function should contain only nodes supported by Ethos-N.
-   * \param func The Relay function for which to construct a support library network.
-   * \return A support library network that performs the same operation as the Relay
-   * function.
-   */
-  NetworkWithIDs Construct(const Function& func);
-
- private:
-  // Translate from a callnode to the appropriate 'Make' method
-  sl::TensorsAndId HandleCall(const CallNode*);
-
-  void VisitExpr_(const CallNode* cn) final;
-  void VisitExpr_(const ConstantNode* cn) final;
-  void VisitExpr_(const TupleNode* op) final;
-  void VisitExpr_(const TupleGetItemNode* tg) final;
-  void VisitLeaf(const Expr& expr) final;
-
-  // Make a support library operand from a Call
-  EthosnError MakeConvolutionLayer(const Call& call, sl::TensorAndId<sl::Operand>* out);
-  EthosnError MakeFullyConnectedLayer(const Call&, sl::TensorAndId<sl::Operand>* out);
-  EthosnError MakeMaxPool2DLayer(const Call& call, sl::TensorAndId<sl::Operand>* out);
-  EthosnError MakeAvgPool2DLayer(const Call& call, sl::TensorAndId<sl::Operand>* out);
-  EthosnError MakeReshapeLayer(const Call& call, sl::TensorAndId<sl::Operand>* out);
-  EthosnError MakeAdditionLayer(const Call& call, sl::TensorAndId<sl::Operand>* out);
-  EthosnError MakeSigmoidLayer(const Call& call, sl::TensorAndId<sl::Operand>* out);
-  EthosnError MakeMeanLayer(const Call& call, sl::TensorAndId<sl::Operand>* out);
-  EthosnError MakeTanhLayer(const Call& call, sl::TensorAndId<sl::Operand>* out);
-  EthosnError MakeConv2DTransposeLayer(const Call& call, sl::TensorAndId<sl::Operand>* out);
-  EthosnError MakeConcatenateLayer(const Call& call, sl::TensorAndId<sl::Operand>* out);
-  EthosnError MakeSplitLayer(const Call& call, sl::TensorsAndId* outs);
-  EthosnError MakeDepthToSpaceLayer(const Call& call, sl::TensorAndId<sl::Operand>* out);
-  EthosnError MakeReluLayer(const Call& call, sl::TensorAndId<sl::Operand>* out);
-  EthosnError MakeLeakyReLULayer(const Call& call, sl::TensorAndId<sl::Operand>* out);
-  EthosnError MakeRequantizeLayer(const Call& call, sl::TensorAndId<sl::Operand>* out);
-  EthosnError MakeReinterpretQuantizeLayer(const Call& call, sl::TensorAndId<sl::Operand>* out);
-  EthosnError MakeResizeLayer(const Call& call, sl::TensorAndId<sl::Operand>* out);
-
-  /*! \brief A look-up table from Expr to layers. */
-  std::map<Expr, std::vector<std::shared_ptr<sl::Operand>>> operand_table_;
-  /*! \brief A look-up table from Expr to SL operation IDs. */
-  std::map<Expr, std::vector<std::pair<uint32_t, uint32_t>>> id_table_;
-  /*! \brief A look-up table from Expr to tensor infos. */
-  std::map<Expr, std::vector<sl::TensorInfo>> tensor_table_;
-  /*! \brief The support library network to compile. */
-  std::shared_ptr<sl::Network> network_;
-};
-
-NetworkWithIDs ConstructNetwork(const IRModule& mod, const GlobalVar& var, const Function& func) {
-  return ConstructNetworkVisitor(mod, var).Construct(func);
-}
-
-/*! \brief Attributes to store the compiler options for Ethos-N */
-struct EthosnCompilerConfigNode : public tvm::AttrsNode<EthosnCompilerConfigNode> {
-  String variant;
-  String sram_size;
-  String tops;
-  String ple_ratio;
-  bool strategy0;
-  bool strategy1;
-  bool strategy3;
-  bool strategy4;
-  bool strategy6;
-  bool strategy7;
-  bool dump_ram;
-  bool initial_sram_dump;
-  bool block_config_16x16;
-  bool block_config_32x8;
-  bool block_config_8x32;
-  bool block_config_8x8;
-  bool enable_intermediate_compression;
-  bool disable_winograd;
-  String debug_dir;
-  bool inline_non_compute_intensive_partitions;
-  bool experimental_compiler;
-
-  TVM_DECLARE_ATTRS(EthosnCompilerConfigNode, "ext.attrs.EthosnCompilerConfigNode") {
-    TVM_ATTR_FIELD(variant).describe("See Ethos-N documentation.").set_default("n78");
-    TVM_ATTR_FIELD(sram_size)
-        .describe("Optionally override the default sram size. See Ethos(TM)-N documentation.")
-        .set_default("0");
-    TVM_ATTR_FIELD(tops)
-        .describe("Valid values 1, 2, 4 and 8. See Ethos(TM)-N documentation.")
-        .set_default("1");
-    TVM_ATTR_FIELD(ple_ratio)
-        .describe("Valid values 2 and 4. See Ethos(TM)-N documentation.")
-        .set_default("2");
-    TVM_ATTR_FIELD(strategy0).set_default(true);
-    TVM_ATTR_FIELD(strategy1).set_default(true);
-    TVM_ATTR_FIELD(strategy3).set_default(true);
-    TVM_ATTR_FIELD(strategy4).set_default(true);
-    TVM_ATTR_FIELD(strategy6).set_default(true);
-    TVM_ATTR_FIELD(strategy7).set_default(true);
-    TVM_ATTR_FIELD(dump_ram).set_default(false);
-    TVM_ATTR_FIELD(initial_sram_dump).set_default(false);
-    TVM_ATTR_FIELD(block_config_16x16).set_default(true);
-    TVM_ATTR_FIELD(block_config_32x8).set_default(true);
-    TVM_ATTR_FIELD(block_config_8x32).set_default(true);
-    TVM_ATTR_FIELD(block_config_8x8).set_default(true);
-    TVM_ATTR_FIELD(enable_intermediate_compression).set_default(true);
-    TVM_ATTR_FIELD(disable_winograd).set_default(false);
-    TVM_ATTR_FIELD(debug_dir).set_default(".");
-    TVM_ATTR_FIELD(inline_non_compute_intensive_partitions)
-        .describe(
-            "A heuristic to improve performance. Inlines functions partitioned for Arm(R) "
-            "Ethos(TM)-N that are deemed 'non-compute-intensive'. The inlined functions will "
-            "continue through TVM's standard compilation flow.")
-        .set_default(true);
-    TVM_ATTR_FIELD(experimental_compiler)
-        .describe("An exprimental cascading compiler for Arm(R) Ethos(TM)-N.")
-        .set_default(false);
-  }
-};
-
-class EthosnCompilerConfig : public Attrs {
- public:
-  TVM_DEFINE_NOTNULLABLE_OBJECT_REF_METHODS(EthosnCompilerConfig, Attrs, EthosnCompilerConfigNode);
-};
-
-TVM_REGISTER_NODE_TYPE(EthosnCompilerConfigNode);
-TVM_REGISTER_PASS_CONFIG_OPTION("relay.ext.ethos-n.options", EthosnCompilerConfig);
-
-EthosnCompilerConfig GetCompilerAttrs() {
-  auto ctx = transform::PassContext::Current();
-  Optional<EthosnCompilerConfig> cfg =
-      ctx->GetConfig<EthosnCompilerConfig>("relay.ext.ethos-n.options");
-  if (!cfg.defined()) {
-    return AttrsWithDefaultValues<EthosnCompilerConfig>();
-  }
-  return cfg.value();
-}
-TVM_REGISTER_GLOBAL("relay.ext.ethos-n.get_compiler_attrs").set_body_typed(GetCompilerAttrs);
-
-/*! \brief The compiler for Ethos-N functions */
-class EthosnCompiler {
- public:
-  /*!
-   * \brief Create an Ethos-N runtime module from a Relay Ethos-N function
-   * \param ref An ObjectRef pointing to a Relay Ethos-N function
-   * \return runtime_module An Ethos-N runtime module
-   */
-  static runtime::Module CreateRuntimeModule(const ObjectRef& ref);
-
-  /*!
-   * \brief Initialise the is-supported functionality of the Ethos-N support library
-   * with the target variant.
-   * \return Error object
-   */
-  static EthosnError SupportedSetup();
-
-  /*!
-   * \brief Return the is-supported API of the Support Library
-   * \return A reference to the API.
-   */
-  static std::unique_ptr<sl::SupportQueries>& GetSupported() {
-    ICHECK(m_Queries != nullptr);
-    return m_Queries;
-  }
-
- private:
-  /*!
-   * \brief Compile a single Relay Ethos-N function into an ordered compiled network.
-   * Compilation options will be taken from the PassContext.
-   * \param mod The module the function is stored in (for error reporting purposes)
-   * \param gvar The global var corresponding to the function
-   * \param func The function to be compiled
-   * \return ordered_compiled_network A compiled network with additional information
-   * to handle difference in input/output ordering between the TVM runtime and the
-   * Ethos-N compiled network.
-   */
-  static runtime::ethosn::OrderedCompiledNetwork CompileEthosnFunc(const IRModule& mod,
-                                                                   const GlobalVar& gvar,
-                                                                   const Function& func);
-
-  /*!
-   * \brief Get the Support Library compilation options from the PassContext
-   * \return options The compilation options
-   */
-  static sl::CompilationOptions CreateOptions();
-
-  /*!
-   * \brief Determine the order in which inputs should be provided/outputs should be
-   * read from a compiled network. This is required because when you compile a network
-   * for Ethos-N, you don't have control over the order in which the inputs/outputs
-   * are given. You can, however, query what order the compiler decided to give them in.
-   * We therefore keep track of our desired order and the actual order and create a
-   * small translation table between the two for use in the runtime.
-   * \param network A network additionally with the desired input/output order
-   * \param compiled_network The compiled network with an as yet undetermined input/output order
-   * \return input_output_order The order in which to permute the inputs/outputs given
-   * by the TVM runtime such that they map correctly to the compiled network.
-   */
-  static std::pair<std::vector<uint32_t>, std::vector<uint32_t>> GetInputOutputOrder(
-      NetworkWithIDs network, const std::unique_ptr<sl::CompiledNetwork>& compiled_network);
-
-  /*!
-   * \brief Determine the input and output sizes of a compiled network.
-   *
-   * These need to be queried from the compiled network as the compiler can choose
-   * to add additional padding on the input/output in certain cases.
-   *
-   * \param compiled_network The network compiled by the NPU compiler.
-   * \return Pair of vectors of buffer sizes for both the inputs and outputs of the
-   * network.
-   */
-  static std::pair<std::vector<uint32_t>, std::vector<uint32_t>> GetIOSizes(
-      const std::unique_ptr<sl::CompiledNetwork>& compiled_network);
-
-  /*!
-   * \brief Query interface used to determine if the Ethos-N hardware supports an operation
-   * with the supplied parameters.
-   */
-  static std::unique_ptr<sl::SupportQueries> m_Queries;
-};
-
-runtime::Module CompileEthosn(const ObjectRef& ref) {
-  return EthosnCompiler::CreateRuntimeModule(ref);
-}
-
-TVM_REGISTER_GLOBAL("relay.ext.ethos-n").set_body_typed(CompileEthosn);
-
-TVM_REGISTER_GLOBAL("relay.ext.ethos-n.constant_updater")
-    .set_body_typed([](Expr expr, std::string symbol) { return Map<String, runtime::NDArray>(); });
-
-}  // namespace ethosn
-}  // namespace contrib
-}  // namespace relay
-}  // namespace tvm
-
-#endif  // TVM_RELAY_BACKEND_CONTRIB_ETHOSN_CODEGEN_ETHOSN_H_
diff --git a/src/relay/backend/contrib/ethosn/convert_equivalent.cc b/src/relay/backend/contrib/ethosn/convert_equivalent.cc
deleted file mode 100644
index ef8c4a5ef567..000000000000
--- a/src/relay/backend/contrib/ethosn/convert_equivalent.cc
+++ /dev/null
@@ -1,379 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file src/relay/backend/contrib/ethosn/convert_equivalent.cc
- * \brief Converts operations into a numerically equivalent form
- * that can be understood by the NPU codegen.
- */
-
-#include <tvm/relay/dataflow_matcher.h>
-#include <tvm/relay/expr.h>
-#include <tvm/relay/expr_functor.h>
-
-#include <unordered_map>
-
-#include "../../../qnn/utils.h"
-#include "../../../transforms/fold_constant.h"
-#include "../../../transforms/pattern_utils.h"
-#include "../../../transforms/simplify_expr.h"
-#include "ethosn_api.h"
-
-namespace tvm {
-namespace relay {
-namespace contrib {
-namespace ethosn {
-
-/*!
- * \brief Helper class to extract inputs and quantization information from binary
- * elementwise operations ready to convert.
- */
-class BinaryElementwiseParams {
- public:
-  static BinaryElementwiseParams ExtractBinaryElementwiseParams(const Call& call) {
-    auto params = BinaryElementwiseParams();
-    params.input1 = call->args[0];
-    params.input2 = call->args[1];
-    params.input1_scale = call->args[2];
-    params.input1_zero_point = call->args[3];
-    params.input2_scale = call->args[4];
-    params.input2_zero_point = call->args[5];
-    // Reverse the inputs if the constant is first input
-    if (call->args[0]->IsInstance<ConstantNode>()) {
-      params.input1 = call->args[1];
-      params.input2 = call->args[0];
-      params.input1_scale = call->args[4];
-      params.input1_zero_point = call->args[5];
-      params.input2_scale = call->args[2];
-      params.input2_zero_point = call->args[3];
-    }
-    params.output_scale = call->args[6];
-    params.output_zero_point = call->args[7];
-    return params;
-  }
-
-  Expr input1;
-  Expr input2;
-  Expr input1_scale;
-  Expr input1_zero_point;
-  Expr input2_scale;
-  Expr input2_zero_point;
-  Expr output_scale;
-  Expr output_zero_point;
-};
-
-/*!
- * \brief Converts qnn.mul to mathematically equivalent
- * qnn.conv2d depthwise operation.
- *
- * \param expr The expression to attempt to convert.
- *
- * \return Null if conversion is not supported else the converted expression.
- */
-Optional<Expr> ConvertQnnMultiplyToDepthwise(const Expr& expr) {
-  Call call = Downcast<Call>(expr);
-  const auto params = BinaryElementwiseParams::ExtractBinaryElementwiseParams(call);
-
-  Constant input_constant = Downcast<Constant>(params.input2);
-  TensorType input_constant_tt = Downcast<TensorType>(input_constant->checked_type());
-  TensorType input_tt = Downcast<TensorType>(call->checked_type());
-  int channels = Downcast<IntImm>(input_tt->shape.back())->value;
-  if (channels != Downcast<IntImm>(input_constant_tt->Size())->value) {
-    return NullOpt;
-  }
-
-  runtime::NDArray input_data = input_constant->data;
-  runtime::NDArray kernel_data_hwoi =
-      runtime::NDArray::Empty({1, 1, channels, 1}, input_data->dtype, input_data->device);
-  kernel_data_hwoi.CopyFrom(input_data);
-  Constant kernel = Constant(kernel_data_hwoi, input_constant->span);
-
-  TensorType output_tt = Downcast<TensorType>(expr->checked_type());
-  DataType output_dtype = output_tt->dtype;
-
-  Expr conv2d =
-      qnn::MakeQnnConv2D(params.input1, kernel, params.input1_zero_point, params.input2_zero_point,
-                         params.input1_scale, params.input2_scale, {1, 1}, {0, 0, 0, 0}, {1, 1},
-                         channels, channels, {1, 1}, "NHWC", "HWOI", "NHWC", DataType::Int(32));
-  Constant bias_data = MakeConstantZeros(DataType::Int(32), {channels});
-  Expr bias_add = MakeBiasAdd(conv2d, bias_data, 3);
-  Expr requantize = qnn::MakeRequantize(bias_add, params.input1_scale, params.input1_zero_point,
-                                        params.output_scale, params.output_zero_point, -1, "None",
-                                        "None", output_dtype);
-
-  try {
-    requantize = InferType(requantize);
-    return requantize;
-  } catch (tvm::Error& e) {
-    // Conversion produced an invalid op.
-    return NullOpt;
-  }
-}
-
-/*!
- * \brief Converts qnn.add to a mathematically equivalent
- * qnn.conv2d depthwise operation.
- *
- * \param expr The expression to attempt to convert.
- *
- * \return Null if conversion is not supported else the converted expression.
- */
-Optional<Expr> ConvertQnnAddToDepthwise(const Expr& expr) {
-  Call call = Downcast<Call>(expr);
-  const auto params = BinaryElementwiseParams::ExtractBinaryElementwiseParams(call);
-
-  Constant input_constant = Downcast<Constant>(params.input2);
-  TensorType input_constant_tt = Downcast<TensorType>(input_constant->checked_type());
-  TensorType input_tt = Downcast<TensorType>(call->checked_type());
-  int channels = Downcast<IntImm>(input_tt->shape.back())->value;
-  if (channels != Downcast<IntImm>(input_constant_tt->Size())->value) {
-    return NullOpt;
-  }
-
-  // Create the identity kernel. The kernel data is constructed such that it produces an identity
-  // operation in the quantized space. Therefore, the input is not scaled in any way which allows
-  // us to later use the bias to perform the addition.
-  float input_scale_value = GetScalarFromConstant<float>(params.input1_scale);
-  float output_scale_value = GetScalarFromConstant<float>(params.output_scale);
-  float identity_kernel_scale_ub = std::min(output_scale_value / input_scale_value, 1.f);
-  float identity_kernel_scale_lb = (1.f / 255.f);
-  float identity_kernel_scale_target = (identity_kernel_scale_ub + identity_kernel_scale_lb) / 2.f;
-  float identity_kernel_scale_recip_rounded = std::round(1.f / identity_kernel_scale_target);
-  float identity_kernel_scale_value = 1.f / identity_kernel_scale_recip_rounded;
-  Constant identity_kernel_scale =
-      MakeConstantScalar(DataType::Float(32), identity_kernel_scale_value);
-  Constant identity_kernel_zero_point = MakeConstantScalar(DataType::Int(32), 0);
-  float identity_kernel_quantized_data = identity_kernel_scale_recip_rounded;
-  std::vector<uint8_t> identity_kernel_data(channels,
-                                            static_cast<uint8_t>(identity_kernel_quantized_data));
-  Constant identity_kernel =
-      MakeConstantTensor(input_constant_tt->dtype, {1, 1, channels, 1}, identity_kernel_data);
-
-  // Calculate the bias, this is where the addition happens. The bias values are calculated by
-  // scaling the constant input to input_scale * identity_kernel_scale.
-  Constant bias_scale =
-      MakeConstantScalar(DataType::Float(32), input_scale_value * identity_kernel_scale_value);
-  Constant bias_zero_point = MakeConstantScalar(DataType::Int(32), 0);
-  Expr requantize_bias =
-      qnn::MakeRequantize(params.input2, params.input2_scale, params.input2_zero_point, bias_scale,
-                          bias_zero_point, -1, "None", "None", DataType::Int(32));
-  Expr reshape_bias = MakeReshape(requantize_bias, {channels});
-
-  try {
-    reshape_bias = transform::FoldConstantExpr(reshape_bias);
-  } catch (tvm::Error& e) {
-    // Conversion produced an invalid op.
-    return NullOpt;
-  }
-  Constant bias = Downcast<Constant>(reshape_bias);
-
-  // Make depthwise conv2d operation
-  Expr conv2d = qnn::MakeQnnConv2D(params.input1, identity_kernel, params.input1_zero_point,
-                                   identity_kernel_zero_point, params.input1_scale,
-                                   identity_kernel_scale, {1, 1}, {0, 0, 0, 0}, {1, 1}, channels,
-                                   channels, {1, 1}, "NHWC", "HWOI", "NHWC", DataType::Int(32));
-  Expr bias_add = MakeBiasAdd(conv2d, bias, 3);
-  Expr requantize = qnn::MakeRequantize(bias_add, params.input1_scale, params.input1_zero_point,
-                                        params.output_scale, params.output_zero_point, -1, "None",
-                                        "None", input_constant_tt->dtype);
-
-  try {
-    return InferType(requantize);
-  } catch (tvm::Error& e) {
-    // Conversion produced an invalid op.
-    return NullOpt;
-  }
-}
-
-/*!
- * \brief Converts qnn.mul to a mathematically equivalent qnn.requantize operation.
- * When converting to support library API, a reinterpret quantize operation will be created.
- *
- * \param expr The expression to attempt to convert.
- *
- * \return Null if conversion is not supported else the converted expression.
- */
-Optional<Expr> ConvertQnnMultiplyToReinterpretQuantize(const Expr& expr) {
-  Call call = Downcast<Call>(expr);
-  const auto params = BinaryElementwiseParams::ExtractBinaryElementwiseParams(call);
-
-  Constant input_constant = Downcast<Constant>(params.input2);
-  TensorType input_constant_tt = Downcast<TensorType>(input_constant->checked_type());
-  if (Downcast<IntImm>(input_constant_tt->Size())->value != 1) {
-    return NullOpt;
-  }
-
-  float input_scale_value = GetScalarFromConstant<float>(params.input1_scale);
-  float constant_scale_value = GetScalarFromConstant<float>(params.input2_scale);
-  int constant_zero_point_value = GetScalarFromConstant<int>(params.input2_zero_point);
-  float new_output_scale_value = input_scale_value * constant_scale_value *
-                                 (ToScalar(input_constant->data) - constant_zero_point_value);
-  Constant new_output_scale = MakeConstantScalar(DataType::Float(32), new_output_scale_value);
-
-  if (std::abs(new_output_scale_value - GetScalarFromConstant<float>(params.output_scale)) >
-      0.004f) {
-    // Multiply does not represent an identity operation so don't convert.
-    return NullOpt;
-  }
-
-  DataType output_data_type = Downcast<TensorType>(call->checked_type())->dtype;
-
-  // A requantize operation is used to represent the identity reinterperet quantize op in
-  // the support library at this stage. That is requantize is used here as a means for
-  // passing the quantization information to the API conversion layer.
-  Expr requantize = qnn::MakeRequantize(
-      params.input1, params.input1_scale, params.input1_zero_point, params.output_scale,
-      params.output_zero_point, -1, "None", "None", output_data_type);
-
-  try {
-    return InferType(requantize);
-  } catch (tvm::Error& e) {
-    // Conversion produced an invalid op.
-    return NullOpt;
-  }
-}
-
-/*!
- * \brief Converts qnn.mul to a mathematically equivalent qnn.requantize operation.
- * When converting to support library API, a reinterpret quantize operation will be created.
- *
- * \param expr The expression to attempt to convert.
- *
- * \return Null if conversion is not supported else the converted expression.
- */
-Optional<Expr> ConvertQnnAddToReinterpretQuantize(const Expr& expr) {
-  Call call = Downcast<Call>(expr);
-  const auto params = BinaryElementwiseParams::ExtractBinaryElementwiseParams(call);
-
-  Constant input_constant = Downcast<Constant>(params.input2);
-  TensorType input_constant_tt = Downcast<TensorType>(input_constant->checked_type());
-  if (Downcast<IntImm>(input_constant_tt->Size())->value != 1) {
-    return NullOpt;
-  }
-
-  float input_scale = GetScalarFromConstant<float>(params.input1_scale);
-  int input_zero_point = GetScalarFromConstant<int>(params.input1_zero_point);
-  float scalar_scale = GetScalarFromConstant<float>(params.input2_scale);
-  int scalar_zero_point = GetScalarFromConstant<int>(params.input2_zero_point);
-  int output_zero_point_value = GetScalarFromConstant<int>(params.output_zero_point);
-  float scalar_value = (ToScalar(input_constant->data) - scalar_zero_point) * scalar_scale;
-
-  float new_output_zero_point_value = input_zero_point - (scalar_value / input_scale);
-  if (new_output_zero_point_value - output_zero_point_value > 1.0f) {
-    // Add does not represent an identity operation so don't convert
-    return NullOpt;
-  }
-
-  DataType output_data_type = Downcast<TensorType>(call->checked_type())->dtype;
-
-  // A requantize operation is used to represent the identity reinterperet quantize op in
-  // the support library at this stage. That is requantize is used here as a means for
-  // passing the quantization information to the API conversion layer.
-  Expr requantize = qnn::MakeRequantize(
-      params.input1, params.input1_scale, params.input1_zero_point, params.output_scale,
-      params.output_zero_point, -1, "None", "None", output_data_type);
-
-  try {
-    return InferType(requantize);
-  } catch (tvm::Error& e) {
-    // Conversion produced an invalid op.
-    return NullOpt;
-  }
-}
-
-class ConvertEquivalentsMutator : public MixedModeMutator {
- public:
-  Expr Rewrite_(const CallNode* pre, const Expr& post) override {
-    Call call = Downcast<Call>(post);
-    if (!call->op->IsInstance<FunctionNode>()) {
-      return post;
-    }
-
-    Function func = Downcast<Function>(call->op);
-    Function new_func = Function(func);
-    auto composite_name = func->GetAttr<String>(attr::kComposite);
-
-    Optional<Expr> optional_new_func_body;
-    String new_composite_name = "";
-    if (composite_name == "ethos-n.qnn_mul_to_reinterpret_quantize") {
-      optional_new_func_body = ConvertQnnMultiplyToReinterpretQuantize(func->body);
-      new_composite_name = "ethos-n.qnn_reinterpret_quantize";
-    } else if (composite_name == "ethos-n.qnn_mul_to_depthwise") {
-      optional_new_func_body = ConvertQnnMultiplyToDepthwise(func->body);
-      new_composite_name = "ethos-n.qnn_conv2d";
-    } else if (composite_name == "ethos-n.qnn_add_to_reinterpret_quantize") {
-      optional_new_func_body = ConvertQnnAddToReinterpretQuantize(func->body);
-      new_composite_name = "ethos-n.qnn_reinterpret_quantize";
-    } else if (composite_name == "ethos-n.qnn_add_to_depthwise") {
-      optional_new_func_body = ConvertQnnAddToDepthwise(func->body);
-      new_composite_name = "ethos-n.qnn_conv2d";
-    }
-
-    if (new_composite_name != "") {
-      ICHECK(optional_new_func_body)
-          << "Operation " << composite_name
-          << " was marked as having a valid conversion, but it could not be converted.";
-      new_func = WithFields(func, func->params, optional_new_func_body.value());
-      new_func = WithAttr(std::move(new_func), attr::kComposite, new_composite_name);
-    }
-
-    Call new_call = WithFields(call, new_func);
-    return Downcast<Expr>(new_call);
-  }
-};
-
-tvm::transform::Pass ConvertEquivalents() {
-  runtime::TypedPackedFunc<IRModule(IRModule, transform::PassContext)> pass_func =
-      [=](IRModule mod, transform::PassContext ctx) {
-        for (auto gv : mod->GetGlobalVars()) {
-          Function func = Downcast<Function>(mod->Lookup(gv));
-          auto compiler_name = func->GetAttr<String>(attr::kCompiler);
-          if (compiler_name.defined() && compiler_name == "ethos-n") {
-            auto new_body = ConvertEquivalentsMutator().VisitExpr(func->body);
-            if (!new_body.same_as(func->body)) {
-              Function new_func = WithFields(func, func->params, new_body);
-              mod->Update(gv, new_func);
-            }
-          }
-        }
-        return mod;
-      };
-  return tvm::transform::CreateModulePass(
-      pass_func, 0, "relay.backend.contrib.ethos-n.ConvertEquivalents", {"InferType"});
-}
-
-TVM_REGISTER_GLOBAL("relay.backend.contrib.ethos-n.ConvertQnnMultiplyToDepthwise")
-    .set_body_typed(ConvertQnnMultiplyToDepthwise);
-
-TVM_REGISTER_GLOBAL("relay.backend.contrib.ethos-n.ConvertQnnAddToDepthwise")
-    .set_body_typed(ConvertQnnAddToDepthwise);
-
-TVM_REGISTER_GLOBAL("relay.backend.contrib.ethos-n.ConvertQnnMultiplyToReinterpretQuantize")
-    .set_body_typed(ConvertQnnMultiplyToReinterpretQuantize);
-
-TVM_REGISTER_GLOBAL("relay.backend.contrib.ethos-n.ConvertQnnAddToReinterpretQuantize")
-    .set_body_typed(ConvertQnnAddToReinterpretQuantize);
-
-TVM_REGISTER_GLOBAL("relay.backend.contrib.ethos-n.ConvertEquivalents")
-    .set_body_typed(ConvertEquivalents);
-
-}  // namespace ethosn
-}  // namespace contrib
-}  // namespace relay
-}  // namespace tvm
diff --git a/src/relay/backend/contrib/ethosn/ethosn_api.cc b/src/relay/backend/contrib/ethosn/ethosn_api.cc
deleted file mode 100644
index 0f539d96e919..000000000000
--- a/src/relay/backend/contrib/ethosn/ethosn_api.cc
+++ /dev/null
@@ -1,1065 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-/*!
- * \file src/relay/backend/contrib/ethosn/ethosn_api.cc
- * \brief The Relay -> Arm(R) Ethos(TM)-N command stream compiler.
- */
-
-#include "ethosn_api.h"
-
-#include <tvm/relay/analysis.h>
-#include <tvm/relay/attrs/image.h>
-#include <tvm/relay/attrs/nn.h>
-#include <tvm/relay/expr.h>
-#include <tvm/relay/expr_functor.h>
-#include <tvm/relay/transform.h>
-#include <tvm/tir/analysis.h>
-
-#include <fstream>
-#include <map>
-#include <memory>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "../../../op/make_op.h"
-#include "../../../transforms/pattern_utils.h"
-#include "../../../transforms/simplify_expr.h"
-#include "../constant_transforms.h"
-#include "ethosn_support_library/Support.hpp"
-#include "ethosn_support_library/SupportQueries.hpp"
-#include "tvm/relay/qnn/attrs.h"
-
-namespace tvm {
-namespace relay {
-namespace contrib {
-namespace ethosn {
-
-sl::TensorInfo EthosnAPI::DefaultInputTensor(const Expr& expr) {
-  Call call = Downcast<Call>(expr);
-  const auto* dtype = call->args[0]->checked_type().as<TensorTypeNode>();
-  sl::DataType data_type;
-  Tvm2Npu(dtype->dtype, &data_type);
-  return sl::TensorInfo({}, data_type, sl::DataFormat::NHWC, {});
-}
-
-EthosnError EthosnAPI::QnnConv2d(const Expr& expr, ConvolutionParams* params) {
-  Call requantize = Downcast<Call>(expr);
-  Call bias_add = Downcast<Call>(requantize->args[0]);
-  Call conv = Downcast<Call>(bias_add->args[0]);
-  Call pad;
-  if (conv->args[0]->IsInstance<CallNode>() &&
-      Downcast<Call>(conv->args[0])->op == Op::Get("nn.pad"))
-    pad = Downcast<Call>(conv->args[0]);
-  const auto& conv_attr = conv->attrs.as<Conv2DAttrs>();
-  params->is_depthwise = conv_attr->channels.defined() &&
-                         tvm::tir::ExprDeepEqual()(conv_attr->channels, conv_attr->groups) &&
-                         conv_attr->groups != 1;
-
-  // Extract the quantization params from the arguments
-  int input_zero_point;
-  int kernel_zero_point;
-  int output_zero_point;
-  std::valarray<float> input_scale;
-  std::valarray<float> kernel_scale;
-  float output_scale;
-  std::string s = conv_attr->kernel_layout;
-  unsigned int qaxis = s.find("O");
-  assert(conv->args[2].size() == 1);
-  assert(conv->args[3] == 1);
-  assert(requantize->args[4] == 1);
-  assert(conv->args[4] == 4);
-  assert(conv->args[1]->checked_type().shape[qaxis] == kernel_scale_axis.size());
-  assert(requantize->args[3] == 1);
-
-  EthosnError err = AsConstant(conv->args[2], &input_zero_point);
-  err += AsConstant(conv->args[3], &kernel_zero_point);
-  err += AsConstant(requantize->args[4], &output_zero_point);
-  err += AsConstant(conv->args[4], &input_scale);
-  err += AsConstant(conv->args[5], &kernel_scale);
-  err += AsConstant(requantize->args[3], &output_scale);
-
-  // Convert quantization params
-  sl::QuantizationInfo input_q_info;
-  sl::QuantizationInfo weights_q_info;
-  sl::QuantizationInfo bias_q_info;
-  sl::QuantizationInfo output_q_info;
-  err += Tvm2Npu(input_zero_point, input_scale, qaxis, &input_q_info);
-  err += Tvm2Npu(kernel_zero_point, kernel_scale, qaxis, &weights_q_info);
-  std::valarray<float> bias = input_q_info.GetScales() * weights_q_info.GetScales();
-  err += Tvm2Npu(0, bias, 3, &bias_q_info);
-  err += Tvm2Npu(output_zero_point, output_scale, &output_q_info);
-
-  // Convert convolution attributes
-  sl::Padding padding;
-  if (pad.defined()) {
-    Tvm2Npu(conv_attr->padding, &padding);
-    // Don't support both standalone operator padding and attribute defined padding
-    if (padding != sl::Padding({0, 0, 0, 0})) {
-      err += EthosnError(
-          ErrStrm() << "both op and attr padding exist, must be either op/attr only or no padding");
-    }
-    err += Tvm2Npu(pad->attrs.as<PadAttrs>()->pad_width, &padding);
-  } else {
-    err += Tvm2Npu(conv_attr->padding, &padding);
-  }
-  sl::Stride stride;
-  err += Tvm2Npu(conv_attr->strides, &stride);
-  // Dilation is not supported
-  std::array<uint32_t, 2> dilation = {1, 1};
-  AsArray(conv_attr->dilation, &dilation);
-  if (conv_attr->dilation.size() != 2 || dilation[0] != 1 || dilation[1] != 1) {
-    err +=
-        EthosnError(ErrStrm() << "dilation=" << conv_attr->dilation << ", dilation must = [1, 1]");
-  }
-  // Create convolution info
-  params->conv_info = sl::ConvolutionInfo(padding, stride, output_q_info);
-
-  // Create input info
-  const TensorTypeNode* input_ttype;
-  if (pad.defined()) {
-    input_ttype = pad->args[0]->checked_type().as<TensorTypeNode>();
-  } else {
-    input_ttype = conv->args[0]->checked_type().as<TensorTypeNode>();
-  }
-  sl::TensorShape input_tensor_shape;
-  sl::DataType input_data_type;
-  err += Tvm2Npu(input_ttype->shape, &input_tensor_shape);
-  err += Tvm2Npu(input_ttype->dtype, &input_data_type);
-  params->input_info =
-      sl::TensorInfo(input_tensor_shape, input_data_type, sl::DataFormat::NHWC, input_q_info);
-
-  // Create weights info
-  const auto* weights_dtype = conv->args[1]->checked_type().as<TensorTypeNode>();
-  sl::TensorShape weights_tensor_shape;
-  sl::DataType weights_data_type;
-  sl::DataFormat weights_data_format;
-  // Ignore the error here because weights don't have a batch axis
-  Tvm2Npu(weights_dtype->shape, &weights_tensor_shape);
-  err += Tvm2Npu(weights_dtype->dtype, &weights_data_type);
-  err += Tvm2Npu(params->is_depthwise ? "HWIM" : "HWIO", &weights_data_format);
-  params->weights_info =
-      sl::TensorInfo(weights_tensor_shape, weights_data_type, weights_data_format, weights_q_info);
-  params->raw_weights = conv->args[1].as<ConstantNode>()->data->data;
-
-  // Create bias info
-  params->bias_info = sl::TensorInfo(
-      {1, 1, 1, params->is_depthwise ? weights_tensor_shape[2] : weights_tensor_shape[3]},
-      sl::DataType::INT32_QUANTIZED, sl::DataFormat::NHWC, bias_q_info);
-  params->raw_bias = bias_add->args[1].as<ConstantNode>()->data->data;
-
-  sl::TensorInfo output_tensor_info;
-  err += Tvm2Npu(requantize->checked_type(), &output_tensor_info);
-  output_tensor_info.m_QuantizationInfo = output_q_info;
-  params->output_info = output_tensor_info;
-
-  return err;
-}
-
-EthosnError EthosnAPI::QnnFullyConnected(const Expr& expr, FullyConnectedParams* params) {
-  Call requantize = Downcast<Call>(expr);
-  Call bias_add = Downcast<Call>(requantize->args[0]);
-  Call dense = Downcast<Call>(bias_add->args[0]);
-
-  // Extract the quantization params from the arguments
-  int input_zero_point;
-  int kernel_zero_point;
-  int output_zero_point;
-  float input_scale;
-  float kernel_scale;
-  float output_scale;
-  EthosnError err = AsConstant(dense->args[2], &input_zero_point);
-  err += AsConstant(dense->args[3], &kernel_zero_point);
-  err += AsConstant(requantize->args[4], &output_zero_point);
-  err += AsConstant(dense->args[4], &input_scale);
-  err += AsConstant(dense->args[5], &kernel_scale);
-  err += AsConstant(requantize->args[3], &output_scale);
-
-  // Convert quantization params
-  sl::QuantizationInfo data_q_info;
-  sl::QuantizationInfo weights_q_info;
-  sl::QuantizationInfo bias_q_info;
-  sl::QuantizationInfo output_q_info;
-  err += Tvm2Npu(input_zero_point, input_scale, &data_q_info);
-  err += Tvm2Npu(kernel_zero_point, kernel_scale, &weights_q_info);
-  std::valarray<float> bias_scales = data_q_info.GetScale() * weights_q_info.GetScales();
-  const int bias_zero_point = 0;
-  const unsigned int bias_axis = 3;
-  err += Tvm2Npu(bias_zero_point, bias_scales, bias_axis, &bias_q_info);
-  err += Tvm2Npu(output_zero_point, output_scale, &output_q_info);
-
-  // Create fc info
-  params->fc_info = sl::FullyConnectedInfo(output_q_info);
-
-  // Create data info
-  const TensorTypeNode* data_dtype = dense->args[0]->checked_type().as<TensorTypeNode>();
-  sl::TensorShape data_tensor_shape;
-  sl::DataType data_data_type;
-  err += Tvm2Npu(data_dtype->shape, &data_tensor_shape);
-  err += Tvm2Npu(data_dtype->dtype, &data_data_type);
-  params->input_info = sl::TensorInfo({data_tensor_shape[0], 1, 1, data_tensor_shape[1]},
-                                      data_data_type, sl::DataFormat::NHWC, data_q_info);
-
-  // Create weights info
-  Constant weights_data = Downcast<Constant>(dense->args[1]);
-  weights_data = TransposeWeights(weights_data, "OI", "IO");
-  const auto* weights_ttype = weights_data->checked_type().as<TensorTypeNode>();
-  sl::TensorShape weights_tensor_shape;
-  sl::DataType weights_data_type;
-  sl::DataFormat weights_data_format;
-  // Ignore the error here because weights don't have a batch axis
-  Tvm2Npu(weights_ttype->shape, &weights_tensor_shape);
-  err += Tvm2Npu(weights_ttype->dtype, &weights_data_type);
-  err += Tvm2Npu("HWIO", &weights_data_format);
-  // Weights tensor shape is 1, 1, I, O
-  params->weights_info = sl::TensorInfo({1, 1, weights_tensor_shape[0], weights_tensor_shape[1]},
-                                        weights_data_type, weights_data_format, weights_q_info);
-  params->raw_weights = weights_data->data;
-
-  // Create bias info
-  params->bias_info =
-      sl::TensorInfo({1, 1, 1, weights_tensor_shape[1]}, sl::DataType::INT32_QUANTIZED,
-                     sl::DataFormat::NHWC, bias_q_info);
-  params->raw_bias = bias_add->args[1].as<ConstantNode>()->data;
-
-  sl::TensorInfo output_tensor_info;
-  err += Tvm2Npu(requantize->checked_type(), &output_tensor_info);
-  output_tensor_info.m_Dimensions = {data_tensor_shape[0], 1, 1, weights_tensor_shape[1]};
-  output_tensor_info.m_QuantizationInfo = output_q_info;
-  params->output_info = output_tensor_info;
-
-  return err;
-}
-
-EthosnError EthosnAPI::Pool2d(const Call& input, const Call& output, Array<IndexExpr> size,
-                              Array<IndexExpr> strides, Array<IndexExpr> padding,
-                              sl::PoolingType pooling_type, sl::PoolingInfo* pool_info,
-                              sl::TensorInfo* input_info, sl::TensorInfo* output_info,
-                              std::string layout) {
-  uint32_t npu_sizex, npu_sizey;
-  sl::Padding npu_padding;
-  sl::Stride npu_stride;
-  EthosnError err = Tvm2Npu(size, &npu_sizex, &npu_sizey);
-  err += Tvm2Npu(padding, &npu_padding);
-  err += Tvm2Npu(strides, &npu_stride);
-  *pool_info = sl::PoolingInfo(npu_sizex, npu_sizey, npu_stride.m_X, npu_stride.m_Y, npu_padding,
-                               pooling_type);
-
-  // Create input info
-  const auto* input_dtype = input->args[0]->checked_type().as<TensorTypeNode>();
-  sl::TensorShape input_tensor_shape;
-  sl::DataType input_data_type;
-  sl::DataFormat input_data_format;
-  err += Tvm2Npu(input_dtype->shape, &input_tensor_shape);
-  err += Tvm2Npu(input_dtype->dtype, &input_data_type);
-  err += Tvm2Npu(layout, &input_data_format);
-  if (input_data_format != sl::DataFormat::NHWC) {
-    return EthosnError(ErrStrm() << "data format=" << layout << ", data format must = NHWC");
-  }
-  *input_info = sl::TensorInfo(input_tensor_shape, input_data_type, input_data_format,
-                               input_info->m_QuantizationInfo);
-
-  sl::TensorInfo output_tensor_info;
-  err += Tvm2Npu(output->checked_type(), &output_tensor_info);
-  // output quantization is the same as the input
-  output_tensor_info.m_QuantizationInfo = input_info->m_QuantizationInfo;
-  *output_info = output_tensor_info;
-  return err;
-}
-
-EthosnError EthosnAPI::MaxPool2D(const Expr& expr, MaxPool2DParams* params) {
-  Call pool = Downcast<Call>(expr);
-  const auto pool_attrs = pool->attrs.as<MaxPool2DAttrs>();
-  return Pool2d(pool, pool, pool_attrs->pool_size, pool_attrs->strides, pool_attrs->padding,
-                sl::PoolingType::MAX, &params->pool_info, &params->input_info, &params->output_info,
-                pool_attrs->layout);
-}
-
-EthosnError EthosnAPI::AvgPool2D(const Expr& expr, AvgPool2DParams* params) {
-  Call cast_0 = Downcast<Call>(expr);
-  Call pool = Downcast<Call>(cast_0->args[0]);
-  Call cast_1 = Downcast<Call>(pool->args[0]);
-  const auto pool_attrs = pool->attrs.as<AvgPool2DAttrs>();
-  return Pool2d(cast_1, cast_0, pool_attrs->pool_size, pool_attrs->strides, pool_attrs->padding,
-                sl::PoolingType::AVG, &params->pool_info, &params->input_info, &params->output_info,
-                pool_attrs->layout);
-}
-
-EthosnError EthosnAPI::Reshape(const Expr& expr, ReshapeParams* params) {
-  // Create input info
-  Call reshape = Downcast<Call>(expr);
-  const auto* input_dtype = reshape->args[0]->checked_type().as<TensorTypeNode>();
-
-  sl::TensorShape input_tensor_shape = {1, 1, 1, 1};
-  sl::DataType input_data_type;
-  EthosnError err = Tvm2Npu(input_dtype->shape, &input_tensor_shape);
-  err += Tvm2Npu(input_dtype->dtype, &input_data_type);
-
-  Array<IndexExpr> inferred_shape;
-  Array<IndexExpr> new_shape = reshape->checked_type().as<TensorTypeNode>()->shape;
-  if (new_shape.size() < 4) {
-    inferred_shape = {1, 1, 1, 1};
-    for (size_t i = 0; i < new_shape.size(); ++i) {
-      inferred_shape.Set(i, new_shape[i]);
-    }
-  } else {
-    inferred_shape = new_shape;
-  }
-
-  err += Tvm2Npu(inferred_shape, &params->new_shape);
-  params->input_info =
-      sl::TensorInfo(input_tensor_shape, input_data_type, params->input_info.m_DataFormat,
-                     params->input_info.m_QuantizationInfo);
-
-  sl::TensorInfo output_tensor_info;
-  err += Tvm2Npu(reshape->checked_type(), &output_tensor_info);
-  output_tensor_info.m_QuantizationInfo = params->input_info.m_QuantizationInfo;
-  params->output_info = output_tensor_info;
-
-  return err;
-}
-
-EthosnError EthosnAPI::Addition(const Expr& expr, AdditionParams* params) {
-  Call call = Downcast<Call>(expr);
-  // Extract the quantization params from the arguments
-  float lhs_scale;
-  int lhs_zero_point;
-  float rhs_scale;
-  int rhs_zero_point;
-  float output_scale;
-  int output_zero_point;
-  EthosnError err = AsConstant(call->args[2], &lhs_scale);
-  err += AsConstant(call->args[3], &lhs_zero_point);
-  err += AsConstant(call->args[4], &rhs_scale);
-  err += AsConstant(call->args[5], &rhs_zero_point);
-  err += AsConstant(call->args[6], &output_scale);
-  err += AsConstant(call->args[7], &output_zero_point);
-
-  sl::QuantizationInfo lhs_q_info;
-  sl::QuantizationInfo rhs_q_info;
-  sl::QuantizationInfo output_q_info;
-  err += Tvm2Npu(lhs_zero_point, lhs_scale, &lhs_q_info);
-  err += Tvm2Npu(rhs_zero_point, rhs_scale, &rhs_q_info);
-  err += Tvm2Npu(output_zero_point, output_scale, &output_q_info);
-  params->output_quantization_info = output_q_info;
-
-  // Create input info
-  const auto* lhs_dtype = call->args[0]->checked_type().as<TensorTypeNode>();
-  sl::TensorShape lhs_tensor_shape;
-  sl::DataType lhs_data_type;
-  err += Tvm2Npu(lhs_dtype->shape, &lhs_tensor_shape);
-  err += Tvm2Npu(lhs_dtype->dtype, &lhs_data_type);
-  params->lhs_info =
-      sl::TensorInfo(lhs_tensor_shape, lhs_data_type, sl::DataFormat::NHWC, lhs_q_info);
-
-  const auto* rhs_dtype = call->args[1]->checked_type().as<TensorTypeNode>();
-  sl::TensorShape rhs_tensor_shape;
-  sl::DataType rhs_data_type;
-  err += Tvm2Npu(rhs_dtype->shape, &rhs_tensor_shape);
-  err += Tvm2Npu(rhs_dtype->dtype, &rhs_data_type);
-  params->rhs_info =
-      sl::TensorInfo(rhs_tensor_shape, rhs_data_type, sl::DataFormat::NHWC, rhs_q_info);
-
-  sl::TensorInfo output_tensor_info;
-  err += Tvm2Npu(call->checked_type(), &output_tensor_info);
-  output_tensor_info.m_QuantizationInfo = output_q_info;
-  params->output_info = output_tensor_info;
-
-  return err;
-}
-
-EthosnError EthosnAPI::Sigmoid(const Expr& expr, SigmoidParams* params) {
-  Call quantize = Downcast<Call>(expr);
-  Call sigmoid = Downcast<Call>(quantize->args[0]);
-  Call dequantize = Downcast<Call>(sigmoid->args[0]);
-
-  // Create input info
-  const auto* input_dtype = dequantize->args[0]->checked_type().as<TensorTypeNode>();
-  sl::TensorShape input_tensor_shape = {1, 1, 1, 1};
-  sl::DataType input_tensor_dtype;
-  EthosnError err = Tvm2Npu(input_dtype->shape, &input_tensor_shape);
-  err += Tvm2Npu(input_dtype->dtype, &input_tensor_dtype);
-  float input_sc;
-  int input_zp;
-  err += AsConstant(dequantize->args[2], &input_zp);
-  err += AsConstant(dequantize->args[1], &input_sc);
-  float output_sc;
-  int output_zp;
-  err += AsConstant(quantize->args[2], &output_zp);
-  err += AsConstant(quantize->args[1], &output_sc);
-
-  auto test_zp = input_dtype->dtype.is_int() ? -128 : 0;
-  if (output_zp != test_zp || output_sc != 1.0f / 256.0f) {
-    err += EthosnError(ErrStrm() << "output quantization params=(" << output_zp << ", " << output_sc
-                                 << "), must = (" << test_zp << ", 1/256)");
-  }
-
-  params->input_info = sl::TensorInfo(input_tensor_shape, input_tensor_dtype, sl::DataFormat::NHWC,
-                                      sl::QuantizationInfo(input_zp, input_sc));
-
-  sl::TensorInfo output_tensor_info;
-  err += Tvm2Npu(quantize->checked_type(), &output_tensor_info);
-  output_tensor_info.m_QuantizationInfo = sl::QuantizationInfo(output_zp, output_sc);
-  params->output_info = output_tensor_info;
-
-  return err;
-}
-
-EthosnError EthosnAPI::Mean(const Expr& expr, MeanParams* params) {
-  Call requantize = Downcast<Call>(expr);
-  Call mean = Downcast<Call>(requantize->args[0]);
-  Call cast_0 = Downcast<Call>(mean->args[0]);
-
-  // Create input info
-  const auto* input_ttype = cast_0->args[0]->checked_type().as<TensorTypeNode>();
-  const auto* output_ttype = requantize->checked_type().as<TensorTypeNode>();
-  sl::TensorShape input_tensor_shape = {1, 1, 1, 1};
-  sl::DataType input_tensor_dtype;
-  EthosnError err = Tvm2Npu(input_ttype->shape, &input_tensor_shape);
-  err += Tvm2Npu(input_ttype->dtype, &input_tensor_dtype);
-  sl::TensorShape output_tensor_shape = {1, 1, 1, 1};
-  sl::DataType output_tensor_dtype;
-  err += Tvm2Npu(output_ttype->shape, &output_tensor_shape);
-  err += Tvm2Npu(output_ttype->dtype, &output_tensor_dtype);
-  float input_sc;
-  int input_zp;
-  err += AsConstant(requantize->args[2], &input_zp);
-  err += AsConstant(requantize->args[1], &input_sc);
-  params->input_info = sl::TensorInfo(input_tensor_shape, input_tensor_dtype, sl::DataFormat::NHWC,
-                                      sl::QuantizationInfo(input_zp, input_sc));
-
-  float output_sc;
-  int output_zp;
-  err += AsConstant(requantize->args[3], &output_sc);
-  err += AsConstant(requantize->args[4], &output_zp);
-  sl::TensorInfo output_tensor_info;
-  err += Tvm2Npu(requantize->checked_type(), &output_tensor_info);
-  output_tensor_info.m_QuantizationInfo = sl::QuantizationInfo(output_zp, output_sc);
-  params->output_info = output_tensor_info;
-
-  return err;
-}
-
-EthosnError EthosnAPI::QnnConv2dTranspose(const Expr& expr, QnnConv2dTransposeParams* params) {
-  Call requantize = Downcast<Call>(expr);
-  Call bias;
-  Call conv2d_transpose;
-  if (requantize->args[0]->IsInstance<CallNode>() &&
-      Downcast<Call>(requantize->args[0])->op == Op::Get("nn.bias_add")) {
-    bias = Downcast<Call>(requantize->args[0]);
-    conv2d_transpose = Downcast<Call>(bias->args[0]);
-  } else {
-    conv2d_transpose = Downcast<Call>(requantize->args[0]);
-  }
-  const auto& conv_attr = conv2d_transpose->attrs.as<Conv2DTransposeAttrs>();
-  ICHECK(conv_attr) << "Expected type Conv2DTransposeAttrs but was "
-                    << conv2d_transpose->attrs->GetTypeKey();
-
-  int input_zero_point;
-  int kernel_zero_point;
-  int output_zero_point;
-  std::valarray<float> input_scale;
-  std::valarray<float> kernel_scale;
-  float output_scale;
-  unsigned int qaxis = conv_attr->kernel_layout.find("O");
-
-  EthosnError err = AsConstant(conv2d_transpose->args[2], &input_zero_point);
-  err += AsConstant(conv2d_transpose->args[3], &kernel_zero_point);
-  err += AsConstant(requantize->args[4], &output_zero_point);
-  err += AsConstant(conv2d_transpose->args[4], &input_scale);
-  err += AsConstant(conv2d_transpose->args[5], &kernel_scale);
-  err += AsConstant(requantize->args[3], &output_scale);
-
-  // Convert quantization params
-  sl::QuantizationInfo input_q_info;
-  sl::QuantizationInfo weights_q_info;
-  sl::QuantizationInfo bias_q_info;
-  sl::QuantizationInfo output_q_info;
-  err += Tvm2Npu(input_zero_point, input_scale, qaxis, &input_q_info);
-  err += Tvm2Npu(kernel_zero_point, kernel_scale, qaxis, &weights_q_info);
-  std::valarray<float> bias_scales = input_q_info.GetScales() * weights_q_info.GetScales();
-  err += Tvm2Npu(0, bias_scales, 3, &bias_q_info);
-  err += Tvm2Npu(output_zero_point, output_scale, &output_q_info);
-
-  // Convert convolution attributes
-  sl::Padding padding;
-  err += Tvm2Npu(conv_attr->padding, &padding);
-  sl::Stride stride;
-  err += Tvm2Npu(conv_attr->strides, &stride);
-  // Dilation is not supported
-  std::array<uint32_t, 2> dilation = {1, 1};
-  AsArray(conv_attr->dilation, &dilation);
-  if (conv_attr->dilation.size() != 2 || dilation[0] != 1 || dilation[1] != 1) {
-    err +=
-        EthosnError(ErrStrm() << "dilation=" << conv_attr->dilation << ", dilation must = [1, 1]");
-  }
-
-  // Create convolution info
-  params->conv_info = sl::ConvolutionInfo(padding, stride, output_q_info);
-
-  // Create input info
-  sl::TensorInfo input_tensor_info;
-  err += Tvm2Npu(conv2d_transpose->args[0]->checked_type(), &input_tensor_info);
-  input_tensor_info.m_QuantizationInfo = input_q_info;
-  params->input_info = input_tensor_info;
-
-  // Create weights info
-  Constant weights_data = Downcast<Constant>(conv2d_transpose->args[1]);
-  if (conv_attr->kernel_layout != "HWIO") {
-    weights_data = TransposeWeights(weights_data, conv_attr->kernel_layout, "HWIO");
-  }
-  const auto* weights_ttype = weights_data->checked_type().as<TensorTypeNode>();
-  sl::TensorShape weights_tensor_shape;
-  sl::DataType weights_data_type;
-  sl::DataFormat weights_data_format;
-  // Ignore the error here because weights don't have a batch axis
-  Tvm2Npu(weights_ttype->shape, &weights_tensor_shape);
-  err += Tvm2Npu(weights_ttype->dtype, &weights_data_type);
-  err += Tvm2Npu("HWIO", &weights_data_format);
-  params->weights_info =
-      sl::TensorInfo(weights_tensor_shape, weights_data_type, weights_data_format, weights_q_info);
-
-  params->raw_weights = weights_data->data;
-
-  // Create bias info
-  unsigned int out_channels = Downcast<IntImm>(conv_attr->channels)->value;
-  params->bias_info = sl::TensorInfo({1, 1, 1, out_channels}, sl::DataType::INT32_QUANTIZED,
-                                     sl::DataFormat::NHWC, bias_q_info);
-  if (bias.defined()) {
-    params->raw_bias = Downcast<Constant>(bias->args[1])->data;
-  } else {
-    params->raw_bias = MakeConstantZeros(tvm::DataType::Int(32), {1, 1, 1, out_channels})->data;
-  }
-
-  // Create output info
-  sl::TensorInfo output_tensor_info;
-  err += Tvm2Npu(requantize->checked_type(), &output_tensor_info);
-  output_tensor_info.m_QuantizationInfo = output_q_info;
-  params->output_info = output_tensor_info;
-
-  return err;
-}
-
-EthosnError EthosnAPI::Tanh(const Expr& expr, TanhParams* params) {
-  Call quantize = Downcast<Call>(expr);
-  Call tanh = Downcast<Call>(quantize->args[0]);
-  Call dequantize = Downcast<Call>(tanh->args[0]);
-  // Create input info
-  const auto* input_dtype = quantize->checked_type().as<TensorTypeNode>();
-  sl::TensorShape input_tensor_shape = {1, 1, 1, 1};
-  sl::DataType input_tensor_dtype;
-  EthosnError err = Tvm2Npu(input_dtype->shape, &input_tensor_shape);
-  err += Tvm2Npu(input_dtype->dtype, &input_tensor_dtype);
-  float input_sc;
-  int input_zp;
-  err += AsConstant(dequantize->args[2], &input_zp);
-  err += AsConstant(dequantize->args[1], &input_sc);
-  float output_sc;
-  int output_zp;
-  err += AsConstant(quantize->args[2], &output_zp);
-  err += AsConstant(quantize->args[1], &output_sc);
-  auto test_zp = input_dtype->dtype.is_uint() ? 128 : 0;
-  if (output_zp != test_zp || output_sc != 0.0078125f) {
-    err += EthosnError(ErrStrm() << "output quantization params=(" << output_zp << ", " << output_sc
-                                 << "), must = (" << test_zp << ", 1/256)");
-  }
-  params->input_info = sl::TensorInfo(input_tensor_shape, input_tensor_dtype, sl::DataFormat::NHWC,
-                                      sl::QuantizationInfo(input_zp, input_sc));
-
-  sl::TensorInfo output_tensor_info;
-  err += Tvm2Npu(quantize->checked_type(), &output_tensor_info);
-  output_tensor_info.m_QuantizationInfo = sl::QuantizationInfo(output_zp, output_sc);
-  params->output_info = output_tensor_info;
-
-  return err;
-}
-
-EthosnError EthosnAPI::LeakyReLU(const Expr& expr, LeakyReLUParams* params) {
-  Call quantize = Downcast<Call>(expr);
-  Call leaky_relu = Downcast<Call>(quantize->args[0]);
-  Call dequantize = Downcast<Call>(leaky_relu->args[0]);
-
-  const auto* input_dtype = quantize->checked_type().as<TensorTypeNode>();
-  sl::TensorShape input_tensor_shape = {1, 1, 1, 1};
-  sl::DataType input_tensor_dtype;
-  EthosnError err = Tvm2Npu(input_dtype->shape, &input_tensor_shape);
-  err += Tvm2Npu(input_dtype->dtype, &input_tensor_dtype);
-  float input_sc;
-  int input_zp;
-  err += AsConstant(dequantize->args[2], &input_zp);
-  err += AsConstant(dequantize->args[1], &input_sc);
-  float output_sc;
-  int output_zp;
-  err += AsConstant(quantize->args[2], &output_zp);
-  err += AsConstant(quantize->args[1], &output_sc);
-
-  const auto* attrs = leaky_relu->attrs.as<LeakyReluAttrs>();
-  double alpha = attrs->alpha;
-  if (alpha >= 1.0f || alpha <= 0.0f) {
-    err += EthosnError(
-        ErrStrm() << "leaky relu alpha must be less than 1 and greater than 0, but was " << alpha);
-    return err;
-  }
-  params->leaky_relu_info = sl::LeakyReluInfo(alpha, sl::QuantizationInfo(output_zp, output_sc));
-  params->input_info = sl::TensorInfo(input_tensor_shape, input_tensor_dtype, sl::DataFormat::NHWC,
-                                      sl::QuantizationInfo(input_zp, input_sc));
-
-  sl::TensorInfo output_tensor_info;
-  err += Tvm2Npu(quantize->checked_type(), &output_tensor_info);
-  output_tensor_info.m_QuantizationInfo = sl::QuantizationInfo(output_zp, output_sc);
-  params->output_info = output_tensor_info;
-
-  return err;
-}
-
-EthosnError EthosnAPI::Concatenate(const Expr& expr, ConcatenateParams* params) {
-  Call call = Downcast<Call>(expr);
-  const auto& attrs = call->attrs.as<ConcatenateAttrs>();
-  int axis = attrs->axis;
-  if (axis < 0) {
-    int output_dims = Downcast<TensorType>(call->checked_type())->shape.size();
-    axis = output_dims + axis;
-  }
-  params->concat_info.m_Axis = axis;
-
-  float output_sc;
-  int output_zp;
-  EthosnError err = AsConstant(call->args[3], &output_sc);
-  err += AsConstant(call->args[4], &output_zp);
-  params->concat_info.m_OutputQuantizationInfo = sl::QuantizationInfo(output_zp, output_sc);
-
-  auto input_scales = call->args[1].as<TupleNode>()->fields;
-  auto input_zero_points = call->args[2].as<TupleNode>()->fields;
-  auto input_tensors = call->args[0]->checked_type().as<TupleTypeNode>()->fields;
-
-  int index = 0;
-  for (auto input_scale : input_scales) {
-    auto input_dtype = input_tensors[index].as<TensorTypeNode>();
-    auto input_zero_point = input_zero_points[index];
-    float scale;
-    int zp;
-    err += AsConstant(input_scale, &scale);
-    err += AsConstant(input_zero_point, &zp);
-    sl::TensorShape input_tensor_shape = {1, 1, 1, 1};
-    sl::DataType input_data_type;
-    err += Tvm2Npu(input_dtype->shape, &input_tensor_shape);
-    err += Tvm2Npu(input_dtype->dtype, &input_data_type);
-    params->input_infos.emplace_back(sl::TensorInfo(input_tensor_shape, input_data_type,
-                                                    sl::DataFormat::NHWC,
-                                                    sl::QuantizationInfo(zp, scale)));
-    index++;
-  }
-
-  sl::TensorInfo output_tensor_info;
-  err += Tvm2Npu(call->checked_type(), &output_tensor_info);
-  output_tensor_info.m_QuantizationInfo = sl::QuantizationInfo(output_zp, output_sc);
-  params->output_info = output_tensor_info;
-
-  return err;
-}
-
-EthosnError EthosnAPI::Split(const Expr& expr, SplitParams* params) {
-  Call call = Downcast<Call>(expr);
-  const auto* input_tensor_type = call->args[0]->checked_type().as<TensorTypeNode>();
-  const auto& attrs = call->attrs.as<SplitAttrs>();
-
-  sl::TensorShape input_tensor_shape = {1, 1, 1, 1};
-  sl::DataType input_data_type;
-  EthosnError err = Tvm2Npu(input_tensor_type->shape, &input_tensor_shape);
-  err += Tvm2Npu(input_tensor_type->dtype, &input_data_type);
-  params->input_info =
-      sl::TensorInfo(input_tensor_shape, input_data_type, params->input_info.m_DataFormat,
-                     params->input_info.m_QuantizationInfo);
-  params->split_info.m_Axis = attrs->axis;
-  if (const auto* sections_ptr = attrs->indices_or_sections.as<runtime::Int::ContainerType>()) {
-    auto sections = sections_ptr->value;
-    int size = input_tensor_shape[attrs->axis] / sections;
-    for (int i = 0; i < sections; i++) {
-      params->split_info.m_Sizes.push_back(size);
-    }
-  } else {
-    auto indices = Downcast<tvm::Array<runtime::Int>>(attrs->indices_or_sections);
-    int last_index = 0;
-    for (const auto& i : indices) {
-      params->split_info.m_Sizes.push_back(i->value - last_index);
-      last_index = i->value;
-    }
-    int axis_size = input_tensor_shape[attrs->axis];
-    params->split_info.m_Sizes.push_back(axis_size - last_index);
-  }
-
-  Array<Type> output_tensors = call->checked_type().as<TupleTypeNode>()->fields;
-  std::vector<sl::TensorInfo> output_infos = {};
-  for (auto output_ttype : output_tensors) {
-    sl::TensorInfo output_tensor_info;
-    err += Tvm2Npu(output_ttype, &output_tensor_info);
-    output_tensor_info.m_QuantizationInfo = params->input_info.m_QuantizationInfo;
-    output_infos.push_back(output_tensor_info);
-  }
-  params->output_infos = output_infos;
-  return err;
-}
-
-EthosnError EthosnAPI::DepthToSpace(const Expr& expr, DepthToSpaceParams* params) {
-  Call call = Downcast<Call>(expr);
-  const auto* input_dtype = call->args[0]->checked_type().as<TensorTypeNode>();
-  const auto* attrs = call->attrs.as<SubPixelAttrs>();
-  if (attrs->mode != "DCR") {
-    return EthosnError(ErrStrm() << "mode=" << attrs->mode << ", mode must = DCR");
-  }
-  params->depth_info.m_BlockSize = attrs->block_size;
-
-  sl::TensorShape input_tensor_shape;
-  sl::DataType input_data_type;
-  sl::DataFormat input_data_format;
-  EthosnError err = Tvm2Npu(input_dtype->shape, &input_tensor_shape);
-  err += Tvm2Npu(input_dtype->dtype, &input_data_type);
-  err += Tvm2Npu(attrs->layout, &input_data_format);
-  params->input_info = sl::TensorInfo(input_tensor_shape, input_data_type, input_data_format,
-                                      params->input_info.m_QuantizationInfo);
-
-  sl::TensorInfo output_tensor_info;
-  err += Tvm2Npu(call->checked_type(), &output_tensor_info);
-  output_tensor_info.m_QuantizationInfo = params->input_info.m_QuantizationInfo;
-  params->output_info = output_tensor_info;
-
-  return err;
-}
-
-EthosnError EthosnAPI::Relu(const Expr& expr, ReluParams* params) {
-  Call call = Downcast<Call>(expr);
-  const auto* input_dtype = call->args[0]->checked_type().as<TensorTypeNode>();
-  const auto* attrs = call->attrs.as<ClipAttrs>();
-  params->relu_info.m_LowerBound = attrs->a_min;
-  params->relu_info.m_UpperBound = attrs->a_max;
-
-  sl::TensorShape input_tensor_shape = {1, 1, 1, 1};
-  sl::DataType input_data_type;
-  EthosnError err = Tvm2Npu(input_dtype->shape, &input_tensor_shape);
-  err += Tvm2Npu(input_dtype->dtype, &input_data_type);
-  params->input_info =
-      sl::TensorInfo(input_tensor_shape, input_data_type, params->input_info.m_DataFormat,
-                     params->input_info.m_QuantizationInfo);
-
-  sl::TensorInfo output_tensor_info;
-  err += Tvm2Npu(call->checked_type(), &output_tensor_info);
-  output_tensor_info.m_QuantizationInfo = params->input_info.m_QuantizationInfo;
-  params->output_info = output_tensor_info;
-
-  return err;
-}
-
-EthosnError EthosnAPI::Requantize(const Expr& expr, RequantizeParams* params) {
-  Call call = Downcast<Call>(expr);
-  const auto* input_ttype = call->args[0]->checked_type().as<TensorTypeNode>();
-  sl::TensorShape input_tensor_shape = {1, 1, 1, 1};
-  sl::DataType input_data_type;
-  EthosnError err = Tvm2Npu(input_ttype->shape, &input_tensor_shape);
-  err += Tvm2Npu(input_ttype->dtype, &input_data_type);
-
-  const auto* output_ttype = call->checked_type().as<TensorTypeNode>();
-  sl::TensorShape output_tensor_shape = {1, 1, 1, 1};
-  sl::DataType output_data_type;
-  err += Tvm2Npu(output_ttype->shape, &output_tensor_shape);
-  err += Tvm2Npu(output_ttype->dtype, &output_data_type);
-
-  float input_sc, output_sc;
-  int input_zp, output_zp;
-  err += AsConstant(call->args[1], &input_sc);
-  err += AsConstant(call->args[2], &input_zp);
-  err += AsConstant(call->args[3], &output_sc);
-  err += AsConstant(call->args[4], &output_zp);
-
-  sl::QuantizationInfo input_q_info;
-  err += Tvm2Npu(input_zp, input_sc, &input_q_info);
-  params->input_info =
-      sl::TensorInfo(input_tensor_shape, input_data_type, sl::DataFormat::NHWC, input_q_info);
-
-  sl::QuantizationInfo requantize_q_info;
-  err += Tvm2Npu(output_zp, output_sc, &requantize_q_info);
-  params->requantize_info = sl::RequantizeInfo(requantize_q_info);
-  params->requantize_info.m_OutputDataType = output_data_type;
-
-  params->output_info = sl::TensorInfo(output_tensor_shape, output_data_type, sl::DataFormat::NHWC,
-                                       requantize_q_info);
-  return err;
-}
-
-EthosnError EthosnAPI::ReinterpretQuantize(const Expr& expr,
-                                           ReinterpretQuantizationParams* params) {
-  Call call = Downcast<Call>(expr);
-  const auto* input_ttype = call->args[0]->checked_type().as<TensorTypeNode>();
-  sl::TensorShape input_tensor_shape = {1, 1, 1, 1};
-  sl::DataType input_data_type;
-  EthosnError err = Tvm2Npu(input_ttype->shape, &input_tensor_shape);
-  err += Tvm2Npu(input_ttype->dtype, &input_data_type);
-
-  const auto* output_ttype = call->checked_type().as<TensorTypeNode>();
-  sl::TensorShape output_tensor_shape = {1, 1, 1, 1};
-  sl::DataType output_data_type;
-  err += Tvm2Npu(output_ttype->shape, &output_tensor_shape);
-  err += Tvm2Npu(output_ttype->dtype, &output_data_type);
-
-  float input_sc, output_sc;
-  int input_zp, output_zp;
-  err += AsConstant(call->args[1], &input_sc);
-  err += AsConstant(call->args[2], &input_zp);
-  err += AsConstant(call->args[3], &output_sc);
-  err += AsConstant(call->args[4], &output_zp);
-
-  sl::QuantizationInfo input_q_info;
-  err += Tvm2Npu(input_zp, input_sc, &input_q_info);
-  params->input_info =
-      sl::TensorInfo(input_tensor_shape, input_data_type, sl::DataFormat::NHWC, input_q_info);
-
-  sl::QuantizationInfo reinterpret_quantize_q_info;
-  err += Tvm2Npu(output_zp, output_sc, &reinterpret_quantize_q_info);
-  params->reinterpret_quantize_info = sl::ReinterpretQuantizationInfo(reinterpret_quantize_q_info);
-
-  params->output_info = sl::TensorInfo(output_tensor_shape, output_data_type, sl::DataFormat::NHWC,
-                                       reinterpret_quantize_q_info);
-  return err;
-}
-
-EthosnError EthosnAPI::Resize(const Expr& expr, ResizeParams* params) {
-  Call resize = Downcast<Call>(expr);
-  const auto* input_ttype = resize->args[0]->checked_type().as<TensorTypeNode>();
-
-  const auto* attrs = resize->attrs.as<Resize2DAttrs>();
-  uint32_t height, width;
-  EthosnError err = Tvm2Npu(attrs->size, &height, &width);
-  params->resize_info = sl::ResizeInfo{sl::ResizeAlgorithm::NEAREST_NEIGHBOUR, height, width,
-                                       params->input_info.m_QuantizationInfo};
-
-  sl::TensorShape input_tensor_shape = {1, 1, 1, 1};
-  sl::DataType input_tensor_dtype;
-  err = Tvm2Npu(input_ttype->shape, &input_tensor_shape);
-  err += Tvm2Npu(input_ttype->dtype, &input_tensor_dtype);
-  params->input_info =
-      sl::TensorInfo(input_tensor_shape, input_tensor_dtype, params->input_info.m_DataFormat,
-                     params->input_info.m_QuantizationInfo);
-
-  sl::TensorInfo output_tensor_info;
-  err += Tvm2Npu(resize->checked_type(), &output_tensor_info);
-  output_tensor_info.m_QuantizationInfo = params->input_info.m_QuantizationInfo;
-  params->output_info = output_tensor_info;
-
-  return err;
-}
-
-EthosnError EthosnAPI::Tvm2Npu(const Array<IndexExpr>& padding, sl::Padding* npu_padding) {
-  std::array<uint32_t, 4> dim;
-  if (EthosnError err = AsArray<IndexExpr, uint32_t>(padding, &dim)) {
-    return err;
-  }
-  switch (padding.size()) {
-    case 1:
-      *npu_padding = sl::Padding(dim[3], dim[3], dim[3], dim[3]);
-      break;
-    case 2:
-      // Height, width -> top, bottom, left, right
-      *npu_padding = sl::Padding(dim[3], dim[3], dim[2], dim[2]);
-      break;
-    case 4:
-      // Top, left, bottom, right -> top, bottom, left, right
-      *npu_padding = sl::Padding(dim[0], dim[2], dim[1], dim[3]);
-      break;
-    default:
-      return EthosnError(ErrStrm() << "padding tuple size=" << padding.size()
-                                   << ", padding tuple size must be {1, 2, 4}");
-  }
-  return EthosnError();
-}
-
-EthosnError EthosnAPI::Tvm2Npu(const Array<IndexExpr>& strides, sl::Stride* npu_stride) {
-  if (strides.size() != 2) {
-    return EthosnError(ErrStrm() << "stride size=" << strides.size() << ", stride size must = 2");
-  }
-  std::array<uint32_t, 2> dim;
-  if (EthosnError err = AsArray<IndexExpr, uint32_t>(strides, &dim)) {
-    return err;
-  }
-  *npu_stride = sl::Stride(dim[1], dim[0]);
-  return EthosnError();
-}
-
-EthosnError EthosnAPI::Tvm2Npu(const Array<IndexExpr>& size, uint32_t* x, uint32_t* y) {
-  if (size.size() != 2) {
-    return EthosnError(ErrStrm() << "dimensions=" << size.size() << ", dimensions must = 2");
-  }
-  std::array<uint32_t, 2> dim;
-  if (EthosnError err = AsArray<IndexExpr, uint32_t>(size, &dim)) {
-    return err;
-  }
-  *x = dim[0];
-  *y = dim[1];
-  return EthosnError();
-}
-
-EthosnError EthosnAPI::Tvm2Npu(const std::string& dformat, sl::DataFormat* data_format) {
-  *data_format = sl::DataFormat::NCHW;
-  if (dformat == "NCHW") {
-    return EthosnError();
-  } else if (dformat == "NHWC") {
-    *data_format = sl::DataFormat::NHWC;
-    return EthosnError();
-  } else if (dformat == "HWIO") {
-    *data_format = sl::DataFormat::HWIO;
-    return EthosnError();
-  } else if (dformat == "HWIM") {
-    *data_format = sl::DataFormat::HWIM;
-    return EthosnError();
-  }
-  return EthosnError(ErrStrm() << "format=" << dformat
-                               << ", format must be {NCHW, NHWC, HWIO, HWIM}");
-}
-
-EthosnError EthosnAPI::Tvm2Npu(const Array<IndexExpr>& shape, sl::TensorShape* npu_shape) {
-  EthosnError err = AsArray<IndexExpr, uint32_t>(shape, npu_shape);
-  if (npu_shape->front() != 1) {
-    err += EthosnError(ErrStrm() << "batch size=" << npu_shape->front() << ", batch size must = 1");
-  }
-  return err;
-}
-
-EthosnError EthosnAPI::Tvm2Npu(const tvm::DataType& dtype, sl::DataType* data_type) {
-  *data_type = sl::DataType::INT8_QUANTIZED;
-  if (dtype.is_scalar() == 1) {
-    if (dtype.is_uint() && dtype.bits() == 8) {
-      *data_type = sl::DataType::UINT8_QUANTIZED;
-      return EthosnError();
-    } else if (dtype.is_int() && dtype.bits() == 8) {
-      return EthosnError();
-    } else if (dtype.is_int() && dtype.bits() == 32) {
-      *data_type = sl::DataType::INT32_QUANTIZED;
-      return EthosnError();
-    }
-  }
-  return EthosnError(ErrStrm() << "dtype=\'" << dtype
-                               << "\', dtype must be either uint8, int8 or int32");
-}
-
-EthosnError EthosnAPI::Tvm2Npu(const int32_t zero_point, const float scale,
-                               sl::QuantizationInfo* npu_qinfo) {
-  sl::QuantizationInfo q(zero_point, scale);
-  *npu_qinfo = q;
-  return EthosnError();
-}
-
-EthosnError EthosnAPI::Tvm2Npu(const int zero_point, std::valarray<float> scales, unsigned int axis,
-                               sl::QuantizationInfo* npu_qinfo) {
-  if (scales.size() == 1) {
-    sl::QuantizationInfo q(zero_point, scales[0]);
-    *npu_qinfo = q;
-  } else {
-    struct sl::QuantizationScales s(std::move(scales));
-    sl::QuantizationInfo q(zero_point, s, axis);
-    *npu_qinfo = q;
-  }
-  return EthosnError();
-}
-
-EthosnError EthosnAPI::Tvm2Npu(const Array<Integer>& shape, sl::TensorShape* npu_shape) {
-  return AsArray<Integer, uint32_t>(shape, npu_shape);
-}
-
-EthosnError EthosnAPI::Tvm2Npu(const Array<Array<Integer>>& padding, sl::Padding* npu_padding) {
-  if (padding.size() != 4) {
-    return EthosnError(ErrStrm() << "padding tuple size=" << padding.size()
-                                 << ", padding tuple size must = 4");
-  }
-  Array<IndexExpr> reduced_padding;
-  reduced_padding.push_back(padding[1][0]);
-  reduced_padding.push_back(padding[1][1]);
-  reduced_padding.push_back(padding[2][0]);
-  reduced_padding.push_back(padding[2][1]);
-  std::array<uint32_t, 4> dim;
-  if (EthosnError err = AsArray<IndexExpr, uint32_t>(reduced_padding, &dim)) {
-    return err;
-  }
-  *npu_padding = sl::Padding(dim[0], dim[1], dim[2], dim[3]);
-  return EthosnError();
-}
-
-EthosnError EthosnAPI::Tvm2Npu(const tvm::Type& type, sl::TensorInfo* npu_tinfo) {
-  const TensorTypeNode* ttype = type.as<TensorTypeNode>();
-  ICHECK(ttype) << "Expected TensorTypeNode but was " << ttype->GetTypeKey();
-
-  sl::TensorShape shape = {1, 1, 1, 1};
-  sl::DataType data_type;
-  EthosnError err = Tvm2Npu(ttype->shape, &shape);
-  err += Tvm2Npu(ttype->dtype, &data_type);
-  *npu_tinfo = sl::TensorInfo(shape, data_type, sl::DataFormat::NHWC, {});
-  return err;
-}
-
-// Convert an array of IntImmNodes into ValueT
-// IndexT type of Array indexing variable
-// ValueT type of resulting value
-// N The size of the output array
-template <typename IndexT, typename ValueT, size_t N>
-EthosnError EthosnAPI::AsArray(const Array<IndexT>& arr, std::array<ValueT, N>* v) {
-  if (arr.size() > N)
-    return EthosnError(ErrStrm() << "dimensions=" << arr.size() << ", dimensions must be <= " << N);
-  for (size_t i = 0; i < arr.size(); i++) {
-    const PrimExpr& a = arr[i];
-    const auto* intImm = a.as<IntImmNode>();
-    if (intImm->value > std::numeric_limits<ValueT>::max()) {
-      return EthosnError(ErrStrm() << "axis size=" << intImm->value << ", axis size must be <= "
-                                   << std::numeric_limits<ValueT>::max());
-    }
-    (*v)[i] = static_cast<ValueT>(intImm->value);
-  }
-  return EthosnError();
-}
-
-// Get a std::valarray from a constant represented by a NDArray.
-EthosnError EthosnAPI::AsConstant(const Expr& expr, std::valarray<float>* out) {
-  if (!expr->IsInstance<ConstantNode>()) {
-    return EthosnError("expected constant data");
-  }
-  const auto* data = expr.as<ConstantNode>();
-  int64_t num_elems = 1;
-  auto shape = data->data.Shape();
-  for (size_t i = 0; i < shape.size(); i++) {
-    num_elems *= shape[i];
-  }
-  out->resize(num_elems);
-  for (int64_t i = 0; i < num_elems; i++) {
-    (*out)[i] = static_cast<float*>(data->data->data)[i];
-  }
-  return EthosnError();
-}
-
-// Get a T from a constant represented by a NDArray.
-template <typename T>
-EthosnError EthosnAPI::AsConstant(const Expr& expr, T* out) {
-  *out = {0};
-  if (!expr->IsInstance<ConstantNode>()) {
-    return EthosnError("expected constant data");
-  }
-  runtime::NDArray data = Downcast<Constant>(expr)->data;
-  *out = *static_cast<T*>(data->data);
-  return EthosnError();
-}
-
-}  // namespace ethosn
-}  // namespace contrib
-}  // namespace relay
-}  // namespace tvm
diff --git a/src/relay/backend/contrib/ethosn/ethosn_api.h b/src/relay/backend/contrib/ethosn/ethosn_api.h
deleted file mode 100644
index d640a02312ec..000000000000
--- a/src/relay/backend/contrib/ethosn/ethosn_api.h
+++ /dev/null
@@ -1,332 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-/*!
- * \file src/relay/backend/contrib/ethosn/ethosn_api.h
- * \brief The Relay -> Arm(R) Ethos(TM)-N command stream compiler.
- */
-
-#ifndef TVM_RELAY_BACKEND_CONTRIB_ETHOSN_ETHOSN_API_H_
-#define TVM_RELAY_BACKEND_CONTRIB_ETHOSN_ETHOSN_API_H_
-
-#include <tvm/relay/attrs/nn.h>
-#include <tvm/relay/expr.h>
-#include <tvm/relay/expr_functor.h>
-#include <tvm/relay/transform.h>
-#include <tvm/tir/analysis.h>
-#include <tvm/tir/op.h>
-
-#include <algorithm>
-#include <limits>
-#include <map>
-#include <memory>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "ethosn_support_library/Support.hpp"
-#include "ethosn_support_library/SupportQueries.hpp"
-
-namespace tvm {
-namespace relay {
-namespace contrib {
-namespace ethosn {
-
-namespace sl = ::ethosn::support_library;
-
-struct ConvolutionParams {
-  sl::ConvolutionInfo conv_info;
-  sl::TensorInfo input_info;
-  sl::TensorInfo weights_info;
-  sl::TensorInfo bias_info;
-  sl::TensorInfo output_info;
-  void* raw_weights = nullptr;
-  void* raw_bias = nullptr;
-  bool is_depthwise = false;
-};
-
-struct FullyConnectedParams {
-  sl::FullyConnectedInfo fc_info;
-  sl::TensorInfo input_info;
-  sl::TensorInfo weights_info;
-  sl::TensorInfo bias_info;
-  sl::TensorInfo output_info;
-  runtime::NDArray raw_weights;
-  runtime::NDArray raw_bias;
-};
-
-struct MaxPool2DParams {
-  sl::PoolingInfo pool_info = sl::PoolingInfo(0, 0, 0, 0, sl::Padding(), sl::PoolingType::MAX);
-  sl::TensorInfo input_info;
-  sl::TensorInfo output_info;
-};
-
-struct AvgPool2DParams {
-  sl::PoolingInfo pool_info = sl::PoolingInfo(0, 0, 0, 0, sl::Padding(), sl::PoolingType::AVG);
-  sl::TensorInfo input_info;
-  sl::TensorInfo output_info;
-};
-
-struct ReshapeParams {
-  sl::TensorShape new_shape{};
-  sl::TensorInfo input_info;
-  sl::TensorInfo output_info;
-};
-
-struct AdditionParams {
-  sl::QuantizationInfo output_quantization_info;
-  sl::TensorInfo lhs_info;
-  sl::TensorInfo rhs_info;
-  sl::TensorInfo output_info;
-};
-
-struct SigmoidParams {
-  sl::TensorInfo input_info;
-  sl::TensorInfo output_info;
-};
-
-struct MeanParams {
-  sl::TensorInfo input_info;
-  sl::TensorInfo output_info;
-};
-
-struct TanhParams {
-  sl::TensorInfo input_info;
-  sl::TensorInfo output_info;
-};
-
-struct LeakyReLUParams {
-  sl::LeakyReluInfo leaky_relu_info;
-  sl::TensorInfo input_info;
-  sl::TensorInfo output_info;
-};
-
-struct QnnConv2dTransposeParams {
-  sl::ConvolutionInfo conv_info;
-  sl::TensorInfo input_info;
-  sl::TensorInfo weights_info;
-  sl::TensorInfo bias_info;
-  sl::TensorInfo output_info;
-  runtime::NDArray raw_weights;
-  runtime::NDArray raw_bias;
-};
-
-struct ConcatenateParams {
-  sl::QuantizationInfo qInfo;
-  sl::ConcatenationInfo concat_info = sl::ConcatenationInfo(1, qInfo);
-  std::vector<sl::TensorInfo> input_infos;
-  sl::TensorInfo output_info;
-};
-
-struct SplitParams {
-  sl::SplitInfo split_info = sl::SplitInfo(0, {});
-  sl::TensorInfo input_info;
-  std::vector<sl::TensorInfo> output_infos;
-};
-
-struct DepthToSpaceParams {
-  sl::DepthToSpaceInfo depth_info = sl::DepthToSpaceInfo(0);
-  sl::TensorInfo input_info;
-  sl::TensorInfo output_info;
-};
-
-struct ReluParams {
-  sl::ReluInfo relu_info;
-  sl::TensorInfo input_info;
-  sl::TensorInfo output_info;
-};
-
-struct RequantizeParams {
-  sl::RequantizeInfo requantize_info;
-  sl::TensorInfo input_info;
-  sl::TensorInfo output_info;
-};
-
-struct ReinterpretQuantizationParams {
-  sl::ReinterpretQuantizationInfo reinterpret_quantize_info;
-  sl::TensorInfo input_info;
-  sl::TensorInfo output_info;
-};
-
-struct ResizeParams {
-  sl::ResizeInfo resize_info;
-  sl::TensorInfo input_info;
-  sl::TensorInfo output_info;
-};
-
-/*!
- * \brief A wrapper around std::stringstream to build an EthosnError.
- */
-class ErrStrm {
- public:
-  template <typename T>
-  ErrStrm& operator<<(const T& val) {  // NOLINT(*)
-    stream_ << val;
-    return *this;
-  }
-
- private:
-  std::stringstream stream_;
-  friend class EthosnError;
-};
-
-/*!
- * \brief Custom error class for storing error messages produced
- * during compilation for Ethos-N.
- */
-class EthosnError {
- public:
-  /*! \brief Default constructor */
-  EthosnError() {}
-  /*!
-   * \brief Construct error from an Array of Strings
-   * \param msgs The messages
-   */
-  explicit EthosnError(const Array<String>& msgs) : msgs(msgs) {}
-  /*!
-   * \brief Construct error from a String
-   * \param msg The message
-   */
-  explicit EthosnError(const String& msg) {
-    if (msg.size()) msgs.push_back(msg);
-  }
-  /*!
-   * \brief Construct error from an ErrStrm
-   * \param err The ErrStrm
-   */
-  explicit EthosnError(const ErrStrm& err) : EthosnError(err.stream_.str()) {}
-
-  /*! \return Whether there are any error messages */
-  explicit operator bool() const { return !msgs.empty(); }
-
-  /*! \brief Add together two errors to give a single error with all the msgs */
-  EthosnError& operator+=(const EthosnError& other) {
-    msgs.insert(msgs.end(), other.msgs.begin(), other.msgs.end());
-    return *this;
-  }
-
-  /*! \brief The error messages */
-  Array<String> msgs;
-};
-
-/*!
- * \brief Functions to interact with Support Library's API including the
- * translation of Relay ops/composite functions into Support Library
- * equivalents.
- */
-class EthosnAPI {
- public:
-  /*! \brief Create a default input tensor */
-  static sl::TensorInfo DefaultInputTensor(const Expr& expr);
-
-  /*! \brief Extract the Support Library convolution params from an ethos-n.qnn_conv2d func */
-  static EthosnError QnnConv2d(const Expr& expr, ConvolutionParams* params);
-  /*! \brief Extract the Support Library dense params from an ethos-n.qnn_fc func */
-  static EthosnError QnnFullyConnected(const Expr& expr, FullyConnectedParams* params);
-  /*! \brief Extract the Support Library max_pool2d params from a Relay max_pool2d call */
-  static EthosnError MaxPool2D(const Expr& expr, MaxPool2DParams* params);
-  /*! \brief Extract the Support Library avg_pool params from a Relay ethos-n.qnn_avg_pool2d func */
-  static EthosnError AvgPool2D(const Expr& expr, AvgPool2DParams* params);
-  /*! \brief Extract the Support Library reshape params from a Relay reshape call */
-  static EthosnError Reshape(const Expr& expr, ReshapeParams* params);
-  /*! \brief Extract the Support Library addition params from a Relay qnn.addition call */
-  static EthosnError Addition(const Expr& expr, AdditionParams* params);
-  /*! \brief Extract the Support Library sigmoid params from a Relay an ethos-n.qnn_sigmoid func */
-  static EthosnError Sigmoid(const Expr& expr, SigmoidParams* params);
-  /*! \brief Extract the Support Library mean params from a mean func */
-  static EthosnError Mean(const Expr& expr, MeanParams* params);
-  /*! \brief Extract the Support Library tanh params from a Relay an ethos-n tanh func */
-  static EthosnError Tanh(const Expr& expr, TanhParams* params);
-  /*! \brief Extract the Support Library leaky relu params from an ethos-n leaky relu Relu call. */
-  static EthosnError LeakyReLU(const Expr& expr, LeakyReLUParams* params);
-  /*! \brief Extract the Support Library transpose params from a Relay
-   * ethos-n.qnn_conv2d_transpose func */
-  static EthosnError QnnConv2dTranspose(const Expr& expr, QnnConv2dTransposeParams* params);
-  /*! \brief Extract the Support Library concatenate params from a Relay qnn.concatenate call */
-  static EthosnError Concatenate(const Expr& expr, ConcatenateParams* params);
-  /*! \brief Extract the Support Library split params from a Relay split call */
-  static EthosnError Split(const Expr& expr, SplitParams* params);
-  /*! \brief Extract the Support Library depth_to_space params from a Relay depth_to_space call */
-  static EthosnError DepthToSpace(const Expr& expr, DepthToSpaceParams* params);
-  /*! \brief Extract the Support Library relu params from a Relay relu call */
-  static EthosnError Relu(const Expr& expr, ReluParams* params);
-  /*! \brief Extract the Support Library requantize params from a Relay qnn.requantize call */
-  static EthosnError Requantize(const Expr& expr, RequantizeParams* params);
-
-  /*!
-   * \brief Extact the Support Library reinterpret quantization params from a Relay qnn.requantize
-   * call.
-   *
-   * \note This is used for the conversion from add and mul to a reinterpret quantization operator.
-   * This is effectively an identity operation, as not the same as 'requantize'.
-   */
-  static EthosnError ReinterpretQuantize(const Expr& expr, ReinterpretQuantizationParams* params);
-
-  /*! \brief Extract the Support Library resize params from a Relay resize call */
-  static EthosnError Resize(const Expr& expr, ResizeParams* params);
-
- private:
-  /*! \brief Convert a TVM IndexExpr array to a SL tensor shape */
-  static EthosnError Tvm2Npu(const Array<IndexExpr>& shape, sl::TensorShape* npu_shape);
-  /*! \brief Convert a TVM data type to a SL data type */
-  static EthosnError Tvm2Npu(const tvm::DataType& dtype, sl::DataType* data_type);
-  /*! \brief Convert TVM 1D padding to SL padding */
-  static EthosnError Tvm2Npu(const Array<IndexExpr>& padding, sl::Padding* npu_padding);
-  /*! \brief Convert TVM 1D striding to SL striding */
-  static EthosnError Tvm2Npu(const Array<IndexExpr>& strides, sl::Stride* npu_stride);
-  /*! \brief Convert TVM data format to SL data format */
-  static EthosnError Tvm2Npu(const std::string& dformat, sl::DataFormat* data_format);
-  /*! \brief Convert TVM size array for pooling size to x and y values */
-  static EthosnError Tvm2Npu(const Array<IndexExpr>& size, uint32_t* x, uint32_t* y);
-  /*! \brief Convert TVM quantization info to SL quantization info */
-  static EthosnError Tvm2Npu(const int32_t zero_point, const float scale,
-                             sl::QuantizationInfo* npu_qinfo);
-  static EthosnError Tvm2Npu(const int32_t zero_point, const std::valarray<float> scales,
-                             const unsigned int axis, sl::QuantizationInfo* npu_qinfo);
-  /*! \brief Convert TVM 2D padding to SL padding */
-  static EthosnError Tvm2Npu(const Array<Array<Integer>>& padding, sl::Padding* npu_padding);
-  /*! \brief Convert a TVM Integer array to a SL tensor shape */
-  static EthosnError Tvm2Npu(const Array<Integer>& shape, sl::TensorShape* npu_shape);
-  /*! \brief Convert a TVM Type to SL tensor info. */
-  static EthosnError Tvm2Npu(const tvm::Type& type, sl::TensorInfo* npu_tinfo);
-
-  /*! \brief Convert a TVM pooling call to SL pooling information */
-  static EthosnError Pool2d(const Call& input, const Call& output, Array<IndexExpr> size,
-                            Array<IndexExpr> strides, Array<IndexExpr> padding,
-                            sl::PoolingType pooling_type, sl::PoolingInfo* pool_info,
-                            sl::TensorInfo* input_info, sl::TensorInfo* output_info,
-                            std::string layout);
-
-  // Convert an array of IntImmNodes into ValueT
-  // IndexT type of Array indexing variable
-  // ValueT type of resulting value
-  template <typename IndexT, typename ValueT, size_t N>
-  static EthosnError AsArray(const Array<IndexT>& arr, std::array<ValueT, N>* v);
-
-  // Get a T from a constant represented by a NDArray.
-  template <typename T>
-  static EthosnError AsConstant(const Expr& expr, T* out);
-  static EthosnError AsConstant(const Expr& expr, std::valarray<float>* out);
-};
-
-}  // namespace ethosn
-}  // namespace contrib
-}  // namespace relay
-}  // namespace tvm
-
-#endif  // TVM_RELAY_BACKEND_CONTRIB_ETHOSN_ETHOSN_API_H_
diff --git a/src/relay/backend/contrib/ethosn/inline_partitions.cc b/src/relay/backend/contrib/ethosn/inline_partitions.cc
deleted file mode 100644
index f8cc3fc00d10..000000000000
--- a/src/relay/backend/contrib/ethosn/inline_partitions.cc
+++ /dev/null
@@ -1,126 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file src/relay/backend/contrib/ethosn/inline_partitions.cc
- * \brief A pass to inline NPU partitions that are not considered compute
- * intensive.
- */
-
-#include <tvm/relay/expr.h>
-#include <tvm/relay/expr_functor.h>
-
-#include "../../../transforms/compiler_function_utils.h"
-
-namespace tvm {
-namespace relay {
-namespace contrib {
-namespace ethosn {
-
-class IsComputeIntensivePartition : MixedModeVisitor {
- public:
-  /*!
-   * \brief Check if the partitioned function is compute
-   * intensive. If it has not multiply-accumulate operations
-   * it is not considered compute intensive.
-   *
-   * \param expr The partitioned function to check.
-   */
-  bool CheckSubgraph(const Expr& expr) {
-    is_compute_intensive = false;
-    VisitExpr(expr);
-    return is_compute_intensive;
-  }
-
-  /*!
-   * \brief Visit the call nodes of a partitioned function
-   * and check if operators or composite functions make the
-   * partitioned function compute intensive.
-   *
-   * \param op The call node to check.
-   */
-  void VisitExpr_(const CallNode* op) override {
-    Call call = GetRef<Call>(op);
-    std::string op_name = "";
-    if (const auto* op = call->op.as<OpNode>()) {
-      op_name = op->name;
-    } else if (const auto* func = call->op.as<FunctionNode>()) {
-      op_name = func->GetAttr<String>(attr::kComposite, "").value();
-    }
-
-    if (op_name != "") {
-      if (compute_intensive_operators.find(op_name) != compute_intensive_operators.end()) {
-        is_compute_intensive = true;
-      }
-    }
-  }
-
- private:
-  /*! \brief Whether or not the partitioned function is consdiered compute intensive. */
-  bool is_compute_intensive;
-  /*! \brief A set of operators considered compute intensive. */
-  const std::unordered_set<std::string> compute_intensive_operators{
-      "ethos-n.qnn_conv2d",     "ethos-n.qnn_conv2d_transpose",
-      "ethos-n.qnn_avg_pool2d", "ethos-n.qnn_sigmoid",
-      "ethos-n.qnn_fc",         "ethos-n.qnn_mean",
-      "ethos-n.qnn_resize",     "nn.max_pool2d",
-  };
-};
-
-/*!
- * \brief This pass checks whether functions partitioned for the NPU are considered
- * non-compute intensive. If they are not, they will be unpartitioned and passed onto
- * other backends to consider.
- *
- * A partitioned function is currently considered non-compute intensive if it contains
- * no multiply accumulate operations. Note that this is not an optimal heuristic.
- *
- * Some suggestions for future exploration:
- * - Making a better choice about large non-compute-intensive subgraphs
- *   as currently these are inlined.
- * - Allowing the user to input ops that are considered compute-intensive.
- * - Inline "small" compute intensive operations.
- */
-tvm::transform::Pass InlineNonComputeIntensivePartitions() {
-  runtime::TypedPackedFunc<IRModule(IRModule, tvm::transform::PassContext)> pass_func =
-      [=](IRModule mod, tvm::transform::PassContext ctx) {
-        auto analyzer = IsComputeIntensivePartition();
-        Array<GlobalVar> gvs_to_inline;
-        for (auto gv : mod->GetGlobalVars()) {
-          Function func = Downcast<Function>(mod->Lookup(gv));
-          auto compiler_name = func->GetAttr<String>(attr::kCompiler);
-          if (compiler_name.defined() && compiler_name == "ethos-n") {
-            if (!analyzer.CheckSubgraph(func->body)) {
-              gvs_to_inline.push_back(gv);
-            }
-          }
-        }
-        return relay::transform::InlineCompilerFunctionsBoundTo(gvs_to_inline)(mod);
-      };
-  return tvm::transform::CreateModulePass(
-      pass_func, 0, "relay.backend.contrib.ethos-n.InlineNonComputeIntensivePartitions", {});
-}
-
-TVM_REGISTER_GLOBAL("relay.backend.contrib.ethos-n.InlineNonComputeIntensivePartitions")
-    .set_body_typed(InlineNonComputeIntensivePartitions);
-
-}  // namespace ethosn
-}  // namespace contrib
-}  // namespace relay
-}  // namespace tvm
diff --git a/src/relay/backend/contrib/ethosu/codegen.cc b/src/relay/backend/contrib/ethosu/codegen.cc
deleted file mode 100644
index 300372838416..000000000000
--- a/src/relay/backend/contrib/ethosu/codegen.cc
+++ /dev/null
@@ -1,330 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file relay/backend/contrib/ethosu/codegen.cc
- *
- * \brief This file contains the target hooks for Arm(R) Ethos(TM)-U NPU
- * Codegen.
- */
-
-#include <tvm/relay/analysis.h>
-#include <tvm/relay/attrs/annotation.h>
-#include <tvm/relay/error.h>
-#include <tvm/relay/expr.h>
-#include <tvm/relay/expr_functor.h>
-#include <tvm/relay/transform.h>
-#include <tvm/target/target.h>
-#include <tvm/tir/function.h>
-
-#include <unordered_map>
-#include <unordered_set>
-#include <utility>
-#include <vector>
-
-#include "../../../op/contrib/ethosu/op_attrs.h"
-#include "../../../op/make_op.h"
-#include "utils.h"
-
-namespace tvm {
-namespace relay {
-namespace contrib {
-namespace ethosu {
-
-using FTVMTIRToRuntime = tvm::runtime::TypedPackedFunc<runtime::Module(IRModule, Target)>;
-
-/*!
- * \brief This mutator outlines functions that are marked with a named
- * "Compiler" attribute. Functions that do not match this condition remain
- * unaltered.
- */
-class OutlineCompilerFunctionsMutator : public MixedModeMutator {
- public:
-  explicit OutlineCompilerFunctionsMutator(const IRModule& mod, const std::string& compiler_name)
-      : mod_(mod), compiler_name_(compiler_name) {}
-
-  Expr VisitExpr_(const LetNode* op) final {
-    auto pre_visit = [this](const LetNode* op) {
-      Expr var = this->VisitExpr(op->var);
-      Expr value = this->VisitExpr(op->value);
-
-      // Outlineable function no longer needs let binding
-      if (this->CanOutlineExpr(value)) {
-        this->memo_[var] = value;
-      }
-    };
-    auto post_visit = [this](const LetNode* op) {
-      // Rely on the Memoizer to cache pre-visit values
-      Expr value = this->VisitExpr(op->value);
-      Expr body = this->VisitExpr(op->body);
-      auto expr = GetRef<Expr>(op);
-
-      // Drop the let binding
-      if (this->CanOutlineExpr(value)) {
-        this->memo_[expr] = this->VisitExpr(op->body);
-      } else {
-        Var var = Downcast<Var>(this->VisitExpr(op->var));
-        if (var.same_as(op->var) && value.same_as(op->value) && body.same_as(op->body)) {
-          this->memo_[expr] = expr;
-        } else {
-          this->memo_[expr] = Let(var, value, body);
-        }
-      }
-    };
-    ExpandANormalForm(op, pre_visit, post_visit);
-    return memo_[GetRef<Expr>(op)];
-  }
-
-  Expr Rewrite_(const CallNode* pre, const Expr& post) override {
-    Call call = Downcast<Call>(post);
-    if (CanOutlineExpr(call->op)) {
-      Function func = Downcast<Function>(call->op);
-      auto gv_name = func->GetAttr<String>("global_symbol").value_or("");
-      ICHECK_NE(gv_name, "")
-          << "Function to be outlined must have global_symbol attribute, but didn't.";
-      GlobalVar gv(gv_name);
-      if (func->checked_type_.defined()) {
-        gv->checked_type_ = func->checked_type();
-      }
-      mod_->Update(gv, func);
-      return Call(gv, call->args, call->attrs, call->type_args);
-    }
-    return post;
-  }
-
- private:
-  /*!
-   * \brief Check if the expr is a function and has the same
-   * compiler name as compiler_name_.
-   *
-   * \param expr The input expr.
-   * \return True if is outlineable else False.
-   */
-  bool CanOutlineExpr(const Expr& expr) {
-    if (!expr->IsInstance<FunctionNode>()) {
-      return false;
-    }
-    Function func = Downcast<Function>(expr);
-    auto compiler = func->GetAttr<String>(attr::kCompiler);
-    if (!compiler.defined()) {
-      return false;
-    }
-    if (compiler != compiler_name_) {
-      return false;
-    }
-    return true;
-  }
-
-  /*! \brief The module that the pass will run on. */
-  IRModule mod_;
-  /*! \brief The name of the compiler to enable outlining on external functions for. */
-  std::string compiler_name_;
-};
-
-/*!
- * \brief A pass to outline compiler specific functions.
- */
-tvm::transform::Pass OutlineCompilerFunctions(const std::string& compiler_name) {
-  runtime::TypedPackedFunc<IRModule(IRModule, transform::PassContext)> pass_func =
-      [=](IRModule mod, transform::PassContext ctx) {
-        GlobalVar gv = mod->GetGlobalVar("main");
-        Function main_func = Downcast<Function>(mod->Lookup("main"));
-        auto new_main_body =
-            OutlineCompilerFunctionsMutator(mod, compiler_name).VisitExpr(main_func->body);
-        if (!new_main_body.same_as(main_func->body)) {
-          Function new_main_func = WithFields(main_func, main_func->params, new_main_body);
-          mod->Update(gv, new_main_func);
-        }
-        return mod;
-      };
-  return tvm::transform::CreateModulePass(
-      pass_func, 0, "relay.backend.contrib.ethos-u.OutlineCompilerFunctions", {});
-}
-
-TVM_REGISTER_GLOBAL("relay.ext.ethos-u.OutlineCompilerFunctions")
-    .set_body_typed(OutlineCompilerFunctions);
-
-/*!
- * \brief This mutator removes identity operations that are not necessary. Specifically, an
- * identity operation can be removed when it is immediately followed by an NPU compute
- * operation.
- */
-class RemoveRedundantIdentities : public MixedModeMutator {
- public:
-  Expr Rewrite_(const CallNode* pre, const Expr& post) override {
-    Call call = Downcast<Call>(post);
-
-    // don't consider rewrite if current op is an identity or concatenate.
-    if (!call->op->IsInstance<OpNode>()) {
-      return post;
-    }
-    const auto* op = call->op.as<OpNode>();
-    std::string op_name = op->name;
-    if (op_name == "contrib.ethosu.identity" || op_name == "concatenate") {
-      return post;
-    }
-
-    // check if we can rewrite parent identity operations to current call.
-    bool needs_rewrite = false;
-    Array<Expr> new_args;
-    for (const auto& arg : call->args) {
-      Expr current_arg = arg;
-
-      // expand tuple to get parent op if we run into one - nested tuples are not supported.
-      if (const auto* tuple_get_item = arg.as<TupleGetItemNode>()) {
-        const auto* tuple = tuple_get_item->tuple.as<TupleNode>();
-        current_arg = tuple->fields[tuple_get_item->index];
-      }
-
-      if (const auto* parent_callnode = current_arg.as<CallNode>()) {
-        if (auto parent_op = parent_callnode->op.as<OpNode>()) {
-          Call parent_call = GetRef<Call>(parent_callnode);
-          if (parent_op->name == "contrib.ethosu.identity" && IdentityDoesNothing(parent_call) &&
-              CheckIdentityBetweenTransformOperations(call, parent_call)) {
-            needs_rewrite = true;
-            new_args.push_back(parent_call->args[0]);
-            continue;
-          }
-        }
-      }
-      new_args.push_back(arg);
-    }
-
-    if (needs_rewrite) {
-      Call new_call = Call(call->op, new_args, call->attrs, call->type_args);
-      // since we are only removing an identity, we know the type information has not changed
-      new_call->checked_type_ = call->checked_type_;
-      return new_call;
-    }
-    return post;
-  }
-
- private:
-  bool IdentityDoesNothing(const Call& call) {
-    const auto* attrs = call->attrs.as<tvm::relay::op::contrib::ethosu::EthosuIdentityAttrs>();
-    bool does_not_requantize = attrs->ifm_scale == 1.0 && attrs->ifm_zero_point == 0 &&
-                               attrs->ofm_scale == 1.0 && attrs->ofm_zero_point == 0;
-    bool has_no_activation = attrs->activation == "NONE";
-    return does_not_requantize && has_no_activation;
-  }
-
-  bool CheckIdentityBetweenTransformOperations(const Call& call, const Call& identity_call) {
-    const auto* op = call->op.as<OpNode>();
-    std::vector<std::string> nc_ops = {"reshape", "strided_slice"};
-
-    if (op && (std::find(nc_ops.begin(), nc_ops.end(), op->name) != nc_ops.end())) {
-      // check if the parent to identity operation is also a non-compute operation,
-      // if it isn't we can safely remove the identity in question by returning true.
-      const auto* identity_arg = identity_call->args[0].as<CallNode>();
-      if (!identity_arg) {
-        return true;
-      }
-      const auto* identity_arg_op = identity_arg->op.as<OpNode>();
-      if (!identity_arg_op ||
-          !(std::find(nc_ops.begin(), nc_ops.end(), identity_arg_op->name) != nc_ops.end())) {
-        return true;
-      }
-
-      const auto* call_tt = call->checked_type_.as<TensorTypeNode>();
-      const auto* identity_arg_tt = identity_arg->checked_type_.as<TensorTypeNode>();
-      ICHECK(call_tt && identity_arg_tt)
-          << "InferType should be run before RemoveRedundantIdentities";
-
-      // we can only remove the identity operation if the second non-compute operation
-      // in the sequence does not reduce the dimensionality of the output to the first
-      // non-compute operation. Doing so could lead to data being accessed incorrectly
-      // by the subsequent compute operation due to the reduction in dimensionality.
-      size_t first_transform_op_dims = identity_arg_tt->shape.size();
-      size_t second_transform_op_dims = call_tt->shape.size();
-      if (second_transform_op_dims < first_transform_op_dims) {
-        return false;
-      }
-    }
-    return true;
-  }
-};
-
-/*!
- * \brief A pass to remove redundant identity operations.
- */
-tvm::transform::Pass IdentityOptimizer() {
-  runtime::TypedPackedFunc<IRModule(IRModule, transform::PassContext)> pass_func =
-      [=](IRModule mod, transform::PassContext ctx) {
-        for (auto gv : mod->GetGlobalVars()) {
-          Function func = Downcast<Function>(mod->Lookup(gv));
-          auto compiler_name = func->GetAttr<String>(attr::kCompiler);
-          if (compiler_name.defined() && compiler_name == "ethos-u") {
-            auto new_body = RemoveRedundantIdentities().VisitExpr(func->body);
-            if (!new_body.same_as(func->body)) {
-              Function new_func = WithFields(func, func->params, new_body);
-              mod->Update(gv, new_func);
-            }
-          }
-        }
-        return mod;
-      };
-  return tvm::transform::CreateModulePass(
-      pass_func, 0, "relay.backend.contrib.ethos-u.IdentityOptimizer", {"InferType"});
-}
-
-TVM_REGISTER_GLOBAL("relay.ext.ethos-u.IdentityOptimizer").set_body_typed(IdentityOptimizer);
-
-/*!
- * \brief This pass will lower NPU functions in a Relay module to scheduled TIR prim functions.
- */
-tvm::transform::Pass RelayToTIR() {
-  runtime::TypedPackedFunc<IRModule(IRModule, transform::PassContext)> pass_func =
-      [=](IRModule ir_module, transform::PassContext pass_context) {
-        auto relay_to_tir_pf = tvm::runtime::Registry::Get("relay.ext.ethos-u.relay_to_tir");
-        ICHECK(relay_to_tir_pf);
-        ir_module = (*relay_to_tir_pf)(ir_module);
-        return ir_module;
-      };
-  return tvm::transform::CreateModulePass(pass_func, 0, "relay.contrib.ethos-u.RelayToTIR", {});
-}
-
-/*!
- * \brief This function lowers the IRModule with PrimFunc
- * with the target of the microNPU to a C-source runtime module
- */
-runtime::Module TIRToRuntime(IRModule mod, Target target) {
-  Array<CompilationArtifact> compile_artifacts;
-  for (const auto& kv : mod->functions) {
-    const tir::PrimFunc& prim_func = Downcast<tir::PrimFunc>(kv.second);
-    auto params = prim_func->GetAttr<Map<ObjectRef, runtime::NDArray>>("ethos-u.constants");
-    ICHECK(params) << "microNPU params should be present";
-    auto primfunc_to_artifact_pf =
-        tvm::runtime::Registry::Get("relay.ext.ethos-u.primfunc_to_artifact");
-    ICHECK(primfunc_to_artifact_pf);
-    CompilationArtifact ca = (*primfunc_to_artifact_pf)(prim_func);
-    compile_artifacts.push_back(ca);
-  }
-  auto ca_to_runtime = tvm::runtime::Registry::Get("runtime.module.ethos-u.create");
-  return (*ca_to_runtime)(compile_artifacts);
-}
-
-TVM_REGISTER_TARGET_KIND("ethos-u", kDLCPU)
-    .set_attr<Bool>("use_device_api", Bool(true))
-    .set_attr<relay::transform::FTVMRelayToTIR>(tvm::attr::kRelayToTIR, RelayToTIR())
-    .set_attr<FTVMTIRToRuntime>("TIRToRuntime", TIRToRuntime);
-
-}  // namespace ethosu
-}  // namespace contrib
-}  // namespace relay
-}  // namespace tvm
diff --git a/src/relay/backend/contrib/ethosu/compiler_attrs.cc b/src/relay/backend/contrib/ethosu/compiler_attrs.cc
deleted file mode 100644
index a3a09cf1119b..000000000000
--- a/src/relay/backend/contrib/ethosu/compiler_attrs.cc
+++ /dev/null
@@ -1,130 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#include <tvm/relay/analysis.h>
-#include <tvm/relay/attrs/annotation.h>
-#include <tvm/relay/error.h>
-#include <tvm/relay/expr.h>
-#include <tvm/relay/expr_functor.h>
-#include <tvm/relay/transform.h>
-
-#include <unordered_map>
-#include <unordered_set>
-#include <utility>
-#include <vector>
-
-#include "../../../op/make_op.h"
-
-namespace tvm {
-namespace relay {
-namespace contrib {
-namespace ethosu {
-
-/*! \brief Attributes to store the compiler options for Arm(R) Ethos(TM)-U NPU. */
-struct EthosUCompilerConfigNode : public tvm::AttrsNode<EthosUCompilerConfigNode> {
-  String accelerator_config;
-  Bool enable_cascader = Bool(false);
-  Bool enable_striping = Bool(false);
-  Bool disable_copying_constants = Bool(false);
-  String dev_force_block_config;
-  String dev_max_open_plans;
-  String dev_max_closed_plans;
-  String dev_select_proposal_idx;
-  Bool dev_disable_pareto_plans = Bool(false);
-  Bool dev_disable_pareto_proposals = Bool(false);
-  Bool dev_disable_block_culling = Bool(false);
-  Bool dev_cascader_logging = Bool(false);
-
-  TVM_DECLARE_ATTRS(EthosUCompilerConfigNode, "ext.attrs.EthosUCompilerConfigNode") {
-    TVM_ATTR_FIELD(accelerator_config)
-        .describe(
-            "The class of Arm(R) Ethos(TM)-U NPU; possible values = {ethos-u55-32, ethos-u55-64, "
-            "ethos-u55-128, ethos-u55-256}")
-        .set_default("ethos-u55-256");
-    TVM_ATTR_FIELD(enable_cascader)
-        .describe("Whether the cascader should be enabled")
-        .set_default(Bool(false));
-    TVM_ATTR_FIELD(enable_striping)
-        .describe("Whether the cascader should be striping")
-        .set_default(Bool(false));
-    TVM_ATTR_FIELD(disable_copying_constants)
-        .describe(
-            "Whether copying constants is disabled for case without the cascader. When this option "
-            "is "
-            "enabled, it is assumed that the constants should be located in SRAM (user determines "
-            "in "
-            "the linker script for section \".rodata.tvm\" that the constants are located in SRAM)")
-        .set_default(Bool(false));
-    String dev_warning = "Option is intended for development and debugging purposes only. ";
-    TVM_ATTR_FIELD(dev_force_block_config)
-        .describe((dev_warning + String("Force the block config to a given value; format = "
-                                        "\"[BLK_HEIGHT]x[BLK_WIDTH]x[BLK_DEPTH]\""))
-                      .data())
-        .set_default("");
-    TVM_ATTR_FIELD(dev_max_open_plans)
-        .describe(
-            (dev_warning + String("Specify the number of open plans kept for each part group"))
-                .data())
-        .set_default("8");
-    TVM_ATTR_FIELD(dev_max_closed_plans)
-        .describe(
-            (dev_warning + String("Specify the number of closed plans kept for each part group"))
-                .data())
-        .set_default("32");
-    TVM_ATTR_FIELD(dev_select_proposal_idx)
-        .describe((dev_warning + String("Select proposal by index")).data())
-        .set_default("-1");
-    TVM_ATTR_FIELD(dev_disable_pareto_plans)
-        .describe((dev_warning + String("Disable pareto culling for plans")).data())
-        .set_default(Bool(false));
-    TVM_ATTR_FIELD(dev_disable_pareto_proposals)
-        .describe((dev_warning + String("Disable pareto culling for proposals")).data())
-        .set_default(Bool(false));
-    TVM_ATTR_FIELD(dev_disable_block_culling)
-        .describe((dev_warning + String("Disable culling for block configs")).data())
-        .set_default(Bool(false));
-    TVM_ATTR_FIELD(dev_cascader_logging)
-        .describe(
-            (dev_warning + String("Enable cascader logging, log is dumped to .json file")).data())
-        .set_default(Bool(false));
-  }
-};
-
-class EthosUCompilerConfig : public Attrs {
- public:
-  TVM_DEFINE_NOTNULLABLE_OBJECT_REF_METHODS(EthosUCompilerConfig, Attrs, EthosUCompilerConfigNode);
-};
-
-TVM_REGISTER_NODE_TYPE(EthosUCompilerConfigNode);
-TVM_REGISTER_PASS_CONFIG_OPTION("relay.ext.ethos-u.options", EthosUCompilerConfig);
-
-auto GetCompilerAttrs() {
-  auto ctx = transform::PassContext::Current();
-  auto cfg = ctx->GetConfig<EthosUCompilerConfig>("relay.ext.ethos-u.options");
-  if (!cfg.defined()) {
-    cfg = AttrsWithDefaultValues<EthosUCompilerConfig>();
-  }
-  return cfg;
-}
-TVM_REGISTER_GLOBAL("relay.ext.ethos-u.get_compiler_attrs").set_body_typed(GetCompilerAttrs);
-
-}  // namespace ethosu
-}  // namespace contrib
-}  // namespace relay
-}  // namespace tvm
diff --git a/src/relay/backend/contrib/ethosu/preprocess.cc b/src/relay/backend/contrib/ethosu/preprocess.cc
deleted file mode 100644
index d87447f863e2..000000000000
--- a/src/relay/backend/contrib/ethosu/preprocess.cc
+++ /dev/null
@@ -1,269 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-#include <tvm/relay/analysis.h>
-#include <tvm/relay/attrs/annotation.h>
-#include <tvm/relay/error.h>
-#include <tvm/relay/expr.h>
-#include <tvm/relay/expr_functor.h>
-#include <tvm/relay/transform.h>
-
-#include <unordered_map>
-#include <unordered_set>
-#include <utility>
-#include <vector>
-
-#include "../../../op/make_op.h"
-
-namespace tvm {
-namespace relay {
-namespace contrib {
-namespace ethosu {
-
-/*!
- * \brief This expression rewriter will traverse the graph to find calls
- * to all external functions. If they have multiple inputs and/or
- * multiple outputs, the following has to be done :
- * 1) If multiple inputs are present, they needed to be concat before the call.
- * 2) Inside the external function they need to be split again to their original inputs.
- * 3) If there are multiple outputs, they need to be concat at the end of external function.
- * 4) Then, the concat output again need to be split and made the original tuple output in the
- * main.
- */
-class ExternalFuncIOHandler : public ExprRewriter {
- public:
-  explicit ExternalFuncIOHandler(const IRModule& module) : module_(module) {}
-  int count = 0;
-
-  Function InferType(const Function& expr, const IRModule& m) {
-    IRModule mod(m);
-    mod->Update(mod->GetGlobalVar("main"), expr);
-    mod = transform::InferType()(mod);
-    return Downcast<Function>(mod->Lookup("main"));
-  }
-
-  /*!
-   * \brief This function will take shape and compute
-   * the scalar size value for it to be use to create
-   * flat single dimensional tensors.
-   */
-  int64_t CalcSize(const Array<Integer>& shape) {
-    int size = 1;
-    for (auto dim_size : shape) {
-      size = size * Downcast<Integer>(dim_size)->value;
-    }
-    return size;
-  }
-
-  /*!
-   * \brief This will take a tensor and create a flattened
-   * tensor to be used by the concat.
-   */
-  Expr CreateFlattenTensor(const Expr& input) {
-    auto ishape = Downcast<Array<Integer>>(Downcast<TensorType>(input->checked_type())->shape);
-    int flatten_size = CalcSize(ishape);
-    Array<Integer> output_shape = {Integer(flatten_size)};
-    return MakeReshape(input, output_shape);
-  }
-
-  /*!
-   * \brief This will take flattened tensors and create
-   * a single concat'd tensor.
-   */
-  Expr CreateConcatTensor(const Array<Expr>& inputs) {
-    auto tuple = Tuple(inputs);
-    return MakeConcatenate(tuple, 0);
-  }
-
-  /*!
-   * \brief This will take a flattened concat'd tensor and use the original inputs shapes
-   * to recreate a Tuple of the original set of tensors.
-   */
-  Expr CreateSplitReshapedTensors(const Expr& input, const Array<Expr>& original_args) {
-    Array<Array<Integer>> shapes;
-    Array<Integer> flatten_tensor_sizes;
-    Array<runtime::Int> split_indices;
-    Array<Expr> rets;
-
-    int total_size = 0;
-    for (auto orig_arg : original_args) {
-      auto shape = Downcast<Array<Integer>>(Downcast<TensorType>(orig_arg->checked_type())->shape);
-      shapes.push_back(shape);
-      flatten_tensor_sizes.push_back(CalcSize(shape));
-      if (total_size != 0) {
-        split_indices.push_back(total_size);
-      }
-      total_size += CalcSize(shape);
-    }
-    auto split_outs = MakeSplit(input, split_indices, 0);
-    for (unsigned int i = 0; i < shapes.size(); i++) {
-      auto split_out = TupleGetItem(split_outs, i);
-      split_out->checked_type_ = original_args[i]->checked_type_;
-      rets.push_back(MakeReshape(split_out, shapes[i]));
-    }
-    return Tuple(rets);
-  }
-
-  /*!
-   * \brief Modify the external function to split the input as the original compute
-   * as required originally. Moreover, the outputs will be flattened and concat'd
-   * to make a single output. Finaly, the external function should only have a single input
-   * and a single output.
-   */
-  Function ModifyExternalFunction(const Function& func, const GlobalVar& gv,
-                                  const DataType& dtype) {
-    Array<Expr> inputs;
-    Var ifms;
-    if (func->params.size() > 1) {
-      Array<Array<Integer>> shapes;
-      Array<Integer> flatten_tensor_sizes;
-      Array<runtime::Int> split_indices;
-
-      auto func_name = gv->name_hint;
-      int total_size = 0;
-      for (auto input : func->params) {
-        auto shape = Downcast<Array<Integer>>(Downcast<TensorType>(input->checked_type())->shape);
-        shapes.push_back(shape);
-        auto flat_size = CalcSize(shape);
-        flatten_tensor_sizes.push_back(flat_size);
-        if (total_size != 0) {
-          split_indices.push_back(total_size);
-        }
-        total_size += flat_size;
-      }
-      Array<PrimExpr> ifms_shape = {total_size};
-      ifms = Var(func_name + "_ifms", TensorType(ifms_shape, dtype));
-      auto split_outs = MakeSplit(ifms, split_indices, 0);
-      for (unsigned int i = 0; i < shapes.size(); i++) {
-        auto split_out = TupleGetItem(split_outs, i);
-        split_out->checked_type_ = func->params[i]->checked_type();
-        inputs.push_back(MakeReshape(split_out, shapes[i]));
-      }
-    } else {
-      CHECK_EQ(func->params.size(), 1);
-      inputs.push_back(func->params[0]);
-      ifms = func->params[0];
-    }
-    Map<Var, Expr> bind_map;
-    CHECK_EQ(func->params.size(), inputs.size());
-    for (size_t i = 0; i < inputs.size(); i++) {
-      bind_map.Set(func->params[i], inputs[i]);
-    }
-    auto core_compute_expr = Bind(func->body, bind_map);
-
-    // Creation of wrapper inside the external function
-    Array<Var> params = {ifms};
-    if (func->body->IsInstance<TupleNode>()) {
-      auto tuple_out = func->body.as<TupleNode>();
-      Array<Expr> reshaped_outputs;
-      for (unsigned int i = 0; i < tuple_out->fields.size(); i++) {
-        auto out = Downcast<Tuple>(core_compute_expr)->fields[i];
-        out->checked_type_ = tuple_out->fields[i]->checked_type_;
-        reshaped_outputs.push_back(CreateFlattenTensor(out));
-      }
-      auto concat_out = CreateConcatTensor(reshaped_outputs);
-      auto f = Function(params, concat_out, concat_out->checked_type_, {}, func->attrs);
-      return InferType(f, this->module_);
-    } else {
-      auto f =
-          Function(params, core_compute_expr, core_compute_expr->checked_type_, {}, func->attrs);
-      return InferType(f, this->module_);
-    }
-  }
-
-  Expr Rewrite_(const CallNode* call, const Expr& post) final {
-    auto post_call = Downcast<Call>(post);
-
-    if (auto optional_glb_var = post_call->op.as<GlobalVar>()) {
-      auto glb_var = optional_glb_var.value();
-      auto func = Downcast<Function>(module_->functions[glb_var]);
-
-      // If the number of inputs and output are 1 --> no need to do anything
-      if (post_call->args.size() == 1 && !func->body->IsInstance<TupleNode>()) {
-        return post;
-      }
-      if (auto compiler = func->GetAttr<String>(attr::kCompiler)) {
-        if (compiler == "ethos-u") {
-          auto ext_input = std::move(post_call->args[0]);
-          auto arg_dtype = Downcast<TensorType>(post_call->args[0]->checked_type())->dtype;
-          if (post_call->args.size() > 1) {
-            Array<Expr> reshaped_inputs;
-            for (const auto& arg : post_call->args) {
-              // All arguments should be of same data type
-              CHECK_EQ(arg_dtype, Downcast<TensorType>(arg->checked_type())->dtype)
-                  << "Currently NPU external functions require all inputs to be of same data "
-                     "type";
-              reshaped_inputs.push_back(CreateFlattenTensor(arg));
-            }
-            ext_input = CreateConcatTensor(reshaped_inputs);
-          }
-          auto ext_func = ModifyExternalFunction(func, glb_var, arg_dtype);
-          Array<Expr> new_args = {ext_input};
-          module_->Add(glb_var, ext_func);
-          Expr new_call = Call(glb_var, new_args);
-          if (func->body->IsInstance<TupleNode>()) {
-            auto orginal_tuple_out = Downcast<Tuple>(func->body);
-            new_call = CreateSplitReshapedTensors(new_call, orginal_tuple_out->fields);
-          }
-          return std::move(new_call);
-        }
-      }
-    }
-    return post;
-  }
-
- private:
-  IRModule module_;
-};
-
-IRModule PreprocessExternalFuncIO_(const IRModule& module) {
-  ExternalFuncIOHandler ex_func_io_handle(module);
-  auto func = Downcast<Function>(module->Lookup("main"));
-  auto preprocessed = PostOrderRewrite(func, &ex_func_io_handle);
-  module->Update(module->GetGlobalVar("main"), Downcast<Function>(preprocessed));
-  return module;
-}
-
-}  // namespace ethosu
-}  // namespace contrib
-
-/*!
- * \brief This is a pre-processing pass for all NPU external functions.
- * Currently, the NPU runtime module expects a single input and a single output.
- * Therefore, this pass will concat the inputs pre-call, split again inside ext. func,
- * concat the output inside ext. func and re-split again after the call.
- */
-
-namespace transform {
-Pass PreprocessExternalFuncIO() {
-  runtime::TypedPackedFunc<IRModule(IRModule, PassContext)> pre_processed_ext_func =
-      [=](IRModule m, PassContext pc) {
-        auto _m = contrib::ethosu::PreprocessExternalFuncIO_(m);
-        return _m;
-      };
-  auto preprocess_pass =
-      CreateModulePass(pre_processed_ext_func, 0, "PreprocessExternalFuncIO", {});
-  return Sequential({preprocess_pass, InferType()});
-}
-
-TVM_REGISTER_GLOBAL("relay.ext.ethos-u.PreprocessExternalFuncIO")
-    .set_body_typed(transform::PreprocessExternalFuncIO);
-
-}  // namespace transform
-}  // namespace relay
-}  // namespace tvm
diff --git a/src/relay/backend/contrib/ethosu/source_module.cc b/src/relay/backend/contrib/ethosu/source_module.cc
deleted file mode 100644
index 938ce2b42c80..000000000000
--- a/src/relay/backend/contrib/ethosu/source_module.cc
+++ /dev/null
@@ -1,324 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file source_module.cc
- * \brief Source code module for the host to invoke the NPU
- */
-#include <dmlc/filesystem.h>
-#include <dmlc/logging.h>
-#include <dmlc/memory_io.h>
-#include <tvm/ir/expr.h>
-#include <tvm/runtime/c_runtime_api.h>
-#include <tvm/runtime/memory.h>
-#include <tvm/runtime/module.h>
-#include <tvm/runtime/ndarray.h>
-#include <tvm/runtime/object.h>
-#include <tvm/runtime/packed_func.h>
-#include <tvm/runtime/registry.h>
-
-#include <cmath>
-#include <fstream>
-#include <map>
-#include <sstream>
-#include <string>
-#include <vector>
-
-#include "../../../../runtime/file_utils.h"
-#include "utils.h"
-
-namespace tvm {
-namespace runtime {
-
-using CompilationArtifact = relay::contrib::ethosu::CompilationArtifact;
-
-// The runtime.Module that contains the host-side c code
-// required for invoking the NPU with the command stream
-class EthosUModuleNode : public ModuleNode {
- public:
-  /*!
-   * \brief The microNPU runtime module.
-   *
-   * \param compilation_artifacts
-   *    This is an array of CompilationArtifacts that is produced via
-   *    lowering each PrimFunc to command stream. Here, those artifacts
-   *    will be used to create the c-source.
-   */
-  explicit EthosUModuleNode(Array<CompilationArtifact> compilation_artifacts)
-      : compilation_artifacts_(compilation_artifacts) {
-    c_source += "#include <stdio.h>\n";
-    c_source += "#include <stdlib.h>\n";
-    c_source += "#include <tvm/runtime/crt/module.h>\n";
-    c_source += "#include <tvm_ethosu_runtime.h>\n\n";
-    for (const CompilationArtifact& compilation_artifact : compilation_artifacts) {
-      c_source += GenerateSource(compilation_artifact);
-      c_source += "\n\n";
-    }
-  }
-
-  /*!
-   * \brief Save the module to file.
-   *
-   * \param file_name The file to be saved to.
-   * \param format The format of the file.
-   */
-  void SaveToFile(const String& file_name, const String& format) final {
-    std::string fmt = GetFileFormat(file_name, format);
-    ICHECK_EQ(fmt, "c") << "Can only save to format="
-                        << "c";
-    std::ofstream out(file_name);
-    out << c_source;
-    out.close();
-  }
-
-  String GetSource(const String& format) final { return c_source; }
-
-  String GetFormat() override { return "c"; }
-
-  Array<CompilationArtifact> GetArtifacts() { return compilation_artifacts_; }
-
-  /*!
-   * \brief Get a PackedFunc from the module.
-   *
-   * \param name The name of the function.
-   * \param sptr_to_self The ObjectPtr that points to this module node.
-   *
-   * \return The function pointer when it is found, otherwise, PackedFunc(nullptr).
-   */
-  PackedFunc GetFunction(const String& name, const ObjectPtr<Object>& sptr_to_self) final {
-    if (name == "get_func_names") {
-      return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
-        Array<String> func_names;
-        for (const CompilationArtifact& ca : compilation_artifacts_) {
-          func_names.push_back(ca->function_name);
-        }
-        *rv = func_names;
-      });
-    }
-    return PackedFunc();
-  }
-
-  const char* type_key() const final { return "c"; }
-
-  static Module Create(Array<CompilationArtifact> compilation_artifacts) {
-    auto n = make_object<EthosUModuleNode>(compilation_artifacts);
-    return Module(n);
-  }
-
-  /*! \brief Get the property of the runtime module .*/
-  int GetPropertyMask() const override { return ModulePropertyMask::kDSOExportable; }
-
-  bool ImplementsFunction(const String& name, bool query_imports) final {
-    return std::find_if(compilation_artifacts_.begin(), compilation_artifacts_.end(),
-                        [&name](const CompilationArtifact& artifact) {
-                          return artifact->function_name == name;
-                        }) != compilation_artifacts_.end();
-  }
-
- private:
-  std::string c_source;
-  Array<CompilationArtifact> compilation_artifacts_;
-  Map<Integer, String> pool_var_names_;
-  int indent_{0};
-  constexpr static int kMaxBaseAddresses_ = 6;
-
-  /*!
-   * \brief Convert the raw string of hex values into a hex string
-   *
-   * \param raw the raw string of hex values
-   *
-   * \return string formatted as a hex string
-   */
-  std::string GetHexString(const std::string& raw) {
-    std::stringstream ss;
-    for (size_t i = 0; i < raw.size() / 2; ++i) {
-      ss << "\\x" << raw.substr(i * 2, 2);
-    }
-    return ss.str();
-  }
-
-  /*!
-   * \brief Emit code that updates the base_addrs array with the base address of the given array
-   *
-   * \param index array index for base_addrs and base_addrs_size
-   * \param name of the array containing relevant data
-   *
-   * \return string of code that updates the base_addrs array with the base address of the given
-   * array
-   */
-  std::string SetBaseAddress(int index, std::string name, int size) {
-    std::stringstream ss;
-    ss << "  base_addrs[" << index << "] = (uintptr_t)(" << name << ");\n";
-    ss << "  base_addrs_size[" << index << "] = " << size << ";\n";
-    return ss.str();
-  }
-
-  /*!
-   * \brief Enter a new scope.
-   */
-  void EnterScope() { indent_ += 2; }
-
-  /*!
-   * \brief Exit a scope.
-   */
-  void ExitScope() {
-    ICHECK_GE(indent_, 2U) << "Wrong ident found.";
-    indent_ -= 2;
-  }
-
-  /*! \brief Print indents using spaces. */
-  void PrintIndents(std::stringstream& ss) {
-    for (int i = 0; i < indent_; i++) {
-      ss << ' ';
-    }
-  }
-
-  /*!
-   * \brief Creates a runtime function signature
-   */
-  void PrintRuntimeFunctionSignature(std::stringstream& ss,
-                                     const relay::contrib::ethosu::CompilationArtifact& artifact,
-                                     std::string func_name) {
-    ss << "TVM_DLL int32_t " << func_name;
-    ss << "(";
-    std::unordered_map<int, relay::contrib::ethosu::BaseAddress> param_idx_to_base_address;
-    for (const relay::contrib::ethosu::BaseAddress& base_address : artifact->base_addresses) {
-      if (base_address->primfunc_param_idx.defined()) {
-        param_idx_to_base_address[base_address->primfunc_param_idx.IntValue()] = base_address;
-      }
-    }
-    for (unsigned int i = 0; i < param_idx_to_base_address.size(); i++) {
-      relay::contrib::ethosu::BaseAddress base_address = param_idx_to_base_address[i];
-      ss << "void* " << base_address->name << ",";
-    }
-    ss << "void* resource_handle) {\n";
-  }
-
-  /*!
-   * \brief Creates a cplusplus guard prefix for extern "C" printing
-   */
-  void PrintExternCPrefix(std::stringstream& ss) {
-    PrintIndents(ss);
-    ss << "#ifdef __cplusplus\n";
-    ss << "extern \"C\" {\n";
-    ss << "#endif\n";
-  }
-
-  /*!
-   * \brief Creates a cplusplus guard postfix for extern "C" printing
-   */
-  void PrintExternCPostfix(std::stringstream& ss) {
-    PrintIndents(ss);
-    ss << "#ifdef __cplusplus\n";
-    ss << "}\n";
-    ss << "#endif\n";
-  }
-
-  /*!
-   * \brief Emit code that offloads a subgraph to the NPU
-   *
-   * \return string of code that offloads a subgraph to the NPU
-   */
-  std::string GenerateSource(relay::contrib::ethosu::CompilationArtifact compilation_artifact) {
-    std::string func_no_dashes = compilation_artifact->function_name;
-    std::replace(func_no_dashes.begin(), func_no_dashes.end(), '-', '_');
-    std::stringstream ss;
-
-    size_t weights_size = (compilation_artifact->encoded_constants.size() / 2);
-    ss << "// Update linker script to place .rodata.tvm in memory that can be accessed by the "
-          "NPU\n";
-    if (weights_size > 0) {
-      ss << "__attribute__((section(\".rodata.tvm\"), aligned(16))) static int8_t "
-         << func_no_dashes << "_weights[" << weights_size << "] = \"";
-      ss << GetHexString(compilation_artifact->encoded_constants);
-      ss << "\";\n";
-    } else {
-      ss << "static int8_t* " << func_no_dashes << "_weights = NULL;\n";
-    }
-    ss << "__attribute__((section(\".rodata.tvm\"), aligned(16))) static int8_t " << func_no_dashes
-       << "_cms_data_data[" << compilation_artifact->command_stream.size() / 2 << "] = \"";
-    ss << GetHexString(compilation_artifact->command_stream);
-    ss << "\";\n";
-    ss << "\n";
-
-    PrintExternCPrefix(ss);
-    PrintRuntimeFunctionSignature(ss, compilation_artifact, func_no_dashes);
-    ss << "  void* cms_data = (void*)(" << func_no_dashes << "_cms_data_data);\n";
-    ss << "  const size_t cms_data_size = sizeof(" << func_no_dashes << "_cms_data_data);\n";
-    ss << "  size_t base_addrs_size[" << kMaxBaseAddresses_ << "] = {0};\n";
-    ss << "  uint64_t base_addrs[" << kMaxBaseAddresses_ << "] = {0};\n";
-    ss << "\n";
-
-    ss << SetBaseAddress(0, func_no_dashes + "_weights", weights_size);
-    for (const relay::contrib::ethosu::BaseAddress& base_address :
-         compilation_artifact->base_addresses) {
-      if (base_address->is_runtime_allocation) {
-        ss << "  int8_t* " << base_address->name
-           << " = (int8_t*) TVMBackendAllocWorkspace(kDLCPU, 0, (uint64_t)" << base_address->size
-           << ", 0, 16);\n";
-      }
-      ss << SetBaseAddress(base_address->region->value, base_address->name.c_str(),
-                           base_address->size->value);
-    }
-    ss << "\n";
-
-    ss << "  int32_t result = TVMEthosULaunch(resource_handle, cms_data, cms_data_size, "
-          "base_addrs, base_addrs_size, "
-       << kMaxBaseAddresses_ << ");\n";
-
-    for (const relay::contrib::ethosu::BaseAddress& base_address :
-         compilation_artifact->base_addresses) {
-      if (base_address->is_runtime_allocation) {
-        ss << "  TVMBackendFreeWorkspace(kDLCPU, 0, " << base_address->name << ");\n";
-      }
-    }
-    ss << "  return result;\n";
-    ss << "}\n";
-    ss << "\n";
-    PrintExternCPostfix(ss);
-    ss << "\n";
-    return ss.str();
-  }
-};
-
-class EthosUModule : public Module {
- public:
-  EthosUModule() {}
-  explicit EthosUModule(ObjectPtr<Object> n) : Module(n) {}
-  /*! \return internal container */
-  inline EthosUModuleNode* operator->();
-  /*! \return internal container */
-  inline const EthosUModuleNode* operator->() const;
-};
-
-inline EthosUModuleNode* EthosUModule::operator->() {
-  return static_cast<EthosUModuleNode*>(get_mutable());
-}
-
-TVM_REGISTER_GLOBAL("runtime.module.ethos-u.create")
-    .set_body_typed([](Array<CompilationArtifact> compilation_artifacts) {
-      return EthosUModuleNode::Create(compilation_artifacts);
-    });
-
-TVM_REGISTER_GLOBAL("runtime.module.ethos-u.get_artifacts").set_body_typed([](EthosUModule mod) {
-  return mod->GetArtifacts();
-});
-
-}  // namespace runtime
-}  // namespace tvm
diff --git a/src/relay/backend/contrib/ethosu/utils.cc b/src/relay/backend/contrib/ethosu/utils.cc
deleted file mode 100644
index 01bd4d10324d..000000000000
--- a/src/relay/backend/contrib/ethosu/utils.cc
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file relay/backend/contrib/ethosu/utils.cc
- * \brief Utilities for microNPU codegen
- */
-
-#include "utils.h"
-
-#include <tvm/runtime/device_api.h>
-#include <tvm/runtime/registry.h>
-#include <tvm/tir/stmt.h>
-#include <tvm/tir/usmp/utils.h>
-
-#include <utility>
-
-namespace tvm {
-namespace relay {
-namespace contrib {
-namespace ethosu {
-
-BaseAddress::BaseAddress(String name, Integer primfunc_param_idx, Integer region, Integer size,
-                         Bool is_runtime_allocation) {
-  auto base_address_node = make_object<BaseAddressNode>();
-  base_address_node->name = name;
-  base_address_node->primfunc_param_idx = primfunc_param_idx;
-  base_address_node->region = region;
-  base_address_node->size = size;
-  base_address_node->is_runtime_allocation = is_runtime_allocation;
-  data_ = std::move(base_address_node);
-}
-
-TVM_REGISTER_NODE_TYPE(BaseAddressNode);
-TVM_REGISTER_GLOBAL("relay.ext.ethos-u.BaseAddress")
-    .set_body_typed([](String name, Integer primfunc_param_idx, Integer region, Integer size,
-                       Bool is_runtime_allocation) {
-      if (is_runtime_allocation.defined()) {
-        return BaseAddress(name, primfunc_param_idx, region, size, is_runtime_allocation);
-      } else {
-        return BaseAddress(name, primfunc_param_idx, region, size);
-      }
-    });
-
-CompilationArtifact::CompilationArtifact(String function_name, String command_stream,
-                                         String encoded_constants,
-                                         Array<BaseAddress> base_addresses) {
-  auto compilation_artifact_node = make_object<CompilationArtifactNode>();
-  compilation_artifact_node->function_name = function_name;
-  compilation_artifact_node->command_stream = command_stream;
-  compilation_artifact_node->encoded_constants = encoded_constants;
-  compilation_artifact_node->base_addresses = base_addresses;
-  data_ = std::move(compilation_artifact_node);
-}
-
-TVM_REGISTER_NODE_TYPE(CompilationArtifactNode);
-TVM_REGISTER_GLOBAL("relay.ext.ethos-u.CompilationArtifact")
-    .set_body_typed([](String function_name, String command_stream, String encoded_constants,
-                       Array<BaseAddress> base_addresses) {
-      return CompilationArtifact(function_name, command_stream, encoded_constants, base_addresses);
-    });
-
-TVM_STATIC_IR_FUNCTOR(ReprPrinter, vtable)
-    .set_dispatch<CompilationArtifactNode>([](const ObjectRef& ref, ReprPrinter* p) {
-      auto* node = static_cast<const CompilationArtifactNode*>(ref.get());
-      p->stream << "CompilationArtifactNode(\n"
-                << "function_name=" << node->function_name
-                << ",\n  command_stream=" << node->command_stream
-                << ",\n  encoded_constants=" << node->encoded_constants
-                << ",\n  base_addresses=" << node->base_addresses << ")";
-    });
-
-}  // namespace ethosu
-}  // namespace contrib
-}  // namespace relay
-}  // namespace tvm
diff --git a/src/relay/backend/contrib/ethosu/utils.h b/src/relay/backend/contrib/ethosu/utils.h
deleted file mode 100644
index 5c61271d3425..000000000000
--- a/src/relay/backend/contrib/ethosu/utils.h
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file relay/backend/contrib/ethosu/utils.h
- * \brief Utilities for microNPU codegen
- */
-
-#ifndef TVM_RELAY_BACKEND_CONTRIB_ETHOSU_UTILS_H_
-#define TVM_RELAY_BACKEND_CONTRIB_ETHOSU_UTILS_H_
-
-#include <tvm/ir/expr.h>
-#include <tvm/target/target.h>
-#include <tvm/tir/stmt.h>
-
-namespace tvm {
-namespace relay {
-namespace contrib {
-namespace ethosu {
-
-/*!
- * \brief Base addresses are input pointers to
- * the driver that get accessed by the command stream
- * using offsets to read/write data.
- */
-struct BaseAddressNode : public Object {
-  /*! \brief The identifier, usually it the param name of the PrimFunc that gets lowered */
-  String name;
-  /*! \brief The index in the params array of the PrimFunc. This is needed to keep aligned
-   * between the PrimFunc arguments ordering and argument ordering of generated code */
-  Integer primfunc_param_idx;
-  /*! \brief The region used by the command stream. This needs to match with base address
-   * index passed into the driver */
-  Integer region;
-  /*! \brief The size of the buffer accessible by this base address */
-  Integer size;
-  /*! \brief This is a runtime allocation that needs to be done in the function */
-  Bool is_runtime_allocation{Bool(false)};
-
-  void VisitAttrs(tvm::AttrVisitor* v) {
-    v->Visit("name", &name);
-    v->Visit("primfunc_param_idx", &primfunc_param_idx);
-    v->Visit("region", &region);
-    v->Visit("size", &size);
-    v->Visit("is_runtime_allocation", &is_runtime_allocation);
-  }
-
-  bool SEqualReduce(const BaseAddressNode* other, SEqualReducer equal) const {
-    return equal(name, other->name) && equal(primfunc_param_idx, other->primfunc_param_idx) &&
-           equal(region, other->region) && equal(size, other->size) &&
-           equal(is_runtime_allocation, other->is_runtime_allocation);
-  }
-
-  void SHashReduce(SHashReducer hash_reduce) const {
-    hash_reduce(name);
-    hash_reduce(primfunc_param_idx);
-    hash_reduce(region);
-    hash_reduce(size);
-    hash_reduce(is_runtime_allocation);
-  }
-
-  static constexpr const char* _type_key = "relay.ext.ethos-u.BaseAddress";
-  TVM_DECLARE_FINAL_OBJECT_INFO(BaseAddressNode, Object);
-};
-
-class BaseAddress : public ObjectRef {
- public:
-  TVM_DLL BaseAddress(String name, Integer primfunc_param_idx, Integer region, Integer size,
-                      Bool is_runtime_allocation = Bool(false));
-  TVM_DEFINE_MUTABLE_OBJECT_REF_METHODS(BaseAddress, ObjectRef, BaseAddressNode);
-};
-
-/*!
- * \brief Captures all the binary artifactes required to create
- * the C-source runtime module
- */
-struct CompilationArtifactNode : public Object {
-  /*! \brief The function name for this artifact belongs to */
-  String function_name;
-  /*! \brief The binary command stream (CS) in hex format */
-  String command_stream;
-  /*! \brief The encoded biases and weights in hex format */
-  String encoded_constants;
-  /*! \brief The information regarding the base addresses */
-  Array<BaseAddress> base_addresses;
-
-  void VisitAttrs(tvm::AttrVisitor* v) {
-    v->Visit("function_name", &function_name);
-    v->Visit("command_stream", &command_stream);
-    v->Visit("encoded_constants", &encoded_constants);
-    v->Visit("base_addresses", &base_addresses);
-  }
-
-  bool SEqualReduce(const CompilationArtifactNode* other, SEqualReducer equal) const {
-    return equal(function_name, other->function_name) &&
-           equal(command_stream, other->command_stream) &&
-           equal(encoded_constants, other->encoded_constants) &&
-           equal(base_addresses, other->base_addresses);
-  }
-
-  void SHashReduce(SHashReducer hash_reduce) const {
-    hash_reduce(function_name);
-    hash_reduce(command_stream);
-    hash_reduce(encoded_constants);
-    hash_reduce(base_addresses);
-  }
-
-  static constexpr const char* _type_key = "relay.ext.ethos-u.CompilationArtifact";
-  TVM_DECLARE_FINAL_OBJECT_INFO(CompilationArtifactNode, Object);
-};
-
-class CompilationArtifact : public ObjectRef {
- public:
-  TVM_DLL CompilationArtifact(String function_name, String command_stream, String encoded_constants,
-                              Array<BaseAddress> base_addresses);
-  TVM_DEFINE_MUTABLE_OBJECT_REF_METHODS(CompilationArtifact, ObjectRef, CompilationArtifactNode);
-};
-
-}  // namespace ethosu
-}  // namespace contrib
-}  // namespace relay
-}  // namespace tvm
-
-#endif  // TVM_RELAY_BACKEND_CONTRIB_ETHOSU_UTILS_H_
diff --git a/src/relay/op/contrib/ethosu/binary_elementwise.cc b/src/relay/op/contrib/ethosu/binary_elementwise.cc
deleted file mode 100644
index 327f7cb33035..000000000000
--- a/src/relay/op/contrib/ethosu/binary_elementwise.cc
+++ /dev/null
@@ -1,156 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file src/relay/op/contrib/ethosu/binary_elementwise.cc
- * \brief Binary elementwise operators definitions for the Arm(R) Ethos(TM)-U NPU.
- */
-#include <tvm/relay/op.h>
-
-#include "common.h"
-#include "op_attrs.h"
-
-namespace tvm {
-namespace relay {
-namespace op {
-namespace contrib {
-namespace ethosu {
-
-bool EthosuBinaryElementwiseRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
-                                const TypeReporter& reporter) {
-  const int ifm_index = 0;
-  const int ifm2_index = 1;
-  const int result_index = 3;
-  ICHECK_EQ(types.size(), result_index + 1);
-
-  const auto* ifm = types[ifm_index].as<TensorTypeNode>();
-  const auto* ifm2 = types[ifm2_index].as<TensorTypeNode>();
-  if (ifm == nullptr) return false;
-  if (ifm2 == nullptr) return false;
-
-  const auto* param = attrs.as<EthosuBinaryElementwiseAttrs>();
-  ICHECK(param != nullptr) << "EthosuBinaryElementwiseAttrs cannot be nullptr.";
-
-  const String operator_name = "ethosu_binary_elementwise";
-  const String operator_type = param->operator_type;
-  const DataType ifm_dtype = ifm->dtype;
-  const DataType ifm2_dtype = ifm2->dtype;
-  const DataType ofm_dtype = DataTypeFromString(param->ofm_dtype);
-
-  CheckDataTypeMatch(reporter, ifm_dtype, ifm2_dtype, operator_name, "ifm", "ifm2", operator_type);
-
-  if (operator_type == "ADD" || operator_type == "SUB" || operator_type == "MUL") {
-    auto allowed_types = {DataType::Int(8), DataType::UInt(8), DataType::Int(16),
-                          DataType::Int(32)};
-    CheckDataType(reporter, ifm_dtype, allowed_types, operator_name, "ifm", operator_type);
-    CheckDataType(reporter, ofm_dtype, allowed_types, operator_name, "ofm", operator_type);
-  } else if (operator_type == "MIN" || operator_type == "MAX") {
-    auto allowed_types = {DataType::Int(8), DataType::UInt(8)};
-    CheckDataType(reporter, ifm_dtype, allowed_types, operator_name, "ifm", operator_type);
-    CheckDataTypeMatch(reporter, ifm_dtype, ofm_dtype, operator_name, "ifm", "ofm", operator_type);
-  } else if (operator_type == "SHR") {
-    CheckDataType(reporter, ifm_dtype, {DataType::Int(32)}, operator_name, "ifm", operator_type);
-    CheckDataType(reporter, ofm_dtype, {DataType::UInt(8), DataType::Int(8), DataType::Int(32)},
-                  operator_name, "ofm", operator_type);
-  } else if (operator_type == "SHL") {
-    CheckDataType(reporter, ifm_dtype, {DataType::Int(32)}, operator_name, "ifm", operator_type);
-    CheckDataType(reporter, ofm_dtype, {DataType::Int(32)}, operator_name, "ofm", operator_type);
-  } else {
-    reporter->GetDiagCtx().EmitFatal(
-        Diagnostic::Error(reporter->GetSpan())
-        << "Invalid operator: expected " << operator_name << " 'ADD' or 'SUB' or 'MUL' or "
-        << "'MIN' or 'MAX' or 'SHR' or 'SHL' for operator_type but was " << param->operator_type);
-    return false;
-  }
-
-  // Assign ofm type
-  auto ofm_shape = EthosuInferElementwiseOutputShape(ifm->shape, param->ifm_layout,
-                                                     param->ofm_layout, param->ifm_channels);
-  reporter->Assign(types[result_index], TensorType(ofm_shape, ofm_dtype));
-  return true;
-}
-
-Expr MakeEthosuBinaryElementwise(Expr ifm, Expr ifm2, Expr lut, String operator_type,
-                                 double ifm_scale, int ifm_zero_point, double ifm2_scale,
-                                 int ifm2_zero_point, double ofm_scale, int ofm_zero_point,
-                                 IndexExpr ifm_channels, IndexExpr ifm2_channels,
-                                 bool reversed_operands, String activation, int clip_min,
-                                 int clip_max, String rounding_mode, String ifm_layout,
-                                 String ifm2_layout, String ofm_layout, String ofm_dtype,
-                                 bool use_rescale, int rescale_scale, int rescale_shift) {
-  auto attrs = make_object<EthosuBinaryElementwiseAttrs>();
-
-  attrs->operator_type = std::move(operator_type);
-  attrs->ifm_scale = ifm_scale;
-  attrs->ifm_zero_point = ifm_zero_point;
-  attrs->ifm2_scale = ifm2_scale;
-  attrs->ifm2_zero_point = ifm2_zero_point;
-  attrs->ofm_scale = ofm_scale;
-  attrs->ofm_zero_point = ofm_zero_point;
-  attrs->ifm_channels = std::move(ifm_channels);
-  attrs->ifm2_channels = std::move(ifm2_channels);
-  attrs->reversed_operands = reversed_operands;
-  attrs->activation = std::move(activation);
-  attrs->clip_min = clip_min;
-  attrs->clip_max = clip_max;
-  attrs->rounding_mode = std::move(rounding_mode);
-  attrs->ifm_layout = std::move(ifm_layout);
-  attrs->ifm2_layout = std::move(ifm2_layout);
-  attrs->ofm_layout = std::move(ofm_layout);
-  attrs->ofm_dtype = std::move(ofm_dtype);
-  attrs->use_rescale = use_rescale;
-  attrs->rescale_scale = rescale_scale;
-  attrs->rescale_shift = rescale_shift;
-
-  static const Op& op = Op::Get("contrib.ethosu.binary_elementwise");
-  return Call(op, {ifm, ifm2, lut}, Attrs(attrs), {});
-}
-
-TVM_REGISTER_GLOBAL("relay.op._make.ethosu_binary_elementwise")
-    .set_body_typed(MakeEthosuBinaryElementwise);
-
-RELAY_REGISTER_OP("contrib.ethosu.binary_elementwise")
-    .describe(R"code(Arm(R) Ethos(TM)-U NPU quantized binary elementwise operator.
-
-This Relay operator corresponds to the hardware-implemented quantized
-binary elementwise operation found on Ethos(TM)-U NPU. It accepts either NHWC
-or NHCWB16 format for the inputs data (input feature maps, or IFMs).
-
-Reference: https://developer.arm.com/documentation/102420/0200/
-
-- **ifm**: NHWC - (1, ifm_height, ifm_width, ifm_channels)
-           NHCWB16 - (1, ifm_height, ifm_channels // 16, ifm_width, 16)
-- **ifm2**: NHWC - (1, ifm_height, ifm_width, ifm_channels)
-           NHCWB16 - (1, ifm_height, ifm_channels // 16, ifm_width, 16)
-- **ofm**: (1, ofm_height, ofm_width, ifm_channels)
-
-)code" TVM_ADD_FILELINE)
-    .set_attrs_type<EthosuBinaryElementwiseAttrs>()
-    .set_num_inputs(3)
-    .add_argument("ifm", "Tensor", "The Input Feature Map tensor (IFM).")
-    .add_argument("ifm2", "Tensor", "The Input Feature Map tensor 2 (IFM2).")
-    .add_argument("lut", "Tensor", "The look-up table of values to use if activation = 'LUT'")
-    .set_support_level(11)
-    .add_type_rel("EthosuBinaryElementwise", EthosuBinaryElementwiseRel);
-
-}  // namespace ethosu
-}  // namespace contrib
-}  // namespace op
-}  // namespace relay
-}  // namespace tvm
diff --git a/src/relay/op/contrib/ethosu/common.cc b/src/relay/op/contrib/ethosu/common.cc
deleted file mode 100644
index 5e957957bc1e..000000000000
--- a/src/relay/op/contrib/ethosu/common.cc
+++ /dev/null
@@ -1,180 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file src/relay/op/contrib/ethosu/common.cc
- * \brief A set of utilities and common functionality for Arm(R) Ethos(TM)-U NPU QNN ops.
- */
-
-#include "common.h"
-
-#include <sstream>
-
-#include "../../op_common.h"
-
-namespace tvm {
-namespace relay {
-namespace op {
-namespace contrib {
-namespace ethosu {
-
-Array<IndexExpr> EthosuInferElementwiseOutputShape(Array<IndexExpr> ifm_shape, String ifm_layout,
-                                                   String ofm_layout, IndexExpr ofm_channels) {
-  // In the case of NHCWB16, convert the ifm shape to NHW (C not required for this function)
-  if (ifm_layout == "NHCWB16") {
-    ifm_shape = {ifm_shape[0], ifm_shape[1], ifm_shape[3]};
-  }
-  Array<IndexExpr> oshape({ifm_shape[0], ifm_shape[1], ifm_shape[2], ofm_channels});
-
-  // If the ofm is NHCWB16, convert the layout
-  if (ofm_layout == "NHCWB16") {
-    int channel_bricks = 1 + (oshape[3].as<IntImmNode>()->value - 1) / 16;
-    oshape = {oshape[0], oshape[1], channel_bricks, oshape[2], 16};
-  }
-
-  return oshape;
-}
-
-Array<IndexExpr> EthosuInferKernelOutput(Array<IndexExpr> ifm_shape, String ifm_layout,
-                                         String ofm_layout, Array<IndexExpr> kernel_shape,
-                                         IndexExpr ofm_channels, Array<IndexExpr> dilation,
-                                         Array<IndexExpr> strides, Array<IndexExpr> padding) {
-  // In the case of NHCWB16, convert the ifm shape to NHW (C not required for this function)
-  if (ifm_layout == "NHCWB16") {
-    ifm_shape = {ifm_shape[0], ifm_shape[1], ifm_shape[3]};
-  }
-  Array<IndexExpr> output_shape({ifm_shape[0], 0, 0, ofm_channels});
-
-  IndexExpr dilated_ksize_y = 1 + (kernel_shape[0] - 1) * dilation[0];
-  IndexExpr dilated_ksize_x = 1 + (kernel_shape[1] - 1) * dilation[1];
-  IndexExpr pad_h, pad_w;
-  GetPaddingHeightWidth(padding, &pad_h, &pad_w);
-  output_shape.Set(1, indexdiv(ifm_shape[1] + pad_h - dilated_ksize_y, strides[0]) + 1);
-  output_shape.Set(2, indexdiv(ifm_shape[2] + pad_w - dilated_ksize_x, strides[1]) + 1);
-
-  // If the ofm is NHCWB16, convert the layout
-  if (ofm_layout == "NHCWB16") {
-    int channel_bricks = 1 + (output_shape[3].as<IntImmNode>()->value - 1) / 16;
-    output_shape = {output_shape[0], output_shape[1], channel_bricks, output_shape[2], 16};
-  }
-
-  return output_shape;
-}
-
-Array<IndexExpr> EthosuInferUpscaledInput(Array<IndexExpr> ifm_shape, String ifm_layout) {
-  if (ifm_layout == "NHCWB16") {
-    ifm_shape = {ifm_shape[0], ifm_shape[1], ifm_shape[3], ifm_shape[2] * 16};
-  }
-
-  const int scale_factor = 2;
-  Array<IndexExpr> new_ifm_shape = {ifm_shape[0], ifm_shape[1] * scale_factor,
-                                    ifm_shape[2] * scale_factor, ifm_shape[3]};
-
-  if (ifm_layout == "NHCWB16") {
-    int channel_bricks = 1 + (new_ifm_shape[3].as<IntImmNode>()->value - 1) / 16;
-    new_ifm_shape = {new_ifm_shape[0], new_ifm_shape[1], channel_bricks, new_ifm_shape[2], 16};
-  }
-
-  return new_ifm_shape;
-}
-
-DataType DataTypeFromString(const String& dtype) {
-  DLDataType dl_dtype = tvm::runtime::String2DLDataType(dtype);
-  return DataType(dl_dtype);
-}
-
-void CheckDataType(const TypeReporter& reporter, const DataType& data_type,
-                   const std::initializer_list<DataType>& allowed_data_types,
-                   const String& operator_name, const String& tensor_name,
-                   const String& operator_type) {
-  for (const auto& i : allowed_data_types) {
-    if (data_type == i) {
-      return;
-    }
-  }
-
-  std::ostringstream message;
-  message << "Invalid operator: expected " << operator_name << " ";
-  if (operator_type != "") {
-    message << operator_type << " ";
-  }
-  message << "to have type in {";
-  for (auto it = allowed_data_types.begin(); it != allowed_data_types.end(); ++it) {
-    message << *it;
-    if (std::next(it) != allowed_data_types.end()) {
-      message << ", ";
-    }
-  }
-  message << "}";
-  message << " for " << tensor_name << " but was " << data_type << ".";
-
-  reporter->GetDiagCtx().EmitFatal(Diagnostic::Error(reporter->GetSpan()) << message.str());
-}
-
-void CheckUpscaleMethod(const TypeReporter& reporter, const String& upscale_method,
-                        const std::initializer_list<String>& allowed_upscale_methods,
-                        const String& operator_name, const String& operator_type) {
-  for (const auto& i : allowed_upscale_methods) {
-    if (upscale_method == i) {
-      return;
-    }
-  }
-
-  std::ostringstream message;
-  message << "Invalid operator: expected " << operator_name << " ";
-  if (operator_type != "") {
-    message << operator_type << " ";
-  }
-  message << "to have upscale method in {";
-  for (auto it = allowed_upscale_methods.begin(); it != allowed_upscale_methods.end(); ++it) {
-    message << *it;
-    if (std::next(it) != allowed_upscale_methods.end()) {
-      message << ", ";
-    }
-  }
-  message << "}";
-  message << " but was " << upscale_method << ".";
-
-  reporter->GetDiagCtx().EmitFatal(Diagnostic::Error(reporter->GetSpan()) << message.str());
-}
-
-void CheckDataTypeMatch(const TypeReporter& reporter, const DataType& data_type,
-                        const DataType& data_type2, const String& operator_name,
-                        const String& tensor_name, const String& tensor_name2,
-                        const String& operator_type) {
-  if (data_type == data_type2) {
-    return;
-  }
-
-  std::ostringstream message;
-  message << "Invalid operator: expected " << operator_name << " ";
-  if (operator_type != "") {
-    message << operator_type << " ";
-  }
-  message << "data types for " << tensor_name << " and " << tensor_name2 << " to match, but was "
-          << data_type << " and " << data_type2;
-
-  reporter->GetDiagCtx().EmitFatal(Diagnostic::Error(reporter->GetSpan()) << message.str());
-}
-
-}  // namespace ethosu
-}  // namespace contrib
-}  // namespace op
-}  // namespace relay
-}  // namespace tvm
diff --git a/src/relay/op/contrib/ethosu/common.h b/src/relay/op/contrib/ethosu/common.h
deleted file mode 100644
index a399a2e53aa4..000000000000
--- a/src/relay/op/contrib/ethosu/common.h
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file src/relay/op/contrib/ethosu/common.h
- * \brief Functions for all Arm(R) Ethos(TM)-U NPU operators to use.
- */
-
-#ifndef TVM_RELAY_OP_CONTRIB_ETHOSU_COMMON_H_
-#define TVM_RELAY_OP_CONTRIB_ETHOSU_COMMON_H_
-
-#include <tvm/relay/expr.h>
-
-namespace tvm {
-namespace relay {
-namespace op {
-namespace contrib {
-namespace ethosu {
-
-/*! \brief Infer the output tensor shape for binary elementwise operators.
- * \param ifm_shape The shape of Input Feature Map.
- * \param ifm_layout The layout of the IFM (NHWC or NHCWB16).
- * \param ofm_layout The layout of the OFM (NHWC or NHCWB16).
- * \param ofm_channels The number of Output Feature Map channels.
- * \return The shape of the output tensor.
- */
-Array<IndexExpr> EthosuInferElementwiseOutputShape(Array<IndexExpr> ifm_shape, String ifm_layout,
-                                                   String ofm_layout, IndexExpr ofm_channels);
-
-/*! \brief Infer the output tensor shape for convolution and pooling operators.
- * \param ifm_shape The shape of Input Feature Map.
- * \param ifm_layout The layout of the IFM (NHWC or NHCWB16).
- * \param ofm_layout The layout of the OFM (NHWC or NHCWB16).
- * \param kernel_shape Kernel shape in format (height, width).
- * \param ofm_channels The number of Output Feature Map channels.
- * \param dilation The 2-dimensional dilation as (dilation_height, dilation_width).
- * \param strides The 2 dimensional strides as (stride_height, stride_width).
- * \param padding The 4 dimensional padding as (pad_top, pad_left, pad_bottom, pad_right).
- * \return The shape of the output tensor.
- */
-Array<IndexExpr> EthosuInferKernelOutput(Array<IndexExpr> ifm_shape, String ifm_layout,
-                                         String ofm_layout, Array<IndexExpr> kernel_shape,
-                                         IndexExpr ofm_channels, Array<IndexExpr> dilation,
-                                         Array<IndexExpr> strides, Array<IndexExpr> padding);
-
-/*! \brief Infer the Output Feature Map shape for operations that use upscaling.
- * \param ifm_shape The shape of the Input Feature Map.
- * \param ifm_layout The layout of the Input Feature Map.
- */
-Array<IndexExpr> EthosuInferUpscaledInput(Array<IndexExpr> ifm_shape, String ifm_layout);
-
-/*! \brief Get data type from string representation.
- * \param dtype Data type in lower case format followed by number of bits e.g. "int8".
- */
-DataType DataTypeFromString(const String& dtype);
-
-/*! \brief Check the data type for a given input matches one given in allowed_data_types. Raise a
- * type inference error if not.
- * \param reporter The infer type reporter.
- * \param data_type The data type to check.
- * \param allowed_data_types An initializer list of allowed data types.
- * \param operator_name The name of the operator to report.
- * \param tensor_name The name of the tensor to report e.g. "ifm", "ofm".
- * \param operator_type The type of the operator to report e.g. "ADD" for binary_elementwise.
- */
-void CheckDataType(const TypeReporter& reporter, const DataType& data_type,
-                   const std::initializer_list<DataType>& allowed_data_types,
-                   const String& operator_name, const String& tensor_name,
-                   const String& operator_type = "");
-
-/*! \brief Check the upscale method matches one given in allowed_upscale_methods. Raise a type
- * inference error if not.
- * \param reporter The infer type reporter.
- * \param upscale_method The upscale method string to check.
- * \param allowed_upscale_methods An initializer list of allowed upscale methods.
- * \param operator_name The name of the operator to report.
- * \param operator_type The type of the operator to report e.g. "ADD" for binary_elementwise.
- */
-void CheckUpscaleMethod(const TypeReporter& reporter, const String& upscale_method,
-                        const std::initializer_list<String>& allowed_upscale_methods,
-                        const String& operator_name, const String& operator_type = "");
-
-/*! \brief Check the data type matches that of the second data type provided. Raise a type inference
- * error if not.
- * \param reporter The infer type reporter.
- * \param data_type The data type to check.
- * \param data_type2 The second data type to check.
- * \param operator_name The name of the operator to report.
- * \param tensor_name The name of the tensor to report e.g. "ifm", "ofm".
- * \param tensor_name2 The name of the second tensor to report e.g. "ifm2".
- * \param operator_type The type of the operator to report e.g. "ADD" for binary_elementwise.
- */
-void CheckDataTypeMatch(const TypeReporter& reporter, const DataType& data_type,
-                        const DataType& data_type2, const String& operator_name,
-                        const String& tensor_name, const String& tensor_name2,
-                        const String& operator_type = "");
-
-}  // namespace ethosu
-}  // namespace contrib
-}  // namespace op
-}  // namespace relay
-}  // namespace tvm
-
-#endif  // TVM_RELAY_OP_CONTRIB_ETHOSU_COMMON_H_
diff --git a/src/relay/op/contrib/ethosu/convolution.cc b/src/relay/op/contrib/ethosu/convolution.cc
deleted file mode 100644
index 96e15709f3d2..000000000000
--- a/src/relay/op/contrib/ethosu/convolution.cc
+++ /dev/null
@@ -1,146 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file src/relay/op/contrib/ethosu/convolution.cc
- * \brief Operator definitions for the Arm(R) Ethos(TM)-U NPU convolution ops.
- */
-#include "../../nn/convolution.h"
-
-#include <tvm/relay/base.h>
-#include <tvm/relay/op.h>
-#include <tvm/relay/qnn/attrs.h>
-#include <tvm/tir/analysis.h>
-#include <tvm/tir/data_layout.h>
-
-#include "../../../qnn/utils.h"
-#include "common.h"
-#include "op_attrs.h"
-
-namespace tvm {
-namespace relay {
-namespace op {
-namespace contrib {
-namespace ethosu {
-
-bool EthosuConv2DRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
-                     const TypeReporter& reporter) {
-  CHECK_EQ(types.size(), 5);
-  const auto* ifm = types[0].as<TensorTypeNode>();
-  const auto* weight = types[1].as<TensorTypeNode>();
-  const auto* scale_bias = types[2].as<TensorTypeNode>();
-  if (ifm == nullptr || weight == nullptr) return false;
-  const auto* param = attrs.as<EthosuConv2DAttrs>();
-  CHECK(param != nullptr) << "EthosuConv2DAttrs cannot be nullptr.";
-  const String operator_name = "ethosu_conv2d";
-
-  CheckDataType(reporter, ifm->dtype, {DataType::UInt(8), DataType::Int(8)}, operator_name, "ifm");
-  CheckDataType(reporter, weight->dtype, {DataType::UInt(8), DataType::Int(8)}, operator_name,
-                "weight");
-  CheckDataType(reporter, scale_bias->dtype, {DataType::UInt(8)}, operator_name, "scale bias");
-
-  CheckUpscaleMethod(reporter, param->upscale, {"NONE", "ZEROS", "NEAREST"}, operator_name);
-
-  // The scale_bias should be provided as a tensor of size {ofm_channels, 10}
-  reporter->Assign(types[2], TensorType({weight->shape[0], 10}, DataType::UInt(8)));
-
-  // Assign weight type {ofm_channels, kernel_height, kernel_width, ifm_channels}
-  reporter->Assign(types[1], TensorType({param->ofm_channels, param->kernel_shape[0],
-                                         param->kernel_shape[1], weight->shape[3]},
-                                        weight->dtype));
-
-  Array<IndexExpr> ifm_shape = ifm->shape;
-  if (param->upscale != "NONE") {
-    ifm_shape = EthosuInferUpscaledInput(ifm_shape, param->ifm_layout);
-  }
-
-  // Assign ofm type
-  auto ofm_shape =
-      EthosuInferKernelOutput(ifm_shape, param->ifm_layout, param->ofm_layout, param->kernel_shape,
-                              param->ofm_channels, param->dilation, param->strides, param->padding);
-
-  reporter->Assign(types[4], TensorType(ofm_shape, ifm->dtype));
-  return true;
-}
-
-Expr MakeEthosuConv2D(Expr ifm, Expr weight, Expr scale_bias, Expr lut, double ifm_scale,
-                      int ifm_zero_point, int weight_zero_point, double ofm_scale,
-                      int ofm_zero_point, Array<IndexExpr> kernel_shape, IndexExpr ofm_channels,
-                      Array<IndexExpr> strides, Array<IndexExpr> padding, Array<IndexExpr> dilation,
-                      String activation, int clip_min, int clip_max, String rounding_mode,
-                      String upscale, String ifm_layout, String ofm_layout) {
-  auto attrs = make_object<EthosuConv2DAttrs>();
-  attrs->ifm_scale = ifm_scale;
-  attrs->ifm_zero_point = ifm_zero_point;
-  attrs->weight_zero_point = weight_zero_point;
-  attrs->ofm_scale = ofm_scale;
-  attrs->ofm_zero_point = ofm_zero_point;
-  attrs->kernel_shape = std::move(kernel_shape);
-  attrs->ofm_channels = std::move(ofm_channels);
-  attrs->strides = std::move(strides);
-  attrs->padding = std::move(padding);
-  attrs->dilation = std::move(dilation);
-  attrs->activation = std::move(activation);
-  attrs->clip_min = clip_min;
-  attrs->clip_max = clip_max;
-  attrs->rounding_mode = std::move(rounding_mode);
-  attrs->upscale = std::move(upscale);
-  attrs->ifm_layout = std::move(ifm_layout);
-  attrs->ofm_layout = std::move(ofm_layout);
-  static const Op& op = Op::Get("contrib.ethosu.conv2d");
-  return Call(op, {ifm, weight, scale_bias, lut}, Attrs(attrs), {});
-}
-
-TVM_REGISTER_GLOBAL("relay.op._make.ethosu_conv2d").set_body_typed(MakeEthosuConv2D);
-
-RELAY_REGISTER_OP("contrib.ethosu.conv2d")
-    .describe(R"code(Arm(R) Ethos(TM)-U NPU 2D quantized convolution operator.
-
-This Relay operator corresponds to the hardware-implemented quantized
-convolution operation found on Ethos(TM)-U NPU. It accepts either NHWC
-or NHCWB16 format for the input data (Input Feature Map, or IFM) and
-OHWI format for the kernel weights.
-
-Reference: https://developer.arm.com/documentation/102420/0200/
-
-Note that the per-channel weight scale and bias tensor must be packed together into
-a combined tensor of uint80s. This is represented in TVM by a (channels, 10) tensor
-of type uint8. For more detail, refer to the Technical Reference Manual linked above.
-
-- **ifm**: NHWC - (1, ifm_height, ifm_width, ifm_channels)
-           NHCWB16 - (1, ifm_height, ifm_channels // 16, ifm_width, 16)
-- **weight**: (ofm_channels, kernel_shape[0], kernel_shape[1], ifm_channels)
-- **scale_bias**: (ofm_channels, 10)
-- **ofm**: (1, ofm_height, ofm_width, ofm_channels)
-
-)code" TVM_ADD_FILELINE)
-    .set_attrs_type<EthosuConv2DAttrs>()
-    .set_num_inputs(4)
-    .add_argument("ifm", "Tensor", "The Input Feature Map tensor (IFM).")
-    .add_argument("weight", "Tensor", "The weight tensor.")
-    .add_argument("scale_bias", "Tensor", "The packed per-channel weight scale and bias tensor.")
-    .add_argument("lut", "Tensor", "The look-up table of values to use if activation = 'LUT'.")
-    .set_support_level(11)
-    .add_type_rel("EthosuConv2D", EthosuConv2DRel);
-
-}  // namespace ethosu
-}  // namespace contrib
-}  // namespace op
-}  // namespace relay
-}  // namespace tvm
diff --git a/src/relay/op/contrib/ethosu/depthwise.cc b/src/relay/op/contrib/ethosu/depthwise.cc
deleted file mode 100644
index f33141d31e74..000000000000
--- a/src/relay/op/contrib/ethosu/depthwise.cc
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file src/relay/op/contrib/ethosu/depthwise.cc
- * \brief Depthwise convolution 2D operator definition for the Arm(R) Ethos(TM)-U NPU
- */
-#include <tvm/relay/base.h>
-#include <tvm/relay/op.h>
-#include <tvm/relay/qnn/attrs.h>
-#include <tvm/tir/analysis.h>
-#include <tvm/tir/data_layout.h>
-
-#include "../../../qnn/utils.h"
-#include "../../nn/convolution.h"
-#include "common.h"
-#include "op_attrs.h"
-
-namespace tvm {
-namespace relay {
-namespace op {
-namespace contrib {
-namespace ethosu {
-
-bool EthosuDepthwiseConv2DRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
-                              const TypeReporter& reporter) {
-  ICHECK_EQ(types.size(), 5);
-  const auto* ifm = types[0].as<TensorTypeNode>();
-  const auto* weight = types[1].as<TensorTypeNode>();
-  const auto* scale_bias = types[2].as<TensorTypeNode>();
-  if (ifm == nullptr || weight == nullptr) return false;
-
-  const auto* param = attrs.as<EthosuDepthwiseConv2DAttrs>();
-  ICHECK(param != nullptr) << "EthosuDepthwiseConv2DAttrs cannot be nullptr.";
-
-  const String operator_name = "ethosu_depthwise_conv2d";
-
-  CheckDataType(reporter, ifm->dtype, {DataType::UInt(8), DataType::Int(8)}, operator_name, "ifm");
-  CheckDataType(reporter, weight->dtype, {DataType::UInt(8), DataType::Int(8)}, operator_name,
-                "weight");
-  CheckDataType(reporter, scale_bias->dtype, {DataType::UInt(8)}, operator_name, "scale bias");
-
-  DataType ofm_dtype = DataTypeFromString(param->ofm_dtype);
-  auto ofm_dtypes = {DataType::UInt(8), DataType::Int(8), DataType::Int(16), DataType::Int(32)};
-  CheckDataType(reporter, ofm_dtype, ofm_dtypes, operator_name, "ofm");
-
-  // Collect the ifm, weight and ofm tensors for using in the inference function
-  Array<Type> tensor_types = {types[0], types[1], types[4]};
-
-  // Assign weight type {ofm_channels, kernel_height, kernel_width, 1}
-  reporter->Assign(types[1], TensorType({param->ofm_channels, param->kernel_shape[0],
-                                         param->kernel_shape[1], weight->shape[3]},
-                                        weight->dtype));
-
-  // Assign ofm type
-  auto ofm_shape =
-      EthosuInferKernelOutput(ifm->shape, param->ifm_layout, param->ofm_layout, param->kernel_shape,
-                              param->ofm_channels, param->dilation, param->strides, param->padding);
-
-  reporter->Assign(types[4], TensorType(ofm_shape, ofm_dtype));
-
-  return true;
-}
-
-Expr MakeEthosuDepthwiseConv2D(Expr ifm, Expr weight, Expr scale_bias, Expr lut, double ifm_scale,
-                               int ifm_zero_point, int weight_zero_point, double ofm_scale,
-                               int ofm_zero_point, Array<IndexExpr> kernel_shape,
-                               IndexExpr ofm_channels, Array<IndexExpr> strides,
-                               Array<IndexExpr> padding, Array<IndexExpr> dilation,
-                               String activation, int clip_min, int clip_max, String rounding_mode,
-                               String upscale, String ifm_layout, String ofm_layout,
-                               String ofm_dtype) {
-  auto attrs = make_object<EthosuDepthwiseConv2DAttrs>();
-  attrs->ifm_scale = ifm_scale;
-  attrs->ifm_zero_point = ifm_zero_point;
-  attrs->weight_zero_point = weight_zero_point;
-  attrs->ofm_scale = ofm_scale;
-  attrs->ofm_zero_point = ofm_zero_point;
-  attrs->kernel_shape = std::move(kernel_shape);
-  attrs->ofm_channels = std::move(ofm_channels);
-  attrs->strides = std::move(strides);
-  attrs->padding = std::move(padding);
-  attrs->dilation = std::move(dilation);
-  attrs->activation = std::move(activation);
-  attrs->clip_min = clip_min;
-  attrs->clip_max = clip_max;
-  attrs->rounding_mode = std::move(rounding_mode);
-  attrs->upscale = std::move(upscale);
-  attrs->ifm_layout = std::move(ifm_layout);
-  attrs->ofm_layout = std::move(ofm_layout);
-  attrs->ofm_dtype = std::move(ofm_dtype);
-  static const Op& op = Op::Get("contrib.ethosu.depthwise_conv2d");
-  return Call(op, {ifm, weight, scale_bias, lut}, Attrs(attrs), {});
-}
-
-TVM_REGISTER_GLOBAL("relay.op._make.ethosu_depthwise_conv2d")
-    .set_body_typed(MakeEthosuDepthwiseConv2D);
-
-RELAY_REGISTER_OP("contrib.ethosu.depthwise_conv2d")
-    .describe(R"code(Arm(R) Ethos(TM)-U NPU 2D quantized depthwise operator.
-
-This Relay operator corresponds to the hardware-implemented quantized
-depthwise operation found on Ethos(TM)-U NPU. It accepts either NHWC or NHCWB16 format
-for the input data (input feature map, or IFM) and OHWI format for the kernel weights.
-
-- **ifm**: NHWC - (1, ifm_height, ifm_width, ifm_channels)
-           NHCWB16 - (1, ifm_height, ifm_channels // 16, ifm_width, 16)
-- **weight**: (ofm_channels, kernel_shape[0], kernel_shape[1], 1 (depth multiplier))
-- **scale_bias**: (ofm_channels, 10)
-- **ofm**: (1, ofm_height, ofm_width, ofm_channels)
-
-)code" TVM_ADD_FILELINE)
-    .set_attrs_type<EthosuDepthwiseConv2DAttrs>()
-    .set_num_inputs(4)
-    .add_argument("ifm", "Tensor", "The Input Feature Map tensor (IFM).")
-    .add_argument("weight", "Tensor", "The weight tensor.")
-    .add_argument("scale_bias", "Tensor", "The packed per-channel weight scale and bias tensor.")
-    .add_argument("lut", "Tensor", "The look-up table of values to use if activation = 'LUT'")
-    .set_support_level(11)
-    .add_type_rel("EthosuDepthwiseConv2D", EthosuDepthwiseConv2DRel);
-
-}  // namespace ethosu
-}  // namespace contrib
-}  // namespace op
-}  // namespace relay
-}  // namespace tvm
diff --git a/src/relay/op/contrib/ethosu/identity.cc b/src/relay/op/contrib/ethosu/identity.cc
deleted file mode 100644
index 9ec6c6f42ce0..000000000000
--- a/src/relay/op/contrib/ethosu/identity.cc
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file src/relay/op/contrib/ethosu/identity.cc
- * \brief Property def of the Arm(R) Ethos(TM)-U NPU identity op.
- */
-#include <tvm/relay/op.h>
-
-#include "common.h"
-#include "op_attrs.h"
-
-namespace tvm {
-namespace relay {
-namespace op {
-namespace contrib {
-namespace ethosu {
-
-bool EthosuIdentityRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
-                       const TypeReporter& reporter) {
-  const int ifm_index = 0;
-  const int result_index = 2;
-  ICHECK_EQ(types.size(), result_index + 1);
-
-  const auto* ifm = types[ifm_index].as<TensorTypeNode>();
-  if (ifm == nullptr) return false;
-
-  const auto* param = attrs.as<EthosuIdentityAttrs>();
-  ICHECK(param != nullptr) << "EthosuIdentityAttrs cannot be nullptr.";
-
-  const String operator_name = "ethosu_identity";
-
-  CheckDataType(reporter, ifm->dtype, {DataType::UInt(8), DataType::Int(8), DataType::Int(16)},
-                operator_name, "ifm");
-
-  if (ifm->shape.size() > 4) {
-    reporter->GetDiagCtx().EmitFatal(
-        Diagnostic::Error(reporter->GetSpan())
-        << "Invalid operator: Input Feature Map should be at most 4 dimensional, but was "
-        << ifm->shape);
-    return false;
-  }
-
-  // Assign ofm type
-  auto ofm_shape = ifm->shape;
-  reporter->Assign(types[result_index], TensorType(ofm_shape, ifm->dtype));
-  return true;
-}
-
-Expr MakeEthosuIdentity(Expr ifm, Expr lut, double ifm_scale, int ifm_zero_point, double ofm_scale,
-                        int ofm_zero_point, String activation, String rounding_mode) {
-  auto attrs = make_object<EthosuIdentityAttrs>();
-  attrs->ifm_scale = ifm_scale;
-  attrs->ifm_zero_point = ifm_zero_point;
-  attrs->ofm_scale = ofm_scale;
-  attrs->ofm_zero_point = ofm_zero_point;
-  attrs->activation = std::move(activation);
-  attrs->rounding_mode = std::move(rounding_mode);
-  static const Op& op = Op::Get("contrib.ethosu.identity");
-  return Call(op, {ifm, lut}, Attrs(attrs), {});
-}
-
-TVM_REGISTER_GLOBAL("relay.op._make.ethosu_identity").set_body_typed(MakeEthosuIdentity);
-
-RELAY_REGISTER_OP("contrib.ethosu.identity")
-    .describe(R"code(Arm(R) Ethos(TM)-U NPU identity operator.
-
-This Relay operator performs the identity pooling operation on the NPU with a capability
-to requantize the data. It accepts input tensors of 4 dimensions or less.
-
-)code" TVM_ADD_FILELINE)
-    .set_attrs_type<EthosuIdentityAttrs>()
-    .set_num_inputs(2)
-    .add_argument("ifm", "Tensor", "The Input Feature Map tensor (IFM).")
-    .add_argument("lut", "Tensor", "The look-up table values to use if activation = 'LUT'.")
-    .set_support_level(11)
-    .add_type_rel("EthosuIdentity", EthosuIdentityRel);
-
-}  // namespace ethosu
-}  // namespace contrib
-}  // namespace op
-}  // namespace relay
-}  // namespace tvm
diff --git a/src/relay/op/contrib/ethosu/op_attrs.h b/src/relay/op/contrib/ethosu/op_attrs.h
deleted file mode 100644
index 868d9d6ad42f..000000000000
--- a/src/relay/op/contrib/ethosu/op_attrs.h
+++ /dev/null
@@ -1,505 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file src/relay/op/contrib/ethosu/op_attrs.h
- * \brief Attributes for the Arm(R) Ethos(TM)-U NPU operators.
- */
-
-#ifndef TVM_RELAY_OP_CONTRIB_ETHOSU_OP_ATTRS_H_
-#define TVM_RELAY_OP_CONTRIB_ETHOSU_OP_ATTRS_H_
-
-#include <tvm/relay/op.h>
-
-namespace tvm {
-namespace relay {
-namespace op {
-namespace contrib {
-namespace ethosu {
-
-/*! \brief Attributes used by the Ethos(TM)-U NPU binary elementwise operators */
-struct EthosuBinaryElementwiseAttrs : public tvm::AttrsNode<EthosuBinaryElementwiseAttrs> {
-  String operator_type;
-  double ifm_scale;
-  int ifm_zero_point;
-  double ifm2_scale;
-  int ifm2_zero_point;
-  double ofm_scale;
-  int ofm_zero_point;
-  IndexExpr ifm_channels;
-  IndexExpr ifm2_channels;
-  bool reversed_operands;
-  String activation;
-  int clip_min;
-  int clip_max;
-  String rounding_mode;
-  String ifm_layout;
-  String ifm2_layout;
-  String ofm_layout;
-  String ofm_dtype;
-  bool use_rescale;
-  int rescale_scale;
-  int rescale_shift;
-
-  TVM_DECLARE_ATTRS(EthosuBinaryElementwiseAttrs, "relay.attrs.EthosuBinaryElementwiseAttrs") {
-    TVM_ATTR_FIELD(operator_type)
-        .describe(
-            "The type of the binary elementwise operator."
-            "'ADD'"
-            "'SUB'"
-            "'MUL'"
-            "'MIN'"
-            "'MAX'"
-            "'SHR'"
-            "'SHL'");
-    TVM_ATTR_FIELD(ifm_scale).describe("The quantization scale for the Input Feature Map tensor.");
-    TVM_ATTR_FIELD(ifm_zero_point)
-        .describe("The quantization zero point for the Input Feature Map tensor.");
-    TVM_ATTR_FIELD(ifm2_scale)
-        .describe("The quantization scale for the Input Feature Map tensor 2.");
-    TVM_ATTR_FIELD(ifm2_zero_point)
-        .describe("The quantization zero point for the Input Feature Map tensor 2.");
-    TVM_ATTR_FIELD(ofm_scale).describe("The quantization scale for the Output Feature Map tensor.");
-    TVM_ATTR_FIELD(ofm_zero_point)
-        .describe("The quantization zero point for the Output Feature Map tensor.");
-    TVM_ATTR_FIELD(ifm_channels).describe("The number of the Input Feature Map channels.");
-    TVM_ATTR_FIELD(ifm2_channels).describe("The number of the Input Feature Map 2 channels.");
-    TVM_ATTR_FIELD(reversed_operands)
-        .describe("True if IFM2 is the first operand and IFM is the second operand.")
-        .set_default(false);
-    TVM_ATTR_FIELD(activation)
-        .describe(
-            "The activation function to use. "
-            "'NONE' - no activation function. "
-            "'CLIP' - clip the output between clip_min and clip_max. "
-            "'TANH' - tanh activation function. "
-            "'SIGMOID' - sigmoid activation function. "
-            "'LUT' - use a look-up table to perform the activation function."
-            "Available activations for activation type:"
-            "{int8, uint8}: 'NONE', 'CLIP', 'TANH', 'SIGMOID', 'LUT'"
-            "{int32}: 'NONE'")
-        .set_default("NONE");
-    TVM_ATTR_FIELD(clip_min)
-        .describe("The minimum clipping value if activation = 'CLIP'.")
-        .set_default(0);
-    TVM_ATTR_FIELD(clip_max)
-        .describe("The maximum clipping value if activation = 'CLIP'.")
-        .set_default(0);
-    TVM_ATTR_FIELD(rounding_mode)
-        .describe(
-            "The rounding mode to apply to the Output Feature Map tensor. "
-            "'TFL' - Tensorflow Lite rounding scheme. "
-            "'TRUNCATE' - Truncate towards zero."
-            "'NATURAL' - Round to nearest value, with x.5 rounded up towards +infinity.")
-        .set_default("TFL");
-    TVM_ATTR_FIELD(ifm_layout)
-        .describe("The layout of the Input Feature Map tensor. Can be 'NHWC' or 'NHCWB16'.")
-        .set_default("NHWC");
-    TVM_ATTR_FIELD(ifm2_layout)
-        .describe("The layout of the Input Feature Map tensor 2. Can be 'NHWC' or 'NHCWB16'.")
-        .set_default("NHWC");
-    TVM_ATTR_FIELD(ofm_layout)
-        .describe("The layout of the Output Feature Map tensor. Can be 'NHWC' or 'NHCWB16'.")
-        .set_default("NHWC");
-    TVM_ATTR_FIELD(ofm_dtype).describe(
-        "The Output Feature Map tensor type."
-        "MUL, ADD, SUB {IFM}->{OFM}:"
-        "  {uint8, int8 int32} -> {uint8, int8, int32}, any pairing"
-        "MAX, MIN:"
-        "  IFM and OFM must be of the same type, one of:"
-        "  {int8, uint8}"
-        "SHR {IFM}->{OFM}:"
-        "  {int32}->{int8, uint8, int32}, any pairing"
-        "SHL:"
-        "  {int32}->{int32} only");
-    TVM_ATTR_FIELD(use_rescale).describe("Use explicit scaling if True.").set_default(false);
-    TVM_ATTR_FIELD(rescale_scale)
-        .describe(
-            "Scale value for rescale. "
-            "For 32-bit operations scale is not applied but shift is.")
-        .set_default(0);
-    TVM_ATTR_FIELD(rescale_shift).describe("Shift value for rescale.").set_default(0);
-  }
-};
-
-TVM_REGISTER_NODE_TYPE(EthosuBinaryElementwiseAttrs);
-
-/*! \brief Attributes used by the Ethos(TM)-U NPU convolution operator */
-struct EthosuConv2DAttrs : public tvm::AttrsNode<EthosuConv2DAttrs> {
-  double ifm_scale;
-  int ifm_zero_point;
-  int weight_zero_point;
-  double ofm_scale;
-  int ofm_zero_point;
-  Array<IndexExpr> kernel_shape;
-  IndexExpr ofm_channels;
-  Array<IndexExpr> strides;
-  Array<IndexExpr> padding;
-  Array<IndexExpr> dilation;
-  String activation;
-  int clip_min;
-  int clip_max;
-  String rounding_mode;
-  String upscale;
-  String ifm_layout;
-  String ofm_layout;
-
-  TVM_DECLARE_ATTRS(EthosuConv2DAttrs, "relay.attrs.EthosuConv2DAttrs") {
-    TVM_ATTR_FIELD(ifm_scale).describe("The quantization scale for the Input Feature Map tensor.");
-    TVM_ATTR_FIELD(ifm_zero_point)
-        .describe("The quantization zero point for the Input Feature Map tensor.");
-    TVM_ATTR_FIELD(weight_zero_point)
-        .describe("The quantization zero point for the weight tensor.");
-    TVM_ATTR_FIELD(ofm_scale).describe("The quantization scale for the Output Feature Map tensor.");
-    TVM_ATTR_FIELD(ofm_zero_point)
-        .describe("The quantization zero point for the Output Feature Map tensor.");
-    TVM_ATTR_FIELD(kernel_shape)
-        .describe("The 2 dimensional kernel shape as (kernel_height, kernel_width).")
-        .set_default(NullValue<Array<IndexExpr>>());
-    TVM_ATTR_FIELD(ofm_channels)
-        .describe("The number of the Output Feature Map channels.")
-        .set_default(NullValue<IndexExpr>());
-    TVM_ATTR_FIELD(strides)
-        .set_default(Array<IndexExpr>({1, 1}))
-        .describe("The 2 dimensional strides as (stride_height, stride_width).");
-    TVM_ATTR_FIELD(padding)
-        .set_default(Array<IndexExpr>({0, 0, 0, 0}))
-        .describe("The 4 dimensional padding as (pad_top, pad_left, pad_bottom, pad_right).");
-    TVM_ATTR_FIELD(dilation)
-        .set_default(Array<IndexExpr>({1, 1}))
-        .describe("The 2 dimensional dilation as (dilation_height, dilation_width).");
-    TVM_ATTR_FIELD(activation)
-        .describe(
-            "The activation function to use. "
-            "'NONE' - no activation function. "
-            "'CLIP' - clip the output between clip_min and clip_max. "
-            "'TANH' - tanh activation function. "
-            "'SIGMOID' - sigmoid activation function. "
-            "'LUT' - use a look-up table to perform the activation function.")
-        .set_default("NONE");
-    TVM_ATTR_FIELD(clip_min)
-        .describe("The minimum clipping value if activation = 'CLIP'.")
-        .set_default(0);
-    TVM_ATTR_FIELD(clip_max)
-        .describe("The maximum clipping value if activation = 'CLIP'.")
-        .set_default(0);
-    TVM_ATTR_FIELD(rounding_mode)
-        .describe(
-            "The rounding mode to apply to the Output Feature Map tensor. "
-            "'TFL' - Tensorflow Lite rounding scheme. "
-            "'TRUNCATE' - Truncate towards zero."
-            "'NATURAL' - Round to nearest value, with x.5 rounded up towards +infinity.")
-        .set_default("TFL");
-    TVM_ATTR_FIELD(upscale)
-        .describe(
-            "The 2x2 upscaling mode to apply to the Input Feature Map tensor. "
-            "'NONE' - no upscaling. "
-            "'NEAREST' - upscale using nearest neighbour. "
-            "'ZEROS' - upscale using zeros.")
-        .set_default("NONE");
-    TVM_ATTR_FIELD(ifm_layout)
-        .set_default("NHWC")
-        .describe("The layout of the Input Feature Map tensor. Can be 'NHWC' or 'NHCWB16'.");
-    TVM_ATTR_FIELD(ofm_layout)
-        .set_default("NHWC")
-        .describe("The layout of the Output Feature Map tensor. Can be 'NHWC' or 'NHCWB16'.");
-  }
-};
-
-TVM_REGISTER_NODE_TYPE(EthosuConv2DAttrs);
-
-/*! \brief Attributes used by the Ethos(TM)-U NPU depthwise operator */
-struct EthosuDepthwiseConv2DAttrs : public tvm::AttrsNode<EthosuDepthwiseConv2DAttrs> {
-  double ifm_scale;
-  int ifm_zero_point;
-  int weight_zero_point;
-  double ofm_scale;
-  int ofm_zero_point;
-  Array<IndexExpr> kernel_shape;
-  IndexExpr ofm_channels;
-  Array<IndexExpr> strides;
-  Array<IndexExpr> padding;
-  Array<IndexExpr> dilation;
-  String activation;
-  int clip_min;
-  int clip_max;
-  String rounding_mode;
-  String upscale;
-  String ifm_layout;
-  String ofm_layout;
-  String ofm_dtype;
-
-  TVM_DECLARE_ATTRS(EthosuDepthwiseConv2DAttrs, "relay.attrs.EthosuDepthwiseConv2DAttrs") {
-    TVM_ATTR_FIELD(ifm_scale).describe("The quantization scale for the Input Feature Map tensor.");
-    TVM_ATTR_FIELD(ifm_zero_point)
-        .describe("The quantization zero point for the Output Feature Map tensor.");
-    TVM_ATTR_FIELD(weight_zero_point)
-        .describe("The quantization zero point for the weight tensor.");
-    TVM_ATTR_FIELD(ofm_scale).describe("The quantization scale for the Output Feature Map tensor.");
-    TVM_ATTR_FIELD(ofm_zero_point)
-        .describe("The quantization zero point for the Output Feature Map tensor.");
-    TVM_ATTR_FIELD(kernel_shape)
-        .describe("The 2 dimensional kernel shape as (kernel_height, kernel_width).")
-        .set_default(NullValue<Array<IndexExpr>>());
-    TVM_ATTR_FIELD(ofm_channels)
-        .describe("The number of OFM channels.")
-        .set_default(NullValue<IndexExpr>());
-    TVM_ATTR_FIELD(strides)
-        .describe("The 2 dimensional strides as (stride_height, stride_width).")
-        .set_default(Array<IndexExpr>({1, 1}));
-    TVM_ATTR_FIELD(padding)
-        .describe("The 4 dimensional padding as (pad_top, pad_left, pad_bottom, pad_right)")
-        .set_default(Array<IndexExpr>({0, 0, 0, 0}));
-    TVM_ATTR_FIELD(dilation)
-        .describe("The 2 dimensional dilation as (dilation_height, dilation_width).")
-        .set_default(Array<IndexExpr>({1, 1}));
-    TVM_ATTR_FIELD(activation)
-        .describe(
-            "Description: The activation function to use."
-            "'NONE' - no activation function."
-            "'CLIP' - clip the output between clip_min and clip_max."
-            "'TANH - tanh activation function."
-            "'SIGMOID' - sigmoid activation function."
-            "'LUT' - use a look-up table to perform the activation function.")
-        .set_default("NONE");
-    TVM_ATTR_FIELD(clip_min)
-        .describe("The minimum clipping value if activation = CLIP.")
-        .set_default(0);
-    TVM_ATTR_FIELD(clip_max)
-        .describe("The maximum clipping value if activation = CLIP.")
-        .set_default(0);
-    TVM_ATTR_FIELD(rounding_mode)
-        .describe(
-            "The rounding mode to apply to the Output Feature Map tensor. "
-            "'TFL' - Tensorflow Lite rounding scheme. "
-            "'TRUNCATE' - Truncate towards zero."
-            "'NATURAL' - Round to nearest value, with x.5 rounded up towards +infinity.")
-        .set_default("TFL");
-    TVM_ATTR_FIELD(upscale)
-        .describe(
-            "The 2x2 upscaling mode to apply to the Input Feature Map tensor. "
-            "'NONE' - no upscaling. "
-            "'NEAREST' - upscale using nearest neighbour. "
-            "'ZEROS' - upscale using zeros.")
-        .set_default("NONE");
-    TVM_ATTR_FIELD(ifm_layout)
-        .set_default("NHWC")
-        .describe("The layout of the Input Feature Map tensor. Can be 'NHWC' or 'NHCWB16'.");
-    TVM_ATTR_FIELD(ofm_layout)
-        .set_default("NHWC")
-        .describe("The layout of the Output Feature Map tensor. Can be 'NHWC' or 'NHCWB16'.");
-    TVM_ATTR_FIELD(ofm_dtype)
-        .describe("The Output Feature Map tensor data type. Can be 'int8', 'uint8' or 'int16'.")
-        .set_default("int8");
-  }
-};
-
-TVM_REGISTER_NODE_TYPE(EthosuDepthwiseConv2DAttrs);
-
-/*! \brief Attributes used by the NPU identity operator */
-struct EthosuIdentityAttrs : public tvm::AttrsNode<EthosuIdentityAttrs> {
-  double ifm_scale;
-  int ifm_zero_point;
-  double ofm_scale;
-  int ofm_zero_point;
-  String activation;
-  String rounding_mode;
-
-  TVM_DECLARE_ATTRS(EthosuIdentityAttrs, "relay.attrs.EthosuIdentityAttrs") {
-    TVM_ATTR_FIELD(ifm_scale).describe("The quantization scale for the Input Feature Map tensor.");
-    TVM_ATTR_FIELD(ifm_zero_point)
-        .describe("The quantization zero point for the Input Feature Map tensor.");
-    TVM_ATTR_FIELD(ofm_scale).describe("The quantization scale for the Output Feature Map tensor.");
-    TVM_ATTR_FIELD(ofm_zero_point)
-        .describe("The quantization zero point for the Output Feature Map tensor.");
-    TVM_ATTR_FIELD(activation)
-        .describe(
-            "The activation function to use. "
-            "'NONE' - no activation function. "
-            "'TANH' - tanh activation function. "
-            "'SIGMOID' - sigmoid activation function. "
-            "'LUT' - use a look-up table to perform the activation function.")
-        .set_default("NONE");
-    TVM_ATTR_FIELD(rounding_mode)
-        .describe(
-            "The rounding mode to apply to the Output Feature Map tensor. "
-            "'TFL' - Tensorflow Lite rounding scheme. "
-            "'TRUNCATE' - Truncate towards zero."
-            "'NATURAL' - Round to nearest value, with x.5 rounded up towards +infinity.")
-        .set_default("TFL");
-  }
-};
-
-TVM_REGISTER_NODE_TYPE(EthosuIdentityAttrs);
-
-/*! \brief Attributes used by the Ethos(TM)-U NPU pooling operator */
-struct EthosuPoolingAttrs : public tvm::AttrsNode<EthosuPoolingAttrs> {
-  String pooling_type;
-  double ifm_scale;
-  int ifm_zero_point;
-  double ofm_scale;
-  int ofm_zero_point;
-  Array<IndexExpr> pool_shape;
-  IndexExpr ofm_channels;
-  String ofm_dtype;
-  Array<IndexExpr> strides;
-  Array<IndexExpr> padding;
-  String activation;
-  int clip_min;
-  int clip_max;
-  String rounding_mode;
-  String upscale;
-  String ifm_layout;
-  String ofm_layout;
-
-  TVM_DECLARE_ATTRS(EthosuPoolingAttrs, "relay.attrs.EthosuPoolingAttrs") {
-    TVM_ATTR_FIELD(pooling_type)
-        .describe(
-            "The type of the pooling. 'AVG' - average pool, 'MAX' - max pool, "
-            "'SUM' - reduce sum pool.");
-    TVM_ATTR_FIELD(ifm_scale).describe("The quantization scale for the Input Feature Map tensor.");
-    TVM_ATTR_FIELD(ifm_zero_point)
-        .describe("The quantization zero point for the Input Feature Map tensor.");
-    TVM_ATTR_FIELD(ofm_scale).describe("The quantization scale for the Output Feature Map tensor.");
-    TVM_ATTR_FIELD(ofm_zero_point)
-        .describe("The quantization zero point for the Output Feature Map tensor.");
-    TVM_ATTR_FIELD(pool_shape)
-        .describe("The 2 dimensional pool shape as (pool_shape_height, pool_shape_width).")
-        .set_default(NullValue<Array<IndexExpr>>());
-    TVM_ATTR_FIELD(ofm_channels)
-        .describe(" The number of the Output Feature Map channels.")
-        .set_default(NullValue<IndexExpr>());
-    TVM_ATTR_FIELD(ofm_dtype).describe(
-        "The Output Feature Map tensor data type. "
-        "'AVG' or 'MAX' pooling - can be 'int8', 'uint8', or 'int16'. "
-        "'SUM' pooling - can be 'int32'.");
-    TVM_ATTR_FIELD(strides)
-        .set_default(Array<IndexExpr>({1, 1}))
-        .describe("The 2 dimensional strides as (stride_height, stride_width).");
-    TVM_ATTR_FIELD(padding)
-        .describe("The 4 dimensional padding as (pad_top, pad_left, pad_bottom, pad_right).")
-        .set_default(Array<IndexExpr>({0, 0, 0, 0}));
-    TVM_ATTR_FIELD(activation)
-        .describe(
-            "The activation function to use. "
-            "'NONE' - no activation function. "
-            "'CLIP' - clip the output between clip_min and clip_max. "
-            "'TANH' - tanh activation function. "
-            "'SIGMOID' - sigmoid activation function. "
-            "'LUT' - use a look-up table to perform the activation function.")
-        .set_default("NONE");
-    TVM_ATTR_FIELD(clip_min)
-        .describe("The minimum clipping value if activation = 'CLIP'.")
-        .set_default(0);
-    TVM_ATTR_FIELD(clip_max)
-        .describe("The maximum clipping value if activation = 'CLIP'.")
-        .set_default(0);
-    TVM_ATTR_FIELD(rounding_mode)
-        .describe(
-            "The rounding mode to apply to the Output Feature Map tensor. "
-            "'TFL' - Tensorflow Lite rounding scheme. "
-            "'TRUNCATE' - Truncate towards zero."
-            "'NATURAL' - Round to nearest value, with x.5 rounded up towards +infinity.")
-        .set_default("TFL");
-    TVM_ATTR_FIELD(upscale)
-        .describe(
-            "The 2x2 upscaling mode to apply to the Input Feature Map tensor. "
-            "'NONE' - no upscaling. "
-            "'NEAREST' - upscale using nearest neighbour. "
-            "'ZEROS' - upscale using zeros.")
-        .set_default("NONE");
-    TVM_ATTR_FIELD(ifm_layout)
-        .describe("The layout of the Input Feature Map tensor. Can be 'NHWC' or 'NHCWB16'.")
-        .set_default("NHWC");
-    TVM_ATTR_FIELD(ofm_layout)
-        .describe("The layout of the Output Feature Map tensor. Can be 'NHWC' or 'NHCWB16'.")
-        .set_default("NHWC");
-  }
-};
-
-TVM_REGISTER_NODE_TYPE(EthosuPoolingAttrs);
-
-/*! \brief Attributes used by the NPU unary elementwise operator */
-struct EthosuUnaryElementwiseAttrs : public tvm::AttrsNode<EthosuUnaryElementwiseAttrs> {
-  String operator_type;
-  double ifm_scale;
-  int ifm_zero_point;
-  double ofm_scale;
-  int ofm_zero_point;
-  IndexExpr ofm_channels;
-  String activation;
-  int clip_min;
-  int clip_max;
-  String rounding_mode;
-  String ifm_layout;
-  String ofm_layout;
-
-  TVM_DECLARE_ATTRS(EthosuUnaryElementwiseAttrs, "relay.attrs.EthosuUnaryElementwiseAttrs") {
-    TVM_ATTR_FIELD(operator_type)
-        .describe(
-            "The type of the unary elementwise operator."
-            "'ABS'"
-            "'CLZ'");
-    TVM_ATTR_FIELD(ifm_scale).describe("The quantization scale for the Input Feature Map tensor.");
-    TVM_ATTR_FIELD(ifm_zero_point)
-        .describe("The quantization zero point for the Input Feature Map tensor.");
-    TVM_ATTR_FIELD(ofm_scale).describe("The quantization scale for the Output Feature Map tensor.");
-    TVM_ATTR_FIELD(ofm_zero_point)
-        .describe("The quantization zero point for the Output Feature Map tensor.");
-    TVM_ATTR_FIELD(ofm_channels).describe("The number of OFM channels.");
-    TVM_ATTR_FIELD(activation)
-        .describe(
-            "The activation function to use. "
-            "'NONE' - no activation function. "
-            "'CLIP' - clip the output between clip_min and clip_max. "
-            "'TANH' - tanh activation function. "
-            "'SIGMOID' - sigmoid activation function. "
-            "'LUT' - use a look-up table to perform the activation function.")
-        .set_default("NONE");
-    TVM_ATTR_FIELD(clip_min)
-        .describe("The minimum clipping value if activation = 'CLIP'.")
-        .set_default(0);
-    TVM_ATTR_FIELD(clip_max)
-        .describe("The maximum clipping value if activation = 'CLIP'.")
-        .set_default(0);
-    TVM_ATTR_FIELD(rounding_mode)
-        .describe(
-            "The rounding mode to apply to the Output Feature Map tensor. "
-            "'TFL' - Tensorflow Lite rounding scheme. "
-            "'TRUNCATE' - Truncate towards zero."
-            "'NATURAL' - Round to nearest value, with x.5 rounded up towards +infinity.")
-        .set_default("TFL");
-    TVM_ATTR_FIELD(ifm_layout)
-        .describe("The layout of the Input Feature Map tensor. Can be 'NHWC' or 'NHCWB16'.")
-        .set_default("NHWC");
-    TVM_ATTR_FIELD(ofm_layout)
-        .describe("The layout of the Output Feature Map tensor. Can be 'NHWC' or 'NHCWB16'.")
-        .set_default("NHWC");
-  }
-};
-
-TVM_REGISTER_NODE_TYPE(EthosuUnaryElementwiseAttrs);
-
-}  // namespace ethosu
-}  // namespace contrib
-}  // namespace op
-}  // namespace relay
-}  // namespace tvm
-
-#endif  // TVM_RELAY_OP_CONTRIB_ETHOSU_OP_ATTRS_H_
diff --git a/src/relay/op/contrib/ethosu/pooling.cc b/src/relay/op/contrib/ethosu/pooling.cc
deleted file mode 100644
index 92e704f667ed..000000000000
--- a/src/relay/op/contrib/ethosu/pooling.cc
+++ /dev/null
@@ -1,156 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file src/relay/op/contrib/ethosu/pooling.cc
- * \brief Pooling operators definitions for the Arm(R) Ethos(TM)-U NPU.
- */
-#include <tvm/relay/op.h>
-
-#include "common.h"
-#include "op_attrs.h"
-
-namespace tvm {
-namespace relay {
-namespace op {
-namespace contrib {
-namespace ethosu {
-
-bool EthosuPoolingRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
-                      const TypeReporter& reporter) {
-  int ifm_index = 0;
-  int result_index = 2;
-  ICHECK_EQ(types.size(), result_index + 1);
-
-  const auto* ifm = types[ifm_index].as<TensorTypeNode>();
-  if (ifm == nullptr) return false;
-
-  const auto* param = attrs.as<EthosuPoolingAttrs>();
-  ICHECK(param != nullptr) << "EthosuPoolingAttrs cannot be nullptr.";
-
-  const String operator_name = "ethosu_pooling";
-
-  if (param->pooling_type != "AVG" && param->pooling_type != "MAX" &&
-      param->pooling_type != "SUM") {
-    reporter->GetDiagCtx().EmitFatal(Diagnostic::Error(reporter->GetSpan())
-                                     << "Invalid operator: expected " << operator_name
-                                     << " type 'AVG', 'MAX', or 'SUM' but was "
-                                     << param->pooling_type);
-    return false;
-  }
-
-  std::initializer_list<DataType> max_avg_pooling_ifm_dtypes = {DataType::UInt(8), DataType::Int(8),
-                                                                DataType::Int(16)};
-  std::initializer_list<DataType> sum_pooling_ifm_dtypes = {DataType::UInt(8), DataType::Int(8),
-                                                            DataType::Int(16), DataType::Int(32)};
-
-  std::initializer_list<DataType>& allowed_ifm_dtypes = max_avg_pooling_ifm_dtypes;
-  if (param->pooling_type == "SUM") {
-    allowed_ifm_dtypes = sum_pooling_ifm_dtypes;
-  }
-
-  CheckDataType(reporter, ifm->dtype, allowed_ifm_dtypes, operator_name, "ifm",
-                param->pooling_type);
-
-  DataType ofm_dtype = DataTypeFromString(param->ofm_dtype);
-
-  std::initializer_list<DataType> max_avg_pooling_ofm_dtypes = {DataType::Int(8), DataType::UInt(8),
-                                                                DataType::Int(16)};
-  if (param->pooling_type == "AVG" || param->pooling_type == "MAX") {
-    CheckDataType(reporter, ofm_dtype, max_avg_pooling_ofm_dtypes, operator_name, "ofm",
-                  param->pooling_type);
-    CheckDataTypeMatch(reporter, ofm_dtype, ifm->dtype, operator_name, "ifm", "ofm",
-                       param->pooling_type);
-  } else {
-    CheckDataType(reporter, ofm_dtype, {DataType::Int(32)}, operator_name, "ofm",
-                  param->pooling_type);
-  }
-
-  CheckUpscaleMethod(reporter, param->upscale, {"NONE", "ZEROS", "NEAREST"}, operator_name);
-
-  Array<IndexExpr> ifm_shape = ifm->shape;
-  if (param->upscale != "NONE") {
-    ifm_shape = EthosuInferUpscaledInput(ifm_shape, param->ifm_layout);
-  }
-
-  // Assign ofm shape
-  auto ofm_shape = EthosuInferKernelOutput(
-      ifm_shape, param->ifm_layout, param->ofm_layout, param->pool_shape, param->ofm_channels,
-      Array<IndexExpr>({1, 1}), param->strides, param->padding);
-
-  reporter->Assign(types[result_index], TensorType(ofm_shape, ofm_dtype));
-  return true;
-}
-
-Expr MakeEthosuPooling(Expr ifm, Expr lut, String pooling_type, double ifm_scale,
-                       int ifm_zero_point, double ofm_scale, int ofm_zero_point,
-                       Array<IndexExpr> pool_shape, IndexExpr ofm_channels, String ofm_dtype,
-                       Array<IndexExpr> strides, Array<IndexExpr> padding, String activation,
-                       int clip_min, int clip_max, String rounding_mode, String upscale,
-                       String ifm_layout, String ofm_layout) {
-  auto attrs = make_object<EthosuPoolingAttrs>();
-  attrs->pooling_type = std::move(pooling_type);
-  attrs->ifm_scale = ifm_scale;
-  attrs->ifm_zero_point = ifm_zero_point;
-  attrs->ofm_scale = ofm_scale;
-  attrs->ofm_zero_point = ofm_zero_point;
-  attrs->pool_shape = std::move(pool_shape);
-  attrs->ofm_channels = std::move(ofm_channels);
-  attrs->ofm_dtype = std::move(ofm_dtype);
-  attrs->strides = std::move(strides);
-  attrs->padding = std::move(padding);
-  attrs->activation = std::move(activation);
-  attrs->clip_min = clip_min;
-  attrs->clip_max = clip_max;
-  attrs->rounding_mode = std::move(rounding_mode);
-  attrs->upscale = std::move(upscale);
-  attrs->ifm_layout = std::move(ifm_layout);
-  attrs->ofm_layout = std::move(ofm_layout);
-  static const Op& op = Op::Get("contrib.ethosu.pooling");
-  return Call(op, {ifm, lut}, Attrs(attrs), {});
-}
-
-TVM_REGISTER_GLOBAL("relay.op._make.ethosu_pooling").set_body_typed(MakeEthosuPooling);
-
-RELAY_REGISTER_OP("contrib.ethosu.pooling")
-    .describe(R"code(Arm(R) Ethos(TM)-U NPU 2D quantized pooling operator.
-
-This Relay operator corresponds to the hardware-implemented quantized
-pooling operation found on Ethos(TM)-U NPU. It accepts either NHWC
-or NHCWB16 format for the input data (input feature map, or IFM).
-
-Reference: https://developer.arm.com/documentation/102420/0200/
-
-- **ifm**: NHWC - (1, ifm_height, ifm_width, ifm_channels)
-           NHCWB16 - (1, ifm_height, ifm_channels // 16, ifm_width, 16)
-- **ofm**: (1, ofm_height, ofm_width, ofm_channels)
-
-)code" TVM_ADD_FILELINE)
-    .set_attrs_type<EthosuPoolingAttrs>()
-    .set_num_inputs(2)
-    .add_argument("ifm", "Tensor", "The Input Feature Map tensor (IFM).")
-    .add_argument("lut", "Tensor", "The look-up table of values to use if activation = 'LUT'")
-    .set_support_level(11)
-    .add_type_rel("EthosuPooling", EthosuPoolingRel);
-
-}  // namespace ethosu
-}  // namespace contrib
-}  // namespace op
-}  // namespace relay
-}  // namespace tvm
diff --git a/src/relay/op/contrib/ethosu/unary_elementwise.cc b/src/relay/op/contrib/ethosu/unary_elementwise.cc
deleted file mode 100644
index 62a8a6a85ab3..000000000000
--- a/src/relay/op/contrib/ethosu/unary_elementwise.cc
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file src/relay/op/contrib/ethosu/unary_elementwise.cc
- * \brief Property def of the Arm(R) Ethos(TM)-U unary elementwise ops.
- */
-#include <tvm/relay/op.h>
-
-#include "common.h"
-#include "op_attrs.h"
-
-namespace tvm {
-namespace relay {
-namespace op {
-namespace contrib {
-namespace ethosu {
-
-bool EthosuUnaryElementwiseRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
-                               const TypeReporter& reporter) {
-  const int ifm_index = 0;
-  const int result_index = 2;
-  ICHECK_EQ(types.size(), result_index + 1);
-
-  const auto* ifm = types[ifm_index].as<TensorTypeNode>();
-  if (ifm == nullptr) return false;
-
-  const auto* param = attrs.as<EthosuUnaryElementwiseAttrs>();
-  CHECK(param != nullptr) << "EthosuUnaryElementwiseAttrs cannot be nullptr.";
-
-  const String operator_name = "ethosu_unary_elementwise";
-  const String operator_type = param->operator_type;
-  if (operator_type != "ABS" && operator_type != "CLZ") {
-    reporter->GetDiagCtx().EmitFatal(Diagnostic::Error(reporter->GetSpan())
-                                     << "Invalid operator: expected << " << operator_name
-                                     << "  'ABS' or 'CLZ' for operator_type but was"
-                                     << operator_type);
-    return false;
-  }
-
-  const DataType ifm_dtype = ifm->dtype;
-  if (operator_type == "CLZ") {
-    CheckDataType(reporter, ifm_dtype, {DataType::Int(32)}, operator_name, "ifm", operator_type);
-  } else {
-    CheckDataType(reporter, ifm_dtype, {DataType::UInt(8), DataType::Int(8)}, operator_name, "ifm",
-                  operator_type);
-  }
-
-  // Assign ofm type
-  auto ofm_shape = EthosuInferElementwiseOutputShape(ifm->shape, param->ifm_layout,
-                                                     param->ofm_layout, param->ofm_channels);
-  reporter->Assign(types[result_index], TensorType(ofm_shape, ifm_dtype));
-  return true;
-}
-
-Expr MakeEthosuUnaryElementwise(Expr ifm, Expr lut, String operator_type, double ifm_scale,
-                                int ifm_zero_point, double ofm_scale, int ofm_zero_point,
-                                IndexExpr ofm_channels, String activation, int clip_min,
-                                int clip_max, String rounding_mode, String ifm_layout,
-                                String ofm_layout) {
-  auto attrs = make_object<EthosuUnaryElementwiseAttrs>();
-
-  attrs->operator_type = std::move(operator_type);
-  attrs->ifm_scale = ifm_scale;
-  attrs->ifm_zero_point = ifm_zero_point;
-  attrs->ofm_scale = ofm_scale;
-  attrs->ofm_zero_point = ofm_zero_point;
-  attrs->ofm_channels = std::move(ofm_channels);
-  attrs->activation = std::move(activation);
-  attrs->clip_min = clip_min;
-  attrs->clip_max = clip_max;
-  attrs->rounding_mode = std::move(rounding_mode);
-  attrs->ifm_layout = std::move(ifm_layout);
-  attrs->ofm_layout = std::move(ofm_layout);
-
-  static const Op& op = Op::Get("contrib.ethosu.unary_elementwise");
-  return Call(op, {ifm, lut}, Attrs(attrs), {});
-}
-
-TVM_REGISTER_GLOBAL("relay.op._make.ethosu_unary_elementwise")
-    .set_body_typed(MakeEthosuUnaryElementwise);
-
-RELAY_REGISTER_OP("contrib.ethosu.unary_elementwise")
-    .describe(R"code(Quantized unary elementwise operator for Arm(R) Ethos(TM)-U NPUs.
-
-This Relay operator corresponds to the hardware-implemented quantized
-unary elementwise operation found on NPUs. It accepts either NHWC
-or NHCWB16 format for the inputs data (input feature maps, or IFMs).
-
-Reference: https://developer.arm.com/documentation/102420/0200/
-
-- **ifm**: NHWC - (1, ifm_height, ifm_width, ifm_channels)
-           NHCWB16 - (1, ifm_height, ifm_channels // 16, ifm_width, 16)
-- **ofm**: (1, ofm_height, ofm_width, ofm_channels)
-
-)code" TVM_ADD_FILELINE)
-    .set_attrs_type<EthosuUnaryElementwiseAttrs>()
-    .set_num_inputs(2)
-    .add_argument("ifm", "Tensor", "The Input Feature Map tensor (IFM).")
-    .add_argument("lut", "Tensor", "The look-up table values to use if activation = 'LUT'")
-    .set_support_level(11)
-    .add_type_rel("EthosuUnaryElementwise", EthosuUnaryElementwiseRel);
-
-}  // namespace ethosu
-}  // namespace contrib
-}  // namespace op
-}  // namespace relay
-}  // namespace tvm
diff --git a/src/relay/transforms/compiler_function_utils.h b/src/relay/transforms/compiler_function_utils.h
index f3499faec262..a6cf6c9e7a8f 100644
--- a/src/relay/transforms/compiler_function_utils.h
+++ b/src/relay/transforms/compiler_function_utils.h
@@ -34,8 +34,6 @@
  *    is run to respect that assumption. So this pass is mostly just to undo that Pass after modules
  *    have passed through the 'codegen' keyhole.
  *
- *    See also OutlineCompilerFunctionsMutator in src/relay/backend/contrib/ethosu/codegen.cc.
- *
  *  - (The \p OutlineCompilerFunctions pass is a more general version of the above which can use
  *    a custom cache to both allocate "global_symbol" names and ensure two structurally equal
  *    functions are assigned the same name, and thus lowered only once. This is used by Collage
diff --git a/src/runtime/contrib/ethosn/ethosn_device.cc b/src/runtime/contrib/ethosn/ethosn_device.cc
deleted file mode 100644
index fa44ba856de2..000000000000
--- a/src/runtime/contrib/ethosn/ethosn_device.cc
+++ /dev/null
@@ -1,222 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file ethosn_device.cc
- * \brief Arm(R) Ethos(TM)-N NPU device integration.
- */
-
-#include "ethosn_device.h"
-
-#include <dlpack/dlpack.h>
-#include <poll.h>
-#include <tvm/runtime/ndarray.h>
-#include <tvm/tir/expr.h>
-#include <unistd.h>
-
-#include <algorithm>
-#include <memory>
-#include <string>
-
-#include "ethosn_driver_library/Buffer.hpp"
-#include "ethosn_runtime.h"
-#include "ethosn_support_library/Support.hpp"
-
-#if defined ETHOSN_HW
-
-#include "ethosn_driver_library/Inference.hpp"
-#include "ethosn_driver_library/Network.hpp"
-#include "ethosn_driver_library/ProcMemAllocator.hpp"
-
-namespace tvm {
-namespace runtime {
-namespace ethosn {
-
-namespace dl = ::ethosn::driver_library;
-
-InferenceWaitStatus WaitForInference(dl::Inference* inference, int timeout) {
-  // Wait for inference to complete
-  int fd = inference->GetFileDescriptor();
-  struct pollfd fds;
-  memset(&fds, 0, sizeof(fds));
-  fds.fd = fd;
-  fds.events = POLLIN;  // Wait for any available input.
-
-  const int ms_per_seconds = 1000;
-  int poll_result = poll(&fds, 1, timeout * ms_per_seconds);
-  int poll_error_code = errno;
-
-  if (poll_result < 0) {
-    return InferenceWaitStatus(InferenceWaitErrorCode::kError,
-                               "Error while waiting for the inference to complete (" +
-                                   std::string(strerror(poll_error_code)) + ")");
-  } else if (poll_result == 0) {
-    return InferenceWaitStatus(InferenceWaitErrorCode::kTimeout,
-                               "Timed out while waiting for the inference to complete.");
-  }
-
-  // poll_result > 0
-  dl::InferenceResult npu_result;
-  if (read(fd, &npu_result, sizeof(npu_result)) != static_cast<ssize_t>(sizeof(npu_result))) {
-    return InferenceWaitStatus(
-        InferenceWaitErrorCode::kError,
-        "Failed to read inference result status (" + std::string(strerror(poll_error_code)) + ")");
-  }
-
-  if (npu_result != dl::InferenceResult::Completed) {
-    return InferenceWaitStatus(
-        InferenceWaitErrorCode::kError,
-        "Inference failed with status " + std::to_string(static_cast<uint32_t>(npu_result)));
-  }
-
-  return InferenceWaitStatus(InferenceWaitErrorCode::kSuccess);
-}
-
-void CreateBuffers(dl::ProcMemAllocator* proc_mem_alloc,
-                   std::vector<std::shared_ptr<dl::Buffer>>* fm,
-                   const std::vector<DLTensor*>& tensors, const std::vector<uint32_t>& tensor_sizes,
-                   bool input) {
-  for (size_t i = 0; i < tensors.size(); i++) {
-    auto* data = static_cast<uint8_t*>(tensors[i]->data);
-    if (input) {
-      (*fm)[i] = std::make_shared<dl::Buffer>(
-          proc_mem_alloc->CreateBuffer(data, tensor_sizes[i], dl::DataFormat::NHWC));
-    } else {
-      (*fm)[i] = std::make_shared<dl::Buffer>(
-          proc_mem_alloc->CreateBuffer(tensor_sizes[i], dl::DataFormat::NHWC));
-    }
-  }
-}
-
-bool Inference(tvm::runtime::TVMArgs args, dl::ProcMemAllocator* proc_mem_alloc, dl::Network* npu,
-               const std::vector<uint32_t>& input_order, const std::vector<uint32_t>& output_order,
-               const std::vector<uint32_t>& input_sizes,
-               const std::vector<uint32_t>& output_sizes) {
-  // Unpack parameters
-  size_t n_inputs = input_order.size();
-  size_t n_outputs = output_order.size();
-  std::vector<DLTensor*> inputs(n_inputs);
-  for (size_t i = 0; i < n_inputs; i++) {
-    inputs[i] = args[input_order[i]];
-  }
-  std::vector<DLTensor*> outputs(n_outputs);
-  size_t output_offset = n_inputs;
-  for (size_t i = 0; i < n_outputs; i++) {
-    outputs[i] = args[output_order[i] + output_offset];
-  }
-
-  // Set up input buffers
-  std::vector<std::shared_ptr<dl::Buffer>> ifm(n_inputs);
-  CreateBuffers(proc_mem_alloc, &ifm, inputs, input_sizes, true);
-
-  // Set up output buffers
-  std::vector<std::shared_ptr<dl::Buffer>> ofm(n_outputs);
-  CreateBuffers(proc_mem_alloc, &ofm, outputs, output_sizes, false);
-
-  // Raw pointers for the inference
-  dl::Buffer* ifm_raw[n_inputs];
-  for (size_t i = 0; i < n_inputs; i++) {
-    ifm_raw[i] = ifm[i].get();
-  }
-  dl::Buffer* ofm_raw[n_outputs];
-  for (size_t i = 0; i < n_outputs; i++) {
-    ofm_raw[i] = ofm[i].get();
-  }
-
-  // Execute the inference.
-  std::unique_ptr<dl::Inference> inference(
-      npu->ScheduleInference(ifm_raw, n_inputs, ofm_raw, n_outputs));
-  InferenceWaitStatus result = WaitForInference(inference.get(), 60);
-
-  if (result.GetErrorCode() != InferenceWaitErrorCode::kSuccess) {
-    LOG(FATAL) << "An error has occured waiting for the inference of a sub-graph on the NPU: "
-               << result.GetErrorDescription();
-  }
-
-  for (size_t i = 0; i < n_outputs; i++) {
-    DLTensor* tensor = outputs[i];
-    dl::Buffer* source_buffer = ofm_raw[i];
-    uint8_t* dest_buffer = static_cast<uint8_t*>(tensor->data);
-    size_t size = source_buffer->GetSize();
-    uint8_t* source_buffer_data = source_buffer->Map();
-    std::copy(source_buffer_data, source_buffer_data + size, dest_buffer);
-    source_buffer->Unmap();
-  }
-
-  return true;
-}
-}  // namespace ethosn
-}  // namespace runtime
-}  // namespace tvm
-
-#else
-/* If USE_ETHOSN_HW=OFF, we mock the inference call with a known-good output.
- * That output can be set by using relay.ethos-n.test.infra.inference_result
- * which will set the values the mocked inference will return the next time
- * it's called.
- */
-
-#include <tvm/runtime/registry.h>
-
-namespace tvm {
-namespace runtime {
-namespace ethosn {
-
-namespace sl = ::ethosn::support_library;
-
-std::vector<tvm::runtime::NDArray> test_outputs;
-
-TVM_REGISTER_GLOBAL("relay.ethos-n.test.infra.inference_result")
-    .set_body([](tvm::TVMArgs args, tvm::TVMRetValue* rv) {
-      test_outputs.clear();
-      for (int argc = 0; argc < args.size(); argc++) {
-        const DLTensor* tensor = args[argc];
-        auto shape = std::vector<int64_t>(tensor->shape, tensor->shape + tensor->ndim);
-        test_outputs.emplace_back(
-            tvm::runtime::NDArray::Empty(shape, tensor->dtype, tensor->device));
-        test_outputs[test_outputs.size() - 1].CopyFrom(tensor);
-      }
-    });
-
-// Allow the ethos-n support code to be tested without a device
-bool Inference(tvm::runtime::TVMArgs args, dl::ProcMemAllocator* /*proc_mem_alloc*/,
-               dl::Network* /* npu */, const std::vector<uint32_t>& input_order,
-               const std::vector<uint32_t>& output_order, const std::vector<uint32_t>& input_sizes,
-               const std::vector<uint32_t>& output_sizes) {
-  std::vector<DLTensor*> outputs;
-  for (int argc = input_order.size(); argc < args.size(); argc++) {
-    outputs.push_back(args[argc]);
-  }
-  bool rc = false;
-  if (test_outputs.size() == outputs.size()) {
-    for (auto i = 0u; i < outputs.size(); i++) {
-      test_outputs[i].CopyTo(outputs[i]);
-    }
-    rc = true;
-  }
-  // Clear after first usage; on-exit destructor of NDArray fails
-  test_outputs.clear();
-  return rc;
-}
-
-}  // namespace ethosn
-}  // namespace runtime
-}  // namespace tvm
-
-#endif
diff --git a/src/runtime/contrib/ethosn/ethosn_device.h b/src/runtime/contrib/ethosn/ethosn_device.h
deleted file mode 100644
index 862a3762f05c..000000000000
--- a/src/runtime/contrib/ethosn/ethosn_device.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file ethosn_device.h
- * \brief Arm(R) Ethos(TM)-N NPU device integration.
- */
-#ifndef TVM_RUNTIME_CONTRIB_ETHOSN_ETHOSN_DEVICE_H_
-#define TVM_RUNTIME_CONTRIB_ETHOSN_ETHOSN_DEVICE_H_
-
-#include <tvm/runtime/registry.h>
-
-#include <vector>
-
-#include "ethosn_runtime.h"
-
-namespace tvm {
-namespace runtime {
-namespace ethosn {
-
-namespace dl = ::ethosn::driver_library;
-
-using tvm::runtime::TVMArgs;
-
-bool Inference(tvm::runtime::TVMArgs args, dl::ProcMemAllocator* proc_mem_alloc, dl::Network* npu,
-               const std::vector<uint32_t>& input_order, const std::vector<uint32_t>& output_order,
-               const std::vector<uint32_t>& input_sizes, const std::vector<uint32_t>& output_sizes);
-}  // namespace ethosn
-}  // namespace runtime
-}  // namespace tvm
-
-#endif  // TVM_RUNTIME_CONTRIB_ETHOSN_ETHOSN_DEVICE_H_
diff --git a/src/runtime/contrib/ethosn/ethosn_runtime.cc b/src/runtime/contrib/ethosn/ethosn_runtime.cc
deleted file mode 100644
index 710888242f94..000000000000
--- a/src/runtime/contrib/ethosn/ethosn_runtime.cc
+++ /dev/null
@@ -1,157 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file ethosn_runtime.cc
- * \brief Execution handling of Arm(R) Ethos(TM)-N command streams.
- */
-
-#include "ethosn_runtime.h"
-
-#include <dmlc/memory_io.h>
-#include <tvm/runtime/c_runtime_api.h>
-#include <tvm/runtime/memory.h>
-#include <tvm/runtime/module.h>
-#include <tvm/runtime/object.h>
-#include <tvm/runtime/packed_func.h>
-#include <tvm/runtime/registry.h>
-
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "../../file_utils.h"
-#include "ethosn_device.h"
-#include "ethosn_driver_library/Inference.hpp"
-#include "ethosn_driver_library/Network.hpp"
-
-namespace tvm {
-namespace runtime {
-namespace ethosn {
-
-namespace dl = ::ethosn::driver_library;
-
-EthosnModule::EthosnModule(std::vector<OrderedCompiledNetwork>* cmms) {
-  for (auto& it : *cmms) {
-    network_map_[it.name].name = it.name;
-    if (it.compiled_cmm != nullptr) {
-      network_map_[it.name].compiled_cmm = std::move(it.compiled_cmm);
-    }
-    if (it.proc_mem_alloc != nullptr) {
-      network_map_[it.name].proc_mem_alloc = std::move(it.proc_mem_alloc);
-    }
-    if (it.runtime_cmm != nullptr) {
-      network_map_[it.name].runtime_cmm = std::move(it.runtime_cmm);
-    }
-    network_map_[it.name].inputs = it.inputs;
-    network_map_[it.name].outputs = it.outputs;
-    network_map_[it.name].input_sizes = it.input_sizes;
-    network_map_[it.name].output_sizes = it.output_sizes;
-  }
-}
-
-PackedFunc EthosnModule::GetFunction(const String& name, const ObjectPtr<Object>& sptr_to_self) {
-  if (network_map_.find(name) != network_map_.end()) {
-    return PackedFunc([sptr_to_self, this, name](TVMArgs args, TVMRetValue* rv) {
-      *rv = Inference(args, network_map_[name].proc_mem_alloc.get(),
-                      network_map_[name].runtime_cmm.get(), network_map_[name].inputs,
-                      network_map_[name].outputs, network_map_[name].input_sizes,
-                      network_map_[name].output_sizes);
-    });
-  } else {
-    return PackedFunc();
-  }
-}
-
-void EthosnModule::SaveToBinary(dmlc::Stream* stream) {
-  stream->Write(network_map_.size());
-  for (const auto& it : network_map_) {
-    stream->Write(it.first);
-    std::stringstream ss;
-    ICHECK(it.second.compiled_cmm != nullptr);
-    it.second.compiled_cmm->Serialize(ss);
-    stream->Write(ss.str());
-    stream->Write(it.second.inputs.size());
-    stream->Write(&it.second.inputs[0], sizeof(uint32_t) * it.second.inputs.size());
-    stream->Write(&it.second.input_sizes[0], sizeof(uint32_t) * it.second.input_sizes.size());
-    stream->Write(it.second.outputs.size());
-    stream->Write(&it.second.outputs[0], sizeof(uint32_t) * it.second.outputs.size());
-    stream->Write(&it.second.output_sizes[0], sizeof(uint32_t) * it.second.output_sizes.size());
-  }
-}
-
-Module EthosnModule::LoadFromBinary(void* strm) {
-  auto stream = static_cast<dmlc::Stream*>(strm);
-  size_t func_count;
-  // Read the number of functions
-  stream->Read(&func_count);
-  std::vector<OrderedCompiledNetwork> cmms;
-  cmms.resize(func_count);
-  for (unsigned int i = 0; i < func_count; i++) {
-    OrderedCompiledNetwork& compiled = cmms[i];
-    std::string ext_symbol;
-    std::string cmm;
-    uint64_t input_size;
-    uint64_t output_size;
-    // Read the symbol name
-    stream->Read(&compiled.name);
-    // Read the serialized command stream
-    stream->Read(&cmm);
-    std::istringstream cmm_strm(cmm);
-#if defined ETHOSN_HW
-    // If hardware unavaiable use the mock inference functionality. If hardware is
-    // avaiable, deserialize the compiled graph.
-    compiled.proc_mem_alloc = std::make_unique<dl::ProcMemAllocator>();
-    compiled.runtime_cmm = std::make_unique<dl::Network>(
-        compiled.proc_mem_alloc->CreateNetwork(cmm.c_str(), cmm.size()));
-#endif
-    // Read the number of inputs
-    stream->Read<uint64_t>(&input_size);
-    auto size = static_cast<size_t>(input_size);
-    compiled.inputs.resize(size);
-    // Read the order of inputs
-    stream->Read(&compiled.inputs[0], sizeof(uint32_t) * size);
-    compiled.input_sizes.resize(size);
-    stream->Read(&compiled.input_sizes[0], sizeof(uint32_t) * size);
-    // Read the number of outputs
-    stream->Read<uint64_t>(&output_size);
-    size = static_cast<size_t>(output_size);
-    compiled.outputs.resize(size);
-    // Read the order of outputs
-    stream->Read(&compiled.outputs[0], sizeof(uint32_t) * size);
-    compiled.output_sizes.resize(size);
-    stream->Read(&compiled.output_sizes[0], sizeof(uint32_t) * size);
-  }
-  auto n = make_object<EthosnModule>(&cmms);
-  return Module(n);
-}
-
-void EthosnModule::SaveToFile(const String& path, const String& format) {
-  std::string data;
-  dmlc::MemoryStringStream writer(&data);
-  dmlc::SeekStream* strm = &writer;
-  SaveToBinary(strm);
-  SaveBinaryToFile(path, data);
-}
-
-TVM_REGISTER_GLOBAL("runtime.module.loadbinary_ethos-n")
-    .set_body([](TVMArgs args, TVMRetValue* rv) { *rv = EthosnModule::LoadFromBinary(args[0]); });
-}  // namespace ethosn
-}  // namespace runtime
-}  // namespace tvm
diff --git a/src/runtime/contrib/ethosn/ethosn_runtime.h b/src/runtime/contrib/ethosn/ethosn_runtime.h
deleted file mode 100644
index 2971990a5b26..000000000000
--- a/src/runtime/contrib/ethosn/ethosn_runtime.h
+++ /dev/null
@@ -1,153 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file ethosn_runtime.h
- * \brief Execution handling of Ethos-N command streams.
- */
-#ifndef TVM_RUNTIME_CONTRIB_ETHOSN_ETHOSN_RUNTIME_H_
-#define TVM_RUNTIME_CONTRIB_ETHOSN_ETHOSN_RUNTIME_H_
-
-#include <tvm/runtime/packed_func.h>
-
-#include <map>
-#include <memory>
-#include <set>
-#include <string>
-#include <unordered_map>
-#include <vector>
-
-#include "ethosn_driver_library/Network.hpp"
-#include "ethosn_driver_library/ProcMemAllocator.hpp"
-#include "ethosn_support_library/Support.hpp"
-
-namespace tvm {
-namespace runtime {
-namespace ethosn {
-
-namespace sl = ::ethosn::support_library;
-namespace dl = ::ethosn::driver_library;
-
-struct OrderedCompiledNetwork {
-  std::unique_ptr<sl::CompiledNetwork> compiled_cmm;
-  std::unique_ptr<dl::Network> runtime_cmm;
-  std::unique_ptr<dl::ProcMemAllocator> proc_mem_alloc;
-  std::string name;
-  std::vector<uint32_t> inputs;
-  std::vector<uint32_t> outputs;
-  std::vector<uint32_t> input_sizes;
-  std::vector<uint32_t> output_sizes;
-};
-
-class EthosnModule : public ModuleNode {
- public:
-  /*!
-   * \brief The Ethos-N runtime module.
-   * \param cmms A vector of compiled networks with input/output orders.
-   */
-  explicit EthosnModule(std::vector<OrderedCompiledNetwork>* cmms);
-
-  /*!
-   * \brief Get a PackedFunc from the Ethos-N module.
-   * \param name The name of the function.
-   * \param sptr_to_self The ObjectPtr that points to this module node.
-   * \return The function pointer when it is found, otherwise, PackedFunc(nullptr).
-   */
-  PackedFunc GetFunction(const String& name, const ObjectPtr<Object>& sptr_to_self) final;
-  /*!
-   * \brief Save a compiled network to a binary stream, which can then be
-   * serialized to disk.
-   * \param stream The stream to save the binary.
-   * \note See EthosnModule::LoadFromBinary for the serialization format.
-   */
-  void SaveToBinary(dmlc::Stream* stream) final;
-  /*!
-   * \brief Load a compiled network from stream.
-   * \param strm The binary stream to load.
-   * \return The created Ethos-N module.
-   * \note The serialization format is:
-   *
-   *       size_t : number of functions
-   *       [
-   *         std::string : name of function (symbol)
-   *         std::string : serialized command stream
-   *         size_t      : number of inputs
-   *         std::vector : order of inputs
-   *         std::vector : buffer sizes for inputs
-   *         size_t      : number of outputs
-   *         std::vector : order of outputs
-   *         std::vector : buffer sizes for outputs
-   *       ] * number of functions
-   */
-  static Module LoadFromBinary(void* strm);
-  /*!
-   * \brief Save a module to a specified path.
-   * \param path Where to save the serialized module.
-   */
-  void SaveToFile(const String& path, const String& format) override;
-
-  const char* type_key() const override { return "ethos-n"; }
-
-  /*! \brief Get the property of the runtime module .*/
-  int GetPropertyMask() const final {
-    return ModulePropertyMask::kBinarySerializable | ModulePropertyMask::kRunnable;
-  };
-
- private:
-  /*! \brief A map between ext_symbols (function names) and ordered compiled networks. */
-  std::map<std::string, OrderedCompiledNetwork> network_map_;
-};
-
-/*!
- * \brief Error codes for evaluating the result of inference on the NPU.
- */
-enum class InferenceWaitErrorCode { kSuccess = 0, kTimeout = 1, kError = 2 };
-
-/*!
- * \brief A helper class holding the status of inference on the NPU and
- * associated error message(s) if any occurred.
- *
- * Similar to the implementation of 'WaitStatus' in the driver stack:
- * https://github.com/ARM-software/ethos-n-driver-stack/blob/22.08/armnn-ethos-n-backend/workloads/EthosNPreCompiledWorkload.cpp#L48
- */
-class InferenceWaitStatus {
- public:
-  InferenceWaitStatus() : error_code_(InferenceWaitErrorCode::kSuccess), error_description_("") {}
-
-  explicit InferenceWaitStatus(InferenceWaitErrorCode errorCode, std::string errorDescription = "")
-      : error_code_(errorCode), error_description_(errorDescription) {}
-
-  InferenceWaitStatus(const InferenceWaitStatus&) = default;
-  InferenceWaitStatus(InferenceWaitStatus&&) = default;
-  InferenceWaitStatus& operator=(const InferenceWaitStatus&) = default;
-  InferenceWaitStatus& operator=(InferenceWaitStatus&&) = default;
-
-  explicit operator bool() const { return error_code_ == InferenceWaitErrorCode::kSuccess; }
-  InferenceWaitErrorCode GetErrorCode() const { return error_code_; }
-  std::string GetErrorDescription() const { return error_description_; }
-
- private:
-  InferenceWaitErrorCode error_code_;
-  std::string error_description_;
-};
-
-}  // namespace ethosn
-}  // namespace runtime
-}  // namespace tvm
-#endif  // TVM_RUNTIME_CONTRIB_ETHOSN_ETHOSN_RUNTIME_H_
diff --git a/src/runtime/contrib/ethosu/bare_metal/tvm_ethosu_runtime.c b/src/runtime/contrib/ethosu/bare_metal/tvm_ethosu_runtime.c
deleted file mode 100644
index 2f8f7ec7c1dc..000000000000
--- a/src/runtime/contrib/ethosu/bare_metal/tvm_ethosu_runtime.c
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#include "tvm_ethosu_runtime.h"
-
-#include <ethosu_driver.h>
-
-int32_t TVMEthosULaunch(tvm_device_ethos_u_t* context, void* cms_data, size_t cms_data_size,
-                        uint64_t* base_addrs, size_t* base_addrs_size, int num_tensors) {
-  struct ethosu_driver* driver = (struct ethosu_driver*)context;
-  int32_t result =
-      ethosu_invoke(driver, cms_data, cms_data_size, base_addrs, base_addrs_size, num_tensors);
-
-  // Map errors in invoke to TVM errors
-  if (result != 0) {
-    return -1;
-  }
-  return 0;
-}
-
-int32_t TVMDeviceEthosUActivate(tvm_device_ethos_u_t* context) { return 0; }
-int32_t TVMDeviceEthosUOpen(tvm_device_ethos_u_t* context) { return 0; }
-int32_t TVMDeviceEthosUClose(tvm_device_ethos_u_t* context) { return 0; }
-int32_t TVMDeviceEthosUDeactivate(tvm_device_ethos_u_t* context) { return 0; }
diff --git a/src/runtime/contrib/ethosu/bare_metal/tvm_ethosu_runtime.h b/src/runtime/contrib/ethosu/bare_metal/tvm_ethosu_runtime.h
deleted file mode 100644
index 31d17557aa84..000000000000
--- a/src/runtime/contrib/ethosu/bare_metal/tvm_ethosu_runtime.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#ifndef TVM_RUNTIME_CONTRIB_ETHOSU_BARE_METAL_TVM_ETHOSU_RUNTIME_H_
-#define TVM_RUNTIME_CONTRIB_ETHOSU_BARE_METAL_TVM_ETHOSU_RUNTIME_H_
-
-#include <ethosu_driver.h>
-#include <stddef.h>
-#include <stdint.h>
-
-typedef void tvm_device_ethos_u_t;
-
-int32_t TVMEthosULaunch(tvm_device_ethos_u_t* resource_handle, void* cms_data, size_t cms_data_size,
-                        uint64_t* base_addrs, size_t* base_addrs_size, int num_tensors);
-
-int32_t TVMDeviceEthosUActivate(tvm_device_ethos_u_t* context);
-int32_t TVMDeviceEthosUOpen(tvm_device_ethos_u_t* context);
-int32_t TVMDeviceEthosUClose(tvm_device_ethos_u_t* context);
-int32_t TVMDeviceEthosUDeactivate(tvm_device_ethos_u_t* context);
-
-#endif  // TVM_RUNTIME_CONTRIB_ETHOSU_BARE_METAL_TVM_ETHOSU_RUNTIME_H_
diff --git a/src/runtime/crt/.gitignore b/src/runtime/crt/.gitignore
deleted file mode 100644
index 796b96d1c402..000000000000
--- a/src/runtime/crt/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-/build
diff --git a/src/runtime/crt/CMakeLists.txt b/src/runtime/crt/CMakeLists.txt
deleted file mode 100644
index ea170bca1c86..000000000000
--- a/src/runtime/crt/CMakeLists.txt
+++ /dev/null
@@ -1,51 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# SPDX-License-Identifier: Apache-2.0
-
-cmake_minimum_required(VERSION 3.18)
-project(standalone_crt_libs C CXX)
-
-if(NOT DEFINED CRT_CONFIG_PATH)
-  message(FATAL_ERROR "Must supply path to crt_config.h: CRT_CONFIG_PATH=...")
-endif()
-
-set(CRT_LIB_BASE ${CMAKE_CURRENT_SOURCE_DIR}/src/runtime/crt)
-if(NOT EXISTS "${CRT_LIB_BASE}")
-    message(FATAL_ERROR
-    "This CMakeList does not build inside TVM source tree.
-Build the standalone_crt target, and re-invoke CMakeList.txt in build/standalone_crt.")
-endif()
-
-if (NOT DEFINED CRT_LIBS)
-  set(CRT_LIBS microtvm_rpc_server
-    microtvm_rpc_common
-    aot_executor_module
-    aot_executor
-    graph_executor_module
-    graph_executor
-    common
-    memory
-  )
-endif()
-
-foreach(crt_lib_name ${CRT_LIBS})
-  add_library(${crt_lib_name})
-  file(GLOB_RECURSE crt_lib_srcs ${CRT_LIB_BASE}/${crt_lib_name}/*.c ${CRT_LIB_BASE}/${crt_lib_name}/*.cc)
-  target_sources(${crt_lib_name} PRIVATE ${crt_lib_srcs})
-  target_include_directories(${crt_lib_name} PRIVATE include ${CRT_CONFIG_PATH})
-endforeach(crt_lib_name ${CRT_LIBS})
diff --git a/src/runtime/crt/aot_executor/aot_executor.c b/src/runtime/crt/aot_executor/aot_executor.c
deleted file mode 100644
index 8ab4160306ff..000000000000
--- a/src/runtime/crt/aot_executor/aot_executor.c
+++ /dev/null
@@ -1,253 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-// LINT_C_FILE
-
-/*!
- * \file aot_executor.c
- * \brief implement AoT executor in C
- */
-
-#include <inttypes.h>
-#include <string.h>
-#include <tvm/runtime/c_runtime_api.h>
-#include <tvm/runtime/crt/aot_executor.h>
-#include <tvm/runtime/crt/logging.h>
-#include <tvm/runtime/crt/module.h>
-#include <tvm/runtime/crt/packed_func.h>
-#include <tvm/runtime/crt/page_allocator.h>
-
-static void DumpMetadata(const TVMMetadata* md) {
-  LOG_DEBUG("%s:\n", __FUNCTION__);
-  LOG_DEBUG("\tmod_name=%s\n", md->mod_name);
-  LOG_DEBUG("\tversion=%" PRId64 "\n", md->version);
-  LOG_DEBUG("\tnum_inputs=%" PRId64 "\n", md->num_inputs);
-  LOG_DEBUG("\tnum_outputs=%" PRId64 "\n", md->num_outputs);
-  LOG_DEBUG("\tnum_workspace_pools=%" PRId64 "\n", md->num_workspace_pools);
-  LOG_DEBUG("\tnum_constant_pools=%" PRId64 "\n", md->num_constant_pools);
-
-  int i;
-
-  for (i = 0; i < md->num_inputs; ++i) {
-    LOG_DEBUG("\tinput[%d]: %s\n", i, md->inputs[i].name);
-  }
-
-  for (i = 0; i < md->num_outputs; ++i) {
-    LOG_DEBUG("\toutput[%d]: %s\n", i, md->outputs[i].name);
-  }
-
-  for (i = 0; i < md->num_workspace_pools; ++i) {
-    LOG_DEBUG("\tworkspace_pools[%d]: %s\n", i, md->workspace_pools[i].name);
-  }
-
-  for (i = 0; i < md->num_constant_pools; ++i) {
-    LOG_DEBUG("\tconstant_pools[%d]: %s\n", i, md->constant_pools[i].name_hint);
-  }
-}
-
-int TVMAotExecutor_GetNumInputs(TVMAotExecutor* executor) { return executor->metadata->num_inputs; }
-
-int TVMAotExecutor_GetNumOutputs(TVMAotExecutor* executor) {
-  return executor->metadata->num_outputs;
-}
-
-int TVMAotExecutor_GetInputIndex(TVMAotExecutor* executor, const char* name) {
-  int i;
-  int rv = -1;
-
-  const TVMMetadata* md = executor->metadata;
-  for (i = 0; i < md->num_inputs; ++i) {
-    if (!strcmp(md->inputs[i].name, name)) {
-      rv = i;
-      break;
-    }
-  }
-  CHECK_GE(rv, 0, "cannot find '%s' among input.", name);
-  return rv;
-}
-
-int TVMAotExecutor_GetInputName(TVMAotExecutor* executor, int index, const char** name) {
-  const TVMMetadata* md = executor->metadata;
-  *name = md->inputs[index].name;
-  return 0;
-}
-
-int TVMAotExecutor_Run(TVMAotExecutor* executor) {
-  const char* tvm_main_suffix = "_run";
-  char tvm_main_name[TVM_CRT_MAX_STRLEN_FUNCTION_NAME];
-
-  {
-    const size_t max_strlen = TVM_CRT_MAX_STRLEN_FUNCTION_NAME;
-    size_t len = strnlen(executor->metadata->mod_name, max_strlen);
-    len += strnlen(tvm_main_suffix, max_strlen);
-
-    CHECK_LT(len, max_strlen, "tvm_main name too long %zu\n", len);
-  }
-
-  // create main function name string, e.g. "tvmgen_default___tvm_main__"
-  snprintf(tvm_main_name, sizeof(tvm_main_name), "%s%s", executor->metadata->mod_name,
-           tvm_main_suffix);
-
-  TVMPackedFunc tvm_main;
-  TVMArgs temp_args;
-
-  CHECK_LE(executor->num_args, TVM_CRT_MAX_ARGS, "too many args %" PRId64 "\n", executor->num_args);
-
-  int i;
-  for (i = 0; i < executor->num_args; ++i) {
-    temp_args.values[i].v_handle = &executor->args[i].dl_tensor;
-    temp_args.tcodes[i] = kTVMDLTensorHandle;
-  }
-  temp_args.values_count = executor->num_args;
-
-  int status =
-      TVMPackedFunc_InitModuleFunc(&tvm_main, executor->module_handle, tvm_main_name, &temp_args);
-
-  if (status != 0) {
-    return status;
-  }
-
-  CHECK_EQ(tvm_main.Call(&tvm_main), 0, "call to %s failed", tvm_main_name);
-
-  return 0;
-}
-
-int TVMAotExecutor_Init(TVMAotExecutor* executor, TVMModuleHandle module_handle,
-                        const DLDevice device, const char* module_name) {
-  executor->module_handle = module_handle;
-  executor->device = device;
-
-  // get a pointer to the PackedFunc get_c_metadata() which gives us access to the top-level
-  // metadata structure
-  TVMPackedFunc get_c_metadata;
-  TVMArgs temp_args;
-  temp_args.values_count = 0;
-
-  const char* tvmgen_prefix = "tvmgen_";
-  const char* get_c_metdata_suffix = "_get_c_metadata";
-  char get_c_metdata_name[TVM_CRT_MAX_STRLEN_FUNCTION_NAME];
-
-  {
-    size_t max_strlen = TVM_CRT_MAX_STRLEN_FUNCTION_NAME;
-    size_t len = strnlen(tvmgen_prefix, max_strlen);
-    len += strnlen(module_name, max_strlen);
-    len += strnlen(get_c_metdata_suffix, max_strlen);
-
-    CHECK_LT(len, max_strlen, "get_c_metadata name too long %zu\n", len);
-  }
-
-  // create get_c_metadata() function name string, e.g. "tvmgen_default_get_c_metadata()"
-  snprintf(get_c_metdata_name, sizeof(get_c_metdata_name), "%s%s%s", tvmgen_prefix, module_name,
-           get_c_metdata_suffix);
-
-  int status = TVMPackedFunc_InitModuleFunc(&get_c_metadata, executor->module_handle,
-                                            get_c_metdata_name, &temp_args);
-  if (status != 0) {
-    return status;
-  }
-
-  CHECK_EQ(get_c_metadata.Call(&get_c_metadata), 0, "get_c_metadata");
-
-  // save the returned pointer to the top-level metadata
-  executor->metadata = (TVMMetadata*)get_c_metadata.ret_value.values[0].v_handle;
-
-  const TVMMetadata* md = executor->metadata;
-
-  DumpMetadata(md);
-
-  executor->num_args = md->num_inputs + md->num_outputs + md->num_workspace_pools;
-
-  tvm_crt_error_t err = TVMPlatformMemoryAllocate(executor->num_args * sizeof(*executor->args),
-                                                  executor->device, (void**)(&executor->args));
-  if (err != kTvmErrorNoError) {
-    return -1;
-  }
-
-  int i;
-  int arg_idx = 0;
-  for (i = 0; i < md->num_inputs; ++i) {
-    LOG_DEBUG("input allocate[%d]: %s\n", i, md->inputs[i].name);
-
-    TVMNDArray* array = &executor->args[arg_idx++];
-
-    status = TVMNDArray_Empty(md->inputs[i].num_shape, md->inputs[i].shape, md->inputs[i].dtype,
-                              executor->device, array);
-    if (status != 0) {
-      return status;
-    }
-
-    TVMNDArray_IncrementReference(array);
-  }
-
-  for (i = 0; i < md->num_outputs; ++i) {
-    LOG_DEBUG("output allocate[%d]: %s\n", i, md->outputs[i].name);
-
-    TVMNDArray* array = &executor->args[arg_idx++];
-
-    status = TVMNDArray_Empty(md->outputs[i].num_shape, md->outputs[i].shape, md->outputs[i].dtype,
-                              executor->device, array);
-    if (status != 0) {
-      return status;
-    }
-
-    TVMNDArray_IncrementReference(array);
-  }
-
-  return status;
-}
-
-int TVMAotExecutor_Create(TVMModuleHandle module_handle, const DLDevice device,
-                          TVMAotExecutor** executor, const char* module_name) {
-  tvm_crt_error_t err = TVMPlatformMemoryAllocate(sizeof(**executor), device, (void**)executor);
-  if (err != kTvmErrorNoError) {
-    return -1;
-  }
-
-  memset(*executor, 0, sizeof(**executor));
-
-  return TVMAotExecutor_Init(*executor, module_handle, device, module_name);
-}
-
-int TVMAotExecutor_Release(TVMAotExecutor* executor, const DLDevice device) {
-  int status;
-
-  if (executor->num_args > 0) {
-    // free TVMNDArray data memory for each argument
-    int i;
-    for (i = 0; i < executor->num_args; ++i) {
-      status = TVMNDArray_Release(&executor->args[i]);
-      if (status != 0) {
-        return status;
-      }
-    }
-
-    // free TVMNDArray argument list
-    status = TVMPlatformMemoryFree(executor->args, executor->device);
-    if (status != 0) {
-      return status;
-    }
-  }
-
-  status = TVMPlatformMemoryFree(executor, device);
-  if (status != 0) {
-    return status;
-  }
-
-  return 0;
-}
diff --git a/src/runtime/crt/aot_executor_module/aot_executor_module.c b/src/runtime/crt/aot_executor_module/aot_executor_module.c
deleted file mode 100644
index a5c8105144f7..000000000000
--- a/src/runtime/crt/aot_executor_module/aot_executor_module.c
+++ /dev/null
@@ -1,234 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-// LINT_C_FILE
-
-/*!
- * \file aot_executor_module.c
- * \brief wrap aot_executor into a TVMModule for use with RPC.
- */
-
-#include <stdio.h>
-#include <tvm/runtime/crt/aot_executor.h>
-#include <tvm/runtime/crt/aot_executor_module.h>
-#include <tvm/runtime/crt/func_registry.h>
-#include <tvm/runtime/crt/module.h>
-
-typedef struct {
-  TVMModule mod;
-  TVMAotExecutor* executor;
-} AotExecutorModule;
-
-static AotExecutorModule aot_executor;
-
-int32_t TVMAotExecutorModule_Create(TVMValue* args, int* tcodes, int nargs, TVMValue* ret_values,
-                                    int* ret_tcodes, void* resource_handle) {
-  if (aot_executor.executor != NULL) {
-    return kTvmErrorExecutorModuleAlreadyCreated;
-  }
-
-  if (nargs != 3) {
-    return kTvmErrorFunctionCallNumArguments;
-  }
-
-  if (tcodes[0] != kTVMModuleHandle || tcodes[1] != kDLDevice || tcodes[2] != kTVMStr) {
-    return kTvmErrorFunctionCallWrongArgType;
-  }
-
-  DLDevice dev = args[1].v_device;
-
-  if (dev.device_type != kDLCPU) {
-    return kTvmErrorExecutorModuleBadContext;
-  }
-
-  TVMAotExecutor_Create(args[0].v_handle, dev, &aot_executor.executor, args[2].v_str);
-
-  TVMModuleHandle out_mod;
-  int status = TVMModCreateFromCModule(&aot_executor.mod, &out_mod);
-  if (status != 0) {
-    ret_tcodes[0] = kTVMNullptr;
-    TVMAotExecutor_Release(aot_executor.executor, dev);
-    return status;
-  }
-
-  ret_values[0].v_handle = out_mod;
-  ret_tcodes[0] = kTVMModuleHandle;
-  return kTvmErrorNoError;
-}
-
-int32_t TVMAotExecutorModule_NotImplemented(TVMValue* args, int* tcodes, int nargs,
-                                            TVMValue* ret_values, int* ret_tcodes,
-                                            void* resource_handle) {
-  return kTvmErrorFunctionCallNotImplemented;
-}
-
-int32_t TVMAotExecutorModule_GetInput(TVMValue* args, int* tcodes, int nargs, TVMValue* ret_values,
-                                      int* ret_tcodes, void* resource_handle) {
-  int64_t index;
-
-  if (tcodes[0] == kTVMArgInt) {
-    if (args[0].v_int64 > TVMAotExecutor_GetNumInputs(aot_executor.executor)) {
-      return kTvmErrorFunctionCallInvalidArg;
-    }
-
-    index = args[0].v_int64;
-  } else {
-    index = TVMAotExecutor_GetInputIndex(aot_executor.executor, args[0].v_str);
-
-    if (index < 0) {
-      return kTvmErrorExecutorModuleNoSuchInput;
-    }
-  }
-
-  TVMNDArray* array = &aot_executor.executor->args[index];
-
-  TVMNDArray_IncrementReference(array);
-
-  ret_values[0].v_handle = (void*)(&array->dl_tensor);
-  ret_tcodes[0] = kTVMNDArrayHandle;
-
-  return 0;
-}
-
-int32_t TVMAotExecutorModule_GetOutput(TVMValue* args, int* tcodes, int nargs, TVMValue* ret_values,
-                                       int* ret_tcodes, void* resource_handle) {
-  if (nargs != 1) {
-    return kTvmErrorFunctionCallNumArguments;
-  }
-
-  if (args[0].v_int64 > TVMAotExecutor_GetNumOutputs(aot_executor.executor)) {
-    return kTvmErrorFunctionCallInvalidArg;
-  }
-
-  // index past the input entries
-  int64_t index = args[0].v_int64 + TVMAotExecutor_GetNumInputs(aot_executor.executor);
-
-  TVMNDArray* array = &aot_executor.executor->args[index];
-
-  TVMNDArray_IncrementReference(array);
-
-  ret_values[0].v_handle = (void*)(&array->dl_tensor);
-  ret_tcodes[0] = kTVMNDArrayHandle;
-
-  return 0;
-}
-
-int32_t TVMAotExecutorModule_GetInputIndex(TVMValue* args, int* tcodes, int nargs,
-                                           TVMValue* ret_values, int* ret_tcodes,
-                                           void* resource_handle) {
-  if (nargs != 1) {
-    return kTvmErrorFunctionCallNumArguments;
-  }
-
-  int index = TVMAotExecutor_GetInputIndex(aot_executor.executor, args[0].v_str);
-
-  if (index < 0) {
-    return kTvmErrorExecutorModuleNoSuchInput;
-  }
-
-  ret_values[0].v_int64 = index;
-  ret_tcodes[0] = kTVMArgInt;
-  return 0;
-}
-
-int32_t TVMAotExecutorModule_GetInputName(TVMValue* args, int* tcodes, int nargs,
-                                          TVMValue* ret_values, int* ret_tcodes,
-                                          void* resource_handle) {
-  if (nargs != 1) {
-    return kTvmErrorFunctionCallNumArguments;
-  }
-
-  char* name;
-  int ret = TVMAotExecutor_GetInputName(aot_executor.executor, args[0].v_int64, &name);
-  if (ret < 0) {
-    return kTvmErrorExecutorModuleNoSuchInput;
-  }
-
-  ret_values[0].v_str = name;
-  ret_tcodes[0] = kTVMStr;
-  return 0;
-}
-
-int32_t TVMAotExecutorModule_GetNumInputs(TVMValue* args, int* tcodes, int nargs,
-                                          TVMValue* ret_values, int* ret_tcodes,
-                                          void* resource_handle) {
-  if (nargs != 0) {
-    return kTvmErrorFunctionCallNumArguments;
-  }
-
-  ret_values[0].v_int64 = TVMAotExecutor_GetNumInputs(aot_executor.executor);
-  ret_tcodes[0] = kTVMArgInt;
-  return 0;
-}
-
-int32_t TVMAotExecutorModule_GetNumOutputs(TVMValue* args, int* tcodes, int nargs,
-                                           TVMValue* ret_values, int* ret_tcodes,
-                                           void* resource_handle) {
-  if (nargs != 0) {
-    return kTvmErrorFunctionCallNumArguments;
-  }
-
-  ret_values[0].v_int64 = TVMAotExecutor_GetNumOutputs(aot_executor.executor);
-  ret_tcodes[0] = kTVMArgInt;
-  return 0;
-}
-
-int32_t TVMAotExecutorModule_Run(TVMValue* args, int* tcodes, int nargs, TVMValue* ret_values,
-                                 int* ret_tcodes, void* resource_handle) {
-  if (nargs != 0) {
-    return kTvmErrorFunctionCallNumArguments;
-  }
-
-  return TVMAotExecutor_Run(aot_executor.executor);
-}
-
-static const TVMBackendPackedCFunc aot_executor_registry_funcs[] = {
-    &TVMAotExecutorModule_GetInput,        // get_input
-    &TVMAotExecutorModule_GetInputIndex,   // get_input_index
-    &TVMAotExecutorModule_NotImplemented,  // get_input_info (do not implement)
-    &TVMAotExecutorModule_GetNumInputs,    // get_num_inputs
-    &TVMAotExecutorModule_GetNumOutputs,   // get_num_outputs
-    &TVMAotExecutorModule_GetOutput,       // get_output
-    &TVMAotExecutorModule_NotImplemented,  // load_params (do not implement)
-    &TVMAotExecutorModule_Run,             // run
-    &TVMAotExecutorModule_NotImplemented,  // set_input (implemented via python wrapper)
-    &TVMAotExecutorModule_NotImplemented,  // share_params (do not implement)
-    &TVMAotExecutorModule_GetInputName,    // get_input_name
-};
-
-static const TVMFuncRegistry aot_executor_registry = {
-    "\x0b\0get_input\0"
-    "get_input_index\0"
-    "get_input_info\0"
-    "get_num_inputs\0"
-    "get_num_outputs\0"
-    "get_output\0"
-    "load_params\0"
-    "run\0"
-    "set_input\0"
-    "share_params\0"
-    "get_input_name\0",
-    aot_executor_registry_funcs};
-
-tvm_crt_error_t TVMAotExecutorModule_Register() {
-  aot_executor.mod.registry = &aot_executor_registry;
-  aot_executor.executor = NULL;
-
-  return TVMFuncRegisterGlobal("tvm.aot_executor.create", &TVMAotExecutorModule_Create, 0);
-}
diff --git a/src/runtime/crt/common/crt_backend_api.c b/src/runtime/crt/common/crt_backend_api.c
deleted file mode 100644
index 56bbbedc1d64..000000000000
--- a/src/runtime/crt/common/crt_backend_api.c
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-// LINT_C_FILE
-
-#include <assert.h>
-#include <inttypes.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <tvm/runtime/c_backend_api.h>
-#include <tvm/runtime/c_runtime_api.h>
-#include <tvm/runtime/crt/logging.h>
-#include <tvm/runtime/crt/page_allocator.h>
-#include <tvm/runtime/crt/platform.h>
-
-#include "crt_config.h"
-
-void* TVMBackendAllocWorkspace(int device_type, int device_id, uint64_t nbytes, int dtype_code_hint,
-                               int dtype_bits_hint) {
-  tvm_crt_error_t err = kTvmErrorNoError;
-  void* ptr = 0;
-  DLDevice dev = {device_type, device_id};
-  assert(nbytes > 0);
-  err = TVMPlatformMemoryAllocate(nbytes, dev, &ptr);
-  CHECK_EQ(err, kTvmErrorNoError,
-           "TVMBackendAllocWorkspace(%d, %d, %" PRIu64 ", %d, %d) -> %" PRId32, device_type,
-           device_id, nbytes, dtype_code_hint, dtype_bits_hint, err);
-  return ptr;
-}
-
-int TVMBackendFreeWorkspace(int device_type, int device_id, void* ptr) {
-  tvm_crt_error_t err = kTvmErrorNoError;
-  DLDevice dev = {device_type, device_id};
-  err = TVMPlatformMemoryFree(ptr, dev);
-  return err;
-}
-
-int TVMBackendParallelLaunch(FTVMParallelLambda flambda, void* cdata, int num_task) {
-  TVMParallelGroupEnv env;
-  env.num_task = 1;
-  flambda(0, &env, cdata);
-  return 0;
-}
-
-int TVMBackendRegisterSystemLibSymbol(const char* name, void* ptr) {
-  return TVMFuncRegisterGlobal(name, ptr, 0);
-}
diff --git a/src/runtime/crt/common/crt_runtime_api.c b/src/runtime/crt/common/crt_runtime_api.c
deleted file mode 100644
index 2df37205b89c..000000000000
--- a/src/runtime/crt/common/crt_runtime_api.c
+++ /dev/null
@@ -1,660 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-// LINT_C_FILE
-
-#include <assert.h>
-#include <inttypes.h>
-#include <math.h>
-#include <stdarg.h>
-#include <stdbool.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <tvm/runtime/c_runtime_api.h>
-#include <tvm/runtime/crt/crt.h>
-#include <tvm/runtime/crt/func_registry.h>
-#include <tvm/runtime/crt/internal/common/ndarray.h>
-#include <tvm/runtime/crt/internal/graph_executor/graph_executor.h>
-#include <tvm/runtime/crt/platform.h>
-
-#if defined(_WIN32) || defined(WIN32)
-#include <windows.h>
-#elif __unix__
-#include <unistd.h>
-#endif
-
-// Handle internal errors
-
-static char g_last_error[1024];
-
-void TVMAPISetLastError(const char* msg) {
-  strncpy(g_last_error, msg, sizeof(g_last_error) - 1);
-  g_last_error[sizeof(g_last_error) - 1] = 0;
-}
-
-__attribute__((format(printf, 1, 2))) int TVMAPIErrorf(const char* msg, ...) {
-  va_list args;
-  int to_return;
-
-  va_start(args, msg);
-  to_return = vsnprintf(g_last_error, sizeof(g_last_error), msg, args);
-  va_end(args);
-
-  return to_return;
-}
-
-const char* TVMGetLastError(void) { return g_last_error; }
-
-// Manipulate NDArray on target device
-
-int TVMArrayAlloc(const tvm_index_t* shape, int ndim, int dtype_code, int dtype_bits,
-                  int dtype_lanes, int device_type, int device_id, TVMArrayHandle* out) {
-  DLDataType dtype;
-  dtype.code = dtype_code;
-  dtype.bits = dtype_bits;
-  dtype.lanes = dtype_lanes;
-  DLDevice dev;
-  dev.device_type = (DLDeviceType)device_type;
-  dev.device_id = device_id;
-  TVMNDArray arr;
-  int status = TVMNDArray_Empty(ndim, shape, dtype, dev, &arr);
-  if (status != 0) {
-    return status;
-  }
-  **out = arr.dl_tensor;
-  return 0;
-}
-
-int TVMArrayFree(TVMArrayHandle handle) {
-  TVMNDArray* arr = (TVMNDArray*)handle;
-
-  return TVMNDArray_Release(arr);
-}
-
-int TVMDeviceAllocDataSpace(DLDevice dev, size_t nbytes, size_t alignment, DLDataType type_hint,
-                            void** out_data) {
-  if (alignment != 1) {
-    nbytes = (nbytes + alignment - 1) / alignment * alignment;
-  }
-  return TVMPlatformMemoryAllocate(nbytes, dev, out_data);
-}
-
-int TVMDeviceAllocDataSpaceWithScope(DLDevice dev, int ndim, const int64_t* shape, DLDataType dtype,
-                                     const char* mem_scope, void** out_data) {
-  size_t nbytes = 1;
-  for (int i = 0; i < ndim; ++i) {
-    nbytes *= shape[i];
-  }
-  nbytes *= (dtype.bits * dtype.lanes + 7) / 8;
-
-  int kAllocAlignment = 64;
-  size_t align = (dtype.bits / 8) * dtype.lanes;
-  if (align < kAllocAlignment) align = kAllocAlignment;
-  return TVMDeviceAllocDataSpace(dev, nbytes, align, dtype, out_data);
-}
-
-int TVMDeviceFreeDataSpace(DLDevice dev, void* ptr) { return TVMPlatformMemoryFree(ptr, dev); }
-
-TVM_ATTRIBUTE_UNUSED static bool IsContiguous(const DLTensor* arr) {
-  if (arr->strides == NULL) return true;
-  int64_t expected_stride = 1;
-  for (int32_t i = arr->ndim; i != 0; --i) {
-    int32_t k = i - 1;
-    if (arr->strides[k] != expected_stride) return false;
-    expected_stride *= arr->shape[k];
-  }
-  return true;
-}
-
-int TVMDeviceCopyDataFromTo(DLTensor* from, DLTensor* to, TVMStreamHandle stream) {
-  assert(IsContiguous(from) && IsContiguous(to));
-  size_t size = 1;
-  for (int i = 0; i < from->ndim; ++i) {
-    size *= from->shape[i];
-  }
-  size *= (from->dtype.bits * from->dtype.lanes + 7) / 8;
-  memcpy(((uint8_t*)to->data) + to->byte_offset, ((uint8_t*)from->data) + from->byte_offset, size);
-  return 0;
-}
-
-int TVMStreamCreate(int device_type, int device_id, TVMStreamHandle* out) {
-  out = NULL;
-  return 0;
-}
-
-int TVMObjectFree(TVMObjectHandle obj) { return 0; }
-
-int TVMStreamFree(int device_type, int device_id, TVMStreamHandle stream) { return 0; }
-
-int TVMSetStream(int device_type, int device_id, TVMStreamHandle stream) { return 0; }
-
-int TVMSynchronize(int device_type, int device_id, TVMStreamHandle stream) { return 0; }
-
-static TVMMutableFuncRegistry global_func_registry;
-
-int TVMFuncRegisterGlobal(const char* name, TVMFunctionHandle f, int override) {
-  return TVMMutableFuncRegistry_Set(&global_func_registry, name, f, override != 0);
-}
-
-static const TVMModule* registered_modules[TVM_CRT_MAX_REGISTERED_MODULES];
-
-/*! \brief Passed as `module_index` to EncodeFunctionHandle. */
-static const tvm_module_index_t kGlobalFuncModuleIndex = TVM_CRT_MAX_REGISTERED_MODULES;
-
-/*! \brief Special module handle for return values from RPCTimeEvaluator. */
-static const tvm_module_index_t kTimeEvaluatorModuleIndex = 0x7fff;
-
-static int DecodeModuleHandle(TVMModuleHandle handle, tvm_module_index_t* out_module_index) {
-  tvm_module_index_t module_index;
-
-  module_index = ((tvm_module_index_t)((uintptr_t)handle)) & ~0x8000;
-  if (module_index > TVM_CRT_MAX_REGISTERED_MODULES || registered_modules[module_index] == NULL) {
-    TVMAPIErrorf("invalid module handle: %08x", module_index);
-    return -1;
-  }
-
-  *out_module_index = module_index;
-  return 0;
-}
-
-static TVMModuleHandle EncodeModuleHandle(tvm_module_index_t module_index) {
-  return (TVMModuleHandle)((uintptr_t)(module_index | 0x8000));
-}
-
-int TVMModCreateFromCModule(const TVMModule* mod, TVMModuleHandle* out_handle) {
-  tvm_module_index_t idx;
-
-  for (idx = 0; idx < TVM_CRT_MAX_REGISTERED_MODULES; idx++) {
-    if (registered_modules[idx] == NULL) {
-      registered_modules[idx] = mod;
-      *out_handle = EncodeModuleHandle(idx);
-      return 0;
-    }
-  }
-
-  return -1;
-}
-
-static const TVMModuleHandle kTVMModuleHandleUninitialized = (TVMModuleHandle)(~0UL);
-
-static TVMModuleHandle system_lib_handle;
-
-int TVMModFree(TVMModuleHandle mod) {
-  /* Never free system_lib_handler */
-  if (mod == system_lib_handle && system_lib_handle != kTVMModuleHandleUninitialized) {
-    return 0;
-  }
-
-  tvm_module_index_t module_index;
-  if (DecodeModuleHandle(mod, &module_index) != 0) {
-    return -1;
-  }
-
-  registered_modules[module_index] = NULL;
-  return 0;
-}
-
-static int SystemLibraryCreate(TVMValue* args, int* type_codes, int num_args, TVMValue* ret_val,
-                               int* ret_type_codes) {
-  const TVMModule* system_lib;
-
-  if (system_lib_handle == kTVMModuleHandleUninitialized) {
-    system_lib = TVMSystemLibEntryPoint();
-    if (TVMModCreateFromCModule(system_lib, &system_lib_handle) != 0) {
-      TVMAPIErrorf("error registering system lib");
-      return -1;
-    }
-  }
-
-  ret_val[0].v_handle = system_lib_handle;
-  ret_type_codes[0] = kTVMModuleHandle;
-  return 0;
-}
-
-static TVMFunctionHandle EncodeFunctionHandle(tvm_module_index_t module_index,
-                                              tvm_function_index_t function_index) {
-  return (TVMFunctionHandle)((
-      ((uintptr_t)(module_index | 0x8000) << (sizeof(tvm_function_index_t) * 8)) |
-      (function_index | 0x8000)));
-}
-
-static int DecodeFunctionHandle(TVMFunctionHandle handle, tvm_module_index_t* module_index,
-                                tvm_function_index_t* function_index) {
-  tvm_module_index_t unvalidated_module_index;
-  unvalidated_module_index =
-      (tvm_module_index_t)(((uintptr_t)handle) >> (sizeof(tvm_function_index_t) * 8));
-  unvalidated_module_index &= ~0x8000;
-
-  if (unvalidated_module_index != kTimeEvaluatorModuleIndex) {
-    if (unvalidated_module_index > kGlobalFuncModuleIndex) {
-      TVMAPIErrorf("invalid module handle: index=%08x", unvalidated_module_index);
-      return -1;
-    } else if (unvalidated_module_index < kGlobalFuncModuleIndex &&
-               registered_modules[unvalidated_module_index] == NULL) {
-      TVMAPIErrorf("unregistered module: index=%08x", unvalidated_module_index);
-      return -1;
-    }
-  }
-
-  *function_index = ((uint32_t)((uintptr_t)handle)) & ~0x8000;
-  *module_index = unvalidated_module_index;
-  return 0;
-}
-
-int TVMByteArrayFree(TVMByteArray* arr) {
-  DLDevice dev = {kDLCPU, 0};
-  int to_return = TVMPlatformMemoryFree((void*)arr->data, dev);
-  if (to_return != 0) {
-    return to_return;
-  }
-
-  return TVMPlatformMemoryFree((void*)arr, dev);
-}
-
-tvm_crt_error_t RunTimeEvaluator(tvm_function_index_t function_index, TVMValue* args,
-                                 int* type_codes, int num_args, TVMValue* ret_val,
-                                 int* ret_type_code);
-
-int TVMFuncCall(TVMFunctionHandle func_handle, TVMValue* arg_values, int* type_codes, int num_args,
-                TVMValue* ret_val, int* ret_type_code) {
-  tvm_module_index_t module_index;
-  tvm_function_index_t function_index;
-  void* resource_handle;
-  const TVMFuncRegistry* registry;
-  TVMBackendPackedCFunc func;
-
-  if (DecodeFunctionHandle(func_handle, &module_index, &function_index) != 0) {
-    return -1;
-  }
-
-  if (module_index == kTimeEvaluatorModuleIndex) {
-    return RunTimeEvaluator(function_index, arg_values, type_codes, num_args, ret_val,
-                            ret_type_code);
-  } else if (module_index == kGlobalFuncModuleIndex) {
-    resource_handle = NULL;
-    registry = &global_func_registry.registry;
-  } else {
-    resource_handle = (void*)registered_modules[module_index]->registry;
-    registry = registered_modules[module_index]->registry;
-  }
-
-  if (TVMFuncRegistry_GetByIndex(registry, function_index, &func) != 0) {
-    TVMAPIErrorf("invalid function index: %04" PRIx16, function_index);
-    return -1;
-  }
-
-  ret_type_code[0] = kTVMNullptr;
-  ret_val[0].v_handle = NULL;
-  return func(arg_values, type_codes, num_args, ret_val, ret_type_code, resource_handle);
-}
-
-static tvm_crt_error_t FindFunctionOrSetAPIError(tvm_module_index_t module_index,
-                                                 const TVMFuncRegistry* registry, const char* name,
-                                                 TVMFunctionHandle* out) {
-  tvm_function_index_t function_index;
-  tvm_crt_error_t err = TVMFuncRegistry_Lookup(registry, name, &function_index);
-  if (err != kTvmErrorNoError) {
-    return err;
-  }
-
-  *out = EncodeFunctionHandle(module_index, function_index);
-  return kTvmErrorNoError;
-}
-
-int TVMFuncGetGlobal(const char* name, TVMFunctionHandle* out) {
-  tvm_crt_error_t to_return =
-      FindFunctionOrSetAPIError(kGlobalFuncModuleIndex, &global_func_registry.registry, name, out);
-  // For compatibility with the C++ runtime equivalent, in src/runtime/registry.cc.
-  if (to_return == kTvmErrorFunctionNameNotFound) {
-    *out = NULL;
-    to_return = kTvmErrorNoError;
-  }
-  return to_return;
-}
-
-int TVMModGetFunction(TVMModuleHandle mod, const char* func_name, int query_imports,
-                      TVMFunctionHandle* out) {
-  tvm_module_index_t module_index;
-  if (DecodeModuleHandle(mod, &module_index) != 0) {
-    return -1;
-  }
-
-  return FindFunctionOrSetAPIError(module_index, registered_modules[module_index]->registry,
-                                   func_name, out);
-}
-
-int ModuleGetFunction(TVMValue* args, int* type_codes, int num_args, TVMValue* ret_value,
-                      int* ret_type_codes) {
-  TVMModuleHandle mod;
-  const char* name;
-  int to_return;
-  int query_imports;
-
-  ret_value[0].v_handle = NULL;
-  ret_type_codes[0] = kTVMNullptr;
-  if (num_args != 3) {
-    TVMAPISetLastError("ModuleGetFunction expects exactly 3 arguments");
-    return kTvmErrorFunctionCallNumArguments;
-  }
-  if (type_codes[0] != kTVMModuleHandle) {
-    TVMAPISetLastError("ModuleGetFunction expects first argument to be a Module");
-    return kTvmErrorFunctionCallWrongArgType;
-  }
-  if (type_codes[1] != kTVMStr) {
-    TVMAPISetLastError("ModuleGetFunction expects second argument to be a string");
-    return kTvmErrorFunctionCallWrongArgType;
-  }
-
-  if (type_codes[2] == kDLInt || type_codes[2] == kTVMArgBool) {
-    query_imports = args[2].v_int64 != 0;
-  } else {
-    TVMAPISetLastError("ModuleGetFunction expects third argument to be an integer");
-    return kTvmErrorFunctionCallWrongArgType;
-  }
-
-  mod = (TVMModuleHandle)args[0].v_handle;
-  name = args[1].v_str;
-  to_return = TVMModGetFunction(mod, name, query_imports, &ret_value->v_handle);
-
-  if (to_return == 0) {
-    ret_type_codes[0] = kTVMPackedFuncHandle;
-  } else {
-    ret_value->v_handle = NULL;
-  }
-
-  // NOTE: For compatibility with C++ runtime API, return no error (but NULL function) when the
-  // function lookup failed.
-  if (to_return == kTvmErrorFunctionNameNotFound) {
-    to_return = kTvmErrorNoError;
-  }
-  return to_return;
-}
-
-typedef struct TVMCReturnValue {
-  TVMValue* ret_val;
-  int* ret_type_code;
-} TVMCReturnValue;
-
-int TVMCFuncSetReturn(TVMRetValueHandle ret, TVMValue* value, int* type_code, int num_ret) {
-  TVMCReturnValue* ret_val;
-  int idx;
-
-  ret_val = (TVMCReturnValue*)ret;
-  for (idx = 0; idx < num_ret; idx++) {
-    ret_val->ret_val[idx] = value[idx];
-    ret_val->ret_type_code[idx] = type_code[idx];
-  }
-
-  return 0;
-}
-
-int TVMFuncFree(TVMFunctionHandle func) {
-  // A no-op, since we don't actually allocate anything in GetFunction.
-  return 0;
-}
-
-int RPCTimeEvaluator(TVMValue* args, int* type_codes, int num_args, TVMValue* ret_val,
-                     int* ret_type_code);
-
-// Sends CRT max packet size.
-int RPCGetCRTMaxPacketSize(TVMValue* args, int* type_codes, int num_args, TVMValue* ret_value,
-                           int* ret_type_codes) {
-  // 11 bytes is for microtvm overhead:
-  // packet start(2), length(4), session header(3), crc(2)
-  ret_value[0].v_int64 = TVM_CRT_MAX_PACKET_SIZE_BYTES - 11;
-  ret_type_codes[0] = kTVMArgInt;
-  return 0;
-}
-
-// Fill the tensor in args[0] with random data using TVMPlatformGenerateRandom.
-static int RandomFill(TVMValue* args, int* type_codes, int num_args, TVMValue* ret_val,
-                      int* ret_type_code) {
-  if (num_args != 1) {
-    return kTvmErrorFunctionCallNumArguments;
-  }
-
-  if (type_codes[0] != kTVMDLTensorHandle) {
-    return kTvmErrorFunctionCallWrongArgType;
-  }
-
-  DLTensor* tensor = (DLTensor*)args[0].v_handle;
-  TVMNDArray arr = {*tensor, 0};
-  return TVMNDArray_RandomFill(&arr);
-}
-
-tvm_crt_error_t TVMInitializeRuntime() {
-  int idx = 0;
-  tvm_crt_error_t error = kTvmErrorNoError;
-
-  DLDevice dev = {kDLCPU, 0};
-
-  void* registry_backing_memory;
-  error = TVMPlatformMemoryAllocate(TVM_CRT_GLOBAL_FUNC_REGISTRY_SIZE_BYTES, dev,
-                                    &registry_backing_memory);
-  if (error != kTvmErrorNoError) {
-    return error;
-  }
-
-  system_lib_handle = kTVMModuleHandleUninitialized;
-
-  error = TVMMutableFuncRegistry_Create(&global_func_registry, registry_backing_memory,
-                                        TVM_CRT_GLOBAL_FUNC_REGISTRY_SIZE_BYTES);
-  for (idx = 0; idx < TVM_CRT_MAX_REGISTERED_MODULES; idx++) {
-    registered_modules[idx] = NULL;
-  }
-
-  if (error == kTvmErrorNoError) {
-    error = TVMFuncRegisterGlobal("runtime.SystemLib", &SystemLibraryCreate, 0);
-  }
-
-  if (error == kTvmErrorNoError) {
-    error = TVMFuncRegisterGlobal("tvm.rpc.server.ModuleGetFunction", &ModuleGetFunction, 0);
-  }
-
-  if (error == kTvmErrorNoError) {
-    error = TVMFuncRegisterGlobal("runtime.RPCTimeEvaluator", &RPCTimeEvaluator, 0);
-  }
-
-  if (error == kTvmErrorNoError) {
-    error = TVMFuncRegisterGlobal("tvm.rpc.server.GetCRTMaxPacketSize", &RPCGetCRTMaxPacketSize, 0);
-  }
-
-  if (error == kTvmErrorNoError) {
-    error = TVMFuncRegisterGlobal("tvm.contrib.random.random_fill", &RandomFill, 0);
-  }
-
-  if (error != kTvmErrorNoError) {
-    TVMPlatformMemoryFree(registry_backing_memory, dev);
-  }
-
-  return error;
-}
-
-typedef struct {
-  uint16_t function_index;
-  TVMFunctionHandle func_to_time;
-  DLDevice device;
-  int number;
-  int repeat;
-  int min_repeat_ms;
-  int limit_zero_time_iterations;
-  int cooldown_interval_ms;
-  int repeats_to_cooldown;
-} time_evaluator_state_t;
-
-static time_evaluator_state_t g_time_evaluator_state;
-
-int RPCTimeEvaluator(TVMValue* args, int* type_codes, int num_args, TVMValue* ret_val,
-                     int* ret_type_code) {
-  ret_val[0].v_handle = NULL;
-  ret_type_code[0] = kTVMNullptr;
-  if (num_args < 12) {
-    TVMAPIErrorf("not enough args");
-    return kTvmErrorFunctionCallNumArguments;
-  }
-  if (type_codes[0] != kTVMModuleHandle || type_codes[1] != kTVMStr ||
-      type_codes[2] != kTVMArgInt || type_codes[3] != kTVMArgInt || type_codes[4] != kTVMArgInt ||
-      type_codes[5] != kTVMArgInt || type_codes[6] != kTVMArgInt || type_codes[7] != kTVMArgInt ||
-      type_codes[8] != kTVMArgInt || type_codes[9] != kTVMArgInt || type_codes[10] != kTVMArgInt ||
-      type_codes[11] != kTVMStr) {
-    TVMAPIErrorf("one or more invalid arg types");
-    return kTvmErrorFunctionCallWrongArgType;
-  }
-
-  TVMModuleHandle mod = (TVMModuleHandle)args[0].v_handle;
-  const char* name = args[1].v_str;
-  g_time_evaluator_state.device.device_type = args[2].v_int64;
-  g_time_evaluator_state.device.device_id = args[3].v_int64;
-  g_time_evaluator_state.number = args[4].v_int64;
-  g_time_evaluator_state.repeat = args[5].v_int64;
-  g_time_evaluator_state.min_repeat_ms = args[6].v_int64;
-  g_time_evaluator_state.limit_zero_time_iterations = args[7].v_int64;
-  g_time_evaluator_state.cooldown_interval_ms = args[8].v_int64;
-  g_time_evaluator_state.repeats_to_cooldown = args[9].v_int64;
-
-  int ret_code =
-      TVMModGetFunction(mod, name, /* query_imports */ 0, &g_time_evaluator_state.func_to_time);
-  if (ret_code != 0) {
-    return ret_code;
-  }
-
-  g_time_evaluator_state.function_index++;
-  ret_val[0].v_handle =
-      EncodeFunctionHandle(kTimeEvaluatorModuleIndex, g_time_evaluator_state.function_index);
-  ret_type_code[0] = kTVMPackedFuncHandle;
-  return kTvmErrorNoError;
-}
-
-tvm_crt_error_t RunTimeEvaluator(tvm_function_index_t function_index, TVMValue* args,
-                                 int* type_codes, int num_args, TVMValue* ret_val,
-                                 int* ret_type_code) {
-  if (function_index != g_time_evaluator_state.function_index) {
-    return kTvmErrorTimeEvaluatorBadHandle;
-  }
-
-  // TODO(areusch): should *really* rethink needing to return doubles
-  DLDevice result_byte_dev = {kDLCPU, 0};
-  TVMByteArray* result_byte_arr = NULL;
-  tvm_crt_error_t err =
-      TVMPlatformMemoryAllocate(sizeof(TVMByteArray), result_byte_dev, (void*)&result_byte_arr);
-  if (err != kTvmErrorNoError) {
-    goto release_and_return;
-  }
-  result_byte_arr->data = NULL;
-  size_t data_size = sizeof(double) * g_time_evaluator_state.repeat;
-  err = TVMPlatformMemoryAllocate(data_size, result_byte_dev, (void**)&result_byte_arr->data);
-  if (err != kTvmErrorNoError) {
-    goto release_and_return;
-  }
-  result_byte_arr->size = data_size;
-
-  // skip first time call, to activate lazy compilation components.
-  err = TVMFuncCall(g_time_evaluator_state.func_to_time, args, type_codes, num_args, ret_val,
-                    ret_type_code);
-  if (err != kTvmErrorNoError) {
-    goto release_and_return;
-  }
-
-  double min_repeat_seconds = ((double)g_time_evaluator_state.min_repeat_ms) / 1000;
-  double* iter = (double*)result_byte_arr->data;
-  for (int i = 0; i < g_time_evaluator_state.repeat; i++) {
-    double curr_res_seconds = 0.0;
-    int absolute_zero_times = 0;
-    // do-while structure ensures we run even when `min_repeat_ms` isn't set (i.e., is 0).
-    do {
-      if (curr_res_seconds > 0.0) {
-        double a = (min_repeat_seconds / (curr_res_seconds / g_time_evaluator_state.number) + 1);
-        const double golden_ratio = 1.618;
-        double b = g_time_evaluator_state.number * golden_ratio;
-        g_time_evaluator_state.number = (int64_t)(a > b ? a : b);
-      }
-      err = TVMPlatformBeforeMeasurement();
-      if (err != kTvmErrorNoError) {
-        goto release_and_return;
-      }
-      err = TVMPlatformTimerStart();
-      if (err != kTvmErrorNoError) {
-        goto release_and_return;
-      }
-
-      for (int j = 0; j < g_time_evaluator_state.number; j++) {
-        err = TVMFuncCall(g_time_evaluator_state.func_to_time, args, type_codes, num_args, ret_val,
-                          ret_type_code);
-        if (err != kTvmErrorNoError) {
-          goto release_and_return;
-        }
-      }
-      err = TVMPlatformTimerStop(&curr_res_seconds);
-      if (err != kTvmErrorNoError) {
-        goto release_and_return;
-      }
-      err = TVMPlatformAfterMeasurement();
-      if (err != kTvmErrorNoError) {
-        goto release_and_return;
-      }
-      if (fpclassify(curr_res_seconds) == FP_ZERO) absolute_zero_times++;
-    } while (curr_res_seconds < min_repeat_seconds &&
-             absolute_zero_times < g_time_evaluator_state.limit_zero_time_iterations);
-    double mean_exec_seconds = curr_res_seconds / g_time_evaluator_state.number;
-    *iter = mean_exec_seconds;
-    iter++;
-    if (g_time_evaluator_state.cooldown_interval_ms > 0 &&
-        (i % g_time_evaluator_state.repeats_to_cooldown) == 0) {
-#if defined(_WIN32) || defined(WIN32)
-      Sleep(g_time_evaluator_state.cooldown_interval_ms);
-#elif __unix__
-      usleep(g_time_evaluator_state.cooldown_interval_ms * 1000);
-#else
-      TVMAPIErrorf(
-          "No support for non-zero cooldown_interval_ms for this platform: Use "
-          "cooldown_interval_ms = 0");
-      goto release_and_return;
-#endif
-    }
-  }
-
-  *ret_type_code = kTVMBytes;
-  ret_val->v_handle = result_byte_arr;
-  return err;
-
-release_and_return : {
-  tvm_crt_error_t release_err =
-      TVMPlatformMemoryFree((void*)result_byte_arr->data, result_byte_dev);
-  if (release_err != kTvmErrorNoError) {
-    release_err = TVMPlatformMemoryFree((void*)result_byte_arr, result_byte_dev);
-  }
-
-  if (err == kTvmErrorNoError && release_err != kTvmErrorNoError) {
-    err = release_err;
-  }
-}
-  return err;
-}
-
-// Default implementation, overridden by the platform runtime.
-TVM_WEAK tvm_crt_error_t TVMPlatformGenerateRandom(uint8_t* buffer, size_t num_bytes) {
-  return kTvmErrorFunctionCallNotImplemented;
-}
-
-// Default implementation, overridden by the platform runtime.
-TVM_WEAK tvm_crt_error_t TVMPlatformBeforeMeasurement() { return kTvmErrorNoError; }
-
-// Default implementation, overridden by the platform runtime.
-TVM_WEAK tvm_crt_error_t TVMPlatformAfterMeasurement() { return kTvmErrorNoError; }
diff --git a/src/runtime/crt/common/func_registry.c b/src/runtime/crt/common/func_registry.c
deleted file mode 100644
index 49cef8fd70eb..000000000000
--- a/src/runtime/crt/common/func_registry.c
+++ /dev/null
@@ -1,174 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-// LINT_C_FILE
-
-/*!
- * \file tvm/runtime/crt/func_registry.c
- * \brief Defines implementations of generic string-based function lookup structs
- */
-
-#include <stdio.h>
-#include <string.h>
-#include <tvm/runtime/crt/func_registry.h>
-
-/*!
- * \brief strcmp against the next string in the registry, and return the end.
- *
- * Regardless of return value, after calling this function, cursor's value will be modified to
- * point at the \0 at the end of the string it currently points to.
- *
- * \param cursor Pointer to cursor to first string to compare.
- * \param name Pointer to reference string.
- * \return 0 if the string pointed to by cursor == name; non-zero otherwise.
- */
-int strcmp_cursor(const char** cursor, const char* name) {
-  int return_value = 0;
-  while (return_value == 0) {
-    char c = **cursor;
-    char n = *name;
-    return_value = ((int)c) - ((int)n);
-
-    if (n == 0 || c == 0) {
-      break;
-    }
-
-    name++;
-    (*cursor)++;
-  }
-
-  while (**cursor != 0) {
-    (*cursor)++;
-  }
-
-  return return_value;
-}
-
-uint16_t TVMFuncRegistry_GetNumFuncs(const TVMFuncRegistry* reg) {
-  uint16_t num_funcs;
-  memcpy(&num_funcs, reg->names, sizeof(num_funcs));
-  return num_funcs;
-}
-
-int TVMFuncRegistry_SetNumFuncs(const TVMFuncRegistry* reg, const uint16_t num_funcs) {
-  memcpy((char*)reg->names, &num_funcs, sizeof(num_funcs));
-  return 0;
-}
-
-const char* TVMFuncRegistry_Get0thFunctionName(const TVMFuncRegistry* reg) {
-  // NOTE: first function name starts at index 2 to skip num_funcs.
-  return (reg->names + sizeof(uint16_t));
-}
-
-tvm_crt_error_t TVMFuncRegistry_Lookup(const TVMFuncRegistry* reg, const char* name,
-                                       tvm_function_index_t* function_index) {
-  tvm_function_index_t idx;
-  const char* reg_name_ptr = TVMFuncRegistry_Get0thFunctionName(reg);
-
-  idx = 0;
-  for (; *reg_name_ptr != '\0'; reg_name_ptr++) {
-    if (!strcmp_cursor(&reg_name_ptr, name)) {
-      *function_index = idx;
-      return kTvmErrorNoError;
-    }
-
-    idx++;
-  }
-
-  return kTvmErrorFunctionNameNotFound;
-}
-
-tvm_crt_error_t TVMFuncRegistry_GetByIndex(const TVMFuncRegistry* reg,
-                                           tvm_function_index_t function_index,
-                                           TVMBackendPackedCFunc* out_func) {
-  uint16_t num_funcs;
-
-  num_funcs = TVMFuncRegistry_GetNumFuncs(reg);
-  if (function_index >= num_funcs) {
-    return kTvmErrorFunctionIndexInvalid;
-  }
-
-  *out_func = reg->funcs[function_index];
-  return kTvmErrorNoError;
-}
-
-tvm_crt_error_t TVMMutableFuncRegistry_Create(TVMMutableFuncRegistry* reg, uint8_t* buffer,
-                                              size_t buffer_size_bytes) {
-  if (buffer_size_bytes < kTvmAverageFuncEntrySizeBytes) {
-    return kTvmErrorBufferTooSmall;
-  }
-
-  reg->registry.names = (const char*)buffer;
-  buffer[0] = 0;  // number of functions present in buffer.
-  buffer[1] = 0;  // note that we combine the first two elements to form a 16-bit function index.
-  buffer[2] = 0;  // end of names list marker.
-
-  // compute a guess of the average size of one entry:
-  //  - assume average function name is around ~10 bytes
-  //  - 1 byte for \0
-  //  - size of 1 function pointer
-  reg->max_functions = buffer_size_bytes / kTvmAverageFuncEntrySizeBytes;
-  reg->registry.funcs =
-      (TVMBackendPackedCFunc*)(buffer + buffer_size_bytes - reg->max_functions * sizeof(void*));
-
-  return kTvmErrorNoError;
-}
-
-tvm_crt_error_t TVMMutableFuncRegistry_Set(TVMMutableFuncRegistry* reg, const char* name,
-                                           TVMBackendPackedCFunc func, int override) {
-  size_t idx;
-  char* reg_name_ptr = (char*)TVMFuncRegistry_Get0thFunctionName(&(reg->registry));
-
-  idx = 0;
-  // NOTE: safe to discard const qualifier here, since reg->registry.names was set from
-  // TVMMutableFuncRegistry_Create above.
-  for (; *reg_name_ptr != 0; reg_name_ptr++) {
-    if (!strcmp_cursor((const char**)&reg_name_ptr, name)) {
-      if (override == 0) {
-        return kTvmErrorFunctionAlreadyDefined;
-      }
-      ((TVMBackendPackedCFunc*)reg->registry.funcs)[idx] = func;
-      return kTvmErrorNoError;
-    }
-
-    idx++;
-  }
-
-  if (reg_name_ptr > ((const char*)reg->registry.funcs)) {
-    return kTvmErrorFunctionRegistryFull;
-  }
-
-  size_t name_len = strlen(name);
-  size_t names_bytes_remaining = ((const char*)reg->registry.funcs) - reg_name_ptr;
-  if (idx >= reg->max_functions || name_len + 1 > names_bytes_remaining) {
-    return kTvmErrorFunctionRegistryFull;
-  }
-
-  memcpy(reg_name_ptr, name, name_len + 1);
-  reg_name_ptr += name_len + 1;
-  *reg_name_ptr = 0;
-  ((TVMBackendPackedCFunc*)reg->registry.funcs)[idx] = func;
-
-  uint16_t num_funcs;
-  // increment num_funcs.
-  num_funcs = TVMFuncRegistry_GetNumFuncs(&(reg->registry)) + 1;
-  TVMFuncRegistry_SetNumFuncs(&(reg->registry), num_funcs);
-
-  return kTvmErrorNoError;
-}
diff --git a/src/runtime/crt/common/ndarray.c b/src/runtime/crt/common/ndarray.c
deleted file mode 100644
index b0e869766bde..000000000000
--- a/src/runtime/crt/common/ndarray.c
+++ /dev/null
@@ -1,183 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-// LINT_C_FILE
-
-/*!
- * \file ndarray.c
- * \brief NDArray container infratructure.
- */
-
-#include <tvm/runtime/crt/internal/common/ndarray.h>
-#include <tvm/runtime/crt/page_allocator.h>
-#include <tvm/runtime/crt/platform.h>
-
-#include "crt_config.h"
-
-static int Create(int32_t ndim, const tvm_index_t* shape, DLDataType dtype, DLDevice dev,
-                  TVMNDArray* array) {
-  memset(array, 0, sizeof(TVMNDArray));
-  array->dl_tensor.ndim = ndim;
-  tvm_crt_error_t err;
-  DLDevice dldev = {kDLCPU, 0};
-  err = TVMPlatformMemoryAllocate(sizeof(int64_t) * ndim, dldev, (void*)&array->dl_tensor.shape);
-  if (err != kTvmErrorNoError) {
-    return -1;
-  }
-  memcpy(array->dl_tensor.shape, shape, sizeof(int64_t) * ndim);
-  array->dl_tensor.dtype = dtype;
-  array->dl_tensor.device = dev;
-  array->dl_tensor.data = 0;
-  return 0;
-}
-
-int64_t TVMNDArray_DataSizeBytes(TVMNDArray* array) {
-  int64_t num_elems = 1;
-  int32_t idx;
-  for (idx = 0; idx < array->dl_tensor.ndim; ++idx) {
-    num_elems *= array->dl_tensor.shape[idx];
-  }
-  return (num_elems * array->dl_tensor.dtype.bits + 7) / 8;
-}
-
-int TVMNDArray_Empty(int32_t ndim, const tvm_index_t* shape, DLDataType dtype, DLDevice dev,
-                     TVMNDArray* array) {
-  int status = Create(ndim, shape, dtype, dev, array);
-  if (status != 0) {
-    return status;
-  }
-  int total_elem_bytes = TVMNDArray_DataSizeBytes(array);
-  array->dl_tensor.data =
-      TVMBackendAllocWorkspace(kDLCPU, 0, total_elem_bytes, dtype.code, dtype.bits);
-  memset(array->dl_tensor.data, 0, total_elem_bytes);
-  return 0;
-}
-
-int TVMNDArray_Load(TVMNDArray* ret, const char** strm) {
-  int32_t status = 0;
-  uint64_t header, reserved;
-  memcpy(&header, *strm, sizeof(header));
-  *strm += sizeof(header);
-  if (header != kTVMNDArrayMagic) {
-    fprintf(stderr, "Invalid DLTensor file format\n");
-    status = -1;
-  }
-  memcpy(&reserved, *strm, sizeof(reserved));
-  *strm += sizeof(reserved);
-  DLDevice dev;
-  int ndim;  // sizeof ndim should match dlpack
-  DLDataType dtype;
-  memcpy(&dev, *strm, sizeof(dev));
-  *strm += sizeof(dev);
-  memcpy(&ndim, *strm, sizeof(ndim));
-  *strm += sizeof(ndim);
-  memcpy(&dtype, *strm, sizeof(dtype));
-  *strm += sizeof(dtype);
-  if ((ndim < 0) || (ndim > TVM_CRT_MAX_NDIM)) {
-    fprintf(stderr, "Invalid ndim=%d: expected to be 0 ~ %d.\n", ndim, TVM_CRT_MAX_NDIM);
-    status = -1;
-  }
-  if (dev.device_type != kDLCPU) {
-    fprintf(stderr, "Invalid DLTensor device: can only save as CPU tensor\n");
-    status = -1;
-  }
-  int64_t shape[TVM_CRT_MAX_NDIM] = {0};
-  int32_t idx;
-  if (ndim != 0) {
-    for (idx = 0; idx < ndim; idx++) {
-      memcpy(&shape[idx], *strm, sizeof(int64_t));
-      *strm += sizeof(shape[idx]);
-    }
-  }
-  status = TVMNDArray_Empty(ndim, shape, dtype, dev, ret);
-  if (status != 0) {
-    return status;
-  }
-  int64_t num_elems = 1;
-  int elem_bytes = (ret->dl_tensor.dtype.bits + 7) / 8;
-  for (idx = 0; idx < ret->dl_tensor.ndim; ++idx) {
-    num_elems *= ret->dl_tensor.shape[idx];
-  }
-  int64_t data_byte_size;
-  memcpy(&data_byte_size, *strm, sizeof(data_byte_size));
-  *strm += sizeof(data_byte_size);
-  if (!(data_byte_size == num_elems * elem_bytes)) {
-    fprintf(stderr,
-            "invalid DLTensor file format: data_byte_size=%d, "
-            "while num_elems*elem_bytes=%d\n",
-            (int)data_byte_size, (int)(num_elems * elem_bytes));  // NOLINT(*)
-    status = -1;
-  }
-  memcpy(ret->dl_tensor.data, *strm, data_byte_size);
-  *strm += data_byte_size;
-
-  return status;
-}
-
-int TVMNDArray_CreateView(TVMNDArray* arr, const tvm_index_t* shape, int32_t ndim, DLDataType dtype,
-                          TVMNDArray* array_view) {
-  int status = Create(ndim, shape, dtype, arr->dl_tensor.device, array_view);
-  if (status != 0) {
-    return status;
-  }
-  array_view->dl_tensor.data = arr->dl_tensor.data;
-  return 0;
-}
-
-int TVMNDArray_RandomFill(TVMNDArray* arr) {
-  int64_t num_bytes = TVMNDArray_DataSizeBytes(arr);
-  if (num_bytes < 0 || num_bytes > SIZE_MAX) {
-    return kTvmErrorFunctionCallInvalidArg;
-  }
-
-  return TVMPlatformGenerateRandom(arr->dl_tensor.data, (size_t)num_bytes);
-}
-
-void TVMNDArray_IncrementReference(TVMNDArray* arr) { arr->reference_count++; }
-
-uint32_t TVMNDArray_DecrementReference(TVMNDArray* arr) {
-  if (arr->reference_count > 0) {
-    arr->reference_count--;
-  }
-
-  return arr->reference_count;
-}
-
-int TVMNDArray_Release(TVMNDArray* arr) {
-  tvm_crt_error_t err;
-  DLDevice dev = {kDLCPU, 0};
-
-  if (TVMNDArray_DecrementReference(arr) > 0) {
-    return 0;
-  }
-
-  err = TVMPlatformMemoryFree(arr->dl_tensor.data, dev);
-  if (err != kTvmErrorNoError) {
-    return err;
-  }
-  arr->dl_tensor.data = NULL;
-
-  err = TVMPlatformMemoryFree(arr->dl_tensor.shape, dev);
-  if (err != kTvmErrorNoError) {
-    return err;
-  }
-  arr->dl_tensor.shape = NULL;
-
-  return 0;
-}
diff --git a/src/runtime/crt/common/packed_func.c b/src/runtime/crt/common/packed_func.c
deleted file mode 100644
index 645b22f3b255..000000000000
--- a/src/runtime/crt/common/packed_func.c
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-// LINT_C_FILE
-
-/*!
- * \file src/runtime/crt/common/packed_func.c
- * \brief PackedFunc implementation.
- */
-#include <stdio.h>
-#include <string.h>
-#include <tvm/runtime/crt/logging.h>
-#include <tvm/runtime/crt/packed_func.h>
-
-DLDataType String2DLDataType(const char* s) {
-  DLDataType t;
-  // handle None type
-  if (strlen(s) == 0) {
-    t.bits = 0;
-    t.lanes = 0;
-    t.code = kTVMOpaqueHandle;
-    return t;
-  }
-  t.bits = 32;
-  t.lanes = 1;
-  const char* scan;
-  if (!strncmp(s, "int", 3)) {
-    t.code = kDLInt;
-    scan = s + 3;
-  } else if (!strncmp(s, "uint", 4)) {
-    t.code = kDLUInt;
-    scan = s + 4;
-  } else if (!strncmp(s, "float", 5)) {
-    t.code = kDLFloat;
-    scan = s + 5;
-  } else if (!strncmp(s, "bfloat", 6)) {
-    t.code = kDLBfloat;
-    scan = s + 6;
-  } else if (!strncmp(s, "handle", 6)) {
-    t.code = kTVMOpaqueHandle;
-    t.bits = 64;  // handle uses 64 bit by default.
-    scan = s + 6;
-  } else if (!strcmp(s, "bool")) {
-    t.code = kDLUInt;
-    t.bits = 1;
-    t.lanes = 1;
-    return t;
-  } else {
-    scan = s;
-    fprintf(stderr, "unknown type %s\n", s);
-  }
-  char* xdelim;
-  uint8_t bits = (uint8_t)(strtoul(scan, &xdelim, 10));
-  if (bits != 0) t.bits = bits;
-  char* endpt = xdelim;
-  if (*xdelim == 'x') {
-    t.lanes = (uint16_t)(strtoul(xdelim + 1, &endpt, 10));
-  }
-  if (!(endpt == s + strlen(s))) {
-    fprintf(stderr, "unknown type %s\n", s);
-  }
-  return t;
-}
-
-int TVMPackedFunc_InitGlobalFunc(TVMPackedFunc* pf, const char* name, const TVMArgs* args) {
-  int status = 0;
-
-  pf->Call = &TVMPackedFunc_Call;
-  pf->SetArgs = &TVMPackedFunc_SetArgs;
-
-  status = TVMFuncGetGlobal(name, &pf->fexec);
-  if (status != 0) {
-    return status;
-  }
-
-  snprintf(pf->name, sizeof(pf->name), "%s", name);
-  TVMPackedFunc_SetArgs(pf, args);
-  return status;
-}
-
-int TVMPackedFunc_InitModuleFunc(TVMPackedFunc* pf, TVMModuleHandle module, const char* name,
-                                 const TVMArgs* args) {
-  int status = 0;
-
-  pf->Call = &TVMPackedFunc_Call;
-  pf->SetArgs = &TVMPackedFunc_SetArgs;
-
-  status = TVMModGetFunction(module, name, 0, &pf->fexec);
-  if (status != 0) {
-    return status;
-  }
-
-  snprintf(pf->name, sizeof(pf->name), "%s", name);
-  TVMPackedFunc_SetArgs(pf, args);
-  return status;
-}
-
-TVMArgs TVMArgs_Create(TVMValue* values, uint32_t* tcodes, uint32_t values_count) {
-  uint32_t idx;
-  TVMArgs args;
-  memset(&args, 0, sizeof(args));
-  for (idx = 0; idx < values_count; idx++) {
-    memcpy(args.values + idx, values + idx, sizeof(TVMValue));
-    args.tcodes[idx] = tcodes[idx];
-  }
-  args.values_count = values_count;
-  return args;
-}
-
-int TVMPackedFunc_Call(TVMPackedFunc* pf) {
-  pf->ret_value.values_count = 1;
-  pf->ret_value.tcodes[0] = kTVMNullptr;
-  return TVMFuncCall(pf->fexec, pf->args.values, pf->args.tcodes, pf->args.values_count,
-                     pf->ret_value.values, pf->ret_value.tcodes);
-}
-
-void TVMPackedFunc_SetArgs(TVMPackedFunc* pf, const TVMArgs* args) {
-  memcpy(&(pf->args), args, sizeof(TVMArgs));
-}
-
-TVMPackedFunc* g_fexecs;
-uint32_t g_fexecs_count;
diff --git a/src/runtime/crt/contrib/stm32/ai_runtime_api.c b/src/runtime/crt/contrib/stm32/ai_runtime_api.c
deleted file mode 100644
index 7d3cdfe12c26..000000000000
--- a/src/runtime/crt/contrib/stm32/ai_runtime_api.c
+++ /dev/null
@@ -1,356 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file ai_runtime_api.c
- * \brief The runtime API for the TVM generated C code.
- */
-
-// LINT_C_FILE
-
-#include "ai_runtime_api.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-// =======================================================
-//                    ai_network_t
-// =======================================================
-
-typedef struct {
-  ai_model_info* info;
-  ai_tensor** inputs;
-  ai_tensor** outputs;
-  ai_ptr activations;
-  const char* error;
-} ai_network_t;
-
-//
-// .nn_models_info section
-//
-extern uintptr_t __models_section_start__;
-extern uintptr_t __models_section_end__;
-
-uint32_t _modelsSection_start = (uint32_t)(&__models_section_start__);
-uint32_t _modelsSection_end = (uint32_t)(&__models_section_end__);
-
-// =======================================================
-//                       Iterator
-// =======================================================
-ai_model_iterator ai_model_iterator_begin() {
-  return _modelsSection_start;  // begin()
-}
-
-ai_model_iterator ai_model_iterator_end() { return _modelsSection_end; }
-
-ai_model_iterator ai_model_iterator_next(ai_model_iterator idx) {
-  return (idx + sizeof(ai_model_info));
-}
-
-ai_model_info* ai_model_iterator_value(ai_model_iterator idx) { return (ai_model_info*)idx; }
-
-// =======================================================
-//   ai_create
-// =======================================================
-AI_API_ENTRY ai_status ai_create(ai_model_info* nn, ai_ptr activations, ai_handle* handle) {
-  uint32_t n_inputs = AI_MODEL_n_inputs(nn);
-  uint32_t n_outputs = AI_MODEL_n_outputs(nn);
-
-  ai_status status = AI_STATUS_OK;
-
-  //
-  // Create internal network representation
-  //
-  ai_network_t* network = (ai_network_t*)malloc(sizeof(ai_network_t));
-
-  network->info = nn;
-
-  for (int i = 0; i < n_inputs; i++) {
-    network->inputs = AI_MODEL_inputs(nn);
-  }
-  for (int i = 0; i < n_outputs; i++) {
-    network->outputs = AI_MODEL_outputs(nn);
-  }
-
-  network->activations = activations;
-
-  network->error = NULL;
-
-  const ai_ptr params = nn->ai_get_params();
-  status = nn->ai_create(params, activations);
-  if (status != AI_STATUS_OK) {
-    network->error = TVMGetLastError();
-  }
-
-  //
-  // Setup weights and activations
-  //
-  *handle = network;
-
-  return status;
-}
-
-// =======================================================
-//   ai_destroy
-// =======================================================
-AI_API_ENTRY ai_status ai_destroy(ai_handle handle) {
-  if (handle == NULL) {
-    return AI_STATUS_ERROR;
-  }
-
-  ai_network_t* network = (ai_network_t*)handle;
-
-  free(network);
-
-  return AI_STATUS_OK;
-}
-
-// =======================================================
-//   ai_get_error
-// =======================================================
-AI_API_ENTRY
-const char* ai_get_error(ai_handle handle) {
-  if (handle == NULL) {
-    return "Network handle is NULL";
-  }
-  ai_network_t* network = (ai_network_t*)handle;
-  if (network->error == NULL) {
-    return "";
-  }
-  return network->error;
-}
-
-// =======================================================
-//   ai_get_input_size
-// =======================================================
-AI_API_ENTRY int32_t ai_get_input_size(ai_handle handle) {
-  if (handle == NULL) {
-    return 0;
-  }
-  ai_network_t* network = (ai_network_t*)handle;
-  return AI_MODEL_n_inputs(network->info);
-}
-
-// =======================================================
-//   ai_get_output_size
-// =======================================================
-AI_API_ENTRY int32_t ai_get_output_size(ai_handle handle) {
-  if (handle == NULL) {
-    return 0;
-  }
-  ai_network_t* network = (ai_network_t*)handle;
-  return AI_MODEL_n_outputs(network->info);
-}
-
-// =======================================================
-//   ai_get_input
-// =======================================================
-AI_API_ENTRY ai_tensor* ai_get_input(ai_handle handle, int32_t index) {
-  if (handle == NULL) {
-    return NULL;
-  }
-  ai_network_t* network = (ai_network_t*)handle;
-  if (index >= AI_MODEL_n_inputs(network->info)) {
-    network->error = "Input index out of range";
-    return NULL;
-  }
-  return (network->inputs)[index];
-}
-
-// =======================================================
-//   ai_get_output
-// =======================================================
-AI_API_ENTRY ai_tensor* ai_get_output(ai_handle handle, int32_t index) {
-  if (handle == NULL) {
-    return NULL;
-  }
-  ai_network_t* network = (ai_network_t*)handle;
-  if (index >= AI_MODEL_n_outputs(network->info)) {
-    network->error = "Output index out of range";
-    return NULL;
-  }
-  return (network->outputs)[index];
-}
-
-// =======================================================
-//   ai_run
-// =======================================================
-AI_API_ENTRY ai_status ai_run(ai_handle handle) {
-  if (handle == NULL) {
-    return AI_STATUS_ERROR;
-  }
-  ai_network_t* network = (ai_network_t*)handle;
-
-  ai_model_info* nn = network->info;
-
-  uint32_t n_inputs = AI_MODEL_n_inputs(nn);
-  uint32_t n_outputs = AI_MODEL_n_outputs(nn);
-  ai_status status = AI_STATUS_OK;
-
-  //
-  // Check that input tensors have been specified
-  //
-  uint32_t i;
-  for (i = 0; i < n_inputs; i++) {
-    ai_tensor* input_tensor = network->inputs[i];
-    DLTensor* input = &input_tensor->dltensor;
-    if (input->data == NULL) {
-      network->error = "Network input NULL";
-      return AI_STATUS_ERROR;
-    }
-  }
-  for (i = 0; i < n_outputs; i++) {
-    ai_tensor* output_tensor = network->outputs[i];
-    DLTensor* output = &output_tensor->dltensor;
-    if (output->data == NULL) {
-      network->error = "Network output NULL";
-      return AI_STATUS_ERROR;
-    }
-  }
-
-  status = nn->ai_run(network->inputs, network->outputs);
-
-  if (status != AI_STATUS_OK) {
-    const char* err = TVMGetLastError();
-    network->error = err;
-  }
-
-  return status;
-}
-
-// =======================================================
-//   ai_get_name
-// =======================================================
-const char* ai_get_name(ai_handle handle) {
-  if (handle == NULL) {
-    return NULL;
-  }
-  ai_network_t* network = (ai_network_t*)handle;
-  return AI_MODEL_name(network->info);
-}
-
-// =======================================================
-//   ai_get_datetime
-// =======================================================
-const char* ai_get_datetime(ai_handle handle) {
-  if (handle == NULL) {
-    return NULL;
-  }
-  ai_network_t* network = (ai_network_t*)handle;
-  return AI_MODEL_datetime(network->info);
-}
-
-// =======================================================
-//   ai_get_revision
-// =======================================================
-const char* ai_get_revision(ai_handle handle) {
-  if (handle == NULL) {
-    return NULL;
-  }
-  ai_network_t* network = (ai_network_t*)handle;
-  return AI_MODEL_revision(network->info);
-}
-
-// =======================================================
-//   ai_get_tool_version
-// =======================================================
-const char* ai_get_tool_version(ai_handle handle) {
-  if (handle == NULL) {
-    return NULL;
-  }
-  ai_network_t* network = (ai_network_t*)handle;
-  return AI_MODEL_tool_version(network->info);
-}
-
-// =======================================================
-//   ai_get_api_version
-// =======================================================
-const char* ai_get_api_version(ai_handle handle) {
-  if (handle == NULL) {
-    return NULL;
-  }
-  ai_network_t* network = (ai_network_t*)handle;
-  return AI_MODEL_api_version(network->info);
-}
-
-// =======================================================
-//   ai_get_node_size
-// =======================================================
-uint32_t ai_get_node_size(ai_handle handle) {
-  if (handle == NULL) {
-    return 0;
-  }
-  ai_network_t* network = (ai_network_t*)handle;
-  return AI_MODEL_n_nodes(network->info);
-}
-
-// =======================================================
-//   ai_get_activations_size
-// =======================================================
-uint32_t ai_get_activations_size(ai_handle handle) {
-  if (handle == NULL) {
-    return 0;
-  }
-  ai_network_t* network = (ai_network_t*)handle;
-  return AI_MODEL_activations_size(network->info);
-}
-
-// =======================================================
-//   ai_get_params_size
-// =======================================================
-uint32_t ai_get_params_size(ai_handle handle) {
-  if (handle == NULL) {
-    return 0;
-  }
-  ai_network_t* network = (ai_network_t*)handle;
-  return AI_MODEL_params_size(network->info);
-}
-
-// =======================================================
-//   ai_get_activations
-// =======================================================
-ai_ptr ai_get_activations(ai_handle handle) {
-  if (handle == NULL) {
-    return 0;
-  }
-  ai_network_t* network = (ai_network_t*)handle;
-  return network->activations;
-}
-
-// =======================================================
-//   ai_get_params
-// =======================================================
-const ai_ptr ai_get_params(ai_handle handle) {
-  if (handle == NULL) {
-    return NULL;
-  }
-  ai_network_t* network = (ai_network_t*)handle;
-  return network->info->ai_get_params();
-}
-
-// =======================================================
-//   ai_get_quantization
-// =======================================================
-const ai_quantization_info* ai_get_quantization(ai_tensor* tensor) {
-  if (tensor == NULL) {
-    return NULL;
-  }
-  return tensor->quant;
-}
diff --git a/src/runtime/crt/contrib/stm32/ai_runtime_api.h b/src/runtime/crt/contrib/stm32/ai_runtime_api.h
deleted file mode 100644
index 10056fde7c10..000000000000
--- a/src/runtime/crt/contrib/stm32/ai_runtime_api.h
+++ /dev/null
@@ -1,244 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file ai_runtime_api.h
- * \brief The runtime API for the TVM generated C code.
- */
-
-#ifndef TVM_RUNTIME_CRT_CONTRIB_STM32_AI_RUNTIME_API_H_
-#define TVM_RUNTIME_CRT_CONTRIB_STM32_AI_RUNTIME_API_H_
-
-#include <inttypes.h>
-#include <stddef.h>
-#include <stdint.h>
-
-#include "dlpack/dlpack.h"              // From TVM
-#include "tvm/runtime/c_runtime_api.h"  // From TVM
-
-//
-// This describes current ai_runtime version
-//
-#define AI_PLATFORM_RUNTIME_MAJOR 1
-#define AI_PLATFORM_RUNTIME_MINOR 0
-#define AI_PLATFORM_RUNTIME_MICRO 0
-
-#define AI_STATIC static
-
-#if defined(_MSC_VER)
-#define AI_INLINE __inline
-#define AI_API_ENTRY __declspec(dllexport)
-#define AI_ALIGNED(x) /* AI_ALIGNED(x) */
-#elif defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
-#define AI_INLINE inline
-#define AI_API_ENTRY /* AI_API_ENTRY */
-#define AI_ALIGNED(x) AI_CONCAT(AI_ALIGNED_, x)
-#elif defined(__CC_ARM)
-#define AI_INLINE __inline
-#define AI_API_ENTRY __attribute__((visibility("default")))
-#define AI_ALIGNED(x) __attribute__((aligned(x)))
-/* Keil disallows anonymous union initialization by default */
-#pragma anon_unions
-#elif defined(__GNUC__)
-#define AI_INLINE __inline
-#define AI_API_ENTRY __attribute__((visibility("default")))
-#define AI_ALIGNED(x) __attribute__((aligned(x)))
-#else
-/* Dynamic libraries are not supported by the compiler */
-#define AI_API_ENTRY  /* AI_API_ENTRY */
-#define AI_ALIGNED(x) /* AI_ALIGNED(x) */
-#endif
-
-/*********************************************************/
-
-typedef void* ai_handle;
-
-#define AI_HANDLE_PTR(ptr_) ((ai_handle)(ptr_))
-#define AI_HANDLE_NULL AI_HANDLE_PTR(NULL)
-
-typedef uint8_t* ai_ptr;
-
-typedef enum { AI_STATUS_OK = 0, AI_STATUS_ERROR = 1, AI_STATUS_DELEGATE_ERROR = 2 } ai_status;
-
-// =======================================================
-//                  ai_quantization_info
-//
-//   Parameters for asymmetric quantization across a dimension (i.e
-//   per output channel quantization).
-//   quantized_dimension specifies which dimension the scales and
-//   zero_points correspond to.
-//   For a particular value in quantized_dimension, quantized values
-//   can be converted back to float using:
-//     real_value = scale * (quantized_value - zero_point)
-// =======================================================
-
-typedef struct {
-  /*!
-   * \brief The quantization info, if quantized
-   */
-  float* scale;
-  int32_t* zero_point;
-  int32_t dim;
-} ai_quantization_info;
-
-// =======================================================
-//                       ai_tensor
-// =======================================================
-
-typedef struct {
-  /*!
-   * \brief The TVM tensor.
-   */
-  DLTensor dltensor;
-  /*!
-   * \brief The quantization info, if quantized
-   */
-  ai_quantization_info* quant;
-} ai_tensor;
-
-// =======================================================
-//   get_dltensor
-// =======================================================
-AI_STATIC AI_INLINE DLTensor* get_dltensor(ai_tensor* tensor) { return &tensor->dltensor; }
-
-// =======================================================
-//   get_tensor_elts
-// =======================================================
-AI_STATIC AI_INLINE uint32_t get_tensor_elts(const ai_tensor* tensor) {
-  const DLTensor* t = &tensor->dltensor;
-  uint32_t elts = 1;
-  for (int i = 0; i < t->ndim; ++i) {
-    elts *= t->shape[i];
-  }
-  return elts;
-}
-
-// =======================================================
-//   get_tensor_size
-// =======================================================
-AI_STATIC AI_INLINE uint32_t get_tensor_size(const ai_tensor* tensor) {
-  const DLTensor* t = &tensor->dltensor;
-  uint32_t size = 1;
-  for (int i = 0; i < t->ndim; ++i) {
-    size *= t->shape[i];
-  }
-  size *= (t->dtype.bits * t->dtype.lanes + 7) / 8;
-  return size;
-}
-
-// =======================================================
-//                    ai_network_info
-// =======================================================
-
-typedef struct {
-  const char* name;
-  const char* datetime;
-  const char* revision;
-  const char* tool_version;
-  const char* api_version;
-  uint16_t n_nodes;
-  uint8_t n_inputs;
-  uint8_t n_outputs;
-  uint32_t activations_size;
-  uint32_t params_size;
-  ai_ptr activations;
-  ai_tensor** inputs;
-  ai_tensor** outputs;
-  const ai_ptr (*ai_get_params)(void);
-  ai_status (*ai_create)(const ai_ptr weights, const ai_ptr activations);
-  ai_status (*ai_destroy)();
-  ai_status (*ai_run)(ai_tensor* input[], ai_tensor* output[]);
-} ai_model_info;
-
-#define AI_MODEL_name(x) (x->name)
-#define AI_MODEL_datetime(x) (x->datetime)
-#define AI_MODEL_revision(x) (x->revision)
-#define AI_MODEL_tool_version(x) (x->tool_version)
-#define AI_MODEL_api_version(x) (x->api_version)
-#define AI_MODEL_n_nodes(x) (x->n_nodes)
-#define AI_MODEL_n_inputs(x) (x->n_inputs)
-#define AI_MODEL_n_outputs(x) (x->n_outputs)
-#define AI_MODEL_activations_size(x) (x->activations_size)
-#define AI_MODEL_params_size(x) (x->params_size)
-#define AI_MODEL_inputs(x) (x->inputs)
-#define AI_MODEL_outputs(x) (x->outputs)
-#define AI_MODEL_activations(x) (x->activations)
-
-// =======================================================
-//                         Iterator
-//
-//   Usage:
-//
-//     for (ai_models_iterator it = ai_models_iterator_begin();
-//          it != ai_models_iterator_end();
-//          it = ai_models_iterator_next(it)) {
-//       const char * name = ai_models_iterator_value(it);
-//     }
-//
-// =======================================================
-
-typedef uint32_t ai_model_iterator;
-
-ai_model_iterator ai_model_iterator_begin();
-ai_model_iterator ai_model_iterator_next(ai_model_iterator it);
-ai_model_iterator ai_model_iterator_end();
-ai_model_info* ai_model_iterator_value(ai_model_iterator it);
-
-// =======================================================
-//                    External Interface
-// =======================================================
-
-ai_status ai_create(ai_model_info* nn, ai_ptr activations, ai_handle* handle);
-
-ai_status ai_destroy(ai_handle handle);
-
-const char* ai_get_error(ai_handle handle);
-
-int32_t ai_get_input_size(ai_handle handle);
-
-int32_t ai_get_output_size(ai_handle handle);
-
-ai_tensor* ai_get_input(ai_handle handle, int32_t index);
-
-ai_tensor* ai_get_output(ai_handle handle, int32_t index);
-
-ai_status ai_run(ai_handle handle);
-
-//
-// Additional methods
-//
-const char* ai_get_name(ai_handle handle);
-const char* ai_get_datetime(ai_handle handle);
-const char* ai_get_revision(ai_handle handle);
-const char* ai_get_tool_version(ai_handle handle);
-const char* ai_get_api_version(ai_handle handle);
-
-uint32_t ai_get_node_size(ai_handle handle);
-uint32_t ai_get_activations_size(ai_handle handle);
-uint32_t ai_get_params_size(ai_handle handle);
-
-ai_ptr ai_get_activations(ai_handle handle);
-const ai_ptr ai_get_params(ai_handle handle);
-
-//
-// Quantization
-//
-const ai_quantization_info* ai_get_quantization(ai_tensor* tensor);
-
-#endif  // TVM_RUNTIME_CRT_CONTRIB_STM32_AI_RUNTIME_API_H_
diff --git a/src/runtime/crt/contrib/stm32/crt_config.h b/src/runtime/crt/contrib/stm32/crt_config.h
deleted file mode 100644
index dc583a6fee53..000000000000
--- a/src/runtime/crt/contrib/stm32/crt_config.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file tvm/runtime/crt_config.h.template
- * \brief Template for CRT configuration, to be modified on each target.
- */
-#ifndef TVM_RUNTIME_CRT_CONTRIB_STM32_CRT_CONFIG_H_
-#define TVM_RUNTIME_CRT_CONTRIB_STM32_CRT_CONFIG_H_
-
-#include <tvm/runtime/crt/logging.h>
-
-/*! Log level of the CRT runtime */
-#define TVM_CRT_LOG_LEVEL TVM_CRT_LOG_LEVEL_DEBUG
-
-#endif  // TVM_RUNTIME_CRT_CONTRIB_STM32_CRT_CONFIG_H_
diff --git a/src/runtime/crt/contrib/stm32/runtime.c b/src/runtime/crt/contrib/stm32/runtime.c
deleted file mode 100644
index 4583eb3c8eca..000000000000
--- a/src/runtime/crt/contrib/stm32/runtime.c
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file runtime.c
- * \brief A minimal "C" runtime support required by the TVM
- *        generated C code. Declared in "runtime/c_backend_api.h"
- *        and "runtime/c_runtime_api.h"
- */
-
-#include <assert.h>
-#include <malloc.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <tvm/runtime/c_backend_api.h>
-#include <tvm/runtime/crt/error_codes.h>
-
-static char* g_last_error = NULL;
-
-// ====================================================
-//   TVMPlatformMemoryAllocate
-// ====================================================
-tvm_crt_error_t TVMPlatformMemoryAllocate(size_t num_bytes, DLDevice dev, void** out_ptr) {
-#ifdef __arm__
-  *out_ptr = malloc(num_bytes);
-#else  // _x86_
-  *out_ptr = malloc(num_bytes);
-#endif
-  return (*out_ptr == NULL) ? kTvmErrorPlatformNoMemory : kTvmErrorNoError;
-}
-
-// ====================================================
-//   TVMPlatformMemoryFree
-// ====================================================
-tvm_crt_error_t TVMPlatformMemoryFree(void* ptr, DLDevice dev) {
-  free(ptr);
-  return kTvmErrorNoError;
-}
-
-// ====================================================
-//   TVMFuncRegisterGlobal
-// ====================================================
-int TVMFuncRegisterGlobal(const char* name, TVMFunctionHandle f, int override) { return -1; }
-
-// ====================================================
-//   TVMPlatformAbort
-// ====================================================
-void __attribute__((noreturn)) TVMPlatformAbort(tvm_crt_error_t code) {
-  for (;;) {
-  }
-}
-
-// ====================================================
-//   TVMLogf
-// ====================================================
-void TVMLogf(const char* msg, ...) { return; }
-
-// ====================================================
-//   TVMAPISetLastError
-// ====================================================
-void TVMAPISetLastError(const char* msg) {
-  if (g_last_error) {
-    free(g_last_error);
-  }
-  uint32_t nbytes = strlen(msg) + 1;
-  g_last_error = malloc(nbytes);
-  snprintf(g_last_error, nbytes, "%s", msg);
-}
-
-// ====================================================
-//   TVMGetLastError
-// ====================================================
-const char* TVMGetLastError(void) {
-  assert(g_last_error);
-  return g_last_error;
-}
diff --git a/src/runtime/crt/crt_config.h.template b/src/runtime/crt/crt_config.h.template
deleted file mode 100644
index 1d32253282e8..000000000000
--- a/src/runtime/crt/crt_config.h.template
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file tvm/runtime/crt_config.h
- * \brief Template for CRT configuration, to be modified on each target.
- */
-#ifndef TVM_RUNTIME_CRT_CRT_CONFIG_H_
-#define TVM_RUNTIME_CRT_CRT_CONFIG_H_
-
-/*! Log level of the CRT runtime */
-#define TVM_CRT_LOG_LEVEL TVM_CRT_LOG_LEVEL_DEBUG
-
-/*! Support low-level debugging in MISRA-C runtime */
-#define TVM_CRT_DEBUG ${TVM_CRT_DEBUG}
-
-/*! Maximum supported dimension in NDArray */
-#define TVM_CRT_MAX_NDIM ${TVM_CRT_MAX_NDIM}
-
-/*! Maximum supported arguments in generated functions */
-#define TVM_CRT_MAX_ARGS ${TVM_CRT_MAX_ARGS}
-
-/*! Size of the global function registry, in bytes. */
-#define TVM_CRT_GLOBAL_FUNC_REGISTRY_SIZE_BYTES ${TVM_CRT_GLOBAL_FUNC_REGISTRY_SIZE_BYTES}
-
-/*! Maximum number of registered modules. */
-#define TVM_CRT_MAX_REGISTERED_MODULES ${TVM_CRT_MAX_REGISTERED_MODULES}
-
-/*! Maximum packet size, in bytes, including the length header. */
-#define TVM_CRT_MAX_PACKET_SIZE_BYTES ${TVM_CRT_MAX_PACKET_SIZE_BYTES}
-
-/*! Maximum supported string length in dltype, e.g. "int8", "int16", "float32" */
-#define TVM_CRT_MAX_STRLEN_DLTYPE ${TVM_CRT_MAX_STRLEN_DLTYPE}
-
-/*! Maximum supported string length in function names */
-#define TVM_CRT_MAX_STRLEN_FUNCTION_NAME ${TVM_CRT_MAX_STRLEN_FUNCTION_NAME}
-
-/*! Maximum supported string length in parameter names */
-#define TVM_CRT_MAX_STRLEN_PARAM_NAME ${TVM_CRT_MAX_STRLEN_PARAM_NAME}
-
-/*! Enable checks to enforce the stack allocator with a FIFO ordering. Off by default */
-// #define TVM_CRT_STACK_ALLOCATOR_ENABLE_FIFO_CHECK
-
-#endif  // TVM_RUNTIME_CRT_CRT_CONFIG_H_
diff --git a/src/runtime/crt/graph_executor/graph_executor.c b/src/runtime/crt/graph_executor/graph_executor.c
deleted file mode 100644
index 395a343ccb41..000000000000
--- a/src/runtime/crt/graph_executor/graph_executor.c
+++ /dev/null
@@ -1,1275 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-// LINT_C_FILE
-
-/*!
- * \file graph_executor.c
- * \brief implement graph executor in pure C
- */
-
-#include <tvm/runtime/c_runtime_api.h>
-#include <tvm/runtime/crt/internal/graph_executor/graph_executor.h>
-#include <tvm/runtime/crt/logging.h>
-#include <tvm/runtime/crt/module.h>
-#include <tvm/runtime/crt/packed_func.h>
-#include <tvm/runtime/crt/page_allocator.h>
-
-#include "crt_config.h"
-
-#ifndef MAX
-#define MAX(a, b) (((a) > (b)) ? (a) : (b))
-#endif  // MAX
-
-uint32_t Shape_Accumulate(int64_t* shape, uint32_t ndim) {
-  int64_t accum = 1;
-  uint32_t idx;
-  for (idx = 0; idx < ndim; idx++) {
-    if (shape[idx] == 0) {
-      break;
-    }
-    accum *= shape[idx];
-  }
-  return accum;
-}
-
-int NodeEntry_Load(TVMGraphExecutorNodeEntry* entry, JSONReader* reader) {
-  int status = 0;
-  reader->BeginArray(reader);
-  if (!(reader->NextArrayItem(reader))) {
-    fprintf(stderr, "invalid json format: failed to parse `node_id`\n");
-    status = -1;
-  }
-  reader->ReadUnsignedInteger(reader, &(entry->node_id));
-  if (!(reader->NextArrayItem(reader))) {
-    fprintf(stderr, "invalid json format: failed to parse `index`\n");
-    status = -1;
-  }
-  reader->ReadUnsignedInteger(reader, &(entry->index));
-  if (reader->NextArrayItem(reader)) {
-    reader->ReadUnsignedInteger(reader, &(entry->version));
-    if (reader->NextArrayItem(reader)) {
-      fprintf(stderr, "invalid json format: failed to parse `version`\n");
-      status = -1;
-    }
-  } else {
-    entry->version = 0;
-  }
-  return status;
-}
-
-void TVMGraphExecutorNode_LoadAttrs(TVMGraphExecutorNode* node, JSONReader* reader,
-                                    TVMOpParam* param) {
-  int bitmask = 0;
-  char key[20], value[TVM_CRT_MAX_STRLEN_FUNCTION_NAME];
-  memset(param, 0, sizeof(TVMOpParam));
-  memset(key, 0, sizeof(key));
-  memset(value, 0, sizeof(value));
-  reader->BeginObject(reader);
-  while (reader->NextObjectItem(reader, key, sizeof(key))) {
-    int status = reader->ReadString(reader, value, sizeof(value));
-    if (status != 0) {
-      fprintf(stderr, "error reading value for key: %s\n", key);
-      break;
-    }
-    if (!strcmp(key, "func_name")) {
-      snprintf(param->func_name, sizeof(value), "%s", value);
-      bitmask |= 1;
-    } else if (!strcmp(key, "num_inputs")) {
-      param->num_inputs = strtoul(value, 0, 10);
-      bitmask |= 2;
-    } else if (!strcmp(key, "num_outputs")) {
-      param->num_outputs = strtoul(value, 0, 10);
-      bitmask |= 4;
-    } else if (!strcmp(key, "flatten_data")) {
-      param->flatten_data = strtoul(value, 0, 10);
-      bitmask |= 8;
-#if TVM_CRT_DEBUG
-    } else {
-      printf("do not support key %s", key);
-#endif  // TVM_CRT_DEBUG
-    }
-  }
-  if (bitmask != (1 | 2 | 4 | 8)) {
-    fprintf(stderr, "invalid format\n");
-  }
-}
-
-int TVMGraphExecutorNode_Load(TVMGraphExecutorNode* node, JSONReader* reader) {
-  int status = 0;
-  reader->BeginObject(reader);
-  int bitmask = 0;
-  char key[20];
-  while (reader->NextObjectItem(reader, key, sizeof(key))) {
-    if (!strcmp(key, "op")) {
-      status = reader->ReadString(reader, node->op_type, sizeof(node->op_type));
-      if (status != 0) {
-        fprintf(stderr, "error reading op\n");
-        break;
-      }
-      bitmask |= 1;
-    } else if (!strcmp(key, "name")) {
-      status = reader->ReadString(reader, node->name, sizeof(node->name));
-      if (status != 0) {
-        fprintf(stderr, "error reading name\n");
-        break;
-      }
-      bitmask |= 2;
-    } else if (!strcmp(key, "inputs")) {
-      size_t count = 0;
-      reader->BeginArray(reader);
-      size_t num_inputs = 0;
-      if (reader->ArrayLength(reader, &num_inputs) != 0) {
-        fprintf(stderr, "error determining inputs array length\n");
-        break;
-      }
-      DLDevice dev = {kDLCPU, 0};
-      tvm_crt_error_t err = TVMPlatformMemoryAllocate(
-          sizeof(TVMGraphExecutorNodeEntry) * num_inputs, dev, (void**)&node->inputs);
-      if (err != kTvmErrorNoError) {
-        fprintf(stderr, "memory allocate error: %08x", err);
-        return -1;
-      }
-      while (reader->NextArrayItem(reader)) {
-        if (count == num_inputs) {
-          fprintf(stderr, "too many array elements\n");
-          return -1;
-        }
-
-        TVMGraphExecutorNodeEntry* inputs = node->inputs + count;
-        reader->BeginArray(reader);
-        if (!reader->NextArrayItem(reader)) {
-          fprintf(stderr, "invalid json format\n");
-          status = -1;
-          break;
-        }
-        reader->ReadUnsignedInteger(reader, &(inputs->node_id));
-        if (!reader->NextArrayItem(reader)) {
-          fprintf(stderr, "invalid json format\n");
-          status = -1;
-          break;
-        }
-        reader->ReadUnsignedInteger(reader, &(inputs->index));
-        if (reader->NextArrayItem(reader)) {
-          reader->ReadUnsignedInteger(reader, &(inputs->version));
-          if (reader->NextArrayItem(reader)) {
-            fprintf(stderr, "invalid json format\n");
-            status = -1;
-            break;
-          }
-        } else {
-          inputs->version = 0;
-        }
-        count++;
-      }
-      node->inputs_count = count;
-      bitmask |= 4;
-    } else if (!strcmp(key, "attr") || !strcmp(key, "attrs")) {
-      TVMOpParam param;
-
-      TVMGraphExecutorNode_LoadAttrs(node, reader, &param);
-      memcpy(&node->param, &param, sizeof(param));
-    } else if (!strcmp(key, "control_deps")) {
-      fprintf(stderr, "do not support key %s", key);
-      status = -1;
-    } else {
-      fprintf(stderr, "do not support key %s", key);
-      status = -1;
-    }
-    if (status != 0) {
-      break;
-    }
-  }
-  if (bitmask != (1 | 2 | 4)) {
-    fprintf(stderr, "invalid format\n");
-    status = -1;
-  }
-  return status;
-}
-
-TVMGraphExecutorNode TVMGraphExecutorNodeCreate() {
-  TVMGraphExecutorNode node;
-  memset(&node, 0, sizeof(TVMGraphExecutorNode));
-  node.LoadAttrs = TVMGraphExecutorNode_LoadAttrs;
-  node.Load = TVMGraphExecutorNode_Load;
-  return node;
-}
-
-int TVMGraphExecutorNodeRelease(TVMGraphExecutorNode* node) {
-  if (!node) {
-    return 0;
-  }
-  if (node->inputs) {
-    DLDevice dev = {kDLCPU, 0};
-    tvm_crt_error_t err = TVMPlatformMemoryFree(node->inputs, dev);
-    node->inputs = 0;
-    if (err != kTvmErrorNoError) {
-      return -1;
-    }
-  }
-
-  return 0;
-}
-
-int TVMGraphExecutorGraphAttr_Load(TVMGraphExecutorGraphAttr* attr, JSONReader* reader) {
-  int status = 0;
-  int bitmask = 0;
-  char key[16], type[16];
-  uint32_t storage_id_count = 0;
-  uint32_t dltype_count = 0;
-  uint32_t shape_count = 0;
-  uint32_t device_index_count = 0;
-  reader->BeginObject(reader);
-  while (reader->NextObjectItem(reader, key, sizeof(key))) {
-    if (!strcmp(key, "dltype")) {
-      reader->BeginArray(reader);
-      if (!(reader->NextArrayItem(reader))) {
-        fprintf(stderr, "Invalid json format\n");
-        status = -1;
-        break;
-      }
-      status = reader->ReadString(reader, type, sizeof(type));
-      if (status != 0) {
-        fprintf(stderr, "error reading dltype type\n");
-        break;
-      }
-      if (strcmp(type, "list_str")) {
-        fprintf(stderr, "Invalid json format\n");
-        status = -1;
-        break;
-      }
-      if (!(reader->NextArrayItem(reader))) {
-        fprintf(stderr, "Invalid json format\n");
-        status = -1;
-        break;
-      }
-      reader->BeginArray(reader);
-      size_t num_items = 0;
-      if (reader->ArrayLength(reader, &num_items) != 0) {
-        fprintf(stderr, "error determing list_str length\n");
-        status = -1;
-        break;
-      }
-      DLDevice dev = {kDLCPU, 0};
-      tvm_crt_error_t err = TVMPlatformMemoryAllocate(TVM_CRT_MAX_STRLEN_DLTYPE * num_items, dev,
-                                                      (void**)&attr->dltype);
-      if (err != kTvmErrorNoError) {
-        fprintf(stderr, "memory allocate error: %08x", err);
-        return -1;
-      }
-      dltype_count = 0;
-      while (reader->NextArrayItem(reader)) {
-        if (dltype_count == num_items) {
-          fprintf(stderr, "array too big\n");
-          status = -1;
-          return status;
-        }
-        status = reader->ReadString(reader, attr->dltype + dltype_count * TVM_CRT_MAX_STRLEN_DLTYPE,
-                                    TVM_CRT_MAX_STRLEN_DLTYPE);
-        if (status != 0) {
-          fprintf(stderr, "error reading dltype array item");
-          break;
-        }
-        dltype_count++;
-      }
-      attr->dltype_count = dltype_count;
-
-      if (reader->NextArrayItem(reader)) {
-        fprintf(stderr, "Invalid json format\n");
-        status = -1;
-        break;
-      }
-      bitmask |= 1;
-    } else if (!strcmp(key, "storage_id")) {
-      reader->BeginArray(reader);
-      if (!(reader->NextArrayItem(reader))) {
-        fprintf(stderr, "Invalid json format\n");
-        status = -1;
-        break;
-      }
-      status = reader->ReadString(reader, type, sizeof(type));
-      if (status != 0) {
-        fprintf(stderr, "error reading device_index array item");
-      }
-      if (strcmp(type, "list_int")) {
-        fprintf(stderr, "Invalid json format\n");
-        status = -1;
-        break;
-      }
-      if (!(reader->NextArrayItem(reader))) {
-        fprintf(stderr, "Invalid json format\n");
-        status = -1;
-        break;
-      }
-      reader->BeginArray(reader);
-      size_t num_items = 0;
-      if (reader->ArrayLength(reader, &num_items) != 0) {
-        fprintf(stderr, "error determing list_str length\n");
-        status = -1;
-        break;
-      }
-      DLDevice dev = {kDLCPU, 0};
-      tvm_crt_error_t err =
-          TVMPlatformMemoryAllocate(sizeof(uint32_t) * num_items, dev, (void**)&attr->storage_id);
-      if (err != kTvmErrorNoError) {
-        fprintf(stderr, "memory allocate error: %08x", err);
-        return -1;
-      }
-      storage_id_count = 0;
-      while (reader->NextArrayItem(reader)) {
-        if (storage_id_count == num_items) {
-          fprintf(stderr, "array too big\n");
-          status = -1;
-          return status;
-        }
-        reader->ReadUnsignedInteger(reader, &(attr->storage_id[storage_id_count]));
-        storage_id_count++;
-      }
-      if (reader->NextArrayItem(reader)) {
-        fprintf(stderr, "Invalid json format\n");
-        status = -1;
-        break;
-      }
-      bitmask |= 2;
-    } else if (!strcmp(key, "shape")) {
-      reader->BeginArray(reader);
-      if (!(reader->NextArrayItem(reader))) {
-        fprintf(stderr, "Invalid json format\n");
-        status = -1;
-        break;
-      }
-      status = reader->ReadString(reader, type, sizeof(type));
-      if (status != 0) {
-        fprintf(stderr, "error reading shape array item\n");
-        break;
-      }
-      if (strcmp(type, "list_shape")) {
-        fprintf(stderr, "Invalid json format\n");
-        status = -1;
-        break;
-      }
-      if (!(reader->NextArrayItem(reader))) {
-        fprintf(stderr, "Invalid json format\n");
-        status = -1;
-        break;
-      }
-      reader->BeginArray(reader);
-      size_t num_items = 0;
-      if (reader->ArrayLength(reader, &num_items) != 0) {
-        fprintf(stderr, "error determing list_str length\n");
-        status = -1;
-        break;
-      }
-      DLDevice dev = {kDLCPU, 0};
-      tvm_crt_error_t err = TVMPlatformMemoryAllocate(
-          sizeof(int64_t) * TVM_CRT_MAX_NDIM * num_items, dev, (void**)&attr->shape);
-      if (err != kTvmErrorNoError) {
-        fprintf(stderr, "memory allocate error: %08x", err);
-        status = -1;
-        break;
-      }
-      err = TVMPlatformMemoryAllocate(sizeof(uint32_t) * num_items, dev, (void**)&attr->ndim);
-      if (err != kTvmErrorNoError) {
-        fprintf(stderr, "memory allocate error: %08x", err);
-        status = -1;
-        break;
-      }
-      shape_count = 0;
-      while (reader->NextArrayItem(reader)) {
-        if (shape_count == num_items) {
-          fprintf(stderr, "array too big\n");
-          status = -1;
-          return status;
-        }
-        reader->BeginArray(reader);
-        int64_t* attr_shape_ptr = attr->shape + shape_count * TVM_CRT_MAX_NDIM;
-        reader->ReadInteger(reader, attr_shape_ptr + 0);
-        uint32_t ndim = 1;
-        if (reader->NextArrayItem(reader)) {
-          for (ndim = 1; ndim < TVM_CRT_MAX_NDIM; ndim++) {
-            if (reader->NextArrayItem(reader)) {
-              reader->ReadInteger(reader, attr_shape_ptr + ndim);
-            } else {
-              break;
-            }
-          }
-          if (ndim == TVM_CRT_MAX_NDIM) {
-            reader->NextArrayItem(reader);
-          }
-        }
-        attr->ndim[shape_count] = ndim;
-        shape_count++;
-      }
-      attr->shape_count = shape_count;
-      if (reader->NextArrayItem(reader)) {
-        fprintf(stderr, "Invalid json format\n");
-        status = -1;
-        break;
-      }
-      bitmask |= 4;
-    } else if (!strcmp(key, "device_index")) {
-      reader->BeginArray(reader);
-      if (!(reader->NextArrayItem(reader))) {
-        fprintf(stderr, "Invalid json format\n");
-        status = -1;
-        break;
-      }
-      status = reader->ReadString(reader, type, sizeof(type));
-      if (status != 0) {
-        fprintf(stderr, "error reading device_index array item");
-        break;
-      }
-      if (strcmp(type, "list_int")) {
-        fprintf(stderr, "Invalid json format\n");
-        status = -1;
-        break;
-      }
-      if (!(reader->NextArrayItem(reader))) {
-        fprintf(stderr, "Invalid json format\n");
-        status = -1;
-        break;
-      }
-      reader->BeginArray(reader);
-      size_t num_items = 0;
-      if (reader->ArrayLength(reader, &num_items) != 0) {
-        fprintf(stderr, "error determing list_int length\n");
-        status = -1;
-        break;
-      }
-      DLDevice dev = {kDLCPU, 0};
-      tvm_crt_error_t err =
-          TVMPlatformMemoryAllocate(sizeof(uint32_t) * num_items, dev, (void**)&attr->device_index);
-      if (err != kTvmErrorNoError) {
-        fprintf(stderr, "memory allocate error: %08x", err);
-        status = -1;
-        break;
-      }
-      device_index_count = 0;
-      while (reader->NextArrayItem(reader)) {
-        if (device_index_count == num_items) {
-          fprintf(stderr, "array too big\n");
-          status = -1;
-          return status;
-        }
-        reader->ReadUnsignedInteger(reader, &(attr->device_index[device_index_count]));
-        device_index_count++;
-      }
-      if (reader->NextArrayItem(reader)) {
-        fprintf(stderr, "Invalid json format\n");
-        status = -1;
-        break;
-      }
-    } else {
-      reader->BeginArray(reader);
-      if (!(reader->NextArrayItem(reader))) {
-        fprintf(stderr, "Invalid json format\n");
-        status = -1;
-        break;
-      }
-      reader->ReadString(reader, type, sizeof(type));
-      if (!strcmp(type, "list_int")) {
-        if (!(reader->NextArrayItem(reader))) {
-          fprintf(stderr, "Invalid json format\n");
-          status = -1;
-          break;
-        }
-        uint32_t temp_count = 0;
-        reader->BeginArray(reader);
-        while (reader->NextArrayItem(reader)) {
-          uint32_t temp;
-          reader->ReadUnsignedInteger(reader, &temp);
-          temp_count++;
-        }
-      } else if (!strcmp(type, "size_t")) {
-        if (!(reader->NextArrayItem(reader))) {
-          fprintf(stderr, "Invalid json format\n");
-          status = -1;
-          break;
-        }
-        uint32_t temp;
-        reader->ReadUnsignedInteger(reader, &temp);
-      } else {
-        fprintf(stderr, "cannot skip graph attr %s", key);
-        status = -1;
-        break;
-      }
-      if (reader->NextArrayItem(reader)) {
-        fprintf(stderr, "Invalid json format\n");
-        status = -1;
-        break;
-      }
-    }
-  }
-  if (bitmask != (1 | 2 | 4)) {
-    fprintf(stderr, "invalid format\n");
-    status = -1;
-  }
-  return status;
-}
-
-int TVMGraphExecutorGraphAttr_Release(TVMGraphExecutorGraphAttr* attr) {
-  if (!attr) {
-    return 0;
-  }
-  if (attr->storage_id) {
-    DLDevice dev = {kDLCPU, 0};
-    tvm_crt_error_t err = TVMPlatformMemoryFree(attr->storage_id, dev);
-    attr->storage_id = 0;
-    if (err != kTvmErrorNoError) {
-      return -1;
-    }
-  }
-  if (attr->device_index) {
-    DLDevice dev = {kDLCPU, 0};
-    tvm_crt_error_t err = TVMPlatformMemoryFree(attr->device_index, dev);
-    attr->device_index = 0;
-    if (err != kTvmErrorNoError) {
-      return -1;
-    }
-  }
-  if (attr->dltype) {
-    DLDevice dev = {kDLCPU, 0};
-    tvm_crt_error_t err = TVMPlatformMemoryFree(attr->dltype, dev);
-    attr->dltype = 0;
-    if (err != kTvmErrorNoError) {
-      return -1;
-    }
-  }
-  if (attr->shape) {
-    DLDevice dev = {kDLCPU, 0};
-    tvm_crt_error_t err = TVMPlatformMemoryFree(attr->shape, dev);
-    attr->shape = 0;
-    if (err != kTvmErrorNoError) {
-      return -1;
-    }
-  }
-  if (attr->ndim) {
-    DLDevice dev = {kDLCPU, 0};
-    tvm_crt_error_t err = TVMPlatformMemoryFree(attr->ndim, dev);
-    attr->ndim = 0;
-    if (err != kTvmErrorNoError) {
-      return -1;
-    }
-  }
-
-  return 0;
-}
-
-int TVMGraphExecutor_Load(TVMGraphExecutor* executor, JSONReader* reader) {
-  int status = 0;
-  reader->BeginObject(reader);
-  int bitmask = 0;
-  char key[20];
-  while (reader->NextObjectItem(reader, key, sizeof(key))) {
-    if (!strcmp(key, "nodes")) {
-      reader->BeginArray(reader);
-      size_t num_items = 0;
-      if (reader->ArrayLength(reader, &num_items) != 0) {
-        fprintf(stderr, "error determing list_int length\n");
-        status = -1;
-        break;
-      }
-      DLDevice dev = {kDLCPU, 0};
-      tvm_crt_error_t err = TVMPlatformMemoryAllocate(sizeof(TVMGraphExecutorNode) * num_items, dev,
-                                                      (void**)&executor->nodes);
-      if (err != kTvmErrorNoError) {
-        fprintf(stderr, "memory allocate error: %08x", err);
-        status = -1;
-        break;
-      }
-      while (reader->NextArrayItem(reader)) {
-        if (executor->nodes_count == num_items) {
-          fprintf(stderr, "array too big\n");
-          status = -1;
-          return status;
-        }
-        TVMGraphExecutorNode* node = executor->nodes + executor->nodes_count;
-        status = TVMGraphExecutorNode_Load(node, reader);
-        if (status != 0) {
-          fprintf(stderr, "failed to load an element in `nodes` field in graph executor node.\n");
-          break;
-#if TVM_CRT_DEBUG
-        } else {
-          printf("loading: node (%u) %s loaded.\n", executor->nodes_count, node->name);
-#endif  // TVM_CRT_DEBUG
-        }
-        executor->nodes_count++;
-      }
-      bitmask |= 1;
-    } else if (!strcmp(key, "arg_nodes")) {
-      reader->BeginArray(reader);
-      size_t num_items = 0;
-      if (reader->ArrayLength(reader, &num_items) != 0) {
-        fprintf(stderr, "error determing list_int length\n");
-        status = -1;
-        break;
-      }
-      DLDevice dev = {kDLCPU, 0};
-      tvm_crt_error_t err = TVMPlatformMemoryAllocate(sizeof(uint32_t) * num_items, dev,
-                                                      (void**)&executor->input_nodes);
-
-      if (err != kTvmErrorNoError) {
-        fprintf(stderr, "memory allocate error: %08x", err);
-        status = -1;
-        break;
-      }
-      while (reader->NextArrayItem(reader)) {
-        if (executor->input_nodes_count == num_items) {
-          fprintf(stderr, "array too big\n");
-          status = -1;
-          return status;
-        }
-        uint32_t* node = executor->input_nodes + executor->input_nodes_count;
-        reader->ReadUnsignedInteger(reader, node);
-        executor->input_nodes_count++;
-      }
-      bitmask |= 2;
-    } else if (!strcmp(key, "node_row_ptr")) {
-      reader->BeginArray(reader);
-      size_t num_items = 0;
-      if (reader->ArrayLength(reader, &num_items) != 0) {
-        fprintf(stderr, "error determing list_int length\n");
-        status = -1;
-        break;
-      }
-      DLDevice dev = {kDLCPU, 0};
-      tvm_crt_error_t err = TVMPlatformMemoryAllocate(sizeof(uint32_t) * num_items, dev,
-                                                      (void**)&executor->node_row_ptr);
-      if (err != kTvmErrorNoError) {
-        fprintf(stderr, "memory allocate error: %08x", err);
-        status = -1;
-        break;
-      }
-      while (reader->NextArrayItem(reader)) {
-        if (executor->node_row_ptr_count == num_items) {
-          fprintf(stderr, "array too big\n");
-          status = -1;
-          return status;
-        }
-        uint32_t count = executor->node_row_ptr_count;
-        uint32_t* node = executor->node_row_ptr + count;
-        reader->ReadUnsignedInteger(reader, node);
-        executor->node_row_ptr_count++;
-      }
-      bitmask |= 4;
-    } else if (!strcmp(key, "heads")) {
-      reader->BeginArray(reader);
-      size_t num_items = 0;
-      if (reader->ArrayLength(reader, &num_items) != 0) {
-        fprintf(stderr, "error determing list_int length\n");
-        status = -1;
-        break;
-      }
-      DLDevice dev = {kDLCPU, 0};
-      tvm_crt_error_t err = TVMPlatformMemoryAllocate(sizeof(TVMGraphExecutorNodeEntry) * num_items,
-                                                      dev, (void**)&executor->outputs);
-      if (err != kTvmErrorNoError) {
-        fprintf(stderr, "memory allocate error: %08x", err);
-        status = -1;
-        break;
-      }
-      while (reader->NextArrayItem(reader)) {
-        if (executor->outputs_count == num_items) {
-          fprintf(stderr, "array too big\n");
-          status = -1;
-          return status;
-        }
-        TVMGraphExecutorNodeEntry* entry = executor->outputs + executor->outputs_count;
-        status = NodeEntry_Load(entry, reader);
-        if (status != 0) {
-          fprintf(stderr, "Fail to load an element in `heads` field in graph executor node.\n");
-          break;
-        }
-        executor->outputs_count++;
-      }
-      bitmask |= 8;
-    } else if (!strcmp(key, "attrs")) {
-      status = TVMGraphExecutorGraphAttr_Load(&(executor->attrs), reader);
-      if (status != 0) {
-        fprintf(stderr, "Fail to load an element in `heads` field in graph executor node.\n");
-        break;
-      }
-      bitmask |= 16;
-    } else if (!strcmp(key, "metadata")) {
-      break;
-    } else {
-      fprintf(stderr, "key %s is not supported\n", key);
-      status = -1;
-    }
-    if (status != 0) {
-      break;
-    }
-  }
-  if (!(bitmask == (1 | 2 | 4 | 8 | 16))) {
-    fprintf(stderr, "invalid format\n");
-    status = -1;
-  }
-  return status;
-}
-
-uint32_t TVMGraphExecutor_GetEntryId(TVMGraphExecutor* executor, uint32_t nid, uint32_t index) {
-  return executor->node_row_ptr[nid] + index;
-}
-
-/*!
- * \brief Get the number of input tensors allocated.
- * \param executor The graph executor.
- * \return the number of input tensors allocated.
- */
-int TVMGraphExecutor_GetNumInputs(TVMGraphExecutor* executor) {
-  return executor->input_nodes_count;
-}
-
-/*!
- * \brief Get the input index given the name of input.
- * \param executor The graph executor.
- * \param name The name of the input.
- * \return The index of input.
- */
-int TVMGraphExecutor_GetInputIndex(TVMGraphExecutor* executor, const char* name) {
-  uint32_t i;
-  int32_t rv = -1;
-  for (i = 0; i < executor->input_nodes_count; ++i) {
-    uint32_t nid = executor->input_nodes[i];
-    if (!strcmp(executor->nodes[nid].name, name)) {
-      rv = i;
-      break;
-    }
-  }
-  CHECK_GE(rv, 0, "cannot find '%s' among input.", name);
-  return rv;
-}
-
-/*!
- * \brief set input to the graph based on name.
- * \param executor The graph executor.
- * \param name The name of the input.
- * \param data_in The input data.
- */
-void TVMGraphExecutor_SetInput(TVMGraphExecutor* executor, const char* name, DLTensor* data_in) {
-  uint32_t index = TVMGraphExecutor_GetInputIndex(executor, name);
-  if (index >= executor->input_nodes_count) {
-    fprintf(stderr, "given index is greater than num of input nodes.\n");
-  }
-  uint32_t eid = TVMGraphExecutor_GetEntryId(executor, executor->input_nodes[index], 0);
-  executor->data_entry[eid].dl_tensor.data = data_in->data;
-}
-
-/*!
- * \brief Load parameters from parameter blob.
- * \param executor The graph executor.
- * \param param_blob A binary blob of parameter.
- * \param param_size The parameter size.
- * \return The result of this function execution.
- */
-int TVMGraphExecutor_LoadParams(TVMGraphExecutor* executor, const char* param_blob,
-                                const uint32_t param_size) {
-  int status = 0;
-  const char* bptr = param_blob;
-  uint64_t header, reserved;
-  memcpy(&header, bptr, sizeof(header));
-  bptr += sizeof(header);
-  if (header != kTVMNDArrayListMagic) {
-    fprintf(stderr, "Invalid parameters file format");
-    status = -1;
-  }
-  memcpy(&reserved, bptr, sizeof(reserved));
-  bptr += sizeof(reserved);
-
-  // read names
-  char* names = NULL;
-  DLDevice dev = {kDLCPU, 0};
-  tvm_crt_error_t err = TVMPlatformMemoryAllocate(
-      TVM_CRT_MAX_STRLEN_PARAM_NAME * executor->nodes_count, dev, (void**)&names);
-  if (err != kTvmErrorNoError) {
-    fprintf(stderr, "memory allocate error: %08x", err);
-    status = -1;
-    return status;
-  }
-  memset(names, 0, TVM_CRT_MAX_STRLEN_PARAM_NAME * executor->nodes_count);
-  uint64_t names_count;
-  int idx;
-  memcpy(&names_count, bptr, sizeof(names_count));
-  bptr += sizeof(names_count);
-  for (idx = 0; idx < names_count; idx++) {
-    uint64_t name_length;
-    memcpy(&name_length, bptr, sizeof(name_length));
-    bptr += sizeof(name_length);
-    if (name_length >= TVM_CRT_MAX_STRLEN_PARAM_NAME) {
-      fprintf(stderr, "Error: function name longer than expected.\n");
-      status = -1;
-    }
-    memcpy(names + TVM_CRT_MAX_STRLEN_PARAM_NAME * idx, bptr, name_length);
-    bptr += name_length;
-  }
-
-  // read sizes
-  uint64_t sz;
-  memcpy(&sz, bptr, sizeof(sz));
-  bptr += sizeof(sz);
-  uint32_t size = sz;
-  if (size != names_count) {
-    fprintf(stderr, "Invalid parameters file format\n");
-    status = -1;
-  }
-
-  for (idx = 0; idx < size; idx++) {
-    int32_t in_idx =
-        TVMGraphExecutor_GetInputIndex(executor, names + TVM_CRT_MAX_STRLEN_PARAM_NAME * idx);
-    CHECK_GT(in_idx, 0, "Found param for non-existent input: %s\n",
-             names + TVM_CRT_MAX_STRLEN_PARAM_NAME * idx);
-    uint32_t eid = TVMGraphExecutor_GetEntryId(executor, executor->input_nodes[in_idx], 0);
-    if (!(eid < executor->data_entry_count)) {
-      fprintf(stderr, "`entry_id`=%d is greater than expected(%d).\n", eid,
-              executor->data_entry_count);
-      status = -1;
-    }
-
-    if (executor->data_entry[eid].dl_tensor.shape) {
-      err = TVMPlatformMemoryFree(executor->data_entry[eid].dl_tensor.shape, dev);
-      if (err != kTvmErrorNoError) {
-        status = -1;
-      }
-      executor->data_entry[eid].dl_tensor.shape = 0;
-    }
-    if (executor->data_entry[eid].dl_tensor.data) {
-      err = TVMPlatformMemoryFree(executor->data_entry[eid].dl_tensor.data, dev);
-      if (err != kTvmErrorNoError) {
-        status = -1;
-      }
-      executor->data_entry[eid].dl_tensor.data = 0;
-    }
-    status |= TVMNDArray_Load(&(executor->data_entry[eid]), &bptr);
-#if TVM_CRT_DEBUG
-    TVMNDArray* entry = &(executor->data_entry[eid]);
-    printf("loading: param %s loaded, in_idx=%d, eid=%d, ndim=%d, data[0]=%f\n",
-           names + TVM_CRT_MAX_STRLEN_PARAM_NAME * idx, in_idx, eid, entry->dl_tensor.ndim,
-           ((float*)entry->dl_tensor.data)[0]);  // NOLINT(*)
-#endif                                           // TVM_CRT_DEBUG
-  }
-
-  // Release memory
-  err = TVMPlatformMemoryFree(names, dev);
-  if (err != kTvmErrorNoError) {
-    status = -1;
-    return status;
-  }
-
-  return status;
-}
-
-/*!
- * \brief Run all the operations one by one.
- * \param executor The graph executor.
- */
-void TVMGraphExecutor_Run(TVMGraphExecutor* executor) {
-  // setup the array and requirements.
-  uint32_t idx;
-  for (idx = 0; idx < executor->op_execs_count; ++idx) {
-    if (executor->op_execs[idx].fexec) {
-#if TVM_CRT_DEBUG
-      printf("calling: %s (%d)\n", executor->op_execs[idx].name, idx);
-#endif  // TVM_CRT_DEBUG
-      executor->op_execs[idx].Call(&(executor->op_execs[idx]));
-    }
-  }
-}
-
-/*!
- * \brief Get the number of output tensors allocated.
- * \param executor The graph executor.
- * \return the number of output tensors allocated.
- */
-int TVMGraphExecutor_GetNumOutputs(TVMGraphExecutor* executor) { return executor->outputs_count; }
-
-int TVMGraphExecutor_GetOutput(TVMGraphExecutor* executor, const int32_t idx, DLTensor* out) {
-  int status = 0;
-  uint32_t nid = executor->outputs[idx].node_id;
-  uint32_t index = executor->outputs[idx].index;
-  uint32_t eid = TVMGraphExecutor_GetEntryId(executor, nid, index);
-
-  // copy data section to allocated output tensor
-  int32_t elem_bytes = out->dtype.bits / 8;
-  int64_t size = Shape_Accumulate(out->shape, out->ndim);
-  DLTensor* tensor = &(executor->data_entry[eid].dl_tensor);
-  CHECK(out->ndim == tensor->ndim);
-  CHECK(out->dtype.bits == tensor->dtype.bits);
-  CHECK(Shape_Accumulate(out->shape, out->ndim) == Shape_Accumulate(tensor->shape, tensor->ndim));
-  memcpy(out->data, tensor->data, size * elem_bytes);
-  return status;
-}
-
-int TVMGraphExecutor_SetupStorage(TVMGraphExecutor* executor) {
-  TVMPackedFunc lookup_linked_param;
-  int lookup_linked_param_valid;
-  uint32_t idx;
-
-  {
-    TVMArgs temp_args;
-    temp_args.values[0].v_int64 = 0;
-    temp_args.tcodes[0] = kTVMArgInt;
-    temp_args.values_count = 1;
-    lookup_linked_param_valid =
-        (TVMPackedFunc_InitModuleFunc(&lookup_linked_param, executor->module_handle,
-                                      "_lookup_linked_param", &temp_args) == 0);
-  }
-
-  // Grab saved optimization plan from graph.
-  TVMGraphExecutorGraphAttr* attrs = &(executor->attrs);
-  DLDataType* vtype = NULL;
-  DLDevice alloc_dev = {kDLCPU, 0};
-  tvm_crt_error_t err = TVMPlatformMemoryAllocate(sizeof(DLDataType) * attrs->dltype_count,
-                                                  alloc_dev, (void**)&vtype);
-  if (err != kTvmErrorNoError) {
-    fprintf(stderr, "memory allocate error: %08x", err);
-    return -1;
-  }
-  for (idx = 0; idx < attrs->dltype_count; idx++) {
-    vtype[idx] = String2DLDataType(attrs->dltype + idx * TVM_CRT_MAX_STRLEN_DLTYPE);
-  }
-
-  // Size and device type of each storage pool entry.
-  TVMGraphExecutorPoolEntry* pool_entry = NULL;
-  err = TVMPlatformMemoryAllocate(sizeof(TVMGraphExecutorPoolEntry) * executor->nodes_count,
-                                  alloc_dev, (void**)&pool_entry);
-  if (err != kTvmErrorNoError) {
-    fprintf(stderr, "memory allocate error: %08x", err);
-    return -1;
-  }
-  memset(pool_entry, 0, sizeof(TVMGraphExecutorPoolEntry) * executor->nodes_count);
-  uint32_t pool_entry_count = 0;
-  // Find the maximum space size.
-  for (idx = 0; idx < attrs->shape_count; idx++) {
-    int storage_id = attrs->storage_id[idx];
-    // Use the fallback device if no device index is available.
-    int device_type = executor->devices[0].device_type;
-    uint32_t size = Shape_Accumulate(attrs->shape + idx * TVM_CRT_MAX_NDIM, attrs->ndim[idx]);
-    DLDataType t = vtype[idx];
-    uint32_t bits = t.bits * t.lanes;
-    size_t bytes = ((bits + 7U) / 8U) * size;
-
-    uint32_t sid = storage_id;
-    if (sid >= pool_entry_count) {
-      pool_entry_count = sid + 1;
-    }
-    pool_entry[sid].entry_id = idx;
-    pool_entry[sid].size = MAX(pool_entry[sid].size, bytes);
-    pool_entry[sid].device_type = device_type;
-  }
-
-  // Allocate the space.
-  err = TVMPlatformMemoryAllocate(sizeof(TVMGraphExecutorStorageEntry) * pool_entry_count,
-                                  alloc_dev, (void**)&executor->storage_pool);
-  if (err != kTvmErrorNoError) {
-    fprintf(stderr, "memory allocate error: %08x", err);
-    return -1;
-  }
-  for (idx = 0; idx < pool_entry_count; idx++) {
-    TVMGraphExecutorPoolEntry pit = pool_entry[idx];
-    DLDevice dev = executor->devices[0];
-    uint8_t did_find_linked_param = 0;
-    if (lookup_linked_param_valid) {
-      lookup_linked_param.args.values[0].v_int64 = idx;
-      CHECK_EQ(lookup_linked_param.Call(&lookup_linked_param), 0, "lookup_linked_param");
-
-      void* linked_param_data = lookup_linked_param.ret_value.values[0].v_handle;
-      if (linked_param_data != NULL) {
-        executor->storage_pool[executor->storage_pool_count].is_linked_param = 1;
-        DLTensor* tensor = &executor->storage_pool[executor->storage_pool_count].array.dl_tensor;
-        tensor->data = linked_param_data;
-        tensor->device = dev;
-        tensor->ndim = attrs->ndim[pit.entry_id];
-        tensor->shape = attrs->shape + idx * TVM_CRT_MAX_NDIM;
-        tensor->strides = NULL;
-        tensor->byte_offset = 0;
-        did_find_linked_param = 1;
-      }
-    }
-    if (did_find_linked_param == 0) {
-      DLDataType dtype = {kDLFloat, 32, 1};
-      int64_t shape[TVM_CRT_MAX_NDIM] = {
-          0,
-      };
-      shape[0] = (pit.size + 3) / 4;
-      int status = TVMNDArray_Empty(1, shape, dtype, dev,
-                                    &executor->storage_pool[executor->storage_pool_count].array);
-      CHECK_EQ(status, 0, "fail to create storage_pool with idx=%d\n", idx);
-    }
-    executor->storage_pool_count++;
-  }
-
-  // Assign the pooled entries. A unified memory pool is used to simplify
-  // memory assignment for each node entry. The allocated memory on each device
-  // is mapped to this pool.
-  executor->data_entry_count = executor->node_row_ptr[executor->node_row_ptr_count - 1];
-  err = TVMPlatformMemoryAllocate(sizeof(TVMNDArray) * executor->data_entry_count, alloc_dev,
-                                  (void**)&executor->data_entry);
-  if (err != kTvmErrorNoError) {
-    fprintf(stderr, "memory allocate error: %08x", err);
-    return -1;
-  }
-  for (idx = 0; idx < executor->data_entry_count; ++idx) {
-    uint32_t storage_id = attrs->storage_id[idx];
-    CHECK(storage_id < executor->storage_pool_count);
-    int status = TVMNDArray_CreateView(&(executor->storage_pool[storage_id].array),
-                                       attrs->shape + idx * TVM_CRT_MAX_NDIM, attrs->ndim[idx],
-                                       vtype[idx], &executor->data_entry[idx]);
-    CHECK_EQ(status, 0, "fail to create for node with idx=%d, storage_id=%u\n", idx, storage_id);
-
-    TVMNDArray_IncrementReference(&executor->data_entry[idx]);
-  }
-
-  // Release memory
-  err = TVMPlatformMemoryFree(vtype, alloc_dev);
-  if (err != kTvmErrorNoError) {
-    fprintf(stderr, "memory free error: %08x", err);
-    return err;
-  }
-
-  err = TVMPlatformMemoryFree(pool_entry, alloc_dev);
-  if (err != kTvmErrorNoError) {
-    fprintf(stderr, "memory free error: %08x", err);
-    return -1;
-  }
-
-  return 0;
-}
-
-int TVMGraphExecutor_SetupOpExecs(TVMGraphExecutor* executor) {
-  int status = 0;
-  uint32_t nid, idx;
-  executor->op_execs_count = executor->nodes_count;
-  DLDevice dev = {kDLCPU, 0};
-  tvm_crt_error_t err = TVMPlatformMemoryAllocate(sizeof(TVMPackedFunc) * executor->op_execs_count,
-                                                  dev, (void**)&executor->op_execs);
-  if (err != kTvmErrorNoError) {
-    fprintf(stderr, "memory allocate error: %08x", err);
-    status = -1;
-    return status;
-  }
-  for (nid = 0; nid < executor->nodes_count; nid++) {
-    const TVMGraphExecutorNode* inode = executor->nodes + nid;
-    if (strcmp(inode->op_type, "null")) {
-      DLTensorPtr args[TVM_CRT_MAX_ARGS];
-      uint32_t args_count = 0;
-      for (idx = 0; idx < inode->inputs_count; idx++) {
-        const TVMGraphExecutorNodeEntry* entry = inode->inputs + idx;
-        uint32_t eid = TVMGraphExecutor_GetEntryId(executor, entry->node_id, entry->index);
-        args[idx] = &(executor->data_entry[eid].dl_tensor);
-        args_count++;
-      }
-      for (idx = 0; idx < inode->param.num_outputs; idx++) {
-        uint32_t eid = TVMGraphExecutor_GetEntryId(executor, nid, idx);
-        args[args_count] = &(executor->data_entry[eid].dl_tensor);
-        args_count++;
-      }
-      if (strcmp(inode->op_type, "tvm_op")) {
-        fprintf(stderr, "Can only take tvm_op as op, but \"%s\" is found.\n", inode->op_type);
-        status = -1;
-        break;
-      }
-      if (args_count >= TVM_CRT_MAX_ARGS) {
-        fprintf(stderr, "too many arguments: expected less than %d args, but got %d.\n",
-                TVM_CRT_MAX_ARGS, args_count);
-        status = -1;
-        break;
-      }
-#if TVM_CRT_DEBUG
-      printf("tvm_op: creating %s with node_id=%d\n", inode->param.func_name, nid);
-#endif  // TVM_CRT_DEBUG
-      TVMPackedFunc pf;
-      TVMGraphExecutor_CreateTVMOp(executor, &(inode->param), args, args_count, &pf);
-      executor->op_execs[nid] = pf;
-    } else {
-      memset(&executor->op_execs[nid], 0, sizeof(TVMPackedFunc));
-    }
-  }
-  return status;
-}
-
-typedef struct TVMOpArgs {
-  DLTensor args[TVM_CRT_MAX_ARGS];
-  uint32_t args_count;
-  TVMValue arg_values[TVM_CRT_MAX_ARGS];
-  uint32_t arg_values_count;
-  uint32_t arg_tcodes[TVM_CRT_MAX_ARGS];
-  uint32_t arg_tcodes_count;
-  int64_t shape_data[TVM_CRT_MAX_ARGS];
-  uint32_t shape_data_count;
-} TVMOpArgs;
-
-int32_t TVMGraphExecutor_CreateTVMOp(TVMGraphExecutor* executor, const TVMOpParam* param,
-                                     DLTensorPtr* args, const uint32_t args_count,
-                                     TVMPackedFunc* pf) {
-  int status = 0;
-  uint32_t idx;
-  TVMOpArgs arg_ptr;
-  memset(&arg_ptr, 0, sizeof(TVMOpArgs));
-  arg_ptr.args_count = args_count;
-  if (param->flatten_data) {
-    arg_ptr.shape_data_count = arg_ptr.args_count;
-  }
-  for (idx = 0; idx < arg_ptr.args_count; ++idx) {
-    TVMValue v;
-    memset(&v, 0, sizeof(v));
-    DLTensor* t = &(arg_ptr.args[idx]);
-    /* v.v_handle = &((*args)[idx]); */
-    v.v_handle = args[idx];
-    arg_ptr.arg_values[idx] = v;
-    arg_ptr.arg_values_count++;
-    arg_ptr.arg_tcodes[idx] = kTVMNDArrayHandle;
-    arg_ptr.arg_tcodes_count++;
-    if (param->flatten_data) {
-      arg_ptr.shape_data[idx] = Shape_Accumulate(t->shape, t->ndim);
-      t->ndim = 1;
-      t->shape[0] = arg_ptr.shape_data[idx];
-    }
-  }
-  if (!strcmp(param->func_name, "__nop") || !strcmp(param->func_name, "__copy")) {
-    fprintf(stderr, "%s function is not yet supported.", param->func_name);
-    status = -1;
-  }
-
-  TVMArgs targs = TVMArgs_Create(arg_ptr.arg_values, arg_ptr.arg_tcodes, arg_ptr.arg_values_count);
-  status = TVMPackedFunc_InitModuleFunc(pf, executor->module_handle, param->func_name, &targs);
-
-  return status;
-}
-
-/*!
- * \brief Initialize the graph executor with graph and device.
- * \param graph_json The execution graph.
- * \param module_handle The module containing the compiled functions for the host
- * processor.
- * \param devs The device of the host and devices where graph nodes will be
- * executed on.
- * \return 0 on success.
- */
-int TVMGraphExecutor_Init(TVMGraphExecutor* executor, const char* graph_json,
-                          TVMModuleHandle module_handle, const DLDevice* devs) {
-  JSONReader reader;
-  tvm_crt_error_t err = JSONReader_Create(graph_json, &reader);
-  if (err != kTvmErrorNoError) {
-    return -1;
-  }
-
-  TVMGraphExecutor_Load(executor, &reader);
-  err = JSONReader_Release(&reader);
-  if (err != kTvmErrorNoError) {
-    return -1;
-  }
-  executor->module_handle = module_handle;
-  executor->devices[0] = devs[0];
-
-  int status;
-  status = TVMGraphExecutor_SetupStorage(executor);
-  if (status != 0) {
-    return status;
-  }
-  status = TVMGraphExecutor_SetupOpExecs(executor);
-
-  return status;
-}
-
-int TVMGraphExecutor_Create(const char* sym_json, TVMModuleHandle module_handle,
-                            const DLDevice* devs, TVMGraphExecutor** executor) {
-  DLDevice dev = {kDLCPU, 0};
-  tvm_crt_error_t err = TVMPlatformMemoryAllocate(sizeof(TVMGraphExecutor), dev, (void**)executor);
-  if (err != kTvmErrorNoError) {
-    fprintf(stderr, "memory allocate error: %08x", err);
-    return -1;
-  }
-
-  memset(*executor, 0, sizeof(TVMGraphExecutor));
-  // init
-  return TVMGraphExecutor_Init(*executor, sym_json, module_handle, devs);
-}
-
-int TVMGraphExecutor_Release(TVMGraphExecutor** pptr) {
-  int status = 0;
-  int32_t idx;
-  TVMGraphExecutor* executor = (TVMGraphExecutor*)(*pptr);
-  for (idx = 0; idx < executor->nodes_count; ++idx) {
-    status = TVMGraphExecutorNodeRelease(&(executor->nodes[idx]));
-    if (status != 0) {
-      return status;
-    }
-  }
-  DLDevice dev = {kDLCPU, 0};
-  status = TVMPlatformMemoryFree(executor->nodes, dev);
-  if (status != 0) {
-    return status;
-  }
-  status = TVMGraphExecutorGraphAttr_Release(&(executor->attrs));
-  if (status != 0) {
-    return status;
-  }
-  for (idx = 0; idx < executor->storage_pool_count; ++idx) {
-    if (executor->storage_pool[idx].is_linked_param == 0) {
-      status = TVMNDArray_Release(&(executor->storage_pool[idx]).array);
-      if (status != 0) {
-        return status;
-      }
-    }
-  }
-  for (idx = 0; idx < executor->data_entry_count; ++idx) {
-    status = TVMPlatformMemoryFree(executor->data_entry[idx].dl_tensor.shape, dev);
-    if (status != 0) {
-      return status;
-    }
-  }
-  status = TVMPlatformMemoryFree(executor->input_nodes, dev);
-  if (status != 0) {
-    return status;
-  }
-  status = TVMPlatformMemoryFree(executor->node_row_ptr, dev);
-  if (status != 0) {
-    return status;
-  }
-  status = TVMPlatformMemoryFree(executor->outputs, dev);
-  if (status != 0) {
-    return status;
-  }
-  status = TVMPlatformMemoryFree(executor->storage_pool, dev);
-  if (status != 0) {
-    return status;
-  }
-  status = TVMPlatformMemoryFree(executor->data_entry, dev);
-  if (status != 0) {
-    return status;
-  }
-  status = TVMPlatformMemoryFree(executor->op_execs, dev);
-  if (status != 0) {
-    return status;
-  }
-  status = TVMPlatformMemoryFree(*pptr, dev);
-  if (status != 0) {
-    return status;
-  }
-
-  if (g_fexecs) {
-    status = TVMPlatformMemoryFree(g_fexecs, dev);
-    g_fexecs = 0;
-    if (status != 0) {
-      return status;
-    }
-  }
-
-  return 0;
-}
diff --git a/src/runtime/crt/graph_executor/load_json.c b/src/runtime/crt/graph_executor/load_json.c
deleted file mode 100644
index 3d3cdb8d1ce9..000000000000
--- a/src/runtime/crt/graph_executor/load_json.c
+++ /dev/null
@@ -1,495 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-// LINT_C_FILE
-
-/*!
- * \file load_json.c
- * \brief Load graph from JSON file.
- */
-#include <stdlib.h>
-#include <string.h>
-#include <tvm/runtime/crt/internal/graph_executor/load_json.h>
-#include <tvm/runtime/crt/page_allocator.h>
-#include <tvm/runtime/crt/platform.h>
-
-// the node entry structure in serialized format
-typedef struct JSONNodeEntry {
-  uint32_t node_id;
-  uint32_t index;
-  uint32_t version;
-  void (*Load)(struct JSONNodeEntry* entry, JSONReader* reader);
-} JSONNodeEntry;
-
-void JSONNodeEntryLoad(JSONNodeEntry* entry, JSONReader* reader) {
-  reader->BeginArray(reader);
-  if (reader->NextArrayItem(reader)) {
-    fprintf(stderr, "invalid json format\n");
-  }
-  reader->ReadUnsignedInteger(reader, &(entry->node_id));
-  if (reader->NextArrayItem(reader)) {
-    fprintf(stderr, "invalid json format\n");
-  }
-  reader->ReadUnsignedInteger(reader, &(entry->index));
-  if (reader->NextArrayItem(reader)) {
-    reader->ReadUnsignedInteger(reader, &(entry->version));
-    if (!reader->NextArrayItem(reader)) {
-      fprintf(stderr, "invalid json format\n");
-    }
-  } else {
-    entry->version = 0;
-  }
-}
-
-// implementation of Seq class
-
-void SeqPush(Seq* seq, uint32_t src) {
-  if (seq->size >= seq->allocated) {
-    printf("seq too large.\n");
-  }
-  seq->data[seq->size] = src;
-  seq->size += 1;
-}
-
-uint32_t* SeqBack(Seq* seq) {
-  if (seq->size >= seq->allocated) {
-    printf("seq too large.\n");
-  }
-  return seq->data + (seq->size - 1);
-}
-
-void SeqPop(Seq* seq) {
-  if (seq->size >= seq->allocated) {
-    printf("seq size is too large.\n");
-  }
-  if (seq->size == 0) {
-    printf("seq size is too small.\n");
-  }
-  seq->size -= 1;
-}
-
-tvm_crt_error_t SeqCreate(uint64_t len, Seq** seq) {
-  DLDevice dev = {kDLCPU, 0};
-  tvm_crt_error_t err = TVMPlatformMemoryAllocate(sizeof(Seq), dev, (void**)seq);
-  if (err != kTvmErrorNoError) {
-    return err;
-  }
-  memset(*seq, 0, sizeof(Seq));
-  (*seq)->allocated = len;
-
-  err = TVMPlatformMemoryAllocate(sizeof(uint32_t) * len, dev, (void**)&(*seq)->data);
-  if (err != kTvmErrorNoError) {
-    return err;
-  }
-  (*seq)->push_back = SeqPush;
-  (*seq)->back = SeqBack;
-  (*seq)->pop_back = SeqPop;
-  return err;
-}
-
-tvm_crt_error_t SeqRelease(Seq* seq) {
-  DLDevice dev = {kDLCPU, 0};
-  tvm_crt_error_t err = TVMPlatformMemoryFree(seq->data, dev);
-  if (err != kTvmErrorNoError) {
-    return err;
-  }
-  return TVMPlatformMemoryFree(seq, dev);
-}
-
-// implementations of JSONReader
-
-/*!
- * \brief Takes the next char from the input source.
- * \return the next character.
- */
-char JSONReader_NextChar(JSONReader* reader) {
-  char ch = reader->isptr[0];
-  reader->isptr += 1;
-  return ch;
-}
-
-/*!
- * \brief Returns the next char from the input source.
- * \return the next character.
- */
-char JSONReader_PeekNextChar(JSONReader* reader) { return reader->isptr[0]; }
-
-/*!
- * \brief Read next nonspace character.
- * \return the next nonspace character.
- */
-char JSONReader_NextNonSpace(JSONReader* reader) {
-  int ch;
-  do {
-    ch = reader->NextChar(reader);
-    if (ch == '\n') {
-      ++(reader->line_count_n_);
-    }
-    if (ch == '\r') {
-      ++(reader->line_count_r_);
-    }
-  } while (isspace(ch));
-  return ch;
-}
-
-/*!
- * \brief Read just before next nonspace but not read that.
- * \return the next nonspace character.
- */
-char JSONReader_PeekNextNonSpace(JSONReader* reader) {
-  int ch;
-  while (1) {
-    ch = reader->PeekNextChar(reader);
-    if (ch == '\n') {
-      ++(reader->line_count_n_);
-    }
-    if (ch == '\r') {
-      ++(reader->line_count_r_);
-    }
-    if (!isspace(ch)) break;
-    reader->NextChar(reader);
-  }
-  return ch;
-}
-
-/*!
- * \brief Parse next JSON string.
- * \param out_str the output string. NULL to merely consume input and discard it.
- * \param out_str_size Number of bytes available to write starting from out_str. Includes
- *      terminating \0.
- * \throw tvm::Error when next token is not string
- */
-int JSONReader_ReadString(JSONReader* reader, char* out_str, size_t out_str_size) {
-  int status = 0;
-  int ch = reader->NextNonSpace(reader);
-  size_t output_counter = 0;
-  while (output_counter < out_str_size || out_str == NULL) {
-    ch = reader->NextChar(reader);
-    if (ch == '\\') {
-      char sch = reader->NextChar(reader);
-      switch (sch) {
-        case 'r':
-          out_str[output_counter++] = '\r';
-          break;
-        case 'n':
-          out_str[output_counter++] = '\n';
-          break;
-        case '\\':
-          out_str[output_counter++] = '\\';
-          break;
-        case 't':
-          out_str[output_counter++] = '\t';
-          break;
-        case '\"':
-          out_str[output_counter++] = '\"';
-          break;
-        default:
-          fprintf(stderr, "unknown string escape %c\n", sch);
-          break;
-      }
-    } else {
-      if (ch == '\"') {
-        break;
-      }
-      if (out_str != NULL) {
-        out_str[output_counter++] = ch;
-      }
-    }
-    if (output_counter == out_str_size - 1) {
-      fprintf(stderr, "Error: string size greater than buffer size (%zu).\n", out_str_size);
-      break;
-    }
-    if (ch == EOF || ch == '\r' || ch == '\n') {
-      fprintf(stderr, "Error at line %zu, Expect \'\"\' but reach end of line\n",
-              reader->line_count_n_);
-      break;
-    }
-  }
-
-  if (out_str != NULL) {
-    out_str[output_counter] = 0;
-  }
-  return status;
-}
-
-int JSONReader_ReadUnsignedInteger(JSONReader* reader, unsigned int* out_value) {
-  int status = 0;
-  char* endptr;
-  const char* icstr = reader->isptr;
-  unsigned int number = strtol(icstr, &endptr, 10);
-  reader->isptr += endptr - icstr;
-  *out_value = number;
-  return status;
-}
-
-int JSONReader_ReadInteger(JSONReader* reader, int64_t* out_value) {
-  int status = 0;
-  char* endptr;
-  const char* icstr = reader->isptr;
-  int64_t number = strtol(icstr, &endptr, 10);
-  reader->isptr += endptr - icstr;
-  *out_value = number;
-  return status;
-}
-
-/*!
- * \brief Begin parsing an object.
- * \code
- *  string key;
- *  // value can be any type that is json serializable.
- *  string value;
- *  reader->BeginObject();
- *  while (reader->NextObjectItem(&key)) {
- *    // do somthing to key value
- *    reader->Read(&value);
- *  }
- * \endcode
- */
-void JSONReader_BeginObject(JSONReader* reader) {
-  int ch = reader->NextNonSpace(reader);
-  if (!(ch == '{')) {
-    fprintf(stderr, "Error at line %zu, Expect \'{\' but got \'%c\'\n", reader->line_count_n_, ch);
-  }
-  Seq* scope_counter_ = reader->scope_counter_;
-  scope_counter_->push_back(scope_counter_, 0);
-}
-
-/*!
- * \brief Try to move to next object item.
- *  If this call is successful, user can proceed to call
- *  reader->Read to read in the value.
- * \param out_key the key to the next object.
- * \param out_key_size number of bytes available to write at out_key, including terminating \0.
- * \return true if the read is successful, false if we are at end of the object.
- */
-uint8_t JSONReader_NextObjectItem(JSONReader* reader, char* out_key, size_t out_key_size) {
-  uint8_t next = 1;
-  Seq* scope_counter_ = reader->scope_counter_;
-  if (scope_counter_->back(scope_counter_)[0] != 0) {
-    int ch = reader->NextNonSpace(reader);
-    if (ch == EOF) {
-      next = 0;
-    } else if (ch == '}') {
-      next = 0;
-    } else {
-      if (ch != ',') {
-        fprintf(stderr, "Error at line %zu, JSON object expect \'}\' or \',\' but got \'%c\'\n",
-                reader->line_count_n_, ch);
-      }
-    }
-  } else {
-    int ch = reader->PeekNextNonSpace(reader);
-    if (ch == '}') {
-      reader->NextChar(reader);
-      next = 0;
-    }
-  }
-  if (!next) {
-    scope_counter_->pop_back(scope_counter_);
-    return 0;
-  } else {
-    scope_counter_->back(scope_counter_)[0] += 1;
-    int err = reader->ReadString(reader, out_key, out_key_size);
-    if (err != 0) {
-      fprintf(stderr, "error reading key");
-      return 0;
-    }
-    int ch = reader->NextNonSpace(reader);
-    if (ch != ':') {
-      fprintf(stderr, "Error at line %zu, Expect \':\' but get \'%c\'\n", reader->line_count_n_,
-              ch);
-    }
-    return 1;
-  }
-}
-
-/*!
- * \brief Begin parsing an array.
- * \code
- *  // value can be any type that is json serializable.
- *  string value;
- *  reader->BeginArray();
- *  while (reader->NextArrayItem(&value)) {
- *    // do somthing to value
- *  }
- * \endcode
- */
-void JSONReader_BeginArray(JSONReader* reader) {
-  int ch = reader->NextNonSpace(reader);
-  if (ch != '[') {
-    fprintf(stderr, "Error at line %zu, Expect \'[\' but get \'%c\'\n", reader->line_count_n_, ch);
-  }
-  Seq* scope_counter_ = reader->scope_counter_;
-  scope_counter_->push_back(scope_counter_, 0);
-}
-
-/*!
- * \brief Try to read the next element in the array.
- *  If this call is successful, user can proceed to call
- *  reader->Read to read in the value.
- * \return true if the read is successful, false if we are at end of the array.
- */
-uint8_t JSONReader_NextArrayItem(JSONReader* reader) {
-  uint8_t next = 1;
-  Seq* scope_counter_ = reader->scope_counter_;
-  if (scope_counter_->back(scope_counter_)[0] != 0) {
-    int ch = reader->NextNonSpace(reader);
-    if (ch == EOF) {
-      next = 0;
-    } else if (ch == ']') {
-      next = 0;
-    } else {
-      if (ch != ',') {
-        fprintf(stderr, "Error at line %zu, JSON object expect \']\' or \',\' but got \'%c\'\n",
-                reader->line_count_n_, ch);
-      }
-    }
-  } else {
-    int ch = reader->PeekNextNonSpace(reader);
-    if (ch == ']') {
-      reader->NextChar(reader);
-      next = 0;
-    }
-  }
-  if (!next) {
-    scope_counter_->pop_back(scope_counter_);
-    return 0;
-  } else {
-    scope_counter_->back(scope_counter_)[0] += 1;
-    return 1;
-  }
-}
-
-/*!
- * \brief Determine the remaining length of the array to read.
- * \param num_elements Pointer which receives the length.
- * \return 0 if successful
- */
-int JSONReader_ArrayLength(JSONReader* reader, size_t* num_elements) {
-  int status = 0;
-  char* old_isptr = reader->isptr;
-  size_t old_line_count_r_ = reader->line_count_r_;
-  size_t old_line_count_n_ = reader->line_count_n_;
-  int old_scope_counter_back = *reader->scope_counter_->back(reader->scope_counter_);
-
-  typedef enum { kObject, kArray } item_type_t;
-  Seq* scopes;
-  tvm_crt_error_t err = SeqCreate(10, &scopes);
-  if (err != kTvmErrorNoError) {
-    return -1;
-  }
-  item_type_t json_item_type = kArray;
-  *num_elements = 0;
-  for (;;) {
-    int has_item = 0;
-    if (json_item_type == kArray) {
-      has_item = reader->NextArrayItem(reader);
-      if (scopes->size == 0 && has_item != 0) {
-        (*num_elements)++;
-      }
-    } else if (json_item_type == kObject) {
-      has_item = reader->NextObjectItem(reader, NULL, 0);
-    } else {
-      status = -1;
-      break;
-    }
-
-    if (has_item) {
-      char c = reader->PeekNextNonSpace(reader);
-      if (c == '"') {
-        reader->ReadString(reader, NULL, 1024);
-      } else if (c == '[') {
-        reader->BeginArray(reader);
-        scopes->push_back(scopes, json_item_type);
-        json_item_type = kArray;
-      } else if (c == '{') {
-        reader->BeginObject(reader);
-        scopes->push_back(scopes, json_item_type);
-        json_item_type = kObject;
-      } else {
-        int64_t val;
-        reader->ReadInteger(reader, &val);
-      }
-    } else {
-      if (scopes->size > 0) {
-        json_item_type = *scopes->back(scopes);
-        scopes->pop_back(scopes);
-      } else {
-        break;
-      }
-    }
-  }
-
-  reader->isptr = old_isptr;
-  reader->line_count_r_ = old_line_count_r_;
-  reader->line_count_n_ = old_line_count_n_;
-  reader->scope_counter_->push_back(reader->scope_counter_, old_scope_counter_back);
-
-  err = SeqRelease(scopes);
-  if (err != kTvmErrorNoError) {
-    return -1;
-  }
-
-  return status;
-}
-
-/*!
- * \brief Constructor.
- * \param is the input source.
- */
-tvm_crt_error_t JSONReader_Create(const char* is, JSONReader* reader) {
-  memset(reader, 0, sizeof(JSONReader));
-  tvm_crt_error_t err = SeqCreate(200, &reader->scope_counter_);
-  if (err != kTvmErrorNoError) {
-    return err;
-  }
-  reader->NextChar = JSONReader_NextChar;
-  reader->PeekNextChar = JSONReader_PeekNextChar;
-  reader->NextNonSpace = JSONReader_NextNonSpace;
-  reader->PeekNextNonSpace = JSONReader_PeekNextNonSpace;
-  reader->ReadString = JSONReader_ReadString;
-  reader->ReadUnsignedInteger = JSONReader_ReadUnsignedInteger;
-  reader->ReadInteger = JSONReader_ReadInteger;
-  reader->BeginArray = JSONReader_BeginArray;
-  reader->BeginObject = JSONReader_BeginObject;
-  reader->NextArrayItem = JSONReader_NextArrayItem;
-  reader->NextObjectItem = JSONReader_NextObjectItem;
-  reader->ArrayLength = JSONReader_ArrayLength;
-
-  DLDevice dev = {kDLCPU, 0};
-  err = TVMPlatformMemoryAllocate(strlen(is) + 1, dev, (void**)&reader->is_);
-  if (err != kTvmErrorNoError) {
-    return err;
-  }
-
-  memset(reader->is_, 0, strlen(is) + 1);
-  snprintf(reader->is_, strlen(is) + 1, "%s", is);
-  reader->isptr = reader->is_;
-  return err;
-}
-
-tvm_crt_error_t JSONReader_Release(JSONReader* reader) {
-  tvm_crt_error_t err = SeqRelease(reader->scope_counter_);
-  if (err != kTvmErrorNoError) {
-    return err;
-  }
-
-  DLDevice dev = {kDLCPU, 0};
-  return TVMPlatformMemoryFree(reader->is_, dev);
-}
diff --git a/src/runtime/crt/graph_executor_module/graph_executor_module.c b/src/runtime/crt/graph_executor_module/graph_executor_module.c
deleted file mode 100644
index 559b6896a55e..000000000000
--- a/src/runtime/crt/graph_executor_module/graph_executor_module.c
+++ /dev/null
@@ -1,258 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-// LINT_C_FILE
-
-/*!
- * \file graph_executor_module.c
- * \brief wrap graph_executor into a TVMModule for use with RPC.
- */
-
-#include <tvm/runtime/crt/func_registry.h>
-#include <tvm/runtime/crt/graph_executor.h>
-#include <tvm/runtime/crt/graph_executor_module.h>
-#include <tvm/runtime/crt/module.h>
-
-#include "tvm/runtime/crt/internal/graph_executor/graph_executor.h"
-
-typedef struct {
-  TVMModule mod;
-  TVMGraphExecutor* executor;
-} GraphExecutorModule;
-
-static GraphExecutorModule graph_executor;
-
-int32_t TVMGraphExecutorModule_Create(TVMValue* args, int* tcodes, int nargs, TVMValue* ret_values,
-                                      int* ret_tcodes, void* resource_handle) {
-  if (graph_executor.executor != NULL) {
-    return kTvmErrorExecutorModuleAlreadyCreated;
-  }
-
-  if (nargs != 4) {
-    return kTvmErrorFunctionCallNumArguments;
-  }
-
-  if (tcodes[0] != kTVMStr || tcodes[1] != kTVMModuleHandle || tcodes[2] != kTVMArgInt ||
-      tcodes[3] != kTVMArgInt) {
-    return kTvmErrorFunctionCallWrongArgType;
-  }
-
-  if (args[2].v_int64 != kDLCPU || args[3].v_int64 != 0) {
-    return kTvmErrorExecutorModuleBadContext;
-  }
-
-  DLDevice dev = {(DLDeviceType)args[2].v_int64, (int)args[3].v_int64};
-  int ret_value =
-      TVMGraphExecutor_Create(args[0].v_str, args[1].v_handle, &dev, &graph_executor.executor);
-  if (ret_value != 0) {
-    return ret_value;
-  }
-
-  TVMModuleHandle out;
-  ret_value = TVMModCreateFromCModule(&graph_executor.mod, &out);
-  if (ret_value != 0) {
-    ret_tcodes[0] = kTVMNullptr;
-    TVMGraphExecutor_Release(&graph_executor.executor);
-    return ret_value;
-  }
-
-  ret_values[0].v_handle = out;
-  ret_tcodes[0] = kTVMModuleHandle;
-  return kTvmErrorNoError;
-}
-
-int32_t TVMGraphExecutorModule_GetInput(TVMValue* args, int* tcodes, int nargs,
-                                        TVMValue* ret_values, int* ret_tcodes,
-                                        void* resource_handle) {
-  if (nargs != 1) {
-    return kTvmErrorFunctionCallNumArguments;
-  }
-
-  if (tcodes[0] != kTVMStr) {
-    return kTvmErrorFunctionCallWrongArgType;
-  }
-
-  int index = TVMGraphExecutor_GetInputIndex(graph_executor.executor, args[0].v_str);
-  if (index < 0) {
-    return kTvmErrorExecutorModuleNoSuchInput;
-  }
-
-  uint32_t eid = TVMGraphExecutor_GetEntryId(graph_executor.executor,
-                                             graph_executor.executor->input_nodes[index], 0);
-
-  TVMNDArray* array = &graph_executor.executor->data_entry[eid];
-
-  TVMNDArray_IncrementReference(array);
-
-  ret_values[0].v_handle = (void*)(&array->dl_tensor);
-  ret_tcodes[0] = kTVMNDArrayHandle;
-  return 0;
-}
-
-int32_t TVMGraphExecutorModule_GetInputIndex(TVMValue* args, int* tcodes, int nargs,
-                                             TVMValue* ret_values, int* ret_tcodes,
-                                             void* resource_handle) {
-  int index = TVMGraphExecutor_GetInputIndex(graph_executor.executor, args[0].v_str);
-
-  if (index < 0) {
-    return kTvmErrorExecutorModuleNoSuchInput;
-  }
-
-  ret_values[0].v_int64 = index;
-  ret_tcodes[0] = kTVMArgInt;
-  return 0;
-}
-
-int32_t TVMGraphExecutorModule_GetNumInputs(TVMValue* args, int* tcodes, int nargs,
-                                            TVMValue* ret_values, int* ret_tcodes,
-                                            void* resource_handle) {
-  if (nargs != 0) {
-    return kTvmErrorFunctionCallNumArguments;
-  }
-
-  ret_values[0].v_int64 = TVMGraphExecutor_GetNumInputs();
-  ret_tcodes[0] = kTVMArgInt;
-  return 0;
-}
-
-int32_t TVMGraphExecutorModule_GetNumOutputs(TVMValue* args, int* tcodes, int nargs,
-                                             TVMValue* ret_values, int* ret_tcodes,
-                                             void* resource_handle) {
-  if (nargs != 0) {
-    return kTvmErrorFunctionCallNumArguments;
-  }
-
-  ret_values[0].v_int64 = TVMGraphExecutor_GetNumOutputs(graph_executor.executor);
-  ret_tcodes[0] = kTVMArgInt;
-  return 0;
-}
-
-int32_t TVMGraphExecutorModule_GetOutput(TVMValue* args, int* tcodes, int nargs,
-                                         TVMValue* ret_values, int* ret_tcodes,
-                                         void* resource_handle) {
-  if (nargs != 1) {
-    return kTvmErrorFunctionCallNumArguments;
-  }
-
-  if (tcodes[0] != kTVMArgInt) {
-    return kTvmErrorFunctionCallWrongArgType;
-  }
-
-  int output_index = args[0].v_int64;
-  if (output_index < 0 || output_index > TVMGraphExecutor_GetNumOutputs(graph_executor.executor)) {
-    return kTvmErrorExecutorModuleNoSuchInput;
-  }
-
-  uint32_t nid = graph_executor.executor->outputs[output_index].node_id;
-  uint32_t index = graph_executor.executor->outputs[output_index].index;
-  uint32_t eid = TVMGraphExecutor_GetEntryId(graph_executor.executor, nid, index);
-
-  TVMNDArray* array = &graph_executor.executor->data_entry[eid];
-
-  TVMNDArray_IncrementReference(array);
-
-  ret_values[0].v_handle = (void*)(&array->dl_tensor);
-  ret_tcodes[0] = kTVMNDArrayHandle;
-  return 0;
-}
-
-int32_t TVMGraphExecutorModule_LoadParams(TVMValue* args, int* tcodes, int nargs,
-                                          TVMValue* ret_values, int* ret_tcodes,
-                                          void* resource_handle) {
-  if (nargs != 1) {
-    return kTvmErrorFunctionCallNumArguments;
-  }
-
-  if (tcodes[0] != kTVMBytes) {
-    return kTvmErrorFunctionCallWrongArgType;
-  }
-
-  ret_tcodes[0] = kTVMNullptr;
-
-  TVMByteArray* arr = (TVMByteArray*)args[0].v_handle;
-  return TVMGraphExecutor_LoadParams(graph_executor.executor, arr->data, arr->size);
-}
-
-int32_t TVMGraphExecutorModule_Run(TVMValue* args, int* tcodes, int nargs, TVMValue* ret_values,
-                                   int* ret_tcodes, void* resource_handle) {
-  if (nargs != 0) {
-    return kTvmErrorFunctionCallNumArguments;
-  }
-
-  TVMGraphExecutor_Run(graph_executor.executor);
-
-  ret_tcodes[0] = kTVMNullptr;
-  return 0;
-}
-
-int32_t TVMGraphExecutorModule_SetInput(TVMValue* args, int* tcodes, int nargs,
-                                        TVMValue* ret_values, int* ret_tcodes,
-                                        void* resource_handle) {
-  if (nargs != 2) {
-    return kTvmErrorFunctionCallNumArguments;
-  }
-
-  if (tcodes[0] != kTVMStr || tcodes[1] != kTVMDLTensorHandle) {
-    return kTvmErrorFunctionCallWrongArgType;
-  }
-
-  TVMGraphExecutor_SetInput(graph_executor.executor, args[0].v_str, (DLTensor*)args[1].v_handle);
-
-  ret_tcodes[0] = kTVMNullptr;
-  return 0;
-}
-
-int32_t TVMGraphExecutorModule_NotImplemented(TVMValue* args, int* tcodes, int nargs,
-                                              TVMValue* ret_values, int* ret_tcodes,
-                                              void* resource_handle) {
-  return kTvmErrorFunctionCallNotImplemented;
-}
-
-static const TVMBackendPackedCFunc graph_executor_registry_funcs[] = {
-    &TVMGraphExecutorModule_GetInput,
-    &TVMGraphExecutorModule_GetInputIndex,
-    &TVMGraphExecutorModule_NotImplemented,  // get_input_info
-    &TVMGraphExecutorModule_GetNumInputs,
-    &TVMGraphExecutorModule_GetNumOutputs,
-    &TVMGraphExecutorModule_GetOutput,
-    &TVMGraphExecutorModule_LoadParams,
-    &TVMGraphExecutorModule_Run,
-    &TVMGraphExecutorModule_SetInput,
-    &TVMGraphExecutorModule_NotImplemented,  // share_params
-};
-
-static const TVMFuncRegistry graph_executor_registry = {
-    "\x08\0get_input\0"
-    "get_input_index\0"
-    "get_input_info\0"
-    "get_num_inputs\0"
-    "get_num_outputs\0"
-    "get_output\0"
-    "load_params\0"
-    "run\0"
-    "set_input\0"
-    "share_params\0",
-    graph_executor_registry_funcs};
-
-tvm_crt_error_t TVMGraphExecutorModule_Register() {
-  graph_executor.mod.registry = &graph_executor_registry;
-  graph_executor.executor = NULL;
-
-  return TVMFuncRegisterGlobal("tvm.graph_executor.create", &TVMGraphExecutorModule_Create, 0);
-}
diff --git a/src/runtime/crt/host/CMakeLists.txt.template b/src/runtime/crt/host/CMakeLists.txt.template
deleted file mode 100644
index be0bce85513b..000000000000
--- a/src/runtime/crt/host/CMakeLists.txt.template
+++ /dev/null
@@ -1,59 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# SPDX-License-Identifier: Apache-2.0
-
-cmake_minimum_required(VERSION 3.18)
-set(CMAKE_CXX_STANDARD 11)
-
-project(crt_autogenerated_project C CXX)
-add_executable(main)
-
-set(CRT_LIB_BASE crt/src/runtime/crt)
-set(CRT_LIBS microtvm_rpc_server
-  microtvm_rpc_common
-  aot_executor_module
-  aot_executor
-  graph_executor_module
-  graph_executor
-  common
-  memory
-)
-
-# Build CRT libraries
-foreach(crt_lib_name ${CRT_LIBS})
-  add_library(${crt_lib_name})
-  file(GLOB_RECURSE crt_lib_srcs ${CRT_LIB_BASE}/${crt_lib_name}/*.c ${CRT_LIB_BASE}/${crt_lib_name}/*.cc)
-  target_sources(${crt_lib_name} PRIVATE ${crt_lib_srcs})
-  target_include_directories(${crt_lib_name} PRIVATE crt_config crt/include)
-  target_compile_definitions(${crt_lib_name} PRIVATE -DTVM_HOST_USE_GRAPH_EXECUTOR_MODULE)
-  target_link_libraries(main PRIVATE ${crt_lib_name})
-endforeach(crt_lib_name ${CRT_LIBS})
-
-# Build model files
-add_library(tvm_model)
-file(GLOB_RECURSE tvm_model_srcs model/codegen/host/src/*.c model/codegen/host/lib/*.o)
-target_sources(tvm_model PRIVATE ${tvm_model_srcs})
-target_include_directories(tvm_model PRIVATE ${CMAKE_SOURCE_DIR}/include crt_config crt/include)
-target_compile_options(tvm_model PRIVATE -Wno-error=unused-variable -Wno-error=missing-braces -Wno-error=unused-const-variable -Wno-unused-variable)
-set_target_properties(tvm_model PROPERTIES LINKER_LANGUAGE C)
-target_link_libraries(main PRIVATE tvm_model)
-
-file(GLOB_RECURSE app_srcs src/**.cc)
-target_sources(main PRIVATE ${app_srcs} ${cmsis_lib_srcs})
-target_compile_definitions(main PRIVATE -DTVM_HOST_USE_GRAPH_EXECUTOR_MODULE)
-target_include_directories(main PRIVATE crt_config include ${CMAKE_SOURCE_DIR}/include crt/include)
diff --git a/src/runtime/crt/host/main.cc b/src/runtime/crt/host/main.cc
deleted file mode 100644
index 0607d4b28719..000000000000
--- a/src/runtime/crt/host/main.cc
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file main.cc
- * \brief main entry point for host subprocess-based CRT
- */
-#include <inttypes.h>
-#include <tvm/runtime/c_runtime_api.h>
-#include <tvm/runtime/crt/aot_executor_module.h>
-#include <tvm/runtime/crt/logging.h>
-#include <tvm/runtime/crt/microtvm_rpc_server.h>
-#include <unistd.h>
-
-#include <iostream>
-
-#include "crt_config.h"
-
-#ifdef TVM_HOST_USE_GRAPH_EXECUTOR_MODULE
-#include <tvm/runtime/crt/graph_executor_module.h>
-#endif
-
-extern "C" {
-
-ssize_t MicroTVMWriteFunc(void* context, const uint8_t* data, size_t num_bytes) {
-  ssize_t to_return = write(STDOUT_FILENO, data, num_bytes);
-  fflush(stdout);
-  fsync(STDOUT_FILENO);
-  return to_return;
-}
-}
-
-static char** g_argv = NULL;
-
-int testonly_reset_server(TVMValue* args, int* type_codes, int num_args, TVMValue* out_ret_value,
-                          int* out_ret_tcode, void* resource_handle) {
-  execvp(g_argv[0], g_argv);
-  perror("microTVM runtime: error restarting");
-  return -1;
-}
-
-int main(int argc, char** argv) {
-  g_argv = argv;
-  TVMPlatformInitialize();
-  microtvm_rpc_server_t rpc_server = MicroTVMRpcServerInit(&MicroTVMWriteFunc, nullptr);
-
-#ifdef TVM_HOST_USE_GRAPH_EXECUTOR_MODULE
-  CHECK_EQ(TVMGraphExecutorModule_Register(), kTvmErrorNoError,
-           "failed to register GraphExecutor TVMModule");
-#endif
-
-  int error = TVMFuncRegisterGlobal("tvm.testing.reset_server",
-                                    (TVMFunctionHandle)&testonly_reset_server, 0);
-  if (error) {
-    fprintf(
-        stderr,
-        "microTVM runtime: internal error (error#: %x) registering global packedfunc; exiting\n",
-        error);
-    return 2;
-  }
-
-  setbuf(stdin, NULL);
-  setbuf(stdout, NULL);
-
-  for (;;) {
-    uint8_t c;
-    int ret_code = read(STDIN_FILENO, &c, 1);
-    if (ret_code < 0) {
-      perror("microTVM runtime: read failed");
-      return 2;
-    } else if (ret_code == 0) {
-      fprintf(stderr, "microTVM runtime: 0-length read, exiting!\n");
-      return 2;
-    }
-    uint8_t* cursor = &c;
-    size_t bytes_to_process = 1;
-    while (bytes_to_process > 0) {
-      tvm_crt_error_t err = MicroTVMRpcServerLoop(rpc_server, &cursor, &bytes_to_process);
-      if (err == kTvmErrorPlatformShutdown) {
-        break;
-      } else if (err != kTvmErrorNoError) {
-        char buf[1024];
-        snprintf(buf, sizeof(buf), "microTVM runtime: MicroTVMRpcServerLoop error: %08x", err);
-        perror(buf);
-        return 2;
-      }
-    }
-  }
-  return 0;
-}
diff --git a/src/runtime/crt/host/microtvm_api_server.py b/src/runtime/crt/host/microtvm_api_server.py
deleted file mode 100644
index 031e4de1139f..000000000000
--- a/src/runtime/crt/host/microtvm_api_server.py
+++ /dev/null
@@ -1,249 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import fcntl
-import os
-import os.path
-import pathlib
-import select
-import shutil
-import subprocess
-import tarfile
-import time
-import re
-
-from tvm.micro.project_api import server
-
-
-PROJECT_DIR = pathlib.Path(os.path.dirname(__file__) or os.path.getcwd())
-
-
-MODEL_LIBRARY_FORMAT_RELPATH = "model.tar"
-
-
-IS_TEMPLATE = not os.path.exists(os.path.join(PROJECT_DIR, MODEL_LIBRARY_FORMAT_RELPATH))
-
-# Used this size to pass most CRT tests in TVM.
-WORKSPACE_SIZE_BYTES = 2 * 1024 * 1024
-
-CMAKEFILE_FILENAME = "CMakeLists.txt"
-
-# The build target given to make
-BUILD_TARGET = "build/main"
-
-
-class Handler(server.ProjectAPIHandler):
-    BUILD_TARGET = "build/main"
-
-    def __init__(self):
-        super(Handler, self).__init__()
-        self._proc = None
-
-    def server_info_query(self, tvm_version):
-        return server.ServerInfo(
-            platform_name="host",
-            is_template=IS_TEMPLATE,
-            model_library_format_path=""
-            if IS_TEMPLATE
-            else PROJECT_DIR / MODEL_LIBRARY_FORMAT_RELPATH,
-            project_options=[
-                server.ProjectOption(
-                    "verbose",
-                    optional=["build"],
-                    type="bool",
-                    default=False,
-                    help="Run make with verbose output",
-                ),
-                server.ProjectOption(
-                    "workspace_size_bytes",
-                    optional=["generate_project"],
-                    type="int",
-                    default=WORKSPACE_SIZE_BYTES,
-                    help="Sets the value of TVM_WORKSPACE_SIZE_BYTES.",
-                ),
-            ],
-        )
-
-    # These files and directories will be recursively copied into generated projects from the CRT.
-    CRT_COPY_ITEMS = ("include", "CMakeLists.txt", "src")
-
-    def _populate_cmake(
-        self,
-        cmakefile_template_path: pathlib.Path,
-        cmakefile_path: pathlib.Path,
-        memory_size: int,
-        verbose: bool,
-    ):
-        """Generate CMakeList file from template."""
-
-        regex = re.compile(r"([A-Z_]+) := (<[A-Z_]+>)")
-        with open(cmakefile_path, "w") as cmakefile_f:
-            with open(cmakefile_template_path, "r") as cmakefile_template_f:
-                for line in cmakefile_template_f:
-                    cmakefile_f.write(line)
-                cmakefile_f.write(
-                    f"target_compile_definitions(main PUBLIC -DTVM_WORKSPACE_SIZE_BYTES={memory_size})\n"
-                )
-                if verbose:
-                    cmakefile_f.write(f"set(CMAKE_VERBOSE_MAKEFILE TRUE)\n")
-
-    def generate_project(self, model_library_format_path, standalone_crt_dir, project_dir, options):
-        # Make project directory.
-        project_dir.mkdir(parents=True)
-        current_dir = pathlib.Path(__file__).parent.absolute()
-
-        # Copy ourselves to the generated project. TVM may perform further build steps on the generated project
-        # by launching the copy.
-        shutil.copy2(__file__, project_dir / os.path.basename(__file__))
-
-        # Place Model Library Format tarball in the special location, which this script uses to decide
-        # whether it's being invoked in a template or generated project.
-        project_model_library_format_path = project_dir / MODEL_LIBRARY_FORMAT_RELPATH
-        shutil.copy2(model_library_format_path, project_model_library_format_path)
-
-        # Extract Model Library Format tarball.into <project_dir>/model.
-        extract_path = project_dir / project_model_library_format_path.stem
-        with tarfile.TarFile(project_model_library_format_path) as tf:
-            os.makedirs(extract_path)
-            tf.extractall(path=extract_path)
-
-        # Populate CRT.
-        crt_path = project_dir / "crt"
-        os.mkdir(crt_path)
-        for item in self.CRT_COPY_ITEMS:
-            src_path = standalone_crt_dir / item
-            dst_path = crt_path / item
-            if os.path.isdir(src_path):
-                shutil.copytree(src_path, dst_path)
-            else:
-                shutil.copy2(src_path, dst_path)
-
-        # Populate CMake file
-        self._populate_cmake(
-            current_dir / f"{CMAKEFILE_FILENAME}.template",
-            project_dir / CMAKEFILE_FILENAME,
-            options.get("workspace_size_bytes", WORKSPACE_SIZE_BYTES),
-            options.get("verbose"),
-        )
-
-        # Populate crt-config.h
-        crt_config_dir = project_dir / "crt_config"
-        crt_config_dir.mkdir()
-        shutil.copy2(
-            current_dir / "crt_config" / "crt_config.h",
-            crt_config_dir / "crt_config.h",
-        )
-
-        # Populate src/
-        src_dir = project_dir / "src"
-        src_dir.mkdir()
-        shutil.copy2(
-            current_dir / "src" / "main.cc",
-            src_dir / "main.cc",
-        )
-        shutil.copy2(
-            current_dir / "src" / "platform.cc",
-            src_dir / "platform.cc",
-        )
-
-    def build(self, options):
-        build_dir = PROJECT_DIR / "build"
-        build_dir.mkdir()
-        subprocess.check_call(["cmake", ".."], cwd=build_dir)
-        subprocess.check_call(["make"], cwd=build_dir)
-
-    def flash(self, options):
-        pass  # Flashing does nothing on host.
-
-    def _set_nonblock(self, fd):
-        flag = fcntl.fcntl(fd, fcntl.F_GETFL)
-        fcntl.fcntl(fd, fcntl.F_SETFL, flag | os.O_NONBLOCK)
-        new_flag = fcntl.fcntl(fd, fcntl.F_GETFL)
-        assert (new_flag & os.O_NONBLOCK) != 0, "Cannot set file descriptor {fd} to non-blocking"
-
-    def open_transport(self, options):
-        self._proc = subprocess.Popen(
-            [self.BUILD_TARGET], stdin=subprocess.PIPE, stdout=subprocess.PIPE, bufsize=0
-        )
-        self._set_nonblock(self._proc.stdin.fileno())
-        self._set_nonblock(self._proc.stdout.fileno())
-        return server.TransportTimeouts(
-            session_start_retry_timeout_sec=0,
-            session_start_timeout_sec=0,
-            session_established_timeout_sec=0,
-        )
-
-    def close_transport(self):
-        if self._proc is not None:
-            proc = self._proc
-            self._proc = None
-            proc.terminate()
-            proc.wait()
-
-    def _await_ready(self, rlist, wlist, timeout_sec=None, end_time=None):
-        if timeout_sec is None and end_time is not None:
-            timeout_sec = max(0, end_time - time.monotonic())
-
-        rlist, wlist, xlist = select.select(rlist, wlist, rlist + wlist, timeout_sec)
-        if not rlist and not wlist and not xlist:
-            raise server.IoTimeoutError()
-
-        return True
-
-    def read_transport(self, n, timeout_sec):
-        if self._proc is None:
-            raise server.TransportClosedError()
-
-        fd = self._proc.stdout.fileno()
-        end_time = None if timeout_sec is None else time.monotonic() + timeout_sec
-
-        try:
-            self._await_ready([fd], [], end_time=end_time)
-            to_return = os.read(fd, n)
-        except BrokenPipeError:
-            to_return = 0
-
-        if not to_return:
-            self.close_transport()
-            raise server.TransportClosedError()
-
-        return to_return
-
-    def write_transport(self, data, timeout_sec):
-        if self._proc is None:
-            raise server.TransportClosedError()
-
-        fd = self._proc.stdin.fileno()
-        end_time = None if timeout_sec is None else time.monotonic() + timeout_sec
-
-        data_len = len(data)
-        while data:
-            self._await_ready([], [fd], end_time=end_time)
-            try:
-                num_written = os.write(fd, data)
-            except BrokenPipeError:
-                num_written = 0
-
-            if not num_written:
-                self.close_transport()
-                raise server.TransportClosedError()
-
-            data = data[num_written:]
-
-
-if __name__ == "__main__":
-    server.main(Handler())
diff --git a/src/runtime/crt/host/platform.cc b/src/runtime/crt/host/platform.cc
deleted file mode 100644
index f5af08a9be88..000000000000
--- a/src/runtime/crt/host/platform.cc
+++ /dev/null
@@ -1,126 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \brief Implementation of TVMPlatform functions in tvm/runtime/crt/platform.h
- */
-
-#include <dlpack/dlpack.h>
-#include <inttypes.h>
-#include <stdarg.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <time.h>
-#include <tvm/runtime/crt/error_codes.h>
-#include <tvm/runtime/crt/page_allocator.h>
-#include <unistd.h>
-
-#include <chrono>
-#include <iostream>
-
-using namespace std::chrono;
-
-extern "C" {
-
-uint8_t memory[TVM_WORKSPACE_SIZE_BYTES];
-MemoryManagerInterface* memory_manager;
-
-steady_clock::time_point g_microtvm_start_time;
-int g_microtvm_timer_running = 0;
-
-// Called when an internal error occurs and execution cannot continue.
-void TVMPlatformAbort(tvm_crt_error_t error_code) {
-  std::cerr << "TVMPlatformAbort: " << error_code << std::endl;
-  throw "Aborted";
-}
-
-// Called by the microTVM RPC server to implement TVMLogf.
-size_t TVMPlatformFormatMessage(char* out_buf, size_t out_buf_size_bytes, const char* fmt,
-                                va_list args) {
-  return vsprintf(out_buf, fmt, args);
-}
-
-// Allocate memory for use by TVM.
-tvm_crt_error_t TVMPlatformMemoryAllocate(size_t num_bytes, DLDevice dev, void** out_ptr) {
-  return memory_manager->Allocate(memory_manager, num_bytes, dev, out_ptr);
-}
-
-// Free memory used by TVM.
-tvm_crt_error_t TVMPlatformMemoryFree(void* ptr, DLDevice dev) {
-  return memory_manager->Free(memory_manager, ptr, dev);
-}
-
-// Start a device timer.
-tvm_crt_error_t TVMPlatformTimerStart() {
-  if (g_microtvm_timer_running) {
-    std::cerr << "timer already running" << std::endl;
-    return kTvmErrorPlatformTimerBadState;
-  }
-  g_microtvm_start_time = std::chrono::steady_clock::now();
-  g_microtvm_timer_running = 1;
-  return kTvmErrorNoError;
-}
-
-// Stop the running device timer and get the elapsed time (in microseconds).
-tvm_crt_error_t TVMPlatformTimerStop(double* elapsed_time_seconds) {
-  if (!g_microtvm_timer_running) {
-    std::cerr << "timer not running" << std::endl;
-    return kTvmErrorPlatformTimerBadState;
-  }
-  auto microtvm_stop_time = std::chrono::steady_clock::now();
-  std::chrono::microseconds time_span = std::chrono::duration_cast<std::chrono::microseconds>(
-      microtvm_stop_time - g_microtvm_start_time);
-  *elapsed_time_seconds = static_cast<double>(time_span.count()) / 1e6;
-  g_microtvm_timer_running = 0;
-  return kTvmErrorNoError;
-}
-
-// Platform-specific before measurement call.
-tvm_crt_error_t TVMPlatformBeforeMeasurement() { return kTvmErrorNoError; }
-
-// Platform-specific after measurement call.
-tvm_crt_error_t TVMPlatformAfterMeasurement() { return kTvmErrorNoError; }
-
-static_assert(RAND_MAX >= (1 << 8), "RAND_MAX is smaller than acceptable");
-unsigned int random_seed = 0;
-// Fill a buffer with random data.
-tvm_crt_error_t TVMPlatformGenerateRandom(uint8_t* buffer, size_t num_bytes) {
-  if (random_seed == 0) {
-    random_seed = (unsigned int)time(NULL);
-  }
-  for (size_t i = 0; i < num_bytes; ++i) {
-    int random = rand_r(&random_seed);
-    buffer[i] = (uint8_t)random;
-  }
-  return kTvmErrorNoError;
-}
-
-// Initialize TVM inference.
-tvm_crt_error_t TVMPlatformInitialize() {
-  int status =
-      PageMemoryManagerCreate(&memory_manager, memory, sizeof(memory), 8 /* page_size_log2 */);
-  if (status != 0) {
-    fprintf(stderr, "error initiailizing memory manager\n");
-    return kTvmErrorPlatformMemoryManagerInitialized;
-  }
-  return kTvmErrorNoError;
-}
-
-}  // extern C
diff --git a/src/runtime/crt/include/tvm/runtime/crt/internal/common/func_registry.h b/src/runtime/crt/include/tvm/runtime/crt/internal/common/func_registry.h
deleted file mode 100644
index d62e3d7e1104..000000000000
--- a/src/runtime/crt/include/tvm/runtime/crt/internal/common/func_registry.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-// LINT_C_FILE
-
-/*!
- * \file tvm/runtime/crt/include/tvm/runtime/crt/internal/common/func_registry.h
- * \brief Abstract device memory management API
- */
-#ifndef TVM_RUNTIME_CRT_INCLUDE_TVM_RUNTIME_CRT_INTERNAL_COMMON_FUNC_REGISTRY_H_
-#define TVM_RUNTIME_CRT_INCLUDE_TVM_RUNTIME_CRT_INTERNAL_COMMON_FUNC_REGISTRY_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-int strcmp_cursor(const char** cursor, const char* name);
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // TVM_RUNTIME_CRT_INCLUDE_TVM_RUNTIME_CRT_INTERNAL_COMMON_FUNC_REGISTRY_H_
diff --git a/src/runtime/crt/include/tvm/runtime/crt/internal/common/ndarray.h b/src/runtime/crt/include/tvm/runtime/crt/internal/common/ndarray.h
deleted file mode 100644
index 0162c6eb4de6..000000000000
--- a/src/runtime/crt/include/tvm/runtime/crt/internal/common/ndarray.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file tvm/runtime/crt/include/tvm/runtime/crt/internal/common/ndarray.h
- * \brief Abstract device memory management API
- */
-#ifndef TVM_RUNTIME_CRT_INCLUDE_TVM_RUNTIME_CRT_INTERNAL_COMMON_NDARRAY_H_
-#define TVM_RUNTIME_CRT_INCLUDE_TVM_RUNTIME_CRT_INTERNAL_COMMON_NDARRAY_H_
-
-#include <dlpack/dlpack.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <tvm/runtime/c_backend_api.h>
-#include <tvm/runtime/c_runtime_api.h>
-
-/*! \brief Magic number for NDArray file */
-static const uint64_t kTVMNDArrayMagic = 0xDD5E40F096B4A13F;
-
-/*! \brief Magic number for NDArray list file  */
-static const uint64_t kTVMNDArrayListMagic = 0xF7E58D4F05049CB7;
-
-typedef struct TVMNDArray {
-  /*! \brief the actual tensor in DLPack format. NOTE: this must be first element in struct */
-  DLTensor dl_tensor;
-
-  /*! \brief count of references to TVMNDArray to avoid early freeing by host */
-  uint32_t reference_count;
-} TVMNDArray;
-
-int TVMNDArray_Create(int32_t ndim, const tvm_index_t* shape, DLDataType dtype, DLDevice dev,
-                      TVMNDArray* array);
-
-int64_t TVMNDArray_DataSizeBytes(TVMNDArray* array);
-
-int TVMNDArray_RandomFill(TVMNDArray* array);
-
-int TVMNDArray_Empty(int32_t ndim, const tvm_index_t* shape, DLDataType dtype, DLDevice dev,
-                     TVMNDArray* array);
-
-int TVMNDArray_Load(TVMNDArray* ret, const char** strm);
-
-int TVMNDArray_CreateView(TVMNDArray* arr, const tvm_index_t* shape, int32_t ndim, DLDataType dtype,
-                          TVMNDArray* array_view);
-
-void TVMNDArray_IncrementReference(TVMNDArray* arr);
-
-uint32_t TVMNDArray_DecrementReference(TVMNDArray* arr);
-
-int TVMNDArray_Release(TVMNDArray* arr);
-
-#endif  // TVM_RUNTIME_CRT_INCLUDE_TVM_RUNTIME_CRT_INTERNAL_COMMON_NDARRAY_H_
diff --git a/src/runtime/crt/include/tvm/runtime/crt/internal/graph_executor/graph_executor.h b/src/runtime/crt/include/tvm/runtime/crt/internal/graph_executor/graph_executor.h
deleted file mode 100644
index d4429308b650..000000000000
--- a/src/runtime/crt/include/tvm/runtime/crt/internal/graph_executor/graph_executor.h
+++ /dev/null
@@ -1,130 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file src/runtime/crt/include/tvm/runtime/crt/internal/graph_executor/graph_executor.h
- * \brief Tiny graph executor that can run graph containing only tvm PackedFunc.
- */
-#ifndef TVM_RUNTIME_CRT_INCLUDE_TVM_RUNTIME_CRT_INTERNAL_GRAPH_EXECUTOR_GRAPH_EXECUTOR_H_
-#define TVM_RUNTIME_CRT_INCLUDE_TVM_RUNTIME_CRT_INTERNAL_GRAPH_EXECUTOR_GRAPH_EXECUTOR_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <tvm/runtime/crt/graph_executor.h>
-#include <tvm/runtime/crt/internal/common/ndarray.h>
-#include <tvm/runtime/crt/internal/graph_executor/load_json.h>
-#include <tvm/runtime/crt/module.h>
-
-// Memory pool entry.
-typedef struct TVMGraphExecutorPoolEntry {
-  size_t size;
-  int device_type;
-  int entry_id;
-} TVMGraphExecutorPoolEntry;
-
-// Node entry
-typedef struct TVMGraphExecutorNodeEntry {
-  uint32_t node_id;
-  uint32_t index;
-  uint32_t version;
-  // JSON Loader
-  void (*Load)(JSONReader* reader);
-} TVMGraphExecutorNodeEntry;
-
-// Storage entry.
-typedef struct TVMGraphExecutorStorageEntry {
-  uint8_t is_linked_param;
-  TVMNDArray array;
-} TVMGraphExecutorStorageEntry;
-
-// Node
-typedef struct TVMGraphExecutorNode {
-  // operator type in string
-  char op_type[16];
-  // name of the op
-  char name[TVM_CRT_MAX_STRLEN_FUNCTION_NAME];
-  // parameters
-  TVMOpParam param;
-  // inputs
-  TVMGraphExecutorNodeEntry* inputs;
-  // number of inputs
-  size_t inputs_count;
-  // control deps
-  uint32_t control_deps[20];
-  // JSON Loader
-  void (*LoadAttrs)(struct TVMGraphExecutorNode* node, JSONReader* reader, TVMOpParam* param);
-  // JSON Loader
-  int (*Load)(struct TVMGraphExecutorNode* node, JSONReader* reader);
-} TVMGraphExecutorNode;
-
-typedef struct TVMGraphExecutor {
-  /*! \brief The graph nodes. */
-  TVMGraphExecutorNode* nodes;
-  /*! \brief The graph nodes counter. */
-  uint32_t nodes_count;
-  /*! \brief The argument nodes. */
-  uint32_t* input_nodes;
-  uint32_t input_nodes_count;
-  /*! \brief Used for quick entry indexing. */
-  uint32_t* node_row_ptr;
-  uint32_t node_row_ptr_count;
-  /*! \brief Output entries. */
-  TVMGraphExecutorNodeEntry* outputs;
-  /*! \brief Output entries counter. */
-  uint32_t outputs_count;
-  /*! \brief Additional graph attributes. */
-  TVMGraphExecutorGraphAttr attrs;
-  /*! \brief The code module that contains both host and device code. */
-  TVMModuleHandle module_handle;
-  /*! \brief Execution context of all devices including the host. */
-  DLDevice devices[1];
-  uint32_t devices_count;
-  /*! \brief Common storage pool for all devices. */
-  TVMGraphExecutorStorageEntry* storage_pool;
-  uint32_t storage_pool_count;
-  /*! \brief Data entry of each node. */
-  TVMNDArray* data_entry;
-  uint32_t data_entry_count;
-  /*! \brief Operator on each node. */
-  TVMPackedFunc* op_execs;
-  uint32_t op_execs_count;
-} TVMGraphExecutor;
-
-typedef DLTensor* DLTensorPtr;
-
-// private functions
-uint32_t TVMGraphExecutor_GetEntryId(TVMGraphExecutor* executor, uint32_t nid, uint32_t index);
-void TVMGraphExecutor_SetInput(TVMGraphExecutor* executor, const char* name, DLTensor* data_in);
-int TVMGraphExecutor_LoadParams(TVMGraphExecutor* executor, const char* param_blob,
-                                const uint32_t param_size);
-void TVMGraphExecutor_Run(TVMGraphExecutor* executor);
-int TVMGraphExecutor_GetOutput(TVMGraphExecutor* executor, const int32_t idx, DLTensor* out);
-
-int32_t TVMGraphExecutor_CreateTVMOp(TVMGraphExecutor* executor, const TVMOpParam* param,
-                                     DLTensorPtr* args, const uint32_t args_count,
-                                     TVMPackedFunc* pf);
-int TVMGraphExecutor_Load(TVMGraphExecutor* executor, JSONReader* reader);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif  // TVM_RUNTIME_CRT_INCLUDE_TVM_RUNTIME_CRT_INTERNAL_GRAPH_EXECUTOR_GRAPH_EXECUTOR_H_
diff --git a/src/runtime/crt/include/tvm/runtime/crt/internal/graph_executor/load_json.h b/src/runtime/crt/include/tvm/runtime/crt/internal/graph_executor/load_json.h
deleted file mode 100644
index 74236148d9d4..000000000000
--- a/src/runtime/crt/include/tvm/runtime/crt/internal/graph_executor/load_json.h
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file src/runtime/crt/include/tvm/runtime/crt/internal/graph_executor/load_json.h
- * \brief Lightweight JSON Reader that read save into C++ data structs.
- */
-#ifndef TVM_RUNTIME_CRT_INCLUDE_TVM_RUNTIME_CRT_INTERNAL_GRAPH_EXECUTOR_LOAD_JSON_H_
-#define TVM_RUNTIME_CRT_INCLUDE_TVM_RUNTIME_CRT_INTERNAL_GRAPH_EXECUTOR_LOAD_JSON_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <ctype.h>
-#include <inttypes.h>
-#include <stdio.h>
-#include <tvm/runtime/crt/error_codes.h>
-
-enum {
-  JSON_READ_TYPE_U8 = 1,
-  JSON_READ_TYPE_S8 = 2,
-  JSON_READ_TYPE_U16 = 3,
-  JSON_READ_TYPE_S16 = 4,
-  JSON_READ_TYPE_U32 = 5,
-  JSON_READ_TYPE_S32 = 6,
-  JSON_READ_TYPE_F32 = 7,
-  JSON_READ_TYPE_F64 = 8,
-  JSON_READ_TYPE_GRAPH_EXECUTOR_NODE = 9,
-  JSON_READ_TYPE_GRAPH_EXECUTOR_NODE_ENTRY = 10,
-  JSON_READ_TYPE_GRAPH_EXECUTOR_GRAPH_ATTR = 11
-};
-
-typedef struct Seq {
-  uint32_t* data;
-  uint64_t allocated;
-  uint32_t size;
-  void (*push_back)(struct Seq* seq, uint32_t src);
-  uint32_t* (*back)(struct Seq* seq);
-  void (*pop_back)(struct Seq* seq);
-} Seq;
-
-/*!
- * \brief Lightweight JSON Reader to read any STL compositions and structs.
- *  The user need to know the schema of the
- */
-typedef struct JSONReader {
-  /*! \brief internal reader string */
-  char* is_;
-  char* isptr;
-  /*! \brief "\\r" counter */
-  size_t line_count_r_;
-  /*! \brief "\\n" counter */
-  size_t line_count_n_;
-  /*!
-   * \brief record how many element processed in
-   *  current array/object scope.
-   */
-  Seq* scope_counter_;
-
-  char (*NextChar)(struct JSONReader* reader);
-  char (*NextNonSpace)(struct JSONReader* reader);
-  char (*PeekNextChar)(struct JSONReader* reader);
-  char (*PeekNextNonSpace)(struct JSONReader* reader);
-  int (*ReadUnsignedInteger)(struct JSONReader* reader, unsigned int* out_value);
-  int (*ReadInteger)(struct JSONReader* reader, int64_t* out_value);
-  int (*ReadString)(struct JSONReader* reader, char* out_str, size_t out_str_size);
-  void (*BeginArray)(struct JSONReader* reader);
-  void (*BeginObject)(struct JSONReader* reader);
-  uint8_t (*NextObjectItem)(struct JSONReader* reader, char* out_key, size_t out_key_size);
-  uint8_t (*NextArrayItem)(struct JSONReader* reader);
-  int (*ArrayLength)(struct JSONReader* reader, size_t* num_elements);
-} JSONReader;
-
-/*!
- * \brief Constructor of JSONReader class
- * \param is the input source.
- * \param reader Pointer to the JSONReader to initialize.
- * \return kTvmErrorNoError on success.
- */
-tvm_crt_error_t JSONReader_Create(const char* is, JSONReader* reader);
-
-/*!
- * \brief Deallocate dynamic memory used in the JSONReader instance.
- * NOTE: this doesn't actually free the passed-in reader itself, just dynamically-allocated members.
- * \param reader Pointer to a JSONReader passed to JSONReader_Create.
- * \return kTvmErrorNoError on success.
- */
-tvm_crt_error_t JSONReader_Release(JSONReader* reader);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif  // TVM_RUNTIME_CRT_INCLUDE_TVM_RUNTIME_CRT_INTERNAL_GRAPH_EXECUTOR_LOAD_JSON_H_
diff --git a/src/runtime/crt/include/tvm/runtime/crt/internal/memory/page_allocator.h b/src/runtime/crt/include/tvm/runtime/crt/internal/memory/page_allocator.h
deleted file mode 100644
index 7d40c03f2673..000000000000
--- a/src/runtime/crt/include/tvm/runtime/crt/internal/memory/page_allocator.h
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file runtime/crt/include/tvm/runtime/crt/internal/memory/page_allocator.h
- * \brief Defines data types and functions used in the internal memory manager.
- *     Exposed for testing.
- */
-
-#ifndef TVM_RUNTIME_CRT_INCLUDE_TVM_RUNTIME_CRT_INTERNAL_MEMORY_PAGE_ALLOCATOR_H_
-#define TVM_RUNTIME_CRT_INCLUDE_TVM_RUNTIME_CRT_INTERNAL_MEMORY_PAGE_ALLOCATOR_H_
-
-#include <tvm/runtime/c_runtime_api.h>
-#include <tvm/runtime/crt/error_codes.h>
-#include <tvm/runtime/crt/page_allocator.h>
-
-#include "crt_config.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*! \brief A page in the DRAM */
-typedef struct Page {
-  /*! \brief Start location in page table */
-  tvm_index_t ptable_begin;
-  /*! \brief The total number of pages */
-  tvm_index_t num_pages;
-  /*! \brief Data */
-  uint8_t* data;
-} Page;
-
-// construct a new page
-Page PageCreate(uint8_t* memory_pool, size_t page_size_bytes, tvm_index_t ptable_begin,
-                tvm_index_t num_pages);
-
-typedef struct PageTable {
-  // Pointer to beginning of memory pool.
-  uint8_t* memory_pool;
-  // Size of one page.
-  size_t page_size_bytes;
-
-  Page* page;
-  size_t max_pages;
-  size_t num_pages;
-  void (*resize)(struct PageTable* ptable, size_t size, Page* page);
-} PageTable;
-
-typedef struct PageEntry {
-  uint8_t* addr;
-  Page page;
-} PageEntry;
-
-typedef struct TLB {
-  PageEntry* entries;
-  size_t max_pages;
-  uint32_t num_pages;
-  void (*set)(struct TLB* tlb, uint8_t* data, Page* page);
-  PageEntry* (*find)(struct TLB* tlb, uint8_t* data);
-} TLB;
-
-typedef struct IndexedEntry {
-  tvm_index_t index;
-  Page page;
-} IndexedEntry;
-
-typedef struct MultiMap {
-  IndexedEntry* entries;
-  size_t max_entries;
-  size_t num_entries;
-  IndexedEntry* (*lower_bound)(struct MultiMap* map, uint32_t npage);
-  IndexedEntry* (*end)(struct MultiMap* map);
-  void (*erase)(struct MultiMap* map, IndexedEntry* entry);
-  void (*insert)(struct MultiMap* map, uint32_t npage, Page* p);
-} MultiMap;
-
-/*!
- * \brief DRAM memory manager
- *  Implements simple paging to allow physical address translation.
- */
-typedef struct MemoryManager {
-  // Public interface for this object.
-  MemoryManagerInterface interface;
-  // Physical address -> page
-  PageTable ptable;
-  // Virtual address -> page
-  TLB pmap;
-  // Free map
-  MultiMap free_map;
-} MemoryManager;
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // TVM_RUNTIME_CRT_INCLUDE_TVM_RUNTIME_CRT_INTERNAL_MEMORY_PAGE_ALLOCATOR_H_
diff --git a/src/runtime/crt/memory/page_allocator.c b/src/runtime/crt/memory/page_allocator.c
deleted file mode 100644
index 0b6bb54df34b..000000000000
--- a/src/runtime/crt/memory/page_allocator.c
+++ /dev/null
@@ -1,331 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-// LINT_C_FILE
-
-/*!
- * \file memory.c
- * \brief Virtual memory manager
- *
- * To maximize portability, thread-safe feature has been dropped for now.
- */
-
-#include <inttypes.h>
-#include <stdbool.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <tvm/runtime/c_runtime_api.h>
-#include <tvm/runtime/crt/error_codes.h>
-#include <tvm/runtime/crt/internal/memory/page_allocator.h>
-#include <tvm/runtime/crt/logging.h>
-#include <tvm/runtime/crt/platform.h>
-
-// construct a new page
-Page PageCreate(uint8_t* memory_pool, size_t page_size_bytes, tvm_index_t ptable_begin,
-                tvm_index_t num_pages) {
-  Page page;
-  page.ptable_begin = ptable_begin;
-  page.num_pages = num_pages;
-  page.data = memory_pool + ptable_begin * page_size_bytes;
-  return page;
-}
-
-void PageTable_Resize(struct PageTable* ptable, size_t new_size, Page* page) {
-  CHECK_LE(ptable->num_pages, new_size, "size value (%zu) is smaller than expected (%zu).",
-           new_size, ptable->num_pages);
-  for (uint32_t idx = ptable->num_pages; idx < new_size; idx++) {
-    ptable->page[idx] = *page;
-  }
-  ptable->num_pages = new_size;
-}
-
-void TLB_Set(TLB* tlb, uint8_t* data, Page* page) {
-  PageEntry* entry = tlb->find(tlb, data);
-  if (entry == 0) {
-    tlb->entries[tlb->num_pages].addr = data;
-    tlb->entries[tlb->num_pages].page = *page;
-    tlb->num_pages++;
-  } else {
-    entry->addr = data;
-    entry->page = *page;
-  }
-}
-
-PageEntry* TLB_Find(TLB* tlb, uint8_t* data) {
-  PageEntry* entry = 0;
-  for (uint32_t idx = 0; idx < tlb->num_pages; idx++) {
-    if (tlb->entries[idx].addr == data) {
-      entry = tlb->entries + idx;
-      break;
-    }
-  }
-  return entry;
-}
-
-IndexedEntry* MultiMap_LowerBound(struct MultiMap* map, uint32_t npage) {
-  IndexedEntry* entry = 0;
-  for (uint32_t idx = 0; idx < map->num_entries; idx++) {
-    if (map->entries[idx].index >= npage) {
-      entry = map->entries + idx;
-      break;
-    }
-  }
-  return entry;
-}
-
-IndexedEntry* MultiMap_End(struct MultiMap* map) {
-  IndexedEntry* entry = 0;
-  return entry;
-}
-
-void MultiMap_Erase(struct MultiMap* map, IndexedEntry* entry) {
-  for (uint32_t idx = 0; idx < map->num_entries; idx++) {
-    if ((map->entries + idx) == entry) {
-      // NOTE: do not use memcpy due to overlap.
-      for (uint32_t src_idx = idx + 1; src_idx < map->num_entries; src_idx++) {
-        map->entries[src_idx - 1] = map->entries[src_idx];
-      }
-      map->num_entries--;
-      break;
-    }
-  }
-}
-
-void MultiMap_Insert(struct MultiMap* map, uint32_t npage, Page* p) {
-  CHECK_LE(map->num_entries + 1, map->max_entries, "invalid number of free pages.");
-  for (uint32_t idx = map->num_entries; idx < (map->num_entries + npage); idx++) {
-    map->entries[map->num_entries].index = npage;
-    map->entries[map->num_entries].page = *p;
-  }
-  map->num_entries++;
-}
-
-/*!
- * \brief Allocate memory from manager
- * \param size The size of memory
- * \return The virtual address
- */
-tvm_crt_error_t PageMemoryManager_Allocate(MemoryManagerInterface* interface, size_t num_bytes,
-                                           DLDevice dev, void** out_ptr) {
-  MemoryManager* mgr = (MemoryManager*)interface;
-
-  *out_ptr = 0;
-  PageTable* ptable = &(mgr->ptable);
-  tvm_index_t npage = (num_bytes + ptable->page_size_bytes - 1) / ptable->page_size_bytes;
-
-  MultiMap* free_map = &(mgr->free_map);
-  IndexedEntry* it = free_map->lower_bound(free_map, npage);
-  tvm_index_t start = 0;
-  if (it != free_map->end(free_map)) {
-    Page p = it->page;
-    free_map->erase(free_map, it);
-    *out_ptr = p.data;
-    start = p.ptable_begin;
-    npage = p.num_pages;
-  } else {
-    start = ptable->num_pages;
-    if ((unsigned)(start + npage) > ptable->max_pages) {
-#if TVM_CRT_DEBUG > 1
-      TVMLogf("insufficient memory, start=%" PRId32 ", npage=%" PRId32 ", total=%" PRId32 " / %zu",
-              (int32_t)start, (int32_t)npage, (int32_t)(start + npage), mgr->pmap.max_pages);
-#endif
-      return kTvmErrorPlatformNoMemory;
-    }
-    /* insert page entry */
-    Page p = PageCreate(ptable->memory_pool, ptable->page_size_bytes, start, npage);
-    ptable->resize(ptable, start + npage, &p);
-    *out_ptr = p.data;
-    TLB* pmap = &(mgr->pmap);
-    pmap->set(pmap, *out_ptr, &p);
-  }
-  mgr->interface.vleak_size++;
-#if TVM_CRT_DEBUG > 1
-  TVMLogf("allocate: addr=%p, start=%" PRId64 "/%zu, npage=%" PRId64 ", vleak=%d\n", data, start,
-          ptable->max_pages, npage, mgr->interface.vleak_size);
-#endif  // TVM_CRT_DEBUG
-  return kTvmErrorNoError;
-}
-
-/*!
- * \brief Reallocate memory from manager
- * \param ptr Pointer holding a pointer to the memory area to be reallocated
- * \param num_bytes The size of memory now required.
- * \return kTvmErrorNoError on success.
- */
-tvm_crt_error_t PageMemoryManager_Realloc(MemoryManagerInterface* interface, void** ptr,
-                                          tvm_index_t num_bytes) {
-  MemoryManager* mgr = (MemoryManager*)interface;
-
-  uint8_t* data = *((uint8_t**)ptr);  // NOLINT(*)
-  PageTable* ptable = &(mgr->ptable);
-  TLB* pmap = &(mgr->pmap);
-  MultiMap* free_map = &(mgr->free_map);
-  tvm_index_t start = 0;
-  tvm_index_t npage = (num_bytes + ptable->page_size_bytes - 1) / ptable->page_size_bytes;
-  if (ptr) {
-    // get page size for given pointer
-    CHECK_NE(pmap->num_pages, 0, "invalid translation look-aside buffer.");
-    PageEntry* entry = pmap->find(pmap, (uint8_t*)ptr);  // NOLINT(*)
-    CHECK_NE(entry, 0, "no valid page entry found.");
-    Page* pptr = &(entry->page);
-    // if the page size is smaller than target page size,
-    // try allocate new space
-    if (pptr->num_pages < npage) {
-      // TODO(liangfu): found out whether we can extend current entry
-      //
-      // insert new page entry
-      IndexedEntry* it = free_map->lower_bound(free_map, npage);
-      if (it != free_map->end(free_map)) {
-        data = it->page.data;
-        start = it->page.ptable_begin;
-        npage = it->page.num_pages;
-        free_map->erase(free_map, it);
-      } else {
-        start = ptable->num_pages;
-        if ((unsigned)(start + npage) > ptable->max_pages) {
-#if TVM_CRT_DEBUG > 1
-          TVMLogf("insufficient memory, start=%" PRId64 ", npage=%" PRId64 ", total=%" PRId64 "",
-                  start, npage, start + npage);
-#endif
-          return kTvmErrorPlatformNoMemory;
-        }
-        Page p = PageCreate(mgr->ptable.memory_pool, mgr->ptable.page_size_bytes, start, npage);
-        ptable->resize(ptable, start + npage, &p);
-        data = p.data;
-        pmap->set(pmap, data, &p);
-      }
-      // copy previous data to the new entry
-      memcpy(data, ptr, ptable->page_size_bytes * pptr->num_pages);
-      // release memory
-      free_map->insert(free_map, pptr->num_pages, pptr);
-    } else {
-      start = pptr->ptable_begin;
-    }
-  } else {
-    IndexedEntry* it = free_map->lower_bound(free_map, npage);
-    if (it != free_map->end(free_map)) {
-      Page p = it->page;
-      free_map->erase(free_map, it);
-      data = p.data;
-      start = p.ptable_begin;
-      npage = p.num_pages;
-    } else {
-      PageTable* ptable = &(mgr->ptable);
-      start = ptable->num_pages;
-      if ((unsigned)(start + npage) > ptable->max_pages) {
-#if TVM_CRT_DEBUG > 1
-        TVMLogf("insufficient memory, start=%" PRId64 ", npage=%" PRId64 ", total=%" PRId64 "",
-                start, npage, start + npage);
-#endif
-        /* insert page entry */
-        Page p = PageCreate(mgr->ptable.memory_pool, mgr->ptable.page_size_bytes, start, npage);
-        ptable->resize(ptable, start + npage, &p);
-        data = p.data;
-        TLB* pmap = &(mgr->pmap);
-        pmap->set(pmap, data, &p);
-      }
-      mgr->interface.vleak_size++;
-    }
-  }
-#if TVM_CRT_DEBUG > 1
-  TVMLogf("reallocate: addr=%p, start=%" PRId64 "/%zu, npage=%" PRId64 ", vleak=%d, size=%zu", data,
-          start, mgr->ptable.max_pages, npage, mgr->interface.vleak_size, size);
-#endif  // TVM_CRT_DEBUG
-  return kTvmErrorNoError;
-}
-
-/*!
- * \brief Free the memory.
- * \param interface Pointer to this structure.
- * \param ptr A pointer returned from TVMPlatformMemoryAllocate which should be free'd.
- * \param dev Execution device passed to TVMPlatformMemoryAllocate. Fixed to {kDLCPU, 0}.
- * \return kTvmErrorNoError if successful; a descriptive error code otherwise.
- */
-tvm_crt_error_t PageMemoryManager_Free(MemoryManagerInterface* interface, void* ptr, DLDevice dev) {
-  MemoryManager* mgr = (MemoryManager*)interface;
-
-  TLB* pmap = &(mgr->pmap);
-  CHECK_NE(pmap->num_pages, 0, "invalid translation look-aside buffer.");
-  PageEntry* entry = pmap->find(pmap, (uint8_t*)ptr);  // NOLINT(*)
-  CHECK_NE(entry, 0, "no valid page entry found.");
-  Page* p = &(entry->page);
-  MultiMap* free_map = &(mgr->free_map);
-  free_map->insert(free_map, p->num_pages, p);
-  mgr->interface.vleak_size--;
-#if TVM_CRT_DEBUG > 1
-  TVMLogf("release: addr=%p, start=%" PRId64 "/%zu, npage=%zu, vleak=%d", ptr,
-          entry->page.ptable_begin, mgr->ptable.max_pages, entry->page.num_pages,
-          mgr->interface.vleak_size);
-#endif  // TVM_CRT_DEBUG
-  return kTvmErrorNoError;
-}
-
-tvm_crt_error_t PageMemoryManagerCreate(MemoryManagerInterface** interface, uint8_t* memory_pool,
-                                        size_t memory_pool_size_bytes,
-                                        size_t page_size_bytes_log2) {
-  memset(memory_pool, 0, memory_pool_size_bytes);
-
-  // Allocate enough space for MAX_PAGES.
-  size_t page_size_bytes = 1 << page_size_bytes_log2;
-  size_t metadata_bytes_per_page = sizeof(Page) + sizeof(PageEntry) + sizeof(IndexedEntry);
-  size_t bytes_needed_per_page = page_size_bytes + metadata_bytes_per_page;
-  size_t num_pages = (memory_pool_size_bytes - sizeof(MemoryManager)) / bytes_needed_per_page;
-
-  uint8_t* metadata_cursor = memory_pool + (num_pages << page_size_bytes_log2);
-  MemoryManager* manager = (MemoryManager*)metadata_cursor;
-  *interface = &manager->interface;
-  /* handle MemoryManager member functions */
-  manager->interface.Allocate = PageMemoryManager_Allocate;
-  //  manager->Realloc = MemoryManager_Reallocate;
-  manager->interface.Free = PageMemoryManager_Free;
-
-  metadata_cursor += sizeof(MemoryManager);
-
-  manager->interface.Allocate = PageMemoryManager_Allocate;
-  manager->interface.Free = PageMemoryManager_Free;
-  manager->ptable.memory_pool = memory_pool;
-
-  /* handle PageTable member functions */
-  manager->ptable.page = (Page*)metadata_cursor;
-  metadata_cursor += sizeof(Page) * num_pages;
-
-  manager->ptable.page_size_bytes = (1 << page_size_bytes_log2);
-  manager->ptable.max_pages = num_pages;
-  manager->ptable.resize = PageTable_Resize;
-
-  /* handle TLB member functions */
-  manager->pmap.entries = (PageEntry*)metadata_cursor;
-  metadata_cursor += sizeof(PageEntry) * num_pages;
-  manager->pmap.max_pages = num_pages;
-  manager->pmap.num_pages = 0;
-
-  manager->pmap.set = TLB_Set;
-  manager->pmap.find = TLB_Find;
-  /* handle free_map member functions */
-  manager->free_map.entries = (IndexedEntry*)metadata_cursor;
-  metadata_cursor += sizeof(IndexedEntry) * num_pages;
-  manager->free_map.max_entries = num_pages;
-  manager->free_map.lower_bound = MultiMap_LowerBound;
-  manager->free_map.end = MultiMap_End;
-  manager->free_map.erase = MultiMap_Erase;
-  manager->free_map.insert = MultiMap_Insert;
-
-  return kTvmErrorNoError;
-}
diff --git a/src/runtime/crt/memory/stack_allocator.c b/src/runtime/crt/memory/stack_allocator.c
deleted file mode 100644
index ba205f8f209b..000000000000
--- a/src/runtime/crt/memory/stack_allocator.c
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-// LINT_C_FILE
-#include <tvm/runtime/crt/stack_allocator.h>
-
-tvm_crt_error_t StackMemoryManager_Allocate_Body(tvm_workspace_t* tvm_runtime_workspace,
-                                                 int32_t nbytes, void** current_alloc,
-                                                 uint8_t do_lifo_check) {
-  // reserve bytes at the end of the allocation such that
-  // next_alloc % TVM_RUNTIME_ALLOC_ALIGNMENT_BYTES == 0.
-  uint32_t offset_bytes =
-      (TVM_RUNTIME_ALLOC_ALIGNMENT_BYTES - nbytes) & (TVM_RUNTIME_ALLOC_ALIGNMENT_BYTES - 1);
-  uint8_t* workspace_end = tvm_runtime_workspace->workspace + tvm_runtime_workspace->workspace_size;
-  if (tvm_runtime_workspace->next_alloc + nbytes + offset_bytes > workspace_end) {
-    return kTvmErrorPlatformNoMemory;
-  }
-  (*current_alloc) = tvm_runtime_workspace->next_alloc;
-  uint8_t* next_alloc = tvm_runtime_workspace->next_alloc + nbytes + offset_bytes;
-  if (do_lifo_check != 0) {
-    if (next_alloc + STACK_ALLOCATOR_TAG_SIZE_BYTES > workspace_end) {
-      return kTvmErrorPlatformNoMemory;
-    }
-    const uint32_t total_size = (nbytes + offset_bytes + STACK_ALLOCATOR_TAG_SIZE_BYTES);
-    *((uint32_t*)next_alloc) = total_size ^ STACK_ALLOCATOR_TAG;
-    next_alloc += STACK_ALLOCATOR_TAG_SIZE_BYTES;
-  }
-
-  tvm_runtime_workspace->next_alloc = next_alloc;
-  return kTvmErrorNoError;
-}
-
-tvm_crt_error_t StackMemoryManager_Allocate(tvm_workspace_t* tvm_runtime_workspace, int32_t nbytes,
-                                            void** current_alloc) {
-  uint8_t do_lifo_check = 0;
-#ifdef TVM_CRT_STACK_ALLOCATOR_ENABLE_LIFO_CHECK
-  do_lifo_check = 1;
-#endif
-  return StackMemoryManager_Allocate_Body(tvm_runtime_workspace, nbytes, current_alloc,
-                                          do_lifo_check);
-}
-
-tvm_crt_error_t StackMemoryManager_Free_Body(tvm_workspace_t* tvm_runtime_workspace, void* ptr,
-                                             uint8_t do_lifo_check) {
-  if (do_lifo_check != 0) {
-    uint32_t tag = *(((uint32_t*)tvm_runtime_workspace->next_alloc) - 1);
-    uint32_t actual_size = (tvm_runtime_workspace->next_alloc - (uint8_t*)ptr);
-    uint32_t expected_size = tag ^ STACK_ALLOCATOR_TAG;
-    if (expected_size != actual_size) {
-      return kTvmErrorPlatformStackAllocBadFree;
-    }
-  }
-  tvm_runtime_workspace->next_alloc = (uint8_t*)ptr;
-  return kTvmErrorNoError;
-}
-
-tvm_crt_error_t StackMemoryManager_Free(tvm_workspace_t* tvm_runtime_workspace, void* ptr) {
-  uint8_t do_lifo_check = 0;
-#ifdef TVM_CRT_STACK_ALLOCATOR_ENABLE_LIFO_CHECK
-  do_lifo_check = 1;
-#endif
-  return StackMemoryManager_Free_Body(tvm_runtime_workspace, ptr, do_lifo_check);
-}
-
-tvm_crt_error_t StackMemoryManager_Init(tvm_workspace_t* tvm_runtime_workspace,
-                                        uint8_t* g_aot_memory, size_t workspace_size) {
-  // We need to round up g_aot_memory in case it is not aligned to
-  // TVM_RUNTIME_ALLOC_ALIGNMENT_BYTES.
-  uintptr_t unaligned_mask = TVM_RUNTIME_ALLOC_ALIGNMENT_BYTES - 1;
-  uint8_t* memory_aligned =
-      (uint8_t*)(((uintptr_t)g_aot_memory + unaligned_mask) & ~unaligned_mask);
-  uint32_t offset = (uintptr_t)(memory_aligned - g_aot_memory);
-
-  tvm_runtime_workspace->next_alloc = memory_aligned;
-  tvm_runtime_workspace->workspace = memory_aligned;
-  tvm_runtime_workspace->workspace_size = workspace_size - offset;
-  return kTvmErrorNoError;
-}
diff --git a/src/runtime/crt/microtvm_rpc_common/frame_buffer.cc b/src/runtime/crt/microtvm_rpc_common/frame_buffer.cc
deleted file mode 100644
index 37eb274eb944..000000000000
--- a/src/runtime/crt/microtvm_rpc_common/frame_buffer.cc
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file frame_buffer.cc
- * \brief Defines a buffer for use by the RPC framing layer.
- */
-
-#include <stdio.h>
-#include <string.h>
-#include <tvm/runtime/crt/rpc_common/frame_buffer.h>
-
-namespace tvm {
-namespace runtime {
-namespace micro_rpc {
-
-size_t FrameBuffer::Write(const uint8_t* data, size_t data_size_bytes) {
-  size_t num_bytes_available = capacity_ - num_valid_bytes_;
-  size_t num_bytes_to_copy = data_size_bytes;
-  if (num_bytes_available < num_bytes_to_copy) {
-    num_bytes_to_copy = num_bytes_available;
-  }
-
-  memcpy(&data_[num_valid_bytes_], data, num_bytes_to_copy);
-  num_valid_bytes_ += num_bytes_to_copy;
-  return num_bytes_to_copy;
-}
-
-size_t FrameBuffer::Read(uint8_t* data, size_t data_size_bytes) {
-  size_t num_bytes_to_copy = data_size_bytes;
-  size_t num_bytes_available = num_valid_bytes_ - read_cursor_;
-  if (num_bytes_available < num_bytes_to_copy) {
-    num_bytes_to_copy = num_bytes_available;
-  }
-
-  memcpy(data, &data_[read_cursor_], num_bytes_to_copy);
-  read_cursor_ += num_bytes_to_copy;
-  return num_bytes_to_copy;
-}
-
-void FrameBuffer::Clear() {
-  num_valid_bytes_ = 0;
-  read_cursor_ = 0;
-}
-
-}  // namespace micro_rpc
-}  // namespace runtime
-}  // namespace tvm
diff --git a/src/runtime/crt/microtvm_rpc_common/framing.cc b/src/runtime/crt/microtvm_rpc_common/framing.cc
deleted file mode 100644
index 47e4a33a718c..000000000000
--- a/src/runtime/crt/microtvm_rpc_common/framing.cc
+++ /dev/null
@@ -1,442 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file framing.cc
- * \brief Framing for RPC.
- */
-
-#include <checksum.h>
-#include <string.h>
-#include <tvm/runtime/crt/logging.h>
-#include <tvm/runtime/crt/rpc_common/framing.h>
-
-#include "crt_config.h"
-
-// For debugging purposes, Framer logs can be enabled, but this should only be done when
-// running from the host. This is done differently from TVMLogf() because TVMLogf() uses the
-// framer in its implementation.
-#ifdef TVM_CRT_FRAMER_ENABLE_LOGS
-#include <cstdio>
-#define TVM_FRAMER_DEBUG_LOG(msg, ...) fprintf(stderr, "microTVM framer: " msg " \n", ##__VA_ARGS__)
-#define TVM_UNFRAMER_DEBUG_LOG(msg, ...) \
-  fprintf(stderr, "microTVM unframer: " msg " \n", ##__VA_ARGS__)
-#else
-#define TVM_FRAMER_DEBUG_LOG(msg, ...)
-#define TVM_UNFRAMER_DEBUG_LOG(msg, ...)
-#endif
-
-namespace tvm {
-namespace runtime {
-namespace micro_rpc {
-
-uint16_t crc16_compute(const uint8_t* data, size_t data_size_bytes, uint16_t* previous_crc) {
-  uint16_t crc = (previous_crc != nullptr ? *previous_crc : 0xffff);
-  for (size_t i = 0; i < data_size_bytes; ++i) {
-    crc = update_crc_ccitt(crc, data[i]);
-  }
-
-  return crc;
-}
-
-template <typename E>
-static constexpr uint8_t to_integral(E e) {
-  return static_cast<uint8_t>(e);
-}
-
-void Unframer::Reset() {
-  state_ = State::kFindPacketStart;
-  saw_escape_start_ = false;
-  num_buffer_bytes_valid_ = 0;
-}
-
-size_t Unframer::BytesNeeded() {
-  size_t bytes_needed = 0;
-  switch (state_) {
-    case State::kFindPacketStart:
-      return 1;
-    case State::kFindPacketLength:
-      bytes_needed = PacketFieldSizeBytes::kPayloadLength;
-      break;
-    case State::kFindPacketCrc:
-      return num_payload_bytes_remaining_;
-    case State::kFindCrcEnd:
-      bytes_needed = PacketFieldSizeBytes::kCrc;
-      break;
-    default:
-      CHECK(false);
-  }
-
-  return bytes_needed > num_buffer_bytes_valid_ ? bytes_needed - num_buffer_bytes_valid_ : 0;
-}
-
-tvm_crt_error_t Unframer::Write(const uint8_t* data, size_t data_size_bytes,
-                                size_t* bytes_consumed) {
-  tvm_crt_error_t return_code = kTvmErrorNoError;
-  input_ = data;
-  input_size_bytes_ = data_size_bytes;
-
-  while (return_code == kTvmErrorNoError && input_size_bytes_ > 0) {
-    TVM_UNFRAMER_DEBUG_LOG("state: %02x size 0x%02zx", to_integral(state_), input_size_bytes_);
-    switch (state_) {
-      case State::kFindPacketStart:
-        return_code = FindPacketStart();
-        break;
-      case State::kFindPacketLength:
-        return_code = FindPacketLength();
-        break;
-      case State::kFindPacketCrc:
-        return_code = FindPacketCrc();
-        break;
-      case State::kFindCrcEnd:
-        return_code = FindCrcEnd();
-        break;
-      default:
-        return_code = kTvmErrorFramingInvalidState;
-        break;
-    }
-  }
-
-  *bytes_consumed = data_size_bytes - input_size_bytes_;
-  input_ = nullptr;
-  input_size_bytes_ = 0;
-
-  if (return_code != kTvmErrorNoError) {
-    state_ = State::kFindPacketStart;
-    ClearBuffer();
-  }
-
-  return return_code;
-}
-
-tvm_crt_error_t Unframer::FindPacketStart() {
-  size_t i;
-  for (i = 0; i < input_size_bytes_; ++i) {
-    if (input_[i] == to_integral(Escape::kEscapeStart)) {
-      saw_escape_start_ = true;
-    } else if (input_[i] == to_integral(Escape::kPacketStart) && saw_escape_start_) {
-      uint8_t packet_start_sequence[2]{to_integral(Escape::kEscapeStart),
-                                       to_integral(Escape::kPacketStart)};
-      crc_ = crc16_compute(packet_start_sequence, sizeof(packet_start_sequence), nullptr);
-      saw_escape_start_ = false;
-      state_ = State::kFindPacketLength;
-      i++;
-      break;
-    } else {
-      saw_escape_start_ = false;
-    }
-  }
-
-  input_ += i;
-  input_size_bytes_ -= i;
-  return kTvmErrorNoError;
-}
-
-tvm_crt_error_t Unframer::ConsumeInput(uint8_t* buffer, size_t buffer_size_bytes,
-                                       size_t* bytes_filled, bool update_crc) {
-  CHECK(*bytes_filled < buffer_size_bytes);
-  tvm_crt_error_t to_return = kTvmErrorNoError;
-  size_t i;
-  for (i = 0; i < input_size_bytes_; ++i) {
-    uint8_t c = input_[i];
-    if (saw_escape_start_) {
-      saw_escape_start_ = false;
-      if (c == to_integral(Escape::kPacketStart)) {
-        // When the start packet sequence is seen, abort unframing the current packet. Since the
-        // escape byte has already been parsed, update the CRC include only the escape byte. This
-        // readies the unframer to consume the kPacketStart byte on the next Write() call.
-        uint8_t escape_start = to_integral(Escape::kEscapeStart);
-        crc_ = crc16_compute(&escape_start, 1, nullptr);
-        to_return = kTvmErrorFramingShortPacket;
-        saw_escape_start_ = true;
-
-        break;
-      } else if (c == to_integral(Escape::kEscapeNop)) {
-        continue;
-      } else if (c == to_integral(Escape::kEscapeStart)) {
-        // do nothing (allow character to be printed)
-      } else {
-        // Invalid escape sequence.
-        to_return = kTvmErrorFramingInvalidEscape;
-        i++;
-        break;
-      }
-    } else if (c == to_integral(Escape::kEscapeStart)) {
-      saw_escape_start_ = true;
-      continue;
-    } else {
-      saw_escape_start_ = false;
-    }
-
-    buffer[*bytes_filled] = c;
-    (*bytes_filled)++;
-    if (*bytes_filled == buffer_size_bytes) {
-      i++;
-      break;
-    }
-  }
-
-  if (update_crc) {
-    crc_ = crc16_compute(input_, i, &crc_);
-  }
-
-  input_ += i;
-  input_size_bytes_ -= i;
-  return to_return;
-}
-
-tvm_crt_error_t Unframer::AddToBuffer(size_t buffer_full_bytes, bool update_crc) {
-  CHECK(!IsBufferFull(buffer_full_bytes));
-  return ConsumeInput(buffer_, buffer_full_bytes, &num_buffer_bytes_valid_, update_crc);
-}
-
-void Unframer::ClearBuffer() { num_buffer_bytes_valid_ = 0; }
-
-tvm_crt_error_t Unframer::FindPacketLength() {
-  tvm_crt_error_t to_return = AddToBuffer(PacketFieldSizeBytes::kPayloadLength, true);
-  if (to_return != kTvmErrorNoError) {
-    return to_return;
-  }
-
-  if (!IsBufferFull(PacketFieldSizeBytes::kPayloadLength)) {
-    return to_return;
-  }
-
-  num_payload_bytes_remaining_ = *reinterpret_cast<uint32_t*>(buffer_);
-  TVM_UNFRAMER_DEBUG_LOG("payload length: 0x%zx", num_payload_bytes_remaining_);
-  ClearBuffer();
-  state_ = State::kFindPacketCrc;
-  return to_return;
-}
-
-tvm_crt_error_t Unframer::FindPacketCrc() {
-  //  CHECK(num_buffer_bytes_valid_ == 0);
-  while (num_payload_bytes_remaining_ > 0) {
-    size_t num_bytes_to_buffer = num_payload_bytes_remaining_;
-    if (num_bytes_to_buffer > sizeof(buffer_)) {
-      num_bytes_to_buffer = sizeof(buffer_);
-    }
-
-    // remember in case we need to rewind due to WriteAll() error.
-    size_t prev_input_size_bytes = input_size_bytes_;
-    size_t prev_num_buffer_bytes_valid = num_buffer_bytes_valid_;
-    {
-      tvm_crt_error_t to_return = AddToBuffer(num_bytes_to_buffer, true);
-      if (to_return != kTvmErrorNoError) {
-        return to_return;
-      }
-    }
-
-    if (prev_num_buffer_bytes_valid == num_buffer_bytes_valid_) {
-      // Return if no bytes were consumed from the input.
-      return kTvmErrorNoError;
-    }
-
-    {
-      size_t bytes_consumed;
-      tvm_crt_error_t to_return =
-          stream_->WriteAll(buffer_, num_buffer_bytes_valid_, &bytes_consumed);
-      num_payload_bytes_remaining_ -= bytes_consumed;
-      if (to_return != kTvmErrorNoError) {
-        // rewind input, skipping escape bytes.
-        size_t buffer_bytes_consumed;
-        const uint8_t* input = input_ - (prev_input_size_bytes - input_size_bytes_);
-        for (buffer_bytes_consumed = 0; bytes_consumed > 0; ++buffer_bytes_consumed) {
-          if (input[buffer_bytes_consumed] != uint8_t(Escape::kEscapeStart)) {
-            bytes_consumed--;
-          }
-        }
-
-        size_t bytes_to_rewind = prev_input_size_bytes - buffer_bytes_consumed;
-        input_ -= bytes_to_rewind;
-        input_size_bytes_ += bytes_to_rewind;
-
-        // must not have seen escape, since AddToBuffer won't stop in the middle.
-        saw_escape_start_ = false;
-
-        return to_return;
-      }
-    }
-
-    ClearBuffer();
-  }
-
-  if (num_payload_bytes_remaining_ == 0) {
-    state_ = State::kFindCrcEnd;
-  }
-
-  return kTvmErrorNoError;
-}
-
-tvm_crt_error_t Unframer::FindCrcEnd() {
-  tvm_crt_error_t to_return = AddToBuffer(PacketFieldSizeBytes::kCrc, false);
-  if (to_return != kTvmErrorNoError) {
-    return to_return;
-  }
-
-  if (!IsBufferFull(PacketFieldSizeBytes::kCrc)) {
-    return kTvmErrorNoError;
-  }
-
-  // TODO(areusch): Handle endianness.
-  stream_->PacketDone(crc_ == *reinterpret_cast<uint16_t*>(buffer_));
-  ClearBuffer();
-  state_ = State::kFindPacketStart;
-  return kTvmErrorNoError;
-}
-
-void Framer::Reset() { state_ = State::kReset; }
-
-tvm_crt_error_t Framer::Write(const uint8_t* payload, size_t payload_size_bytes) {
-  tvm_crt_error_t to_return;
-  to_return = StartPacket(payload_size_bytes);
-  if (to_return != kTvmErrorNoError) {
-    return to_return;
-  }
-
-  to_return = WritePayloadChunk(payload, payload_size_bytes);
-  if (to_return != 0) {
-    return to_return;
-  }
-
-  to_return = FinishPacket();
-  return to_return;
-}
-
-tvm_crt_error_t Framer::StartPacket(size_t payload_size_bytes) {
-  uint8_t packet_header[sizeof(uint32_t)];
-  size_t ptr = 0;
-  if (state_ == State::kReset) {
-    packet_header[ptr] = to_integral(Escape::kEscapeNop);
-    ptr++;
-    tvm_crt_error_t to_return =
-        WriteAndCrc(packet_header, ptr, false /* escape */, false /* update_crc */);
-    if (to_return != kTvmErrorNoError) {
-      return to_return;
-    }
-
-    ptr = 0;
-  }
-
-  packet_header[ptr] = to_integral(Escape::kEscapeStart);
-  ptr++;
-  packet_header[ptr] = to_integral(Escape::kPacketStart);
-  ptr++;
-
-  crc_ = 0xffff;
-  tvm_crt_error_t to_return =
-      WriteAndCrc(packet_header, ptr, false /* escape */, true /* update_crc */);
-  if (to_return != kTvmErrorNoError) {
-    return to_return;
-  }
-
-  uint32_t payload_size_wire = payload_size_bytes;
-  to_return = WriteAndCrc(reinterpret_cast<uint8_t*>(&payload_size_wire), sizeof(payload_size_wire),
-                          true /* escape */, true /* update_crc */);
-  if (to_return == kTvmErrorNoError) {
-    state_ = State::kTransmitPacketPayload;
-    num_payload_bytes_remaining_ = payload_size_bytes;
-  }
-
-  return to_return;
-}
-
-tvm_crt_error_t Framer::WriteAndCrc(const uint8_t* data, size_t data_size_bytes, bool escape,
-                                    bool update_crc) {
-  while (data_size_bytes > 0) {
-    uint8_t buffer[kMaxStackBufferSizeBytes];
-    size_t buffer_ptr = 0;
-    size_t i;
-    for (i = 0; i < data_size_bytes && buffer_ptr != kMaxStackBufferSizeBytes; ++i) {
-      uint8_t c = data[i];
-      if (!escape || c != to_integral(Escape::kEscapeStart)) {
-        buffer[buffer_ptr] = c;
-        buffer_ptr++;
-        continue;
-      }
-
-      if (buffer_ptr == kMaxStackBufferSizeBytes - 1) {
-        break;
-      }
-
-      buffer[buffer_ptr] = to_integral(Escape::kEscapeStart);
-      buffer_ptr++;
-
-      buffer[buffer_ptr] = to_integral(Escape::kEscapeStart);
-      buffer_ptr++;
-    }
-
-    size_t bytes_consumed;
-    tvm_crt_error_t to_return = stream_->WriteAll(buffer, buffer_ptr, &bytes_consumed);
-    if (to_return != kTvmErrorNoError) {
-      return to_return;
-    }
-
-    if (update_crc) {
-      crc_ = crc16_compute(buffer, buffer_ptr, &crc_);
-    }
-
-    data_size_bytes -= i;
-    data += i;
-  }
-
-  return kTvmErrorNoError;
-}
-
-tvm_crt_error_t Framer::WritePayloadChunk(const uint8_t* payload_chunk,
-                                          size_t payload_chunk_size_bytes) {
-  if (state_ != State::kTransmitPacketPayload) {
-    return kTvmErrorFramingInvalidState;
-  } else if (payload_chunk_size_bytes > num_payload_bytes_remaining_) {
-    return kTvmErrorFramingPayloadOverflow;
-  }
-
-  TVM_FRAMER_DEBUG_LOG("write payload chunk: %" PRIuMAX " bytes", payload_chunk_size_bytes);
-  tvm_crt_error_t to_return = WriteAndCrc(payload_chunk, payload_chunk_size_bytes,
-                                          true /* escape */, true /* update_crc */);
-  if (to_return != kTvmErrorNoError) {
-    state_ = State::kReset;
-    return to_return;
-  }
-
-  num_payload_bytes_remaining_ -= payload_chunk_size_bytes;
-  return kTvmErrorNoError;
-}
-
-tvm_crt_error_t Framer::FinishPacket() {
-  if (state_ != State::kTransmitPacketPayload) {
-    return kTvmErrorFramingInvalidState;
-  } else if (num_payload_bytes_remaining_ != 0) {
-    return kTvmErrorFramingPayloadIncomplete;
-  }
-
-  tvm_crt_error_t to_return = WriteAndCrc(reinterpret_cast<uint8_t*>(&crc_), sizeof(crc_),
-                                          true /* escape */, false /* update_crc */);
-  if (to_return != kTvmErrorNoError) {
-    TVM_FRAMER_DEBUG_LOG("write and crc returned: %02x", to_return);
-    state_ = State::kReset;
-  } else {
-    state_ = State::kIdle;
-  }
-  return to_return;
-}
-
-}  // namespace micro_rpc
-}  // namespace runtime
-}  // namespace tvm
diff --git a/src/runtime/crt/microtvm_rpc_common/session.cc b/src/runtime/crt/microtvm_rpc_common/session.cc
deleted file mode 100644
index 3570f6260cae..000000000000
--- a/src/runtime/crt/microtvm_rpc_common/session.cc
+++ /dev/null
@@ -1,282 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file session.h
- * \brief RPC Session
- */
-
-#include <tvm/runtime/crt/logging.h>
-#include <tvm/runtime/crt/rpc_common/session.h>
-
-#include "crt_config.h"
-
-namespace tvm {
-namespace runtime {
-namespace micro_rpc {
-
-struct microtvm_session_start_payload_t {
-  uint8_t version;
-};
-
-void Session::RegenerateNonce() {
-  local_nonce_ = (((local_nonce_ << 5) | (local_nonce_ >> 5)) + 1);
-
-  if (local_nonce_ == kInvalidNonce) {
-    local_nonce_++;
-  }
-}
-
-tvm_crt_error_t Session::SendInternal(MessageType message_type, const uint8_t* message_data,
-                                      size_t message_size_bytes) {
-  tvm_crt_error_t to_return = StartMessage(message_type, message_size_bytes);
-  if (to_return != kTvmErrorNoError) {
-    return to_return;
-  }
-
-  if (message_size_bytes > 0) {
-    to_return = SendBodyChunk(message_data, message_size_bytes);
-    if (to_return != kTvmErrorNoError) {
-      return to_return;
-    }
-  }
-
-  return framer_->FinishPacket();
-}
-
-tvm_crt_error_t Session::StartMessage(MessageType message_type, size_t message_size_bytes) {
-  SessionHeader header{session_id_, message_type};
-  if (message_type == MessageType::kLog) {
-    header.session_id = 0;
-  }
-
-  tvm_crt_error_t to_return = framer_->StartPacket(message_size_bytes + sizeof(SessionHeader));
-  if (to_return != 0) {
-    return to_return;
-  }
-
-  return framer_->WritePayloadChunk(reinterpret_cast<uint8_t*>(&header), sizeof(SessionHeader));
-}
-
-tvm_crt_error_t Session::SendBodyChunk(const uint8_t* chunk, size_t chunk_size_bytes) {
-  return framer_->WritePayloadChunk(chunk, chunk_size_bytes);
-}
-
-tvm_crt_error_t Session::FinishMessage() { return framer_->FinishPacket(); }
-
-tvm_crt_error_t Session::StartSession() {
-  CHECK_NE(state_, State::kReset, "must call Initialize");
-
-  RegenerateNonce();
-  SetSessionId(local_nonce_, 0);
-  microtvm_session_start_payload_t payload = {Session::kVersion};
-  tvm_crt_error_t to_return = SendInternal(MessageType::kStartSessionInit,
-                                           reinterpret_cast<uint8_t*>(&payload), sizeof(payload));
-  if (to_return == 0) {
-    state_ = State::kStartSessionSent;
-  }
-
-  return to_return;
-}
-
-tvm_crt_error_t Session::Initialize(uint8_t initial_session_nonce) {
-  local_nonce_ = initial_session_nonce;
-  return TerminateSession();
-}
-
-tvm_crt_error_t Session::TerminateSession() {
-  SetSessionId(0, 0);
-  state_ = State::kNoSessionEstablished;
-  return SendInternal(MessageType::kTerminateSession, nullptr, 0);
-}
-
-tvm_crt_error_t Session::SendMessage(MessageType message_type, const uint8_t* message_data,
-                                     size_t message_size_bytes) {
-  if (state_ != State::kSessionEstablished && message_type != MessageType::kLog) {
-    return kTvmErrorSessionInvalidState;
-  }
-
-  return SendInternal(message_type, message_data, message_size_bytes);
-}
-
-ssize_t Session::SessionReceiver::Write(const uint8_t* data, size_t data_size_bytes) {
-  if (session_->receive_buffer_has_complete_message_) {
-    return kTvmErrorSessionReceiveBufferBusy;
-  }
-
-  size_t bytes_written = session_->receive_buffer_->Write(data, data_size_bytes);
-  if (bytes_written != data_size_bytes) {
-    return kTvmErrorSessionReceiveBufferShortWrite;
-  }
-
-  return bytes_written;
-}
-
-void Session::SessionReceiver::PacketDone(bool is_valid) {
-  if (!is_valid) {
-    return;
-  }
-
-  SessionHeader header;
-  int bytes_read =
-      session_->receive_buffer_->Read(reinterpret_cast<uint8_t*>(&header), sizeof(header));
-  if (bytes_read != sizeof(header)) {
-    return;
-  }
-  session_->receive_buffer_has_complete_message_ = true;
-
-  switch (header.message_type) {
-    case MessageType::kStartSessionInit:
-      session_->ProcessStartSessionInit(header);
-      session_->receive_buffer_has_complete_message_ = false;
-      break;
-    case MessageType::kStartSessionReply:
-      session_->ProcessStartSessionReply(header);
-      session_->receive_buffer_has_complete_message_ = false;
-      break;
-    case MessageType::kTerminateSession:
-      if (session_->state_ == State::kSessionEstablished) {
-        session_->state_ = State::kNoSessionEstablished;
-        session_->OnSessionTerminatedMessage();
-      }
-      session_->receive_buffer_has_complete_message_ = false;
-      break;
-    case MessageType::kLog:
-      if (header.session_id == 0 || header.session_id == session_->session_id_) {
-        // Special case for log messages: session id can be 0.
-        session_->message_received_func_(session_->message_received_func_context_,
-                                         header.message_type, session_->receive_buffer_);
-      }
-      break;
-    default:
-      if (session_->state_ == State::kSessionEstablished &&
-          header.session_id == session_->session_id_) {
-        session_->message_received_func_(session_->message_received_func_context_,
-                                         header.message_type, session_->receive_buffer_);
-      }
-      break;
-  }
-}
-
-void Session::ClearReceiveBuffer() {
-  receive_buffer_has_complete_message_ = false;
-  receive_buffer_->Clear();
-}
-
-void Session::SendSessionStartReply(const SessionHeader& header) {
-  RegenerateNonce();
-  SetSessionId(InitiatorNonce(header.session_id), local_nonce_);
-  microtvm_session_start_payload_t payload = {Session::kVersion};
-  tvm_crt_error_t to_return = SendInternal(MessageType::kStartSessionReply,
-                                           reinterpret_cast<uint8_t*>(&payload), sizeof(payload));
-  state_ = State::kSessionEstablished;
-  CHECK_EQ(to_return, kTvmErrorNoError, "SendSessionStartReply");
-  OnSessionEstablishedMessage();
-}
-
-void Session::ProcessStartSessionInit(const SessionHeader& header) {
-  if (InitiatorNonce(header.session_id) == kInvalidNonce) {
-    return;
-  }
-
-  microtvm_session_start_payload_t payload;
-  int bytes_read = receive_buffer_->Read(reinterpret_cast<uint8_t*>(&payload), sizeof(payload));
-  if (bytes_read != sizeof(payload)) {
-    return;
-  }
-
-  switch (state_) {
-    case State::kReset:
-    case State::kNoSessionEstablished:
-      // Normal case: received a StartSession packet from reset.
-      SendSessionStartReply(header);
-      break;
-
-    case State::kStartSessionSent:
-      // When two StartSessionInit packets sent simultaneously: lowest nonce wins; ties retry.
-      if (InitiatorNonce(header.session_id) < local_nonce_) {
-        if (payload.version == Session::kVersion) {
-          SendSessionStartReply(header);
-        }
-      } else if (InitiatorNonce(header.session_id) == local_nonce_) {
-        StartSession();
-      }
-
-      break;
-
-    case State::kSessionEstablished:
-      SendSessionStartReply(header);
-      OnSessionEstablishedMessage();
-      break;
-
-    default:
-      state_ = State::kReset;
-  }
-}
-
-void Session::ProcessStartSessionReply(const SessionHeader& header) {
-  if (ResponderNonce(header.session_id) == kInvalidNonce) {
-    return;
-  }
-
-  microtvm_session_start_payload_t payload;
-  int bytes_read = receive_buffer_->Read(reinterpret_cast<uint8_t*>(&payload), sizeof(payload));
-  if (bytes_read != sizeof(payload)) {
-    return;
-  }
-
-  switch (state_) {
-    case State::kReset:
-    case State::kNoSessionEstablished:
-      break;
-    case State::kStartSessionSent:
-      if (InitiatorNonce(header.session_id) == local_nonce_ &&
-          payload.version == Session::kVersion) {
-        SetSessionId(local_nonce_, ResponderNonce(header.session_id));
-        state_ = State::kSessionEstablished;
-        OnSessionEstablishedMessage();
-      }
-      break;
-    case State::kSessionEstablished:
-      if (InitiatorNonce(header.session_id) != kInvalidNonce &&
-          ResponderNonce(header.session_id) == kInvalidNonce) {
-        if (payload.version == Session::kVersion) {
-          SendSessionStartReply(header);
-        } else {
-          SetSessionId(local_nonce_, 0);
-          state_ = State::kReset;
-        }
-      } else {
-        state_ = State::kReset;
-      }
-      break;
-  }
-}
-
-void Session::OnSessionEstablishedMessage() {
-  message_received_func_(message_received_func_context_, MessageType::kStartSessionReply, NULL);
-}
-
-void Session::OnSessionTerminatedMessage() {
-  message_received_func_(message_received_func_context_, MessageType::kTerminateSession, NULL);
-}
-
-}  // namespace micro_rpc
-}  // namespace runtime
-}  // namespace tvm
diff --git a/src/runtime/crt/microtvm_rpc_common/write_stream.cc b/src/runtime/crt/microtvm_rpc_common/write_stream.cc
deleted file mode 100644
index a4ad0fc86dff..000000000000
--- a/src/runtime/crt/microtvm_rpc_common/write_stream.cc
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file framing.h
- * \brief Framing for RPC.
- */
-#include <tvm/runtime/crt/rpc_common/write_stream.h>
-
-namespace tvm {
-namespace runtime {
-namespace micro_rpc {
-
-WriteStream::~WriteStream() {}
-
-tvm_crt_error_t WriteStream::WriteAll(uint8_t* data, size_t data_size_bytes,
-                                      size_t* bytes_consumed) {
-  *bytes_consumed = 0;
-  while (data_size_bytes > 0) {
-    ssize_t to_return = Write(data, data_size_bytes);
-    if (to_return == 0) {
-      return kTvmErrorWriteStreamShortWrite;
-    } else if (to_return < 0) {
-      return (tvm_crt_error_t)to_return;
-    } else if (to_return > 0 && (static_cast<size_t>(to_return)) > data_size_bytes) {
-      return kTvmErrorWriteStreamLongWrite;
-    }
-
-    data += to_return;
-    data_size_bytes -= to_return;
-    *bytes_consumed += to_return;
-  }
-
-  return kTvmErrorNoError;
-}
-
-}  // namespace micro_rpc
-}  // namespace runtime
-}  // namespace tvm
diff --git a/src/runtime/crt/microtvm_rpc_server/rpc_server.cc b/src/runtime/crt/microtvm_rpc_server/rpc_server.cc
deleted file mode 100644
index cd2fb03ed7f9..000000000000
--- a/src/runtime/crt/microtvm_rpc_server/rpc_server.cc
+++ /dev/null
@@ -1,271 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file rpc_server.cc
- * \brief MicroTVM RPC Server
- */
-
-#include <inttypes.h>
-#include <stdarg.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/types.h>
-
-// NOTE: dmlc/base.h contains some declarations that are incompatible with some C embedded
-// toolchains. Just pull the bits we need for this file.
-#define DMLC_CMAKE_LITTLE_ENDIAN DMLC_IO_USE_LITTLE_ENDIAN
-#define DMLC_LITTLE_ENDIAN 1
-#include <tvm/runtime/c_runtime_api.h>
-#include <tvm/runtime/crt/aot_executor_module.h>
-#include <tvm/runtime/crt/crt.h>
-#include <tvm/runtime/crt/logging.h>
-#include <tvm/runtime/crt/microtvm_rpc_server.h>
-#include <tvm/runtime/crt/module.h>
-#include <tvm/runtime/crt/page_allocator.h>
-#include <tvm/runtime/crt/platform.h>
-#include <tvm/runtime/crt/rpc_common/frame_buffer.h>
-#include <tvm/runtime/crt/rpc_common/framing.h>
-#include <tvm/runtime/crt/rpc_common/session.h>
-
-#include "../../minrpc/minrpc_server.h"
-#include "crt_config.h"
-
-namespace tvm {
-namespace runtime {
-namespace micro_rpc {
-
-class MicroIOHandler {
- public:
-  MicroIOHandler(Session* session, FrameBuffer* receive_buffer)
-      : session_{session}, receive_buffer_{receive_buffer} {}
-
-  void MessageStart(size_t message_size_bytes) {
-    session_->StartMessage(MessageType::kNormal, message_size_bytes + 8);
-  }
-
-  ssize_t PosixWrite(const uint8_t* buf, size_t buf_size_bytes) {
-    int to_return = session_->SendBodyChunk(buf, buf_size_bytes);
-    if (to_return < 0) {
-      return to_return;
-    }
-    return buf_size_bytes;
-  }
-
-  void MessageDone() { CHECK_EQ(session_->FinishMessage(), kTvmErrorNoError, "FinishMessage"); }
-
-  ssize_t PosixRead(uint8_t* buf, size_t buf_size_bytes) {
-    return receive_buffer_->Read(buf, buf_size_bytes);
-  }
-
-  void Close() {}
-
-  void Exit(int code) {
-    for (;;) {
-    }
-  }
-
- private:
-  Session* session_;
-  FrameBuffer* receive_buffer_;
-};
-
-namespace {
-// Stored as globals so that they can be used to report initialization errors.
-microtvm_rpc_channel_write_t g_write_func = nullptr;
-void* g_write_func_ctx = nullptr;
-}  // namespace
-
-class SerialWriteStream : public WriteStream {
- public:
-  SerialWriteStream() {}
-  virtual ~SerialWriteStream() {}
-
-  ssize_t Write(const uint8_t* data, size_t data_size_bytes) override {
-    return g_write_func(g_write_func_ctx, data, data_size_bytes);
-  }
-
-  void PacketDone(bool is_valid) override {}
-
- private:
-  void operator delete(void*) noexcept {}  // NOLINT(readability/casting)
-};
-
-class MicroRPCServer {
- public:
-  MicroRPCServer(uint8_t* receive_storage, size_t receive_storage_size_bytes,
-                 microtvm_rpc_channel_write_t write_func, void* write_func_ctx)
-      : receive_buffer_{receive_storage, receive_storage_size_bytes},
-        framer_{&send_stream_},
-        session_{&framer_, &receive_buffer_, &HandleCompleteMessageCb, this},
-        io_{&session_, &receive_buffer_},
-        unframer_{session_.Receiver()},
-        rpc_server_{&io_},
-        is_running_{true} {}
-
-  void Initialize() {
-    uint8_t initial_session_nonce = Session::kInvalidNonce;
-    tvm_crt_error_t error =
-        TVMPlatformGenerateRandom(&initial_session_nonce, sizeof(initial_session_nonce));
-    CHECK_EQ(kTvmErrorNoError, error, "generating random session id");
-    CHECK_EQ(kTvmErrorNoError, session_.Initialize(initial_session_nonce), "rpc server init");
-  }
-
-  /*! \brief Process one message from the receive buffer, if possible.
-   *
-   * \param new_data If not nullptr, a pointer to a buffer pointer, which should point at new input
-   *     data to process. On return, updated to point past data that has been consumed.
-   * \param new_data_size_bytes Points to the number of valid bytes in `new_data`. On return,
-   *     updated to the number of unprocessed bytes remaining in `new_data` (usually 0).
-   * \return an error code indicating the outcome of the processing loop.
-   */
-  tvm_crt_error_t Loop(uint8_t** new_data, size_t* new_data_size_bytes) {
-    if (!is_running_) {
-      return kTvmErrorPlatformShutdown;
-    }
-
-    tvm_crt_error_t err = kTvmErrorNoError;
-    if (new_data != nullptr && new_data_size_bytes != nullptr && *new_data_size_bytes > 0) {
-      size_t bytes_consumed;
-      err = unframer_.Write(*new_data, *new_data_size_bytes, &bytes_consumed);
-      *new_data += bytes_consumed;
-      *new_data_size_bytes -= bytes_consumed;
-    }
-
-    if (err == kTvmErrorNoError && !is_running_) {
-      err = kTvmErrorPlatformShutdown;
-    }
-
-    return err;
-  }
-
-  void Log(const uint8_t* message, size_t message_size_bytes) {
-    tvm_crt_error_t to_return =
-        session_.SendMessage(MessageType::kLog, message, message_size_bytes);
-    if (to_return != 0) {
-      TVMPlatformAbort(to_return);
-    }
-  }
-
- private:
-  FrameBuffer receive_buffer_;
-  SerialWriteStream send_stream_;
-  Framer framer_;
-  Session session_;
-  MicroIOHandler io_;
-  Unframer unframer_;
-  MinRPCServer<MicroIOHandler> rpc_server_;
-
-  bool is_running_;
-
-  void HandleCompleteMessage(MessageType message_type, FrameBuffer* buf) {
-    if (message_type != MessageType::kNormal) {
-      return;
-    }
-
-    is_running_ = rpc_server_.ProcessOnePacket();
-    session_.ClearReceiveBuffer();
-  }
-
-  static void HandleCompleteMessageCb(void* context, MessageType message_type, FrameBuffer* buf) {
-    static_cast<MicroRPCServer*>(context)->HandleCompleteMessage(message_type, buf);
-  }
-};
-
-}  // namespace micro_rpc
-}  // namespace runtime
-}  // namespace tvm
-
-extern "C" {
-
-static microtvm_rpc_server_t g_rpc_server = nullptr;
-
-microtvm_rpc_server_t MicroTVMRpcServerInit(microtvm_rpc_channel_write_t write_func,
-                                            void* write_func_ctx) {
-  tvm::runtime::micro_rpc::g_write_func = write_func;
-  tvm::runtime::micro_rpc::g_write_func_ctx = write_func_ctx;
-
-  tvm_crt_error_t err = TVMInitializeRuntime();
-  if (err != kTvmErrorNoError) {
-    TVMPlatformAbort(err);
-  }
-
-  err = TVMAotExecutorModule_Register();
-  if (err != kTvmErrorNoError) {
-    TVMPlatformAbort(err);
-  }
-
-  DLDevice dev = {kDLCPU, 0};
-  void* receive_buffer_memory;
-  err = TVMPlatformMemoryAllocate(TVM_CRT_MAX_PACKET_SIZE_BYTES, dev, &receive_buffer_memory);
-  if (err != kTvmErrorNoError) {
-    TVMPlatformAbort(err);
-  }
-  auto receive_buffer = new (receive_buffer_memory) uint8_t[TVM_CRT_MAX_PACKET_SIZE_BYTES];
-  void* rpc_server_memory;
-  err = TVMPlatformMemoryAllocate(sizeof(tvm::runtime::micro_rpc::MicroRPCServer), dev,
-                                  &rpc_server_memory);
-  if (err != kTvmErrorNoError) {
-    TVMPlatformAbort(err);
-  }
-  auto rpc_server = new (rpc_server_memory) tvm::runtime::micro_rpc::MicroRPCServer(
-      receive_buffer, TVM_CRT_MAX_PACKET_SIZE_BYTES, write_func, write_func_ctx);
-  g_rpc_server = static_cast<microtvm_rpc_server_t>(rpc_server);
-  rpc_server->Initialize();
-  return g_rpc_server;
-}
-
-void TVMLogf(const char* format, ...) {
-  va_list args;
-  char log_buffer[256];
-  va_start(args, format);
-  size_t num_bytes_logged = TVMPlatformFormatMessage(log_buffer, sizeof(log_buffer), format, args);
-  va_end(args);
-
-  // Most header-based logging frameworks tend to insert '\n' at the end of the log message.
-  // Remove that for remote logging, since the remote logger will do the same.
-  if (num_bytes_logged > 0 && log_buffer[num_bytes_logged - 1] == '\n') {
-    log_buffer[num_bytes_logged - 1] = 0;
-    num_bytes_logged--;
-  }
-
-  if (g_rpc_server != nullptr) {
-    static_cast<tvm::runtime::micro_rpc::MicroRPCServer*>(g_rpc_server)
-        ->Log(reinterpret_cast<uint8_t*>(log_buffer), num_bytes_logged);
-  } else {
-    tvm::runtime::micro_rpc::SerialWriteStream write_stream;
-    tvm::runtime::micro_rpc::Framer framer{&write_stream};
-    tvm::runtime::micro_rpc::Session session{&framer, nullptr, nullptr, nullptr};
-    tvm_crt_error_t err =
-        session.SendMessage(tvm::runtime::micro_rpc::MessageType::kLog,
-                            reinterpret_cast<uint8_t*>(log_buffer), num_bytes_logged);
-    if (err != kTvmErrorNoError) {
-      TVMPlatformAbort(err);
-    }
-  }
-}
-
-tvm_crt_error_t MicroTVMRpcServerLoop(microtvm_rpc_server_t server_ptr, uint8_t** new_data,
-                                      size_t* new_data_size_bytes) {
-  tvm::runtime::micro_rpc::MicroRPCServer* server =
-      static_cast<tvm::runtime::micro_rpc::MicroRPCServer*>(server_ptr);
-  return server->Loop(new_data, new_data_size_bytes);
-}
-
-}  // extern "C"
diff --git a/src/runtime/crt/platform-template.c b/src/runtime/crt/platform-template.c
deleted file mode 100644
index b93fd1459be6..000000000000
--- a/src/runtime/crt/platform-template.c
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \brief Implementation of TVMPlatform functions in tvm/runtime/crt/platform.h
- */
-
-#include <dlpack/dlpack.h>
-#include <stdarg.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <tvm/runtime/crt/error_codes.h>
-#include <tvm/runtime/crt/page_allocator.h>
-
-uint8_t memory[TVM_WORKSPACE_SIZE_BYTES];
-MemoryManagerInterface* memory_manager;
-
-// Called when an internal error occurs and execution cannot continue.
-void TVMPlatformAbort(tvm_crt_error_t error_code) { exit(1); }
-
-// Called by the microTVM RPC server to implement TVMLogf.
-size_t TVMPlatformFormatMessage(char* out_buf, size_t out_buf_size_bytes, const char* fmt,
-                                va_list args) {
-  return vsprintf(out_buf, fmt, args);
-}
-
-// Allocate memory for use by TVM.
-tvm_crt_error_t TVMPlatformMemoryAllocate(size_t num_bytes, DLDevice dev, void** out_ptr) {
-  return memory_manager->Allocate(memory_manager, num_bytes, dev, out_ptr);
-}
-
-// Free memory used by TVM.
-tvm_crt_error_t TVMPlatformMemoryFree(void* ptr, DLDevice dev) {
-  return memory_manager->Free(memory_manager, ptr, dev);
-}
-
-// Start a device timer.
-tvm_crt_error_t TVMPlatformTimerStart() { return kTvmErrorNoError; }
-
-// Stop the running device timer and get the elapsed time (in microseconds).
-tvm_crt_error_t TVMPlatformTimerStop(double* elapsed_time_seconds) { return kTvmErrorNoError; }
-
-// Platform-specific before measurement call.
-tvm_crt_error_t TVMPlatformBeforeMeasurement() { return kTvmErrorNoError; }
-
-// Platform-specific after measurement call.
-tvm_crt_error_t TVMPlatformAfterMeasurement() { return kTvmErrorNoError; }
-
-// Fill a buffer with random data.
-tvm_crt_error_t TVMPlatformGenerateRandom(uint8_t* buffer, size_t num_bytes) {
-  return kTvmErrorNoError;
-}
-
-// Initialize TVM inference.
-tvm_crt_error_t TVMPlatformInitialize() {
-  int status =
-      PageMemoryManagerCreate(&memory_manager, memory, sizeof(memory), 8 /* page_size_log2 */);
-  if (status != 0) {
-    fprintf(stderr, "error initiailizing memory manager\n");
-    return kTvmErrorPlatformMemoryManagerInitialized;
-  }
-  return kTvmErrorNoError;
-}
diff --git a/src/runtime/micro/micro_session.cc b/src/runtime/micro/micro_session.cc
deleted file mode 100644
index 23ed09119a69..000000000000
--- a/src/runtime/micro/micro_session.cc
+++ /dev/null
@@ -1,445 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file micro_session.cc
- */
-
-#include "micro_session.h"
-
-#include <tvm/runtime/crt/rpc_common/framing.h>
-#include <tvm/runtime/crt/rpc_common/session.h>
-#include <tvm/runtime/logging.h>
-#include <tvm/runtime/registry.h>
-
-#include <algorithm>
-#include <chrono>
-#include <cstdarg>
-#include <memory>
-#include <sstream>
-#include <string>
-#include <utility>
-
-#include "../../support/str_escape.h"
-#include "../rpc/rpc_channel.h"
-#include "../rpc/rpc_channel_logger.h"
-#include "../rpc/rpc_endpoint.h"
-#include "../rpc/rpc_session.h"
-#include "crt_config.h"
-
-namespace tvm {
-namespace runtime {
-namespace micro_rpc {
-
-class CallbackWriteStream : public WriteStream {
- public:
-  explicit CallbackWriteStream(PackedFunc fsend, ::std::chrono::microseconds write_timeout)
-      : fsend_{fsend}, write_timeout_{write_timeout} {}
-
-  ssize_t Write(const uint8_t* data, size_t data_size_bytes) override {
-    TVMByteArray bytes;
-    bytes.data = (const char*)data;
-    bytes.size = data_size_bytes;
-    if (write_timeout_ == ::std::chrono::microseconds::zero()) {
-      fsend_(bytes, nullptr);
-    } else {
-      fsend_(bytes, write_timeout_.count());
-    }
-
-    return static_cast<ssize_t>(data_size_bytes);
-  }
-
-  void PacketDone(bool is_valid) override {}
-
-  void SetWriteTimeout(::std::chrono::microseconds timeout) { write_timeout_ = timeout; }
-
- private:
-  PackedFunc fsend_;
-  ::std::chrono::microseconds write_timeout_;
-};
-
-class MicroTransportChannel : public RPCChannel {
- public:
-  enum class State : uint8_t {
-    kReset = 0,               // state entered before the transport has been read or written to.
-    kSessionTerminated = 1,   // session is terminated, but transport is alive.
-    kSessionEstablished = 2,  // session is alive.
-  };
-
-  /*!
-   * \brief Construct a new MicroTransportChannel.
-   * \param fsend A PackedFunc accepting (data_bytes, timeout_usec) and returning the number of
-   *  bytes sent. If a timeout_usec elapses before all data is sent, it should return 0.
-   * \param frecv A PackedFunc accepting (num_bytes, timeout_usec) and returning a string containing
-   *  the received data. Must not return an empty string, except to indicate a timeout.
-   * \param session_start_retry_timeout During session initialization, the session start message is
-   *  re-sent after this many microseconds elapse without a reply. If 0, the session start message
-   *  is sent only once.
-   * \param session_start_timeout Session initialization is considered "timed out" if no reply is
-   *  received this many microseconds after the session start is sent. If 0, a session start never
-   *  times out.
-   * \param session_established_timeout Timeout used for the Recv() function. This is used for
-   *  messages sent after a session is already established. If 0, Recv() never times out.
-   */
-  MicroTransportChannel(PackedFunc fsend, PackedFunc frecv,
-                        ::std::chrono::microseconds session_start_retry_timeout,
-                        ::std::chrono::microseconds session_start_timeout,
-                        ::std::chrono::microseconds session_established_timeout)
-      : state_{State::kReset},
-        session_start_retry_timeout_{session_start_retry_timeout},
-        session_start_timeout_{session_start_timeout},
-        session_established_timeout_{session_established_timeout},
-        write_stream_{fsend, session_start_timeout},
-        framer_{&write_stream_},
-        receive_buffer_{new uint8_t[TVM_CRT_MAX_PACKET_SIZE_BYTES], TVM_CRT_MAX_PACKET_SIZE_BYTES},
-        session_{&framer_, &receive_buffer_, &HandleMessageReceivedCb, this},
-        unframer_{session_.Receiver()},
-        did_receive_message_{false},
-        frecv_{frecv},
-        message_buffer_{nullptr} {}
-
- private:
-  static constexpr const size_t kReceiveBufferSizeBytes = 128;
-
-  /*
-   * \brief Receive data until either pf() returns true or a timeout occurs.
-   *
-   * The condition function is called first, so this function may return without performing a read.
-   * Following this call, received data is consumed and frecv_ is invoked until the timeout occurs
-   * or the condition function passes.
-   *
-   * \param pf A condition function that returns true when enough data has been received for the
-   *  caller to proceed.
-   * \param timeout Pointer to number of microseconds to wait before timing out. If nullptr, no
-   *  timeout ever occurs in this function, so it may block forever. If 0, a single non-blocking
-   *  read is performed, and any data returned is processed.
-   * \return true if the condition passed, false if the timeout expired.
-   */
-  bool ReceiveUntil(TypedPackedFunc<bool(void)> pf, ::std::chrono::microseconds* timeout) {
-    if (pf()) {
-      return true;
-    }
-
-    auto end_time = ::std::chrono::steady_clock::now();
-    if (timeout != nullptr) {
-      end_time += *timeout;
-    }
-    for (;;) {
-      if (ConsumeReceivedPayload(pf)) {
-        return true;
-      }
-
-      ::std::string chunk;
-      size_t bytes_needed = unframer_.BytesNeeded();
-      CHECK_GT(bytes_needed, 0) << "unframer unexpectedly needs no data";
-      if (timeout != nullptr) {
-        ::std::chrono::microseconds iter_timeout{
-            ::std::max(::std::chrono::microseconds{0},
-                       ::std::chrono::duration_cast<::std::chrono::microseconds>(
-                           end_time - ::std::chrono::steady_clock::now()))};
-        chunk = frecv_(bytes_needed, iter_timeout.count()).operator std::string();
-      } else {
-        chunk = frecv_(bytes_needed, nullptr).operator std::string();
-      }
-      pending_chunk_ = chunk;
-      if (pending_chunk_.size() == 0) {
-        // Timeout occurred
-        return false;
-      }
-    }
-  }
-
-  static constexpr const int kNumRandRetries = 10;
-  static std::atomic<unsigned int> random_seed;
-
-  inline uint8_t GenerateRandomNonce() {
-    // NOTE: this is bad concurrent programming but in practice we don't really expect race
-    // conditions here, and even if they occur we don't particularly care whether a competing
-    // process computes a different random seed. This value is just chosen pseudo-randomly to
-    // form an initial distinct session id. Here we just want to protect against bad loads causing
-    // confusion.
-    unsigned int seed = random_seed.load();
-    if (seed == 0) {
-#if defined(_MSC_VER)
-      seed = (unsigned int)time(nullptr);
-      srand(seed);
-#else
-      seed = (unsigned int)time(nullptr);
-#endif
-    }
-    uint8_t initial_nonce = 0;
-    for (int i = 0; i < kNumRandRetries && initial_nonce == 0; ++i) {
-#if defined(_MSC_VER)
-      initial_nonce = rand();  // NOLINT(runtime/threadsafe_fn)
-#else
-      initial_nonce = rand_r(&seed);
-#endif
-    }
-    random_seed.store(seed);
-    ICHECK_NE(initial_nonce, 0) << "rand() does not seem to be producing random values";
-    return initial_nonce;
-  }
-
-  bool StartSessionInternal() {
-    using ::std::chrono::duration_cast;
-    using ::std::chrono::microseconds;
-    using ::std::chrono::steady_clock;
-
-    steady_clock::time_point start_time = steady_clock::now();
-    ICHECK_EQ(kTvmErrorNoError, session_.Initialize(GenerateRandomNonce()));
-    ICHECK_EQ(kTvmErrorNoError, session_.StartSession());
-
-    if (session_start_timeout_ == microseconds::zero() &&
-        session_start_retry_timeout_ == microseconds::zero()) {
-      ICHECK(ReceiveUntil([this]() -> bool { return session_.IsEstablished(); }, nullptr))
-          << "ReceiveUntil indicated timeout expired, but no timeout set!";
-      ICHECK(session_.IsEstablished()) << "Session not established, but should be";
-      return true;
-    }
-
-    auto session_start_end_time = start_time + session_start_timeout_;
-    steady_clock::time_point end_time;
-    if (session_start_retry_timeout_ != ::std::chrono::microseconds::zero()) {
-      end_time = start_time + session_start_retry_timeout_;
-    } else {
-      end_time = session_start_end_time;
-    }
-
-    while (!session_.IsEstablished()) {
-      microseconds time_remaining =
-          ::std::max(microseconds{0}, duration_cast<microseconds>(end_time - steady_clock::now()));
-      if (ReceiveUntil([this]() -> bool { return session_.IsEstablished(); }, &time_remaining)) {
-        break;
-      }
-
-      if (session_start_timeout_ != microseconds::zero() && end_time >= session_start_end_time) {
-        return false;
-      }
-      end_time += session_start_retry_timeout_;
-
-      ICHECK_EQ(kTvmErrorNoError, session_.Initialize(GenerateRandomNonce()));
-      ICHECK_EQ(kTvmErrorNoError, session_.StartSession());
-    }
-
-    return true;
-  }
-
- public:
-  bool StartSession() {
-    ICHECK(state_ == State::kReset)
-        << "MicroSession: state_: expected kReset, got " << uint8_t(state_);
-
-    bool to_return = StartSessionInternal();
-    if (to_return) {
-      write_stream_.SetWriteTimeout(session_established_timeout_);
-    }
-
-    return to_return;
-  }
-
-  size_t Send(const void* data, size_t size) override {
-    const uint8_t* data_bytes = static_cast<const uint8_t*>(data);
-    tvm_crt_error_t err = session_.SendMessage(MessageType::kNormal, data_bytes, size);
-    ICHECK(err == kTvmErrorNoError) << "SendMessage returned " << err;
-
-    return size;
-  }
-
-  size_t Recv(void* data, size_t size) override {
-    size_t num_bytes_recv = 0;
-    while (num_bytes_recv < size) {
-      if (message_buffer_ != nullptr) {
-        num_bytes_recv += message_buffer_->Read(static_cast<uint8_t*>(data), size);
-        if (message_buffer_->ReadAvailable() == 0) {
-          message_buffer_ = nullptr;
-          session_.ClearReceiveBuffer();
-        }
-        if (num_bytes_recv == size) {
-          ICHECK(message_buffer_ == nullptr || message_buffer_->ReadAvailable() > 0);
-          return num_bytes_recv;
-        }
-      }
-
-      did_receive_message_ = false;
-      if (session_established_timeout_ == ::std::chrono::microseconds::zero()) {
-        ICHECK(ReceiveUntil([this]() -> bool { return did_receive_message_; }, nullptr))
-            << "ReceiveUntil timeout expired, but no timeout configured!";
-      } else {
-        if (!ReceiveUntil([this]() -> bool { return did_receive_message_; },
-                          &session_established_timeout_)) {
-          std::stringstream ss;
-          ss << "MicroSessionTimeoutError: failed to read reply message after timeout "
-             << session_established_timeout_.count() / 1e6 << "s";
-
-          throw std::runtime_error(ss.str());
-        }
-      }
-    }
-
-    return num_bytes_recv;
-  }
-
-  FrameBuffer* GetReceivedMessage() {
-    if (did_receive_message_) {
-      did_receive_message_ = false;
-      return message_buffer_;
-    }
-
-    return nullptr;
-  }
-
- private:
-  /*!
-   * \brief Consume the entire received payload, unless the pf condition is met halfway through.
-   *
-   * This function expects pending_chunk_ to contain a chunk of unprocessed packet data. It
-   * repeatedly writes the chunk to the Unframer until either a) pf() returns True or b) no more
-   * data remains to be written.
-   *
-   * \param pf A PackedFunc which returns true when ReceiveUntil should return.
-   * \returns true if pf() returned true during processing; false otherwise.
-   */
-  bool ConsumeReceivedPayload(TypedPackedFunc<bool(void)> pf) {
-    while (pending_chunk_.size() > 0) {
-      size_t bytes_consumed = 0;
-      int unframer_error = unframer_.Write((const uint8_t*)pending_chunk_.data(),
-                                           pending_chunk_.size(), &bytes_consumed);
-
-      ICHECK(bytes_consumed <= pending_chunk_.size())
-          << "consumed " << bytes_consumed << " want <= " << pending_chunk_.size();
-      pending_chunk_ = pending_chunk_.substr(bytes_consumed);
-      if (unframer_error < 0) {
-        LOG(ERROR) << "unframer got error code: " << unframer_error;
-      } else {
-        if (pf()) {
-          return true;
-        }
-      }
-    }
-
-    return false;
-  }
-
-  static void HandleMessageReceivedCb(void* context, MessageType message_type, FrameBuffer* buf) {
-    static_cast<MicroTransportChannel*>(context)->HandleMessageReceived(message_type, buf);
-  }
-
-  void HandleMessageReceived(MessageType message_type, FrameBuffer* buf) {
-    size_t message_size_bytes;
-    switch (message_type) {
-      case MessageType::kStartSessionInit:
-        break;
-
-      case MessageType::kStartSessionReply:
-        state_ = State::kSessionEstablished;
-        break;
-
-      case MessageType::kTerminateSession:
-        if (state_ == State::kReset) {
-          state_ = State::kSessionTerminated;
-        } else if (state_ == State::kSessionTerminated) {
-          LOG(FATAL) << "SessionTerminatedError: multiple session-terminated messages received; "
-                        "device in reboot loop?";
-        } else if (state_ == State::kSessionEstablished) {
-          LOG(FATAL) << "SessionTerminatedError: remote device terminated connection";
-        }
-        break;
-
-      case MessageType::kLog:
-        uint8_t message[1024];
-        message_size_bytes = buf->ReadAvailable();
-        if (message_size_bytes == 0) {
-          return;
-        } else if (message_size_bytes > sizeof(message) - 1) {
-          LOG(ERROR) << "Remote log message is too long to display: " << message_size_bytes
-                     << " bytes";
-          return;
-        }
-
-        ICHECK_EQ(buf->Read(message, sizeof(message) - 1), message_size_bytes);
-        message[message_size_bytes] = 0;
-        LOG(INFO) << "remote: " << message;
-        session_.ClearReceiveBuffer();
-        return;
-
-      case MessageType::kNormal:
-        did_receive_message_ = true;
-        message_buffer_ = buf;
-        break;
-    }
-  }
-
-  State state_;
-  ::std::chrono::microseconds session_start_retry_timeout_;
-  ::std::chrono::microseconds session_start_timeout_;
-  ::std::chrono::microseconds session_established_timeout_;
-  CallbackWriteStream write_stream_;
-  Framer framer_;
-  FrameBuffer receive_buffer_;
-  Session session_;
-  Unframer unframer_;
-  bool did_receive_message_;
-  PackedFunc frecv_;
-  FrameBuffer* message_buffer_;
-  std::string pending_chunk_;
-};
-
-std::atomic<unsigned int> MicroTransportChannel::random_seed{0};
-
-TVM_REGISTER_GLOBAL("micro._rpc_connect").set_body([](TVMArgs args, TVMRetValue* rv) {
-  MicroTransportChannel* micro_channel =
-      new MicroTransportChannel(args[1], args[2], ::std::chrono::microseconds(uint64_t(args[3])),
-                                ::std::chrono::microseconds(uint64_t(args[4])),
-                                ::std::chrono::microseconds(uint64_t(args[5])));
-  if (!micro_channel->StartSession()) {
-    std::stringstream ss;
-    ss << "MicroSessionTimeoutError: session start handshake failed after " << double(args[4]) / 1e6
-       << "s";
-    throw std::runtime_error(ss.str());
-  }
-  std::unique_ptr<RPCChannel> channel(micro_channel);
-  bool enable_logging = false;
-  if (args.num_args > 7) {
-    enable_logging = args[7];
-  }
-  if (enable_logging) {
-    channel.reset(new RPCChannelLogging(std::move(channel)));
-  }
-  auto ep = RPCEndpoint::Create(std::move(channel), args[0], "", args[6]);
-  auto sess = CreateClientSession(ep);
-  *rv = CreateRPCSessionModule(sess);
-});
-
-}  // namespace micro_rpc
-}  // namespace runtime
-}  // namespace tvm
-
-extern "C" {
-
-void TVMLogf(const char* fmt, ...) {
-  va_list args;
-  char msg_buf[256];
-  va_start(args, fmt);
-  vsnprintf(msg_buf, sizeof(msg_buf), fmt, args);
-  va_end(args);
-  LOG(INFO) << msg_buf;
-}
-
-void TVMPlatformAbort(int error_code) { ICHECK(false) << "TVMPlatformAbort: " << error_code; }
-}
diff --git a/src/runtime/micro/micro_session.h b/src/runtime/micro/micro_session.h
deleted file mode 100644
index 50018a4bb0c0..000000000000
--- a/src/runtime/micro/micro_session.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file micro_session.h
- * \brief session to manage multiple micro modules
- *
- * Each session consists of an interaction with a *single* logical device.
- * Within that interaction, multiple TVM modules can be loaded on the logical
- * device.
- *
- * Multiple sessions can exist simultaneously, but there is only ever one
- * *active* session. The idea of an active session mainly has implications for
- * the frontend, in that one must make a session active in order to allocate
- * new TVM objects on it. Aside from that, previously allocated objects can be
- * used even if the session which they belong to is not currently active.
- */
-#ifndef TVM_RUNTIME_MICRO_MICRO_SESSION_H_
-#define TVM_RUNTIME_MICRO_MICRO_SESSION_H_
-
-#endif  // TVM_RUNTIME_MICRO_MICRO_SESSION_H_
diff --git a/src/runtime/micro/standalone/README.md b/src/runtime/micro/standalone/README.md
deleted file mode 100644
index 60614270b008..000000000000
--- a/src/runtime/micro/standalone/README.md
+++ /dev/null
@@ -1,18 +0,0 @@
-<!--- Licensed to the Apache Software Foundation (ASF) under one -->
-<!--- or more contributor license agreements.  See the NOTICE file -->
-<!--- distributed with this work for additional information -->
-<!--- regarding copyright ownership.  The ASF licenses this file -->
-<!--- to you under the Apache License, Version 2.0 (the -->
-<!--- "License"); you may not use this file except in compliance -->
-<!--- with the License.  You may obtain a copy of the License at -->
-
-<!---   http://www.apache.org/licenses/LICENSE-2.0 -->
-
-<!--- Unless required by applicable law or agreed to in writing, -->
-<!--- software distributed under the License is distributed on an -->
-<!--- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -->
-<!--- KIND, either express or implied.  See the License for the -->
-<!--- specific language governing permissions and limitations -->
-<!--- under the License. -->
-
-## A replacement implementation of the TVM runtime, focused on a minimal subset of the overall runtime.
diff --git a/src/runtime/micro/standalone/microtvm_graph_executor.cc b/src/runtime/micro/standalone/microtvm_graph_executor.cc
deleted file mode 100644
index afe9d5d5f6df..000000000000
--- a/src/runtime/micro/standalone/microtvm_graph_executor.cc
+++ /dev/null
@@ -1,398 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#include "microtvm_graph_executor.h"
-
-#include <dlfcn.h>
-
-#include <cassert>
-#include <string>
-
-#include "picojson.h"
-
-namespace tvm {
-namespace micro {
-namespace {
-
-int TVMSToI(const std::string& str) {
-  // For platforms (e.g. older NDK versions) where std::stoi(...) is not available.
-  char* end;
-  return std::strtol(str.c_str(), &end, 10);
-}
-
-void ParseOutputs(const picojson::array& joutputs, DynArray<NodeEntry>* outputs) {
-  outputs->resize(joutputs.size());
-  for (size_t i = 0; i < joutputs.size(); ++i) {
-    const auto& joutput_i = joutputs[i].get<picojson::array>();
-    (*outputs)[i] = NodeEntry{static_cast<uint32_t>(joutput_i[0].get<double>()),
-                              static_cast<uint32_t>(joutput_i[1].get<double>()),
-                              static_cast<uint32_t>(joutput_i[2].get<double>())};
-  }
-}
-
-void ParseAttrs(const picojson::object& jattr, GraphAttr* attr) {
-  // parse dltype
-  for (const auto& jdltype_ : jattr.at("dltype").get<picojson::array>()) {
-    if (jdltype_.is<std::string>()) {
-      continue;
-    }
-    const auto& jdltype = jdltype_.get<picojson::array>();
-
-    attr->dltype.resize(jdltype.size());
-    for (size_t i = 0; i < jdltype.size(); ++i) {
-      attr->dltype[i] = jdltype[i].get<std::string>();
-    }
-  }
-  for (const auto& jstorage_id_ : jattr.at("storage_id").get<picojson::array>()) {
-    if (jstorage_id_.is<std::string>()) {
-      continue;
-    }
-    const auto& jstorage_id = jstorage_id_.get<picojson::array>();
-
-    attr->storage_id.resize(jstorage_id.size());
-    for (size_t i = 0; i < jstorage_id.size(); ++i) {
-      attr->storage_id[i] = static_cast<int>(jstorage_id[i].get<double>());
-    }
-  }
-  for (const auto& jshape_ : jattr.at("shape").get<picojson::array>()) {
-    if (jshape_.is<std::string>()) {
-      continue;
-    }
-    const auto& jshape = jshape_.get<picojson::array>();
-    attr->shape.resize(jshape.size());
-    for (size_t i = 0; i < jshape.size(); ++i) {
-      const auto& jshape_i = jshape[i].get<picojson::array>();
-      attr->shape[i].resize(jshape_i.size());
-      for (size_t j = 0; j < jshape_i.size(); ++j) {
-        attr->shape[i][j] = static_cast<int64_t>(jshape_i[j].get<double>());
-      }
-    }
-  }
-}
-
-void ParseNodes(const picojson::array& jnodes, DynArray<Node>* nodes) {
-  nodes->resize(jnodes.size());
-  for (size_t i = 0; i < nodes->size(); ++i) {
-    auto* n = &(*nodes)[i];
-    const auto& jn = jnodes[i].get<picojson::object>();
-    n->op_type = jn.at("op").get<std::string>();
-    n->name = jn.at("name").get<std::string>();
-    const auto jinputs = jn.at("inputs").get<picojson::array>();
-    n->inputs.resize(jinputs.size());
-    for (size_t i = 0; i < jinputs.size(); ++i) {
-      const auto& jinput_i = jinputs[i].get<picojson::array>();
-      n->inputs[i] = NodeEntry{static_cast<uint32_t>(jinput_i[0].get<double>()),
-                               static_cast<uint32_t>(jinput_i[1].get<double>()),
-                               static_cast<uint32_t>(jinput_i[2].get<double>())};
-    }
-    const auto& jattrs_ = jn.find("attrs");
-    if (jattrs_ != jn.end()) {
-      const auto& jattrs = jattrs_->second.get<picojson::object>();
-      n->param.func_name = jattrs.at("func_name").get<std::string>();
-      n->param.num_inputs = TVMSToI(jattrs.at("num_inputs").get<std::string>());
-      n->param.num_outputs = TVMSToI(jattrs.at("num_outputs").get<std::string>());
-      n->param.flatten_data = TVMSToI(jattrs.at("flatten_data").get<std::string>());
-    }
-  }
-}
-
-void ParseArgNodes(const picojson::array& jinput_nodes, DynArray<uint32_t>* input_nodes) {
-  input_nodes->resize(jinput_nodes.size());
-  for (size_t i = 0; i < jinput_nodes.size(); ++i) {
-    (*input_nodes)[i] = static_cast<uint32_t>(jinput_nodes[i].get<double>());
-  }
-}
-}  // namespace
-
-NDArray::~NDArray() {}
-
-NDArray NDArray::Empty(const DynArray<int64_t>& shape, DLDataType dtype, DLDevice dev) {
-  NDArray r;
-  int64_t nbytes = (dtype.bits * dtype.lanes + 7) / 8;
-  for (const auto& s : shape) {
-    nbytes *= s;
-  }
-
-  r.storage_ = std::shared_ptr<void>(
-      TVMBackendAllocWorkspace(static_cast<int>(dev.device_type), static_cast<int>(dev.device_id),
-                               nbytes, dtype.code, dtype.bits),
-      [=](void* ptr) {
-        if (ptr) {
-          TVMBackendFreeWorkspace(dev.device_type, dev.device_id, ptr);
-        }
-      });
-  r.shape_ = shape;
-  r.dtype_ = dtype;
-  r.device_ = dev;
-  return r;
-}
-
-NDArray NDArray::CreateView(const DynArray<int64_t>& shape, DLDataType dtype) {
-  NDArray r;
-  r.storage_ = storage_;
-  r.shape_ = shape;
-  r.dtype_ = dtype;
-  r.device_ = device_;
-  return r;
-}
-
-DLTensor NDArray::ToDLTensor() {
-  DLTensor r;
-  r.data = storage_.get();
-  assert(r.data != nullptr);
-  r.device = device_;
-  r.ndim = shape_.size();
-  r.dtype = dtype_;
-  r.shape = shape_.data();
-  r.strides = nullptr;
-  r.byte_offset = 0;
-  return r;
-}
-
-size_t GetDataSize(const DLTensor& arr) {
-  size_t size = 1;
-  for (size_t i = 0; i < static_cast<size_t>(arr.ndim); ++i) {
-    size *= static_cast<size_t>(arr.shape[i]);
-  }
-  size *= (arr.dtype.bits * arr.dtype.lanes + 7) / 8;
-  return size;
-}
-
-void NDArray::CopyFrom(DLTensor* src) {
-  std::memcpy(storage_.get(),
-              reinterpret_cast<const uint8_t*>(src->data) + static_cast<size_t>(src->byte_offset),
-              GetDataSize(*src));
-}
-
-void NDArray::CopyTo(DLTensor* dst) const {
-  std::memcpy(reinterpret_cast<uint8_t*>(dst->data) + static_cast<size_t>(dst->byte_offset),
-              storage_.get(), GetDataSize(*dst));
-}
-
-DSOModule::DSOModule(const std::string& name) {
-  dlerror();
-  lib_handle_ = dlopen(name.c_str(), RTLD_LAZY | RTLD_LOCAL);
-  assert(!dlerror());
-  assert(lib_handle_ != nullptr);
-
-#define TVM_INIT_CONTEXT_FUNC(FuncName)                                               \
-  if (auto* fp = reinterpret_cast<decltype(&FuncName)*>(GetSymbol("__" #FuncName))) { \
-    *fp = FuncName;                                                                   \
-  }
-  // Initialize the functions
-  TVM_INIT_CONTEXT_FUNC(TVMAPISetLastError);
-  TVM_INIT_CONTEXT_FUNC(TVMBackendAllocWorkspace);
-  TVM_INIT_CONTEXT_FUNC(TVMBackendFreeWorkspace);
-  TVM_INIT_CONTEXT_FUNC(TVMBackendParallelLaunch);
-// TODO(tulloch): implement these functions?
-// TVM_INIT_CONTEXT_FUNC(TVMFuncCall);
-// TVM_INIT_CONTEXT_FUNC(TVMBackendGetFuncFromEnv);
-// TVM_INIT_CONTEXT_FUNC(TVMBackendParallelBarrier);
-#undef TVM_INIT_CONTEXT_FUNC
-}
-
-DSOModule::~DSOModule() {
-  if (lib_handle_) {
-    dlclose(lib_handle_);
-  }
-}
-
-BackendPackedCFunc DSOModule::GetFunction(const std::string& name) const {
-  auto faddr = reinterpret_cast<BackendPackedCFunc>(GetSymbol(name.c_str()));
-  assert(faddr);
-  return faddr;
-}
-
-void* DSOModule::GetSymbol(const char* name) const {
-  dlerror();
-  auto* f = dlsym(lib_handle_, name);
-  assert(!dlerror());
-  return f;
-}
-
-MicroGraphExecutor::MicroGraphExecutor(const std::string& graph_json, DSOModule* module) {
-  assert(module);
-  module_ = module;
-  picojson::value v;
-  picojson::parse(v, graph_json);
-  ParseNodes(v.get<picojson::object>()["nodes"].get<picojson::array>(), &nodes_);
-  ParseArgNodes(v.get<picojson::object>()["arg_nodes"].get<picojson::array>(), &input_nodes_);
-  ParseArgNodes(v.get<picojson::object>()["node_row_ptr"].get<picojson::array>(), &node_row_ptr_);
-  ParseOutputs(v.get<picojson::object>()["heads"].get<picojson::array>(), &outputs_);
-  ParseAttrs(v.get<picojson::object>()["attrs"].get<picojson::object>(), &attrs_);
-  SetupStorage();
-  SetupOpExecs();
-}
-
-MicroGraphExecutor::~MicroGraphExecutor() {}
-
-void MicroGraphExecutor::Run() {
-  for (size_t i = 0; i < op_execs_.size(); ++i) {
-    if (op_execs_[i]) op_execs_[i]();
-  }
-}
-
-void MicroGraphExecutor::SetInput(int index, DLTensor* data_in) {
-  assert(static_cast<size_t>(index) < input_nodes_.size());
-  uint32_t eid = this->entry_id(input_nodes_[index], 0);
-  data_entry_[eid].CopyFrom(data_in);
-}
-
-void MicroGraphExecutor::CopyOutputTo(int index, DLTensor* data_out) {
-  assert(static_cast<size_t>(index) < outputs_.size());
-  uint32_t eid = this->entry_id(outputs_[index]);
-  const NDArray& data = data_entry_[eid];
-  data.CopyTo(data_out);
-}
-
-void MicroGraphExecutor::SetupStorage() {
-  // Grab saved optimization plan from graph.
-  DynArray<DLDataType> vtype(attrs_.dltype.size());
-  for (size_t i = 0; i < attrs_.dltype.size(); ++i) {
-    assert(attrs_.dltype[i] == "float32");
-    DLDataType ty;
-    ty.bits = 32;
-    ty.lanes = 1;
-    ty.code = kDLFloat;
-    vtype[i] = ty;
-  }
-
-  // Size and device type of each storage pool entry.
-  std::vector<PoolEntry> pool_entry;
-  // Find the maximum space size.
-  for (size_t i = 0; i < attrs_.shape.size(); ++i) {
-    int storage_id = attrs_.storage_id[i];
-    // Use the fallback device if no device index is available.
-    int device_type = static_cast<int>(device_.device_type);
-    size_t size = 1;
-    for (int64_t sz : attrs_.shape[i]) {
-      size *= static_cast<size_t>(sz);
-    }
-    assert(storage_id >= 0);
-    DLDataType t = vtype[i];
-    size_t bits = t.bits * t.lanes;
-    assert(bits % 8U == 0U || bits == 1U);
-    size_t bytes = ((bits + 7U) / 8U) * size;
-
-    uint32_t sid = static_cast<uint32_t>(storage_id);
-    if (sid >= pool_entry.size()) {
-      pool_entry.resize(sid + 1, {0, -1});
-    } else {
-      assert(pool_entry[sid].device_type == -1 || pool_entry[sid].device_type == device_type);
-    }
-    pool_entry[sid].size = std::max(pool_entry[sid].size, bytes);
-    pool_entry[sid].device_type = device_type;
-  }
-
-  // Allocate the space.
-  storage_pool_.resize(pool_entry.size());
-  for (size_t i = 0; i < pool_entry.size(); ++i) {
-    const auto& pit = pool_entry[i];
-    DynArray<int64_t> shape(1);
-    shape[0] = static_cast<int64_t>(pit.size + 3) / 4;
-    storage_pool_[i] = NDArray::Empty(shape, DLDataType{kDLFloat, 32, 1}, device_);
-  }
-
-  // Assign the pooled entries. A unified memory pool is used to simplify
-  // memory assignment for each node entry. The allocated memory on each device
-  // is mapped to this pool.
-  data_entry_.resize(num_node_entries());
-  for (size_t i = 0; i < data_entry_.size(); ++i) {
-    int storage_id = attrs_.storage_id[i];
-    assert(static_cast<size_t>(storage_id) < storage_pool_.size());
-    data_entry_[i] = storage_pool_[storage_id].CreateView(attrs_.shape[i], vtype[i]);
-  }
-}
-
-std::function<void()> CreateTVMOp(const DSOModule& module, const TVMOpParam& param,
-                                  const DynArray<DLTensor>& args) {
-  typedef union {
-    void* v_handle;
-  } TVMValue;
-  /*typedef*/ enum {
-    kTVMDLTensorHandle = 7U,
-  } /*TVMArgTypeCode*/;
-  struct OpArgs {
-    DynArray<DLTensor> args;
-    DynArray<TVMValue> arg_values;
-    DynArray<int> arg_tcodes;
-    DynArray<int64_t> shape_data;
-  };
-
-  std::shared_ptr<OpArgs> arg_ptr = std::make_shared<OpArgs>();
-  arg_ptr->args = args;
-  if (param.flatten_data) {
-    arg_ptr->shape_data.resize(arg_ptr->args.size());
-  }
-  arg_ptr->arg_values.resize(arg_ptr->args.size());
-  arg_ptr->arg_tcodes.resize(arg_ptr->args.size());
-  for (size_t i = 0; i < arg_ptr->args.size(); ++i) {
-    TVMValue v;
-    DLTensor* t = &(arg_ptr->args[i]);
-    v.v_handle = t;
-    arg_ptr->arg_values[i] = v;
-    arg_ptr->arg_tcodes[i] = kTVMDLTensorHandle;
-    if (param.flatten_data) {
-      arg_ptr->shape_data[i] =
-          std::accumulate(t->shape, t->shape + t->ndim, 1, std::multiplies<int64_t>());
-      t->ndim = 1;
-      t->shape = &(arg_ptr->shape_data[i]);
-    }
-  }
-
-  if (param.func_name == "__nop") {
-    return []() {};
-  } else if (param.func_name == "__copy") {
-    // TODO(mbs): device_copy cleanup.
-    assert(false);
-  }
-
-  BackendPackedCFunc pf = module.GetFunction(param.func_name);
-  assert(pf != nullptr);
-
-  auto fexec = [arg_ptr, pf]() {
-    assert(pf);
-    (pf)(arg_ptr->arg_values.data(), arg_ptr->arg_tcodes.data(),
-         static_cast<int>(arg_ptr->arg_values.size()));
-  };
-  return fexec;
-}
-
-void MicroGraphExecutor::SetupOpExecs() {
-  op_execs_.resize(nodes_.size());
-  // setup the array and requirements.
-  for (uint32_t nid = 0; nid < nodes_.size(); ++nid) {
-    const auto& inode = nodes_[nid];
-    if (inode.op_type == "null") continue;
-    DynArray<DLTensor> args(inode.inputs.size() + inode.param.num_outputs);
-    for (size_t i = 0; i < inode.inputs.size(); ++i) {
-      const auto& e = inode.inputs[i];
-      args[i] = data_entry_[this->entry_id(e)].ToDLTensor();
-    }
-    for (size_t index = 0; index < inode.param.num_outputs; ++index) {
-      uint32_t eid = this->entry_id(nid, index);
-      args[index + inode.inputs.size()] = data_entry_[eid].ToDLTensor();
-    }
-    assert(inode.op_type == "tvm_op");
-    op_execs_[nid] = CreateTVMOp(*module_, inode.param, args);
-  }
-}
-
-}  // namespace micro
-}  // namespace tvm
diff --git a/src/runtime/micro/standalone/microtvm_graph_executor.h b/src/runtime/micro/standalone/microtvm_graph_executor.h
deleted file mode 100644
index 73aead54aaed..000000000000
--- a/src/runtime/micro/standalone/microtvm_graph_executor.h
+++ /dev/null
@@ -1,167 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#ifndef TVM_RUNTIME_MICRO_STANDALONE_MICROTVM_GRAPH_EXECUTOR_H_
-#define TVM_RUNTIME_MICRO_STANDALONE_MICROTVM_GRAPH_EXECUTOR_H_
-
-#include <dlpack/dlpack.h>
-
-#include <algorithm>
-#include <functional>
-#include <memory>
-#include <numeric>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "microtvm_runtime_api.h"
-#include "minimal_vector.h"
-
-namespace tvm {
-namespace micro {
-
-typedef int (*BackendPackedCFunc)(void* args, int* type_codes, int num_args);
-
-// dlopen/dlsym/dlclose abstraction.
-class DSOModule {
- public:
-  explicit DSOModule(const std::string& name);
-  ~DSOModule();
-  BackendPackedCFunc GetFunction(const std::string& name) const;
-
- private:
-  void* GetSymbol(const char* name) const;
-  void* lib_handle_{nullptr};
-};
-
-// The graph attribute fields.
-struct GraphAttr {
-  DynArray<int> storage_id;
-  DynArray<std::string> dltype;
-  DynArray<DynArray<int64_t>> shape;
-};
-
-// Memory pool entry.
-struct PoolEntry {
-  size_t size;
-  int device_type;
-};
-
-// Node entry
-struct NodeEntry {
-  uint32_t node_id;
-  uint32_t index;
-  uint32_t version;
-};
-
-// Operator attributes about TVMOp
-struct TVMOpParam {
-  std::string func_name;
-  uint32_t num_inputs;
-  uint32_t num_outputs;
-  uint32_t flatten_data;
-};
-
-// Node
-struct Node {
-  // operator type in string
-  std::string op_type;
-  // name of the op
-  std::string name;
-  // parameters
-  TVMOpParam param;
-  // inputs
-  DynArray<NodeEntry> inputs;
-};
-
-// Minimal NDArray abstraction
-class NDArray {
- public:
-  // initialize NDArray with shape/dtype/device
-  static NDArray Empty(const DynArray<int64_t>& shape, DLDataType dtype, DLDevice dev);
-  // create a view of the NDArray storage, with the given shape/dtype
-  NDArray CreateView(const DynArray<int64_t>& shape, DLDataType dtype);
-  // Copy into the internal storage.
-  void CopyFrom(DLTensor* src);
-  // Copy out of the internal storage
-  void CopyTo(DLTensor* dst) const;
-  // View `this` as a DLTensor
-  DLTensor ToDLTensor();
-  ~NDArray();
-
- private:
-  // reference-counted storage
-  std::shared_ptr<void> storage_;
-  // tensor shape
-  DynArray<int64_t> shape_;
-  // tensor dtype
-  DLDataType dtype_;
-  // tensor device
-  DLDevice device_;
-};
-
-// Minimal GraphExecutor implementation
-class MicroGraphExecutor {
- public:
-  // Construct a GraphExecutor with the given graph and DSOModule.
-  MicroGraphExecutor(const std::string& graph_json, DSOModule* module);
-  ~MicroGraphExecutor();
-  // Run the graph
-  void Run();
-  // Set the input at `index` to a copy of the tensor `data_in`
-  void SetInput(int index, DLTensor* data_in);
-  // Copy the output at `index` into `data_out`
-  void CopyOutputTo(int index, DLTensor* data_out);
-
- private:
-  void SetupStorage();
-  void SetupOpExecs();
-
-  uint32_t num_node_entries() const { return node_row_ptr_.back(); }
-  uint32_t entry_id(uint32_t nid, uint32_t index) const { return node_row_ptr_[nid] + index; }
-  uint32_t entry_id(const NodeEntry& e) const { return entry_id(e.node_id, e.index); }
-
-  DSOModule* module_;
-
-  // TODO(tulloch): these are essentially unused after construction.
-  // The graph nodes
-  DynArray<Node> nodes_;
-  // The argument noes
-  DynArray<uint32_t> input_nodes_;
-  // Used for quick entry indexing
-  DynArray<uint32_t> node_row_ptr_;
-  // Output entries
-  DynArray<NodeEntry> outputs_;
-  // Additional graph attributes
-  GraphAttr attrs_;
-  // Execution device
-  DLDevice device_{kDLCPU, 0};
-
-  // Common storage pool
-  DynArray<NDArray> storage_pool_;
-  // Data entry for each node
-  DynArray<NDArray> data_entry_;
-  // Operator for each node
-  DynArray<std::function<void()>> op_execs_;
-};
-
-}  // namespace micro
-}  // namespace tvm
-
-#endif  // TVM_RUNTIME_MICRO_STANDALONE_MICROTVM_GRAPH_EXECUTOR_H_
diff --git a/src/runtime/micro/standalone/microtvm_runtime.cc b/src/runtime/micro/standalone/microtvm_runtime.cc
deleted file mode 100644
index a51be1414b68..000000000000
--- a/src/runtime/micro/standalone/microtvm_runtime.cc
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-#include "tvm/runtime/micro/standalone/microtvm_runtime.h"
-
-#include <cassert>
-
-#include "microtvm_graph_executor.h"
-
-void* MicroTVMRuntimeCreate(const char* json, size_t json_len, void* module) {
-  return new tvm::micro::MicroGraphExecutor(std::string(json, json + json_len),
-                                            reinterpret_cast<tvm::micro::DSOModule*>(module));
-}
-
-void MicroTVMRuntimeDestroy(void* handle) {
-  delete reinterpret_cast<tvm::micro::MicroGraphExecutor*>(handle);
-}
-
-void MicroTVMRuntimeSetInput(void* handle, int index, void* tensor) {
-  reinterpret_cast<tvm::micro::MicroGraphExecutor*>(handle)->SetInput(
-      index, reinterpret_cast<DLTensor*>(tensor));
-}
-
-void MicroTVMRuntimeRun(void* handle) {
-  reinterpret_cast<tvm::micro::MicroGraphExecutor*>(handle)->Run();
-}
-
-void MicroTVMRuntimeGetOutput(void* handle, int index, void* tensor) {
-  reinterpret_cast<tvm::micro::MicroGraphExecutor*>(handle)->CopyOutputTo(
-      index, reinterpret_cast<DLTensor*>(tensor));
-}
-void* MicroTVMRuntimeDSOModuleCreate(const char* so, size_t so_len) {
-  return new tvm::micro::DSOModule(std::string(so, so + so_len));
-}
-
-void MicroTVMRuntimeDSOModuleDestroy(void* module) {
-  delete reinterpret_cast<tvm::micro::DSOModule*>(module);
-}
diff --git a/src/runtime/micro/standalone/microtvm_runtime_api.cc b/src/runtime/micro/standalone/microtvm_runtime_api.cc
deleted file mode 100644
index c266107faafb..000000000000
--- a/src/runtime/micro/standalone/microtvm_runtime_api.cc
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#include "microtvm_runtime_api.h"
-
-#include <stdlib.h>
-
-#include <cassert>
-#include <string>
-
-void* TVMBackendAllocWorkspace(int device_type, int device_id, uint64_t nbytes, int dtype_code_hint,
-                               int dtype_bits_hint) {
-  void* ptr = nullptr;
-  assert(nbytes > 0);
-#ifdef __ANDROID__
-  ptr = memalign(64, nbytes);
-#else
-  const int ret = posix_memalign(&ptr, 64, nbytes);
-  (void)ret;
-  assert(ret == 0);
-#endif
-  return ptr;
-}
-
-int TVMBackendFreeWorkspace(int device_type, int device_id, void* ptr) {
-  free(ptr);
-  return 0;
-}
-
-static thread_local std::string g_last_error;
-void TVMAPISetLastError(const char* msg) { g_last_error = msg; }
-const char* TVMGetLastError(void) { return g_last_error.c_str(); }
-
-int TVMBackendParallelLaunch(FTVMParallelLambda flambda, void* cdata, int num_task) {
-  TVMParallelGroupEnv env;
-  env.num_task = 1;
-  flambda(0, &env, cdata);
-  return 0;
-}
diff --git a/src/runtime/micro/standalone/microtvm_runtime_api.h b/src/runtime/micro/standalone/microtvm_runtime_api.h
deleted file mode 100644
index 47d4d80b9c09..000000000000
--- a/src/runtime/micro/standalone/microtvm_runtime_api.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-#ifndef TVM_RUNTIME_MICRO_STANDALONE_MICROTVM_RUNTIME_API_H_
-#define TVM_RUNTIME_MICRO_STANDALONE_MICROTVM_RUNTIME_API_H_
-
-#include <stdint.h>
-#include <stdlib.h>
-
-#include <cassert>
-
-// The subset of the TVM runtime API that is implemented by the minimal runtime API.
-
-#define TVM_MICRO_RUNTIME_API_BACKEND_API extern "C" __attribute__((weak, visibility("default")))
-
-TVM_MICRO_RUNTIME_API_BACKEND_API int TVMBackendFreeWorkspace(int device_type, int device_id,
-                                                              void* ptr);
-
-TVM_MICRO_RUNTIME_API_BACKEND_API void* TVMBackendAllocWorkspace(int device_type, int device_id,
-                                                                 uint64_t nbytes,
-                                                                 int dtype_code_hint,
-                                                                 int dtype_bits_hint);
-
-typedef struct {
-  void* sync_handle;
-  int32_t num_task;
-} TVMParallelGroupEnv;
-
-typedef int (*FTVMParallelLambda)(int task_id, TVMParallelGroupEnv* penv, void* cdata);
-
-TVM_MICRO_RUNTIME_API_BACKEND_API int TVMBackendParallelLaunch(FTVMParallelLambda flambda,
-                                                               void* cdata, int num_task);
-
-TVM_MICRO_RUNTIME_API_BACKEND_API void TVMAPISetLastError(const char* msg);
-TVM_MICRO_RUNTIME_API_BACKEND_API const char* TVMGetLastError(void);
-
-#undef TVM_MICRO_RUNTIME_API_BACKEND_API
-
-#endif  // TVM_RUNTIME_MICRO_STANDALONE_MICROTVM_RUNTIME_API_H_
diff --git a/src/runtime/micro/standalone/minimal_vector.h b/src/runtime/micro/standalone/minimal_vector.h
deleted file mode 100644
index 74bea06ebcfd..000000000000
--- a/src/runtime/micro/standalone/minimal_vector.h
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#ifndef TVM_RUNTIME_MICRO_STANDALONE_MINIMAL_VECTOR_H_
-#define TVM_RUNTIME_MICRO_STANDALONE_MINIMAL_VECTOR_H_
-
-#include <algorithm>
-#include <cassert>
-#include <memory>
-
-namespace tvm {
-namespace micro {
-
-// A minimal wrapper, derived from https://github.com/Robbepop/dynarray/, that
-// supports a minimal subset of the std::vector API with a minimized code size.
-template <typename T>
-struct DynArray {
-  using value_type = T;
-  using size_type = size_t;
-  using difference_type = std::ptrdiff_t;
-  using reference = value_type&;
-  using const_reference = value_type const&;
-  using pointer = value_type*;
-  using const_pointer = value_type const*;
-  using iterator = pointer;
-  using const_iterator = const_pointer;
-  using reverse_iterator = std::reverse_iterator<iterator>;
-  using const_reverse_iterator = std::reverse_iterator<const_iterator>;
-
-  explicit DynArray(size_type size = 0) { resize(size); }
-
-  DynArray(const DynArray& other) {
-    resize(other.size());
-    std::copy(other.begin(), other.end(), begin());
-  }
-
-  DynArray& operator=(const DynArray& other) {
-    resize(other.size());
-    std::copy(other.begin(), other.end(), begin());
-    return *this;
-  }
-
-  void resize(size_type size) {
-    if (size > 0) {
-      data_.reset(new T[size]);
-    } else {
-      data_.reset();
-    }
-    size_ = size;
-  }
-
-  size_type size() const { return size_; }
-
-  reference operator[](size_type pos) { return data_[pos]; }
-
-  const_reference operator[](size_type pos) const { return data_[pos]; }
-
-  pointer data() { return data_.get(); }
-
-  const_pointer data() const { return data_.get(); }
-
-  iterator begin() { return data_.get(); }
-
-  const_iterator begin() const { return data_.get(); }
-
-  const_iterator cbegin() const { return data_.get(); }
-
-  iterator end() { return data_.get() + size_; }
-
-  const_iterator end() const { return data_.get() + size_; }
-
-  const_iterator cend() const { return data_.get() + size_; }
-
-  reference front() { return data_[0]; }
-
-  const_reference front() const { return data_[0]; }
-
-  reference back() { return data_[size_ - 1]; }
-
-  const_reference back() const { return data_[size_ - 1]; }
-
- private:
-  std::unique_ptr<T[]> data_;
-  size_type size_;
-};
-
-}  // namespace micro
-}  // namespace tvm
-
-#endif  // TVM_RUNTIME_MICRO_STANDALONE_MINIMAL_VECTOR_H_
diff --git a/src/support/libinfo.cc b/src/support/libinfo.cc
index f1768dfd77a8..1b986adcd551 100644
--- a/src/support/libinfo.cc
+++ b/src/support/libinfo.cc
@@ -151,10 +151,6 @@
 #define TVM_INFO_USE_MSVC_MT "NOT-FOUND"
 #endif
 
-#ifndef TVM_INFO_USE_MICRO
-#define TVM_INFO_USE_MICRO "NOT-FOUND"
-#endif
-
 #ifndef TVM_INFO_INSTALL_DEV
 #define TVM_INFO_INSTALL_DEV "NOT-FOUND"
 #endif
@@ -231,10 +227,6 @@
 #define TVM_INFO_USE_RANDOM "NOT-FOUND"
 #endif
 
-#ifndef TVM_INFO_USE_MICRO_STANDALONE_RUNTIME
-#define TVM_INFO_USE_MICRO_STANDALONE_RUNTIME "NOT-FOUND"
-#endif
-
 #ifndef TVM_INFO_USE_CPP_RPC
 #define TVM_INFO_USE_CPP_RPC "NOT-FOUND"
 #endif
@@ -324,7 +316,6 @@ TVM_DLL Map<String, String> GetLibInfo() {
       {"USE_BLAS", TVM_INFO_USE_BLAS},
       {"USE_BNNS", TVM_INFO_USE_BNNS},
       {"USE_BYODT_POSIT", TVM_INFO_USE_BYODT_POSIT},
-      {"USE_CMSISNN", TVM_INFO_USE_CMSISNN},
       {"USE_COREML", TVM_INFO_USE_COREML},
       {"USE_CPP_RPC", TVM_INFO_USE_CPP_RPC},
       {"USE_CPP_RTVM", TVM_INFO_USE_CPP_RTVM},
@@ -339,8 +330,6 @@ TVM_DLL Map<String, String> GetLibInfo() {
       {"USE_FLASHINFER", TVM_INFO_USE_FLASHINFER},
       {"USE_AMX", TVM_INFO_USE_AMX},
       {"USE_DNNL", TVM_INFO_USE_DNNL},
-      {"USE_ETHOSN", TVM_INFO_USE_ETHOSN},
-      {"USE_ETHOSU", TVM_INFO_USE_ETHOSU},
       {"USE_FALLBACK_STL_MAP", TVM_INFO_USE_FALLBACK_STL_MAP},
       {"USE_GRAPH_EXECUTOR_CUDA_GRAPH", TVM_INFO_USE_GRAPH_EXECUTOR_CUDA_GRAPH},
       {"USE_GRAPH_EXECUTOR", TVM_INFO_USE_GRAPH_EXECUTOR},
@@ -357,8 +346,6 @@ TVM_DLL Map<String, String> GetLibInfo() {
       {"USE_LLVM", TVM_INFO_USE_LLVM},
       {"USE_MLIR", TVM_INFO_USE_MLIR},
       {"USE_METAL", TVM_INFO_USE_METAL},
-      {"USE_MICRO_STANDALONE_RUNTIME", TVM_INFO_USE_MICRO_STANDALONE_RUNTIME},
-      {"USE_MICRO", TVM_INFO_USE_MICRO},
       {"USE_MIOPEN", TVM_INFO_USE_MIOPEN},
       {"USE_MKL", TVM_INFO_USE_MKL},
       {"USE_MRVL", TVM_INFO_USE_MRVL},
diff --git a/src/target/llvm/codegen_llvm.cc b/src/target/llvm/codegen_llvm.cc
index 55093ff8eea9..b1caf28149b5 100644
--- a/src/target/llvm/codegen_llvm.cc
+++ b/src/target/llvm/codegen_llvm.cc
@@ -91,7 +91,6 @@
 #include <llvm/Transforms/IPO.h>
 #include <llvm/Transforms/Utils/ModuleUtils.h>
 #include <tvm/runtime/c_runtime_api.h>
-#include <tvm/runtime/crt/error_codes.h>
 #include <tvm/runtime/device_api.h>
 #include <tvm/tir/op.h>
 
diff --git a/src/target/source/codegen_c_host.cc b/src/target/source/codegen_c_host.cc
index b22d32d6c5e3..2e059eeee520 100644
--- a/src/target/source/codegen_c_host.cc
+++ b/src/target/source/codegen_c_host.cc
@@ -24,7 +24,6 @@
 
 #include <tvm/relay/executor.h>
 #include <tvm/relay/runtime.h>
-#include <tvm/runtime/crt/error_codes.h>
 #include <tvm/runtime/module.h>
 #include <tvm/target/codegen.h>
 
@@ -55,17 +54,6 @@ void CodeGenCHost::Init(bool output_ssa, bool emit_asserts, bool emit_fwd_func_d
   decl_stream << "#include \"tvm/runtime/c_backend_api.h\"\n";
   decl_stream << "#include <math.h>\n";
   decl_stream << "#include <stdbool.h>\n";
-  if (devices.find("ethos-u") != devices.end()) {
-    decl_stream << "#include <tvm_ethosu_runtime.h>\n";
-  }
-  if (devices.find("cmsis-nn") != devices.end()) {
-    decl_stream << "#include <stdio.h>\n";
-    decl_stream << "#include <stdlib.h>\n";
-    decl_stream << "#include <dlpack/dlpack.h>\n";
-    decl_stream << "#include <arm_nnfunctions.h>\n";
-    decl_stream << "#include <arm_nn_types.h>\n";
-    decl_stream << "#include <arm_nn_math_types.h>\n";
-  }
   CodeGenC::Init(output_ssa);
 }
 
diff --git a/src/tir/contrib/ethosu/passes.cc b/src/tir/contrib/ethosu/passes.cc
deleted file mode 100644
index ac1cf0ef11bb..000000000000
--- a/src/tir/contrib/ethosu/passes.cc
+++ /dev/null
@@ -1,987 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file tir/contrib/ethosu/passes.cc
- *
- * \brief Passes used in TIR lowering for the microNPU compiler.
- */
-#include <tvm/tir/builtin.h>
-#include <tvm/tir/function.h>
-#include <tvm/tir/op.h>
-#include <tvm/tir/stmt_functor.h>
-#include <tvm/tir/transform.h>
-
-#include <algorithm>
-#include <unordered_map>
-#include <unordered_set>
-
-#include "../../transforms/ir_utils.h"
-
-namespace tvm {
-
-/*!
- * \brief The maximum number of movements allowed for a copy in the CopyComputeReordering pass.
- */
-constexpr const char* kCopyComputeReorderingMaxCopyMovements =
-    "tir.contrib.ethos-u.copy_compute_reordering_max_copy_movements";
-TVM_REGISTER_PASS_CONFIG_OPTION(kCopyComputeReorderingMaxCopyMovements, Integer);
-
-/*!
- * \brief Whether to reorder copies and computes based on cycle count.
- */
-constexpr const char* kCopyComputeReorderingReorderByCycles =
-    "tir.contrib.ethos-u.copy_compute_reordering_reorder_by_cycles";
-TVM_REGISTER_PASS_CONFIG_OPTION(kCopyComputeReorderingReorderByCycles, Bool);
-
-namespace tir {
-namespace contrib {
-namespace ethosu {
-
-namespace {
-
-struct FlattenUnwrapResult {
-  std::vector<Stmt> seq;
-  std::vector<Stmt> rewrap_nest;
-};
-
-/*! \brief Utility function to flatten SeqStmt
- *
- * An AttrStmt or DeclBuffer may internally contain SeqStmt nodes that
- * we want to flatten.  Unlike SeqStmt::Flatten, this function unwraps
- * these node types when encountered.
- *
- * \param stmt The tir::Stmt to be flattened.
- * \return The flattened statements
- */
-FlattenUnwrapResult FlattenUnwrap(const Stmt& stmt) {
-  std::vector<Stmt> seq_stmt;
-  std::vector<Stmt> rewrap_nest;
-  std::function<void(const Stmt&)> flatten_unwrap = [&](const Stmt& stmt) {
-    if (auto* ptr = stmt.as<DeclBufferNode>()) {
-      rewrap_nest.push_back(DeclBuffer(ptr->buffer, Evaluate(0)));
-      flatten_unwrap(ptr->body);
-    } else if (auto* ptr = stmt.as<SeqStmtNode>()) {
-      for (const auto& sub_stmt : ptr->seq) {
-        flatten_unwrap(sub_stmt);
-      }
-    } else if (auto* ptr1 = stmt.as<EvaluateNode>(); ptr1 && ptr1->value.as<IntImmNode>()) {
-      // Skip
-    } else {
-      seq_stmt.push_back(stmt);
-    }
-  };
-  flatten_unwrap(stmt);
-  return FlattenUnwrapResult{seq_stmt, rewrap_nest};
-}
-
-/*! Returns the arguments of the given statement */
-Array<PrimExpr> GetStmtArgs(Stmt stmt) {
-  while (auto* ptr = stmt.as<DeclBufferNode>()) {
-    stmt = ptr->body;
-  }
-
-  auto attr{stmt.as<AttrStmtNode>()};
-  Stmt eval_stmt{attr ? attr->body : stmt};
-  auto eval{eval_stmt.as<EvaluateNode>()};
-  ICHECK(eval) << "Expected statement to be an evaluate node, but was " << eval_stmt->GetTypeKey();
-  auto call{eval->value.as<CallNode>()};
-  ICHECK(call) << "Expected expression to be a call node, but was " << eval->value->GetTypeKey();
-  return call->args;
-}
-
-enum class StmtType { global_copy, local_copy, compute };
-
-/*! Returns the type of the given statement */
-StmtType GetStmtType(const Stmt& stmt) {
-  Array<PrimExpr> args{GetStmtArgs(stmt)};
-  if (args[0].as<StringImmNode>()->value == "ethosu_copy") {
-    if (args[3].as<BufferLoadNode>()->buffer.scope() == "global") {
-      return StmtType::global_copy;
-    } else {
-      return StmtType::local_copy;
-    }
-  }
-  return StmtType::compute;
-}
-/*! Returns the buffer read my the given copy statement */
-Buffer GetCopyReadBuffer(const Stmt& stmt) {
-  Array<PrimExpr> args{GetStmtArgs(stmt)};
-  return args[1].as<BufferLoadNode>()->buffer;
-}
-
-/*! Returns the buffer written my the given copy statement */
-Buffer GetCopyWriteBuffer(const Stmt& stmt) {
-  Array<PrimExpr> args{GetStmtArgs(stmt)};
-  return args[3].as<BufferLoadNode>()->buffer;
-}
-
-/*! Returns the length of the given copy statement */
-int64_t GetCopyLength(const Stmt& stmt) {
-  Array<PrimExpr> args{GetStmtArgs(stmt)};
-  return args[2].as<IntImmNode>()->value;
-}
-
-/*! Returns the cycles of the given statement */
-int64_t GetStmtCycles(const Stmt& stmt) {
-  auto attr{stmt.as<AttrStmtNode>()};
-  if (attr && attr->attr_key == "pragma_compute_cycles_hint") {
-    int64_t cycles{Downcast<Integer>(attr->value)->value};
-    return cycles;
-  }
-  return 0;
-}
-}  // namespace
-
-/*!
- * \brief This mutator moves allocates to the top of the body of the main
- * function.
- *
- * Note: This pass can currently only be run in conjunction with the
- * LowerToTIR() pass as it expects a single primitive function called
- * "main" that is being offloaded to the NPU.
- *
- * For example,
- * Before:
- *   allocate {
- *       extern_call(...)
- *           allocate {
- *               extern_call(...)
- *           }
- *   }
- *
- * After:
- *   allocate {
- *       allocate {
- *           extern_call(...)
- *           extern_call(...)
- *       }
- *  }
- */
-class HoistAllocatesMutator : public StmtExprMutator {
- public:
-  HoistAllocatesMutator() {}
-
-  PrimFunc operator()(PrimFunc main_func) {
-    Stmt new_main_func_body = SeqStmt::Flatten(this->VisitStmt(main_func->body));
-
-    // Write all allocates that were removed in reverse order
-    for (auto it = allocates_.rbegin(); it != allocates_.rend(); it++) {
-      Allocate current_alloc = *it;
-      if (it != allocates_.rbegin()) {
-        new_main_func_body = SeqStmt::Flatten(new_main_func_body);
-      }
-      new_main_func_body =
-          Allocate(current_alloc->buffer_var, current_alloc->dtype, current_alloc->extents,
-                   current_alloc->condition, new_main_func_body, current_alloc->annotations,
-                   current_alloc->span);
-    }
-
-    PrimFunc new_main_func = PrimFunc(main_func->params, new_main_func_body, main_func->ret_type,
-                                      main_func->buffer_map, main_func->attrs);
-    return new_main_func;
-  }
-
- private:
-  Stmt VisitStmt_(const AllocateNode* op) override {
-    allocates_.push_back(GetRef<Allocate>(op));
-    return VisitStmt(op->body);
-  }
-
-  /*! A stack to store allocates as they are visited. */
-  std::vector<Allocate> allocates_;
-};
-
-/*!
- * \brief A pass to hoist allocate nodes to the top of the body of the main function.
- *
- * \return tvm::transform::Pass
- */
-tvm::transform::Pass HoistAllocates() {
-  auto pass_func = [=](PrimFunc f, IRModule mod, tvm::transform::PassContext ctx) {
-    ICHECK(mod->GetGlobalVars().size() == 1 && mod->ContainGlobalVar("main"))
-        << "Expected a single primitive function called 'main'. Please run the HoistAllocates pass "
-           "in conjunction with the LowerToTIR() pass.";
-    return HoistAllocatesMutator()(f);
-  };
-  return tvm::tir::transform::CreatePrimFuncPass(pass_func, 0, "tir.contrib.ethos-u.HoistAllocates",
-                                                 {});
-}
-
-TVM_REGISTER_GLOBAL("tir.contrib.ethos-u.HoistAllocates").set_body_typed(HoistAllocates);
-
-/*!
- * \brief Reorders copy and compute nodes in such a way that independent DMA copies
- * and computes happen in parallel.
- * Copies to buffers with local scope are not reordered since they copy LUT
- * into the SHRAM and that already happens in parallel with copying weights into
- * the weights encoder.
- */
-class CopyComputeReorderingMutator : public StmtExprMutator {
- public:
-  explicit CopyComputeReorderingMutator(int max_copy_movements, bool reorder_by_cycles)
-      : _max_copy_movements{max_copy_movements}, _reorder_by_cycles{reorder_by_cycles} {}
-
-  PrimFunc operator()(PrimFunc main_func) {
-    if (_max_copy_movements > 0) {
-      auto prim_func_node{main_func.CopyOnWrite()};
-      prim_func_node->body = this->VisitStmt(main_func->body);
-      return GetRef<PrimFunc>(prim_func_node);
-    }
-    return main_func;
-  }
-
- private:
-  // A structure to hold a compute op with the corresponding weights/bias copy and LUT copy
-  struct OpWithCopies {
-    Stmt compute_op{};
-    Stmt global_copy{};
-    Stmt local_copy{};
-  };
-
-  Stmt VisitStmt_(const SeqStmtNode* op) override {
-    auto [seq, rewrap_nest] = FlattenUnwrap(GetRef<Stmt>(op));
-
-    if (seq.size() <= 1) {
-      return StmtExprMutator::VisitStmt_(op);
-    }
-
-    std::vector<Stmt> new_seq(seq.begin(), seq.end());
-
-    // Reorder the copies and computes based on the cycle count
-    if (_reorder_by_cycles) {
-      // We can't hide the first copy, so ignore it for the purpose of hiding copies
-      Stmt first_copy{};
-      if (stmt_is_global_copy(new_seq[0]) ||
-          (stmt_is_local_copy(new_seq[0]) && stmt_is_global_copy(new_seq[1]))) {
-        auto copy_position = stmt_is_global_copy(new_seq[0]) ? 0 : 1;
-        first_copy = new_seq[copy_position];
-        new_seq.erase(new_seq.begin() + copy_position);
-      }
-
-      // Build up a list of cells with the compute op and the copy ops that directly preceed it
-      std::vector<OpWithCopies> ops{};
-      for (size_t idx = 0; idx < new_seq.size(); ++idx) {
-        if (stmt_is_compute_op(new_seq[idx])) {
-          OpWithCopies new_op;
-          new_op.compute_op = new_seq[idx];
-          if (idx > 0) {
-            auto prev_op = new_seq[idx - 1];
-            if (!stmt_is_compute_op(prev_op)) {
-              if (stmt_is_local_copy(prev_op)) {
-                new_op.local_copy = prev_op;
-              } else {
-                new_op.global_copy = prev_op;
-              }
-              if (idx > 1) {
-                auto prev_prev_op = new_seq[idx - 2];
-                if (!stmt_is_compute_op(prev_prev_op)) {
-                  if (stmt_is_local_copy(prev_prev_op)) {
-                    new_op.local_copy = prev_prev_op;
-                  } else {
-                    new_op.global_copy = prev_prev_op;
-                  }
-                }
-              }
-            }
-          }
-          ops.push_back(new_op);
-        }
-      }
-
-      // Move the global copies up by one. If in general the computes take longer than the copies,
-      // that should be good enough
-      for (size_t idx = 1; idx < ops.size(); ++idx) {
-        if (ops[idx].global_copy.as<AttrStmtNode>()) {
-          ops[idx - 1].global_copy = ops[idx].global_copy;
-          ops[idx].global_copy = {};
-        }
-      }
-
-      // If there are long copies, try to hide them further
-      for (size_t idx = ops.size() - 1; idx > 0; --idx) {
-        if (ops[idx].global_copy.as<AttrStmtNode>()) {
-          // Check whether the copy is hidden
-          int64_t copy_cycles{GetStmtCycles(ops[idx].global_copy)};
-          int64_t compute_cycles{GetStmtCycles(ops[idx].compute_op)};
-          bool is_hidden = compute_cycles >= copy_cycles;
-
-          // If the previous compute op is not already hiding another copy, move the copy back, so
-          // that it would be hidden by multiple computes
-          while (!is_hidden && !ops[idx - 1].global_copy.as<AttrStmtNode>() && (idx > 0)) {
-            int64_t new_compute_cycles{GetStmtCycles(ops[idx - 1].compute_op)};
-            ops[idx - 1].global_copy = ops[idx].global_copy;
-            ops[idx].global_copy = {};
-            compute_cycles += new_compute_cycles;
-            is_hidden = compute_cycles >= copy_cycles;
-            --idx;
-          }
-        }
-      }
-
-      // Reconstruct the op sequence from the vector of OpWithCopies
-      new_seq.clear();
-      if (first_copy.as<AttrStmtNode>()) {
-        new_seq.push_back(first_copy);
-      }
-      for (auto& op : ops) {
-        if (op.global_copy.as<AttrStmtNode>()) {
-          new_seq.push_back(op.global_copy);
-        }
-        if (op.local_copy.as<EvaluateNode>()) {
-          new_seq.push_back(op.local_copy);
-        }
-        if (op.compute_op.as<AttrStmtNode>()) {
-          new_seq.push_back(op.compute_op);
-        }
-      }
-    } else {
-      // Each copy statement to a buffer with global scope is moved up
-      // at most `_max_copy_movements` times.
-      for (size_t index = 0; index < new_seq.size(); ++index) {
-        if (GetStmtType(new_seq[index]) == StmtType::global_copy) {
-          int lower = std::max(0, static_cast<int>(index) - _max_copy_movements);
-          for (int i = index; i > lower && (GetStmtType(new_seq[i - 1]) == StmtType::compute);
-               --i) {
-            std::swap(new_seq[i - 1], new_seq[i]);
-          }
-        }
-      }
-    }
-
-    return MergeNest(rewrap_nest, SeqStmt::Flatten(new_seq));
-  }
-
-  bool stmt_is_global_copy(const Stmt& stmt) { return GetStmtType(stmt) == StmtType::global_copy; }
-
-  bool stmt_is_local_copy(const Stmt& stmt) { return GetStmtType(stmt) == StmtType::local_copy; }
-
-  bool stmt_is_compute_op(const Stmt& stmt) { return GetStmtType(stmt) == StmtType::compute; }
-
-  /*! The maximum number of movements allowed for a copy. */
-  int _max_copy_movements;
-  /*! Whether we use the cycle hint to determine the reordering. */
-  bool _reorder_by_cycles;
-};
-
-/*!
- * \brief A pass to reorder copy and compute nodes in such a way that independent DMA copies
- * and computes happen in parallel. If reorder_by_cycles is set, we will ignore the
- * max_copy_movements value.
- *
- *  \param max_copy_movements: The maximum number of movements allowed for a copy.
- *  If None, the pass context option tir.contrib.ethos-u.copy_compute_reordering_max_copy_movements
- *  is used if provided, otherwise the default value will be 1.
- *
- * \param reorder_by_cycles: Whether to reorder copies and computes by cycles.
- *  If None, the pass context option tir.contrib.ethos-u.copy_compute_reordering_reorder_by_cycles
- *  is used if provided, otherwise the default value will be False. If the value is True,
- *  max_copy_movements will be ignored.
- * \return tvm::transform::Pass
- */
-tvm::transform::Pass CopyComputeReordering(Optional<Integer> max_copy_movements,
-                                           Optional<Bool> reorder_by_cycles) {
-  auto pass_func = [=](PrimFunc f, IRModule mod, tvm::transform::PassContext ctx) {
-    ICHECK(mod->GetGlobalVars().size() == 1 && mod->ContainGlobalVar("main"))
-        << "Expected a single primitive function called 'main'. Please run the "
-           "CopyComputeReordering "
-           "pass in conjunction with the LowerToTIR() pass.";
-
-    auto copy_movements = max_copy_movements.value_or(
-        ctx->GetConfig(kCopyComputeReorderingMaxCopyMovements, Integer(1)).value());
-    auto reorder = reorder_by_cycles.value_or(
-        ctx->GetConfig(kCopyComputeReorderingReorderByCycles, Bool(false)).value());
-    return CopyComputeReorderingMutator(copy_movements.IntValue(), reorder)(f);
-  };
-  return tvm::tir::transform::CreatePrimFuncPass(pass_func, 0,
-                                                 "tir.contrib.ethos-u.CopyComputeReordering", {});
-}
-
-TVM_REGISTER_GLOBAL("tir.contrib.ethos-u.CopyComputeReordering")
-    .set_body_typed(CopyComputeReordering);
-
-/*!
- * \brief This mutator removes all allocates.
- */
-class RemoveAllocatesMutator : public StmtExprMutator {
- public:
-  PrimFunc operator()(PrimFunc main_func) {
-    auto prim_func_node{main_func.CopyOnWrite()};
-    prim_func_node->body = this->VisitStmt(main_func->body);
-    return GetRef<PrimFunc>(prim_func_node);
-  }
-
- private:
-  Stmt VisitStmt_(const AllocateNode* op) override { return VisitStmt(op->body); }
-};
-
-/*!
- * \brief This extractor collects information used by the MergeConstantsMutator
- */
-class MergeConstantsInfoExtractor : public StmtExprVisitor {
- public:
-  class Info {
-   public:
-    /*! A stack to store allocates as they are visited. */
-    std::vector<Allocate> allocates{};
-
-    /*! A list that contains in the i-th position the write buffer of the i-th statement
-     * if that statement is a copy to a buffer with global scope  */
-    std::vector<Optional<Buffer>> copy_write_buffers{};
-
-    /*! Maps a copy's write buffer to an index representing the
-     * new buffer and an offset in that buffer */
-    std::unordered_map<const BufferNode*, std::pair<int /* new buffer index */, int /* offset */>>
-        old_to_new_write_buffer{};
-
-    /*! Maps an index representing a new buffer to the length of that buffer */
-    std::unordered_map<int /* new buffer index */, int /* length */> new_buffers_length{};
-
-    /*! Maps an index representing a new buffer to the cycless needed to copy that buffer */
-    std::unordered_map<int /* new buffer index */, int64_t> cycless{};
-  };
-
-  Info operator()(PrimFunc main_func) {
-    this->VisitStmt(main_func->body);
-    return std::move(_info);
-  }
-
- private:
-  /*! The information collected by this extractor */
-  Info _info{};
-
-  void VisitStmt_(const AllocateNode* op) override {
-    _info.allocates.push_back(GetRef<Allocate>(op));
-    VisitStmt(op->body);
-  }
-
-  void VisitStmt_(const SeqStmtNode* op) override {
-    std::vector<Stmt> seq_stmt = FlattenUnwrap(GetRef<Stmt>(op)).seq;
-
-    if (seq_stmt.size() <= 1) {
-      StmtExprVisitor::VisitStmt_(op);
-      return;
-    }
-
-    for (size_t i = 0; i < seq_stmt.size(); ++i) {
-      Stmt stmt{seq_stmt[i]};
-      switch (GetStmtType(stmt)) {
-        case StmtType::global_copy: {
-          Buffer write_buffer{GetCopyWriteBuffer(stmt)};
-          _info.copy_write_buffers.push_back(write_buffer);
-          _info.old_to_new_write_buffer[write_buffer.as<BufferNode>()] = std::make_pair(-1, -1);
-          break;
-        }
-        case StmtType::local_copy: {
-          _info.copy_write_buffers.push_back(Optional<Buffer>{});
-          break;
-        }
-        case StmtType::compute: {
-          _info.copy_write_buffers.push_back(Optional<Buffer>{});
-          std::vector<Buffer> buffers{GetCopiedBuffersUsedByStmt(stmt)};
-          if (buffers.empty()) {
-            continue;
-          }
-          _info.new_buffers_length[i] = 0;
-          for (Buffer buffer : buffers) {
-            for (size_t j{i - 1}; j >= 0; --j) {
-              if (_info.copy_write_buffers[j] == buffer) {
-                _info.old_to_new_write_buffer[buffer.as<BufferNode>()] =
-                    std::make_pair(i, _info.new_buffers_length[i]);
-                _info.new_buffers_length[i] += GetCopyLength(seq_stmt[j]);
-                _info.cycless[i] += GetStmtCycles(seq_stmt[j]);
-                break;
-              }
-            }
-          }
-          break;
-        }
-      }
-    }
-  }
-
-  /*! Get all buffers written by copies and used by a given statement */
-  std::vector<Buffer> GetCopiedBuffersUsedByStmt(const Stmt& stmt) {
-    std::vector<Buffer> buffers{};
-    for (PrimExpr arg : GetStmtArgs(stmt)) {
-      if (auto buffer_load = arg.as<BufferLoadNode>()) {
-        Buffer buffer{buffer_load->buffer};
-        // Check if the buffer has already been added
-        if (std::find(buffers.begin(), buffers.end(), buffer) == buffers.end()) {
-          // Check if the buffer is copied
-          if (_info.old_to_new_write_buffer.count(buffer.as<BufferNode>())) {
-            buffers.push_back(buffer);
-          }
-        }
-      }
-    }
-    return buffers;
-  }
-};
-
-/*!
- * \brief This mutator looks for the constants used by each compute operator
- * and merges them into a single buffer.
- * Constants written to a buffer with local scope are not merged.
- */
-class MergeConstantsMutator : public StmtExprMutator {
- public:
-  explicit MergeConstantsMutator(MergeConstantsInfoExtractor::Info info) : _info{std::move(info)} {}
-
-  PrimFunc operator()(PrimFunc main_func, const Map<IntImm, runtime::NDArray>& const_dict) {
-    // Rewrite
-    Stmt new_body = RewritePrimFuncBody(main_func->body);
-    std::unordered_set<const VarNode*> params_to_delete{};
-    Map<Var, Buffer> new_buffer_map{MakeNewBufferMap(main_func->buffer_map, &params_to_delete)};
-    Array<Var> new_params{MakeNewParams(main_func->params, params_to_delete)};
-
-    // Make the new const dict
-    Array<Array<IntImm>> args_to_merge{GetArgsToMerge(main_func->buffer_map, main_func->params)};
-    Map<IntImm, Array<IntImm>> buffers_to_merge{
-        GetArgsToMergeWithoutArgsNotInConstDict(args_to_merge, const_dict)};
-    Map<IntImm, runtime::NDArray> new_const_dict{MakeNewConstDict(buffers_to_merge, const_dict)};
-
-    // Make the new prim func
-    auto prim_func_node{main_func.CopyOnWrite()};
-    prim_func_node->body = std::move(new_body);
-    prim_func_node->buffer_map = std::move(new_buffer_map);
-    prim_func_node->params = std::move(new_params);
-    PrimFunc f{GetRef<PrimFunc>(prim_func_node)};
-
-    // Add the new const dict as an attribute
-    f = WithAttr(std::move(f), "ethos-u.const_dict", new_const_dict);
-
-    return f;
-  }
-
- private:
-  /*! The information collected by the MergeConstantsInfoExtractor */
-  MergeConstantsInfoExtractor::Info _info;
-
-  /*! Maps an index representing a new buffer to the new buffer */
-  std::unordered_map<int /* new buffer index */, Buffer> new_buffers{};
-
-  /*! Maps a copy's read buffer to the new copy's read buffer */
-  std::unordered_map<const BufferNode*, Buffer> old_to_new_read_buffers{};
-
-  /*! Maps an index representing a new buffer to the list of buffers to be merged in the new buffer
-   */
-  std::unordered_map<int /* new buffer index */, std::vector<Buffer>> buffers_to_merge{};
-
-  /*! A set of buffers to delete */
-  std::unordered_set<const BufferNode*> buffers_to_delete{};
-
-  Stmt RewritePrimFuncBody(Stmt body) {
-    std::unordered_map<const VarNode*, Allocate> var_to_allocate{};
-
-    // Rewrite old allocates
-    std::unordered_set<const VarNode*> buffer_vars{GetVarsForWrittenCopyBuffers()};
-    for (auto it{_info.allocates.rbegin()}; it != _info.allocates.rend(); ++it) {
-      Allocate alloc{*it};
-      var_to_allocate[alloc->buffer_var.get()] = alloc;
-      if (buffer_vars.count(alloc->buffer_var.as<VarNode>()) == 0) {
-        body = Allocate(alloc->buffer_var, alloc->dtype, alloc->extents, alloc->condition, body,
-                        alloc->annotations, alloc->span);
-      }
-    }
-
-    // Rewrite new allocates
-    for (auto it{_info.copy_write_buffers.rbegin()}; it != _info.copy_write_buffers.rend(); ++it) {
-      if (Optional<Buffer> buffer_opt = *it) {
-        Buffer old_write_buffer{buffer_opt.value()};
-        int new_buffer_index{
-            _info.old_to_new_write_buffer[old_write_buffer.as<BufferNode>()].first};
-
-        // Check if the allocate has already been created
-        if (new_buffers.count(new_buffer_index) == 0) {
-          BufferNode* new_buffer{old_write_buffer.CopyOnWrite()};
-          new_buffer->shape = {_info.new_buffers_length[new_buffer_index]};
-
-          new_buffers[new_buffer_index] = GetRef<Buffer>(new_buffer);
-
-          Allocate old_allocate{var_to_allocate[old_write_buffer->data.get()]};
-          body = Allocate(new_buffer->data, new_buffer->dtype, new_buffer->shape, tir::const_true(),
-                          body, old_allocate->annotations, old_allocate->span);
-        }
-      }
-    }
-
-    // Rewrite operators
-    return this->VisitStmt(body);
-  }
-
-  Stmt VisitStmt_(const AllocateNode* op) override {
-    auto allocate{CopyOnWrite(op)};
-    allocate->body = this->VisitStmt(op->body);
-    return Stmt(allocate);
-  }
-
-  Stmt VisitStmt_(const SeqStmtNode* op) override {
-    std::vector<Stmt> seq_stmt = FlattenUnwrap(GetRef<Stmt>(op)).seq;
-
-    if (seq_stmt.size() <= 1) {
-      return StmtExprMutator::VisitStmt_(op);
-    }
-
-    Array<Stmt> new_seq{};
-    for (size_t i{0}; i < seq_stmt.size(); ++i) {
-      Stmt stmt{seq_stmt[i]};
-
-      switch (GetStmtType(stmt)) {
-        case StmtType::global_copy: {
-          Buffer old_write_buffer{_info.copy_write_buffers[i].value()};
-          std::pair<int, int> pair{
-              _info.old_to_new_write_buffer[old_write_buffer.as<BufferNode>()]};
-          int new_buffer_index{pair.first};
-          int new_buffer_offset{pair.second};
-          UpdateBuffersToMergeAndDelete(stmt, new_buffer_index, new_buffer_offset);
-
-          if (!IsCopyToBeDeleted(new_buffer_offset)) {
-            Optional<PrimExpr> cycless{GetMergedCycles(new_buffer_index)};
-            new_seq.push_back(MakeNewStmt(
-                stmt, MakeNewCopyArgs(stmt, old_write_buffer, new_buffer_index), cycless));
-          }
-          break;
-        }
-        case StmtType::local_copy: {
-          new_seq.push_back(stmt);
-          break;
-        }
-        case StmtType::compute: {
-          new_seq.push_back(MakeNewStmt(stmt, MakeNewComputeArgs(stmt)));
-          break;
-        }
-      }
-    }
-    return SeqStmt::Flatten(new_seq);
-  }
-
-  /*! Returns the variables of the buffers written by copies */
-  std::unordered_set<const VarNode*> GetVarsForWrittenCopyBuffers() {
-    std::unordered_set<const VarNode*> buffer_vars{};
-    std::transform(_info.old_to_new_write_buffer.begin(), _info.old_to_new_write_buffer.end(),
-                   std::inserter(buffer_vars, buffer_vars.begin()),
-                   [](std::pair<const BufferNode*, std::pair<int, int>> pair) -> const VarNode* {
-                     return pair.first->data.as<VarNode>();
-                   });
-    return buffer_vars;
-  }
-
-  /*! Returns the cycles of the new buffer at the given index */
-  Optional<PrimExpr> GetMergedCycles(int new_buffer_index) {
-    auto it = _info.cycless.find(new_buffer_index);
-    if (it != _info.cycless.end()) {
-      return Integer(it->second);
-    }
-    return Optional<PrimExpr>{};
-  }
-
-  /*! Returns true if a copy must be deleted, false otherwise */
-  bool IsCopyToBeDeleted(int new_buffer_offset) { return new_buffer_offset > 0; }
-
-  Array<PrimExpr> MakeNewCopyArgs(const Stmt& stmt, const Buffer& old_write_buffer,
-                                  int new_buffer_index) {
-    Array<PrimExpr> args{GetStmtArgs(stmt)};
-    int new_length{_info.new_buffers_length[new_buffer_index]};
-
-    Array<PrimExpr> new_args{};
-    for (size_t i = 0; i < args.size(); ++i) {
-      switch (i) {
-        case 1: /* read_address */ {
-          auto buffer_load = args[1].as<BufferLoadNode>();
-          Buffer buffer{buffer_load->buffer};
-          Buffer new_buffer{buffer->data,
-                            buffer->dtype,
-                            {new_length},
-                            buffer->strides,
-                            buffer->elem_offset,
-                            buffer->name,
-                            buffer->data_alignment,
-                            buffer->offset_factor,
-                            buffer->buffer_type,
-                            buffer->axis_separators,
-                            buffer->span};
-          old_to_new_read_buffers[buffer.as<BufferNode>()] = new_buffer;
-          new_args.push_back(BufferLoad(new_buffer, buffer_load->indices, buffer_load->predicate,
-                                        buffer_load->span));
-          break;
-        }
-        case 2: /* length */ {
-          new_args.push_back(new_length);
-          break;
-        }
-        case 3: /* write_address */ {
-          new_args.push_back(MakeNewBufferLoad(old_write_buffer, 0, true).value());
-          break;
-        }
-        default:
-          new_args.push_back(args[i]);
-          break;
-      }
-    }
-    return new_args;
-  }
-
-  Array<PrimExpr> MakeNewComputeArgs(const Stmt& stmt) {
-    Array<PrimExpr> args{GetStmtArgs(stmt)};
-    Array<PrimExpr> new_args{};
-    for (size_t i = 0; i < args.size(); ++i) {
-      if (auto buffer_load = args[i].as<BufferLoadNode>()) {
-        BufferLoad new_buffer_load{
-            MakeNewBufferLoad(buffer_load->buffer, buffer_load->indices[0], false)
-                .value_or(GetRef<BufferLoad>(buffer_load))};
-        new_args.push_back(new_buffer_load);
-      } else {
-        new_args.push_back(args[i]);
-      }
-    }
-    return new_args;
-  }
-
-  Stmt MakeNewStmt(const Stmt& stmt, const Array<PrimExpr>& new_args,
-                   Optional<PrimExpr> cycless = Optional<PrimExpr>{}) {
-    auto attr{stmt.as<AttrStmtNode>()};
-    Stmt eval_stmt{attr ? attr->body : stmt};
-    auto eval{eval_stmt.as<EvaluateNode>()};
-    ICHECK(eval) << "Expected statement to be an evaluate node, but was "
-                 << eval_stmt->GetTypeKey();
-    auto call{eval->value.as<CallNode>()};
-    ICHECK(call) << "Expected expression to be a call node, but was " << eval->value->GetTypeKey();
-
-    Call new_call{call->dtype, call->op, new_args, call->span};
-    Evaluate new_eval{new_call, eval->span};
-
-    if (attr) {
-      ICHECK(attr->attr_key == "pragma_compute_cycles_hint");
-      PrimExpr value = cycless.value_or(attr->value);
-      return AttrStmt{attr->node, attr->attr_key, value, new_eval, attr->span};
-    } else {
-      return std::move(new_eval);
-    }
-  }
-
-  Optional<BufferLoad> MakeNewBufferLoad(const Buffer& write_buffer, const PrimExpr& old_index,
-                                         bool only_old_index) {
-    auto it = _info.old_to_new_write_buffer.find(write_buffer.as<BufferNode>());
-    if (it != _info.old_to_new_write_buffer.end()) {
-      std::pair<int, int> pair{it->second};
-      int new_buffer_index{pair.first};
-      PrimExpr new_index{only_old_index ? old_index : (pair.second + old_index)};
-      return BufferLoad{new_buffers[new_buffer_index], {new_index}};
-    }
-    return Optional<BufferLoad>{};
-  }
-
-  Map<tir::Var, Buffer> MakeNewBufferMap(const Map<tir::Var, Buffer>& buffer_map,
-                                         std::unordered_set<const VarNode*>* params_to_delete) {
-    Map<tir::Var, Buffer> new_buffer_map{};
-    for (std::pair<Var, Buffer> pair : buffer_map) {
-      Var var{pair.first};
-      Buffer buffer{pair.second};
-
-      if (buffers_to_delete.count(buffer.as<BufferNode>()) == 1) {
-        params_to_delete->insert(var.as<VarNode>());
-      } else if (old_to_new_read_buffers.count(buffer.as<BufferNode>()) == 1) {
-        new_buffer_map.Set(var, old_to_new_read_buffers[buffer.as<BufferNode>()]);
-      } else {
-        new_buffer_map.Set(var, buffer);
-      }
-    }
-    return new_buffer_map;
-  }
-
-  Array<tir::Var> MakeNewParams(const Array<tir::Var>& params,
-                                const std::unordered_set<const VarNode*>& params_to_delete) {
-    std::vector<Var> new_params{};
-    for (Var var : params) {
-      if (params_to_delete.count(var.as<VarNode>()) == 0) {
-        new_params.push_back(var);
-      }
-    }
-    return new_params;
-  }
-
-  void UpdateBuffersToMergeAndDelete(const Stmt& stmt, int new_buffer_index,
-                                     int new_buffer_offset) {
-    Array<PrimExpr> args{GetStmtArgs(stmt)};
-    Buffer read_buffer{GetCopyReadBuffer(stmt)};
-
-    if (buffers_to_merge.count(new_buffer_index) == 0) {
-      buffers_to_merge[new_buffer_index] = std::vector<Buffer>{read_buffer};
-    } else {
-      buffers_to_merge[new_buffer_index].push_back(read_buffer);
-    }
-
-    if (new_buffer_offset > 0) {
-      buffers_to_delete.insert(read_buffer.as<BufferNode>());
-    }
-  }
-
-  /*! Returns an array whose elements are the indices of the function arguments to be merged.
-   * Example: if a function has three arguments and the second and the third ones must
-   * be merged then the array is: [[0], [1, 2], [3]] */
-  Array<Array<IntImm>> GetArgsToMerge(const Map<Var, Buffer>& buffer_map,
-                                      const Array<Var>& params) {
-    std::unordered_map<const BufferNode*, Var> buffer_to_var{};
-    for (std::pair<Var, Buffer> var_buffer : buffer_map) {
-      buffer_to_var[var_buffer.second.as<BufferNode>()] = var_buffer.first;
-    }
-
-    std::unordered_map<const VarNode*, int> var_to_index{};
-    for (int i = 0; i < static_cast<int>(params.size()); ++i) {
-      var_to_index[params[i].as<VarNode>()] = i;
-    }
-
-    std::vector<Array<IntImm>> vector{};
-    for (std::pair<int, std::vector<Buffer>> index_vector : buffers_to_merge) {
-      std::vector<IntImm> indices{};
-      for (Buffer buffer : index_vector.second) {
-        const VarNode* var{buffer_to_var[buffer.as<BufferNode>()].as<VarNode>()};
-        IntImm index{DataType::Int(64), var_to_index[var]};
-        var_to_index.erase(var);
-        auto it = std::find_if(indices.begin(), indices.end(),
-                               [&](IntImm value) { return value->value == index->value; });
-        if (it == indices.end()) {
-          indices.push_back(index);
-        }
-      }
-      vector.push_back(Array<IntImm>{indices});
-    }
-
-    for (std::pair<const VarNode*, int> var_index : var_to_index) {
-      vector.push_back(Array<IntImm>{IntImm(DataType::Int(64), var_index.second)});
-    }
-    std::sort(vector.begin(), vector.end(),
-              [](Array<IntImm> a, Array<IntImm> b) { return a[0]->value < b[0]->value; });
-    return vector;
-  }
-
-  Map<IntImm, Array<IntImm>> GetArgsToMergeWithoutArgsNotInConstDict(
-      const Array<Array<IntImm>>& args_to_merge, const Map<IntImm, runtime::NDArray>& const_dict) {
-    Map<IntImm, Array<IntImm>> new_args_to_merge{};
-    bool first_arg_found = false;
-    int64_t new_arg_key = 0;  // the updated key of the merged const_dict
-    for (Array<IntImm> args : args_to_merge) {
-      IntImm key{args[0]};
-      auto it = std::find_if(const_dict.begin(), const_dict.end(),
-                             [&](std::pair<tvm::IntImm, runtime::NDArray> pair) {
-                               return pair.first->value == key->value;
-                             });
-      if (it != const_dict.end()) {
-        if (first_arg_found == false) {
-          first_arg_found = true;
-          new_arg_key = key->value;
-        }
-        new_args_to_merge.Set(IntImm(DataType::Int(64), new_arg_key), args);
-      }
-      if (first_arg_found) {
-        new_arg_key++;
-      }
-    }
-    return new_args_to_merge;
-  }
-
-  Map<IntImm, runtime::NDArray> MakeNewConstDict(const Map<IntImm, Array<IntImm>>& args_to_merge,
-                                                 Map<IntImm, runtime::NDArray> const_dict) {
-    Map<IntImm, runtime::NDArray> new_const_dict{};
-    if (args_to_merge.size() == 0) {
-      return new_const_dict;
-    }
-
-    for (auto const& elem : args_to_merge) {
-      IntImm key = elem.first;
-      Array<IntImm> args = elem.second;
-      int64_t size = 0;
-      for (IntImm arg : args) {
-        auto it = std::find_if(const_dict.begin(), const_dict.end(),
-                               [&](auto pair) { return pair.first->value == arg->value; });
-        runtime::NDArray arg_constant{(*it).second};
-        size += runtime::GetDataSize(*arg_constant.operator->());
-      }
-
-      runtime::NDArray constant = runtime::NDArray::Empty({size}, DataType::UInt(8), {kDLCPU, 0});
-
-      size_t offset = 0;
-      for (IntImm arg : args) {
-        auto it = std::find_if(const_dict.begin(), const_dict.end(),
-                               [&](auto pair) { return pair.first->value == arg->value; });
-        runtime::NDArray arg_constant{(*it).second};
-        size_t nbytes = runtime::GetDataSize(*arg_constant.operator->());
-        arg_constant.CopyToBytes(static_cast<uint8_t*>(constant->data) + offset, nbytes);
-        offset += nbytes;
-      }
-      new_const_dict.Set(key, constant);
-    }
-    return new_const_dict;
-  }
-};
-
-/*!
- * \brief This pass looks for the constants used by each compute operator
- * and merges them into a single buffer.
- * Constants written to a buffer with local scope are not merged.
- * \return tvm::transform::Pass
- */
-tvm::transform::Pass MergeConstants() {
-  auto pass_func = [=](PrimFunc f, IRModule mod, tvm::transform::PassContext ctx) {
-    ICHECK(mod->GetGlobalVars().size() == 1 && mod->ContainGlobalVar("main"))
-        << "Expected a single primitive function called 'main'. Please run the "
-           "MergeConstants pass in conjunction with the LowerToTIR() pass.";
-    Optional<Map<IntImm, runtime::NDArray>> const_dict{
-        f->attrs.GetAttr("ethos-u.const_dict", Optional<Map<IntImm, runtime::NDArray>>{})};
-    ICHECK(const_dict) << "Expected a ethos-u.const_dict attribute";
-
-    MergeConstantsInfoExtractor::Info info{MergeConstantsInfoExtractor()(f)};
-    f = RemoveAllocatesMutator()(f);
-    return MergeConstantsMutator(info)(f, const_dict.value());
-  };
-  return tvm::tir::transform::CreatePrimFuncPass(pass_func, 0, "tir.contrib.ethos-u.MergeConstants",
-                                                 {});
-}
-
-TVM_REGISTER_GLOBAL("tir.contrib.ethos-u.MergeConstants").set_body_typed(MergeConstants);
-
-/*!
- * \brief This pass removes the ethos-u.const_dict attribute
- * \return tvm::transform::Pass
- */
-class RemoveConstDictAttributeMutator : public StmtExprMutator {
- public:
-  RemoveConstDictAttributeMutator() {}
-
-  PrimFunc operator()(PrimFunc main_func) {
-    return WithoutAttr(std::move(main_func), "ethos-u.const_dict");
-  }
-};
-
-tvm::transform::Pass RemoveConstDictAttribute() {
-  auto pass_func = [=](PrimFunc f, IRModule mod, tvm::transform::PassContext ctx) {
-    return RemoveConstDictAttributeMutator()(f);
-  };
-  return tvm::tir::transform::CreatePrimFuncPass(
-      pass_func, 0, "tir.contrib.ethos-u.RemoveConstDictAttribute", {});
-}
-
-TVM_REGISTER_GLOBAL("tir.contrib.ethos-u.RemoveConstDictAttribute")
-    .set_body_typed(RemoveConstDictAttribute);
-
-}  // namespace ethosu
-}  // namespace contrib
-}  // namespace tir
-}  // namespace tvm
diff --git a/tests/cpp/microtvm_runtime_standalone_test.cc b/tests/cpp/microtvm_runtime_standalone_test.cc
deleted file mode 100644
index 8a9ec1d4f85b..000000000000
--- a/tests/cpp/microtvm_runtime_standalone_test.cc
+++ /dev/null
@@ -1,130 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#include <dlpack/dlpack.h>
-#include <gtest/gtest.h>
-
-#include <map>
-#include <random>
-#include <vector>
-
-#ifdef USE_MICRO_STANDALONE_RUNTIME
-
-// Use system(..), `gcc -shared -fPIC`, thus restrict the test to OS X for now.
-#if defined(__APPLE__) && defined(__MACH__)
-
-#include <gtest/gtest.h>
-#include <spawn.h>
-#include <sys/wait.h>
-#include <tvm/driver/driver_api.h>
-#include <tvm/relay/analysis.h>
-#include <tvm/relay/expr.h>
-#include <tvm/relay/transform.h>
-#include <tvm/relay/type.h>
-#include <tvm/runtime/executor_info.h>
-#include <tvm/runtime/micro/standalone/microtvm_runtime.h>
-#include <tvm/runtime/module.h>
-#include <tvm/runtime/packed_func.h>
-#include <tvm/runtime/registry.h>
-#include <tvm/te/operation.h>
-#include <tvm/topi/generic/injective.h>
-
-TVM_REGISTER_GLOBAL("test.sch").set_body([](tvm::TVMArgs args, tvm::TVMRetValue* rv) {
-  *rv = ::tvm::topi::generic::schedule_injective(args[0], args[1]);
-});
-
-TEST(MicroStandaloneRuntime, BuildModule) {
-  using namespace tvm;
-  auto tensor_type = relay::TensorType({2, 3}, ::tvm::runtime::DataType::Float(32));
-  auto a = relay::Var("a", tensor_type);
-  auto b = relay::Var("b", tensor_type);
-  auto add_op = relay::Op::Get("add");
-  auto x = relay::Call(add_op, {a, b}, tvm::Attrs(), {});
-  auto c = relay::Var("c", tensor_type);
-  auto y = relay::Call(add_op, {x, c}, tvm::Attrs(), {});
-  auto func = relay::Function(relay::FreeVars(y), y, relay::Type(), {});
-  auto A = tvm::runtime::NDArray::Empty({2, 3}, {kDLFloat, 32, 1}, {kDLCPU, 0});
-  auto B = tvm::runtime::NDArray::Empty({2, 3}, {kDLFloat, 32, 1}, {kDLCPU, 0});
-  auto C = tvm::runtime::NDArray::Empty({2, 3}, {kDLFloat, 32, 1}, {kDLCPU, 0});
-
-  auto pA = static_cast<float*>(A->data);
-  auto pB = static_cast<float*>(B->data);
-  auto pC = static_cast<float*>(C->data);
-
-  for (int i = 0; i < 6; ++i) {
-    pA[i] = i;
-    pB[i] = i + 1;
-    pC[i] = i + 2;
-  }
-  // get schedule
-  auto reg = tvm::runtime::Registry::Get("relay.op._Register");
-  auto s_i = tvm::runtime::Registry::Get("test.sch");
-  if (!reg) {
-    LOG(FATAL) << "no _Register";
-  }
-  if (!s_i) {
-    LOG(FATAL) << "no test_sch";
-  }
-  (*reg)("add", "FTVMSchedule", *s_i, 10);
-  // build
-  auto pfb = tvm::runtime::Registry::Get("relay.build_module._BuildModule");
-  tvm::runtime::Module build_mod = (*pfb)();
-  auto build_f = build_mod.GetFunction("build", false);
-  auto json_f = build_mod.GetFunction("get_graph_json", false);
-  auto mod_f = build_mod.GetFunction("get_module", false);
-  Map<tvm::Integer, tvm::Target> targets;
-
-  Target llvm_tgt = Target("llvm");
-  targets.Set(0, llvm_tgt);
-  build_f(func, targets, llvm_tgt, runtime::kTvmExecutorGraph, "");
-  std::string json = json_f();
-  tvm::runtime::Module mod = mod_f();
-  std::string o_fname = std::tmpnam(nullptr);
-  std::string so_fname = std::tmpnam(nullptr);
-  mod->SaveToFile(o_fname, "o");
-  const std::vector<std::string> args = {"gcc", "-shared", "-fPIC", "-o", so_fname, o_fname};
-  std::stringstream s;
-  for (auto& c : args) {
-    s << c << " ";
-  }
-  const auto ss = s.str();
-  const auto ret = system(ss.c_str());
-  ASSERT_EQ(ret, 0);
-  // Now, execute the minimal runtime.
-  auto* dsoModule = MicroTVMRuntimeDSOModuleCreate(so_fname.c_str(), so_fname.size());
-  ASSERT_NE(dsoModule, nullptr);
-  auto* handle = MicroTVMRuntimeCreate(json.c_str(), json.size(), dsoModule);
-  ASSERT_NE(handle, nullptr);
-
-  MicroTVMRuntimeSetInput(handle, 0, const_cast<DLTensor*>(A.operator->()));
-  MicroTVMRuntimeSetInput(handle, 1, const_cast<DLTensor*>(B.operator->()));
-  MicroTVMRuntimeSetInput(handle, 2, const_cast<DLTensor*>(C.operator->()));
-  MicroTVMRuntimeRun(handle);
-  auto Y = tvm::runtime::NDArray::Empty({2, 3}, {kDLFloat, 32, 1}, {kDLCPU, 0});
-  MicroTVMRuntimeGetOutput(handle, 0, const_cast<DLTensor*>(Y.operator->()));
-  auto* pY = static_cast<float*>(Y->data);
-  for (int i = 0; i < 6; ++i) {
-    CHECK_LT(fabs(pY[i] - (i + (i + 1) + (i + 2))), 1e-4);
-  }
-  MicroTVMRuntimeDestroy(handle);
-  MicroTVMRuntimeDSOModuleDestroy(dsoModule);
-}
-
-#endif
-#endif
diff --git a/tests/cpp/relay/backend/contrib/cmsisnn/buffer_size_test.cc b/tests/cpp/relay/backend/contrib/cmsisnn/buffer_size_test.cc
deleted file mode 100644
index 2094b70eb872..000000000000
--- a/tests/cpp/relay/backend/contrib/cmsisnn/buffer_size_test.cc
+++ /dev/null
@@ -1,268 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#ifdef TVM_USE_CMSISNN
-
-#include "../../../../../../src/relay/backend/contrib/cmsisnn/buffer_size.h"
-
-#include <gtest/gtest.h>
-#include <tvm/ir/transform.h>
-#include <tvm/target/target.h>
-
-#include <cmath>
-#include <random>
-#include <string>
-
-#include "../../../../../../src/relay/backend/contrib/cmsisnn/compiler_attrs.h"
-
-namespace tvm {
-namespace relay {
-namespace contrib {
-namespace cmsisnn {
-
-static std::random_device rd;
-static std::mt19937 gen(rd());
-static std::uniform_int_distribution<> fake_parameters(2, 100);
-
-static const Target kHasMVE("cmsis-nn -mcpu=cortex-m55");
-static const Target kHasDSP("cmsis-nn -mcpu=cortex-m55 -mattr=+nomve");
-static const Target kNoExt("cmsis-nn -mcpu=cortex-m55 -mattr=+nodsp,+nomve");
-
-class CMSISNNCalculatedBufferSize : public testing::TestWithParam<std::array<int32_t, 3>> {};
-
-TEST(CMSISNNConv2dBufferSizeInt8, Conv1x1) {
-  int32_t any = fake_parameters(gen);
-  auto conv2d_1x1 = [=](Target target, int32_t input_c) {
-    return Conv2dBufferSizeInt8(target, 0, 0, any, any, input_c, any, any, 1, 1, 1, 1, 1, 1);
-  };
-
-  ASSERT_EQ(conv2d_1x1(kNoExt, 4), 0);
-  ASSERT_EQ(conv2d_1x1(kNoExt, 8), 0);
-  ASSERT_EQ(conv2d_1x1(kNoExt, 12), 0);
-  ASSERT_EQ(conv2d_1x1(kNoExt, 16), 0);
-  ASSERT_EQ(conv2d_1x1(kNoExt, 32), 0);
-
-  ASSERT_EQ(conv2d_1x1(kHasDSP, 4), 0);
-  ASSERT_EQ(conv2d_1x1(kHasDSP, 8), 0);
-  ASSERT_EQ(conv2d_1x1(kHasDSP, 12), 0);
-  ASSERT_EQ(conv2d_1x1(kHasDSP, 16), 0);
-  ASSERT_EQ(conv2d_1x1(kHasDSP, 32), 0);
-
-  ASSERT_EQ(conv2d_1x1(kHasMVE, 4), 0);
-  ASSERT_EQ(conv2d_1x1(kHasMVE, 8), 0);
-  ASSERT_EQ(conv2d_1x1(kHasMVE, 12), 0);
-  ASSERT_EQ(conv2d_1x1(kHasMVE, 16), 0);
-  ASSERT_EQ(conv2d_1x1(kHasMVE, 32), 0);
-}
-
-TEST(CMSISNNConv2dBufferSizeInt8, Conv1xN) {
-  int32_t any = fake_parameters(gen);
-  int32_t input_c = fake_parameters(gen);
-  int32_t filter_w = fake_parameters(gen);
-  int32_t filter_h = 1;
-  int32_t calculated_buffer = (2 * input_c * filter_w * filter_h) * (int32_t)sizeof(int16_t);
-
-  auto conv2d_1xn = [=](Target target, int32_t output_w) {
-    return Conv2dBufferSizeInt8(target, any, any, 1, 1, input_c, 1, output_w, any, any, 1, 1,
-                                filter_w, filter_h);
-  };
-
-  ASSERT_EQ(conv2d_1xn(kNoExt, 4), calculated_buffer);
-  ASSERT_EQ(conv2d_1xn(kNoExt, 8), calculated_buffer);
-  ASSERT_EQ(conv2d_1xn(kNoExt, 12), calculated_buffer);
-  ASSERT_EQ(conv2d_1xn(kNoExt, 16), calculated_buffer);
-  ASSERT_EQ(conv2d_1xn(kNoExt, 32), calculated_buffer);
-
-  ASSERT_EQ(conv2d_1xn(kHasDSP, 4), calculated_buffer);
-  ASSERT_EQ(conv2d_1xn(kHasDSP, 8), calculated_buffer);
-  ASSERT_EQ(conv2d_1xn(kHasDSP, 12), calculated_buffer);
-  ASSERT_EQ(conv2d_1xn(kHasDSP, 16), calculated_buffer);
-  ASSERT_EQ(conv2d_1xn(kHasDSP, 32), calculated_buffer);
-
-  ASSERT_EQ(conv2d_1xn(kHasMVE, 4), 0);
-  ASSERT_EQ(conv2d_1xn(kHasMVE, 8), 0);
-  ASSERT_EQ(conv2d_1xn(kHasMVE, 12), 0);
-  ASSERT_EQ(conv2d_1xn(kHasMVE, 16), 0);
-  ASSERT_EQ(conv2d_1xn(kHasMVE, 32), 0);
-}
-
-TEST(CMSISNNConv2dBufferSizeInt8, Default) {
-  int32_t any = fake_parameters(gen);
-
-  int32_t input_c = fake_parameters(gen);
-  int32_t filter_w = fake_parameters(gen);
-  int32_t filter_h = fake_parameters(gen);
-  int32_t calculated_buffer = (2 * input_c * filter_w * filter_h) * (int32_t)sizeof(int16_t);
-  int32_t col_length = input_c * filter_w * filter_h;
-  col_length = (col_length + 7) / 8;
-  int32_t calculated_buffer_mve = 4 * col_length * 8 * (int32_t)sizeof(int8_t);
-
-  auto conv2d = [=](Target target, int32_t output_w) {
-    return Conv2dBufferSizeInt8(target, any, any, 1, 1, input_c, 1, output_w, any, any, any, any,
-                                filter_w, filter_h);
-  };
-
-  ASSERT_EQ(conv2d(kNoExt, 4), calculated_buffer);
-  ASSERT_EQ(conv2d(kNoExt, 8), calculated_buffer);
-  ASSERT_EQ(conv2d(kNoExt, 12), calculated_buffer);
-  ASSERT_EQ(conv2d(kNoExt, 16), calculated_buffer);
-  ASSERT_EQ(conv2d(kNoExt, 32), calculated_buffer);
-
-  ASSERT_EQ(conv2d(kHasDSP, 4), calculated_buffer);
-  ASSERT_EQ(conv2d(kHasDSP, 8), calculated_buffer);
-  ASSERT_EQ(conv2d(kHasDSP, 12), calculated_buffer);
-  ASSERT_EQ(conv2d(kHasDSP, 16), calculated_buffer);
-  ASSERT_EQ(conv2d(kHasDSP, 32), calculated_buffer);
-
-  ASSERT_EQ(conv2d(kHasMVE, 4), calculated_buffer_mve);
-  ASSERT_EQ(conv2d(kHasMVE, 8), calculated_buffer_mve);
-  ASSERT_EQ(conv2d(kHasMVE, 12), calculated_buffer_mve);
-  ASSERT_EQ(conv2d(kHasMVE, 16), calculated_buffer_mve);
-  ASSERT_EQ(conv2d(kHasMVE, 32), calculated_buffer_mve);
-}
-
-TEST(CMSISNNConv2dBufferSizeInt16, Default) {
-  int32_t any = fake_parameters(gen);
-
-  auto conv2d_int16_buffer = [=](Target target, int32_t input_c, int32_t filter_w,
-                                 int32_t filter_h) {
-    return Conv2dBufferSizeInt16(target, any, any, 1, 1, input_c, any, any, any, any, 1, 1,
-                                 filter_w, filter_h);
-  };
-
-  auto calculated_buffer = [=](int32_t input_c, int32_t filter_w, int32_t filter_h) {
-    return (2 * input_c * filter_w * filter_h) * (int32_t)sizeof(int16_t);
-  };
-
-  ASSERT_EQ(conv2d_int16_buffer(kNoExt, 3, 5, 5), 0);
-  ASSERT_EQ(conv2d_int16_buffer(kNoExt, 32, 3, 3), 0);
-
-  ASSERT_EQ(conv2d_int16_buffer(kHasDSP, 3, 3, 3), calculated_buffer(3, 3, 3));
-  ASSERT_EQ(conv2d_int16_buffer(kHasDSP, 12, 5, 5), calculated_buffer(12, 5, 5));
-  ASSERT_EQ(conv2d_int16_buffer(kHasDSP, 24, 5, 5), 0);
-
-  ASSERT_EQ(conv2d_int16_buffer(kHasMVE, 3, 3, 3), 0);
-  ASSERT_EQ(conv2d_int16_buffer(kHasMVE, 12, 5, 5), 0);
-  ASSERT_EQ(conv2d_int16_buffer(kHasMVE, 24, 5, 5), 0);
-}
-
-TEST(CMSISNNDepthwiseConv2dBufferSizeInt8, UnEvenChannels) {
-  int32_t filter_w = fake_parameters(gen);
-  int32_t filter_h = fake_parameters(gen);
-  int32_t input_n = 1;
-
-  auto depthwise_conv2d_with_channels = [=](Target target, int32_t input_c, int32_t output_c) {
-    return DepthwiseConv2dBufferSizeInt8(target, input_n, input_c, output_c, filter_w, filter_h, 1,
-                                         1, 1);
-  };
-
-  ASSERT_EQ(depthwise_conv2d_with_channels(kNoExt, 4, 6), 0);
-  ASSERT_EQ(depthwise_conv2d_with_channels(kNoExt, 8, 7), 0);
-  ASSERT_EQ(depthwise_conv2d_with_channels(kHasDSP, 4, 6), 0);
-  ASSERT_EQ(depthwise_conv2d_with_channels(kHasDSP, 8, 7), 0);
-  ASSERT_EQ(depthwise_conv2d_with_channels(kHasMVE, 4, 6), 0);
-  ASSERT_EQ(depthwise_conv2d_with_channels(kHasMVE, 8, 7), 0);
-}
-
-TEST(CMSISNNDepthwiseConv2dBufferSizeInt8, MultipleBatches) {
-  int32_t input_output_c = fake_parameters(gen);
-  int32_t filter_w = fake_parameters(gen);
-  int32_t filter_h = fake_parameters(gen);
-
-  auto depthwise_conv2d_with_batch = [=](Target target, int32_t input_n) {
-    return DepthwiseConv2dBufferSizeInt8(target, input_n, input_output_c, input_output_c, filter_w,
-                                         filter_h, 1, 1, 1);
-  };
-
-  ASSERT_EQ(depthwise_conv2d_with_batch(kNoExt, 4), 0);
-  ASSERT_EQ(depthwise_conv2d_with_batch(kNoExt, 7), 0);
-  ASSERT_EQ(depthwise_conv2d_with_batch(kHasDSP, 4), 0);
-  ASSERT_EQ(depthwise_conv2d_with_batch(kHasDSP, 7), 0);
-  ASSERT_EQ(depthwise_conv2d_with_batch(kHasMVE, 4), 0);
-  ASSERT_EQ(depthwise_conv2d_with_batch(kHasMVE, 7), 0);
-}
-
-TEST(CMSISNNDepthwiseConv2dBufferSizeInt8, Default) {
-  int32_t input_output_c = fake_parameters(gen);
-  int32_t filter_w = fake_parameters(gen);
-  int32_t filter_h = fake_parameters(gen);
-  int32_t input_n = 1;
-
-  int32_t mve_calculated_buffer =
-      (4 * CH_IN_BLOCK_MVE * filter_w * filter_h) * (int32_t)sizeof(int8_t);
-  int32_t dsp_calculated_buffer = (input_output_c * filter_w * filter_h) * (int32_t)sizeof(int16_t);
-
-  auto depthwise_conv2d = [=](Target target) {
-    return DepthwiseConv2dBufferSizeInt8(target, input_n, input_output_c, input_output_c, filter_w,
-                                         filter_h, 1, 1, 1);
-  };
-
-  ASSERT_EQ(depthwise_conv2d(kNoExt), 0);
-  ASSERT_EQ(depthwise_conv2d(kNoExt), 0);
-  ASSERT_EQ(depthwise_conv2d(kHasDSP), dsp_calculated_buffer);
-  ASSERT_EQ(depthwise_conv2d(kHasDSP), dsp_calculated_buffer);
-  ASSERT_EQ(depthwise_conv2d(kHasMVE), mve_calculated_buffer);
-  ASSERT_EQ(depthwise_conv2d(kHasMVE), mve_calculated_buffer);
-}
-
-TEST(CMSISNNDepthwiseConv2dBufferSizeInt16, Default) {
-  int32_t any = fake_parameters(gen);
-
-  auto depthwise_int16_buffer = [=](Target target, int32_t input_c, int32_t filter_w,
-                                    int32_t filter_h) {
-    return DepthwiseConv2dBufferSizeInt16(target, any, input_c, any, filter_w, filter_h, 1, 1, 1);
-  };
-
-  auto dsp_only_buffer = [=](int32_t input_c, int32_t filter_w, int32_t filter_h) {
-    return (input_c * filter_w * filter_h) * (int32_t)sizeof(int16_t);
-  };
-
-  auto dsp_mve_buffer = [=](int32_t input_c, int32_t filter_w, int32_t filter_h) {
-    return (4 * input_c * filter_w * filter_h) * (int32_t)sizeof(int16_t) + 8;
-  };
-
-  ASSERT_EQ(depthwise_int16_buffer(kNoExt, 3, 5, 5), 0);
-  ASSERT_EQ(depthwise_int16_buffer(kNoExt, 32, 3, 3), 0);
-
-  ASSERT_EQ(depthwise_int16_buffer(kHasDSP, 3, 3, 3), dsp_only_buffer(3, 3, 3));
-  ASSERT_EQ(depthwise_int16_buffer(kHasDSP, 12, 5, 5), dsp_only_buffer(12, 5, 5));
-  ASSERT_EQ(depthwise_int16_buffer(kHasDSP, 24, 5, 5), 0);
-
-  ASSERT_EQ(depthwise_int16_buffer(kHasMVE, 3, 3, 3), dsp_mve_buffer(3, 3, 3));
-  ASSERT_EQ(depthwise_int16_buffer(kHasMVE, 12, 5, 5), dsp_mve_buffer(12, 5, 5));
-  ASSERT_EQ(depthwise_int16_buffer(kHasMVE, 24, 5, 5), 0);
-}
-
-TEST(CMSISNNAvgPoolBufferSize, Default) {
-  int32_t input_c = fake_parameters(gen);
-  int32_t calculated_buffer = (input_c * sizeof(int32_t));
-
-  auto avg_pool = [=](Target target) { return AvgPoolBufferSize(target, input_c); };
-
-  ASSERT_EQ(avg_pool(kNoExt), 0);
-  ASSERT_EQ(avg_pool(kHasDSP), calculated_buffer);
-  ASSERT_EQ(avg_pool(kHasMVE), 0);
-}
-
-}  // namespace cmsisnn
-}  // namespace contrib
-}  // namespace relay
-}  // namespace tvm
-
-#endif
diff --git a/tests/cpp/relay/backend/contrib/cmsisnn/compiler_attrs_test.cc b/tests/cpp/relay/backend/contrib/cmsisnn/compiler_attrs_test.cc
deleted file mode 100644
index 24dd3a6f5e2d..000000000000
--- a/tests/cpp/relay/backend/contrib/cmsisnn/compiler_attrs_test.cc
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#ifdef TVM_USE_CMSISNN
-
-#include "../../../../../../src/relay/backend/contrib/cmsisnn/compiler_attrs.h"
-
-#include <gtest/gtest.h>
-#include <tvm/ir/transform.h>
-#include <tvm/target/target.h>
-
-#include <cmath>
-#include <string>
-
-namespace tvm {
-namespace relay {
-namespace contrib {
-namespace cmsisnn {
-
-static Target GetTargetWithCompilerAttrs(String mcpu, String mattr) {
-  auto context_node = make_object<tvm::transform::PassContextNode>();
-  auto cmsisnn_config_node = make_object<CMSISNNCompilerConfigNode>();
-  cmsisnn_config_node->InitBySeq("mcpu", mcpu, "mattr", mattr);
-
-  context_node->config = {
-      {"relay.ext.cmsisnn.options", CMSISNNCompilerConfig(cmsisnn_config_node)}};
-
-  tvm::transform::PassContext context = tvm::transform::PassContext(context_node);
-  return CreateTarget(context);
-}
-
-TEST(CMSISNNTarget, CreateFromUndefined) {
-  auto context_node = make_object<tvm::transform::PassContextNode>();
-  tvm::transform::PassContext context = tvm::transform::PassContext(context_node);
-  Target target = CreateTarget(context);
-  ASSERT_EQ(target->GetFeature<Bool>("has_mve").value_or(Bool(false)), Bool(false));
-  ASSERT_EQ(target->GetFeature<Bool>("has_dsp").value_or(Bool(false)), Bool(false));
-}
-
-TEST(CMSISNNTarget, CreateFromContextCortexM55) {
-  Target target = GetTargetWithCompilerAttrs("cortex-m55", "");
-  ASSERT_EQ(target->GetFeature<Bool>("has_mve").value_or(Bool(false)), Bool(true));
-  ASSERT_EQ(target->GetFeature<Bool>("has_dsp").value_or(Bool(false)), Bool(true));
-}
-
-TEST(CMSISNNTarget, CreateFromContextWithAttrsCortexM55) {
-  Target target = GetTargetWithCompilerAttrs("cortex-m55", "+nomve");
-  ASSERT_EQ(target->GetFeature<Bool>("has_mve").value_or(Bool(false)), Bool(false));
-  ASSERT_EQ(target->GetFeature<Bool>("has_dsp").value_or(Bool(false)), Bool(true));
-}
-
-TEST(CMSISNNTarget, CreateFromContextCortexM85) {
-  Target target = GetTargetWithCompilerAttrs("cortex-m85", "");
-  ASSERT_EQ(target->GetFeature<Bool>("has_mve").value_or(Bool(false)), Bool(true));
-  ASSERT_EQ(target->GetFeature<Bool>("has_dsp").value_or(Bool(false)), Bool(true));
-}
-
-TEST(CMSISNNTarget, CreateFromContextWithAttrsCortexM85) {
-  Target target = GetTargetWithCompilerAttrs("cortex-m85", "+nomve");
-  ASSERT_EQ(target->GetFeature<Bool>("has_mve").value_or(Bool(false)), Bool(false));
-  ASSERT_EQ(target->GetFeature<Bool>("has_dsp").value_or(Bool(false)), Bool(true));
-}
-
-}  // namespace cmsisnn
-}  // namespace contrib
-}  // namespace relay
-}  // namespace tvm
-
-#endif
diff --git a/tests/cpp/runtime/contrib/ethosn/inference_test.cc b/tests/cpp/runtime/contrib/ethosn/inference_test.cc
deleted file mode 100644
index 45a6bd0997e9..000000000000
--- a/tests/cpp/runtime/contrib/ethosn/inference_test.cc
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file tests/cpp/runtime/contrib/ethosn/inference_test.cc
- * \brief Tests to check Arm(R) Ethos(TM)-N runtime components used during inference.
- */
-
-#ifdef ETHOSN_HW
-
-#include <gtest/gtest.h>
-
-#include "../../../../../src/runtime/contrib/ethosn/ethosn_device.cc"
-
-namespace tvm {
-namespace runtime {
-namespace ethosn {
-
-TEST(WaitForInference, InferenceScheduled) {
-  const int inference_result = 0 /* Scheduled */;
-  const int timeout = 0;
-
-  dl::Inference inference = dl::Inference(inference_result);
-  InferenceWaitStatus result = WaitForInference(&inference, timeout);
-
-  ASSERT_EQ(result.GetErrorCode(), InferenceWaitErrorCode::kTimeout);
-  ICHECK_EQ(result.GetErrorDescription(), "Timed out while waiting for the inference to complete.");
-}
-
-TEST(WaitForInference, InferenceError) {
-  const int inference_result = 3 /* Error */;
-  const int timeout = 0;
-
-  dl::Inference inference = dl::Inference(inference_result);
-  InferenceWaitStatus result = WaitForInference(&inference, timeout);
-
-  ASSERT_EQ(result.GetErrorCode(), InferenceWaitErrorCode::kError);
-  ICHECK_EQ(result.GetErrorDescription(),
-            "Failed to read inference result status (No such file or directory)");
-}
-
-}  // namespace ethosn
-}  // namespace runtime
-}  // namespace tvm
-
-#endif
diff --git a/tests/crt/buffer_write_stream.h b/tests/crt/buffer_write_stream.h
deleted file mode 100644
index 48a30ac4b273..000000000000
--- a/tests/crt/buffer_write_stream.h
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#ifndef TESTS_CRT_BUFFER_WRITE_STREAM_H_
-#define TESTS_CRT_BUFFER_WRITE_STREAM_H_
-
-#include <inttypes.h>
-#include <tvm/runtime/crt/rpc_common/frame_buffer.h>
-#include <tvm/runtime/crt/rpc_common/write_stream.h>
-
-#include <string>
-
-using ::tvm::runtime::micro_rpc::FrameBuffer;
-using ::tvm::runtime::micro_rpc::WriteStream;
-
-template <unsigned int N>
-class BufferWriteStream : public WriteStream {
- public:
-  ssize_t Write(const uint8_t* data, size_t data_size_bytes) override {
-    return buffer_.Write(data, data_size_bytes);
-  }
-
-  void Reset() {
-    buffer_.Clear();
-    packet_done_ = false;
-  }
-
-  inline bool packet_done() { return packet_done_; }
-
-  inline bool is_valid() { return is_valid_; }
-
-  void PacketDone(bool is_valid) override {
-    EXPECT_FALSE(packet_done_);
-    packet_done_ = true;
-    is_valid_ = is_valid;
-  }
-
-  std::string BufferContents() { return std::string((const char*)buffer_data_, buffer_.Size()); }
-
-  static constexpr unsigned int capacity() { return N; }
-
- private:
-  bool packet_done_{false};
-  bool is_valid_{false};
-  uint8_t buffer_data_[N];
-  FrameBuffer buffer_{buffer_data_, N};
-};
-
-#endif  // TESTS_CRT_BUFFER_WRITE_STREAM_H_
diff --git a/tests/crt/contrib/stm32/Makefile b/tests/crt/contrib/stm32/Makefile
deleted file mode 100644
index b2515b854d4f..000000000000
--- a/tests/crt/contrib/stm32/Makefile
+++ /dev/null
@@ -1,88 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-ifndef TVM_PATH
-$(error TVM_PATH must be set and point at your TVM installation)
-endif
-
-ifndef MODEL_PATH
-$(error MODEL_PATH must be set and point at your model implementation)
-endif
-
-ifndef BUILD_PATH
-$(error BUILD_PATH must be set and point at where your models are built)
-endif
-
-ifndef IMAGE_PATH
-$(error IMAGE_PATH must be set and point at where your images are stored)
-endif
-
-SRC_PATH = $(TVM_PATH)/tests/crt/contrib/stm32/src
-TVM_CRT_PATH = $(TVM_PATH)/src/runtime/crt/common
-STM32_RUNTIME_PATH = $(TVM_PATH)/src/runtime/crt/contrib/stm32
-
-#
-# Model sources
-#
-C_SOURCES := $(wildcard ${MODEL_PATH}/*.c)
-
-#
-# TVM sources
-#
-C_SOURCES += $(TVM_CRT_PATH)/crt_backend_api.c
-C_SOURCES += $(STM32_RUNTIME_PATH)/runtime.c
-C_SOURCES += $(STM32_RUNTIME_PATH)/ai_runtime_api.c
-
-#
-# Application sources
-#
-C_SOURCES += $(SRC_PATH)/main.c
-
-vpath %.c $(sort $(dir $(C_SOURCES)))
-
-#
-# Build
-#
-
-BUILD_DIR = $(MODEL_PATH)
-
-TARGET = network.exe
-
-OBJECTS = $(addprefix $(BUILD_DIR)/,$(notdir $(C_SOURCES:.c=.o)))
-
-CXX = gcc -m32 -g
-
-DEFINES =
-INCLUDES = -I$(TVM_PATH)/3rdparty/dlpack/include -I$(TVM_PATH)/include -I$(STM32_RUNTIME_PATH)
-
-CFLAGS = $(DEFINES) $(INCLUDES)
-LDFLAGS = -lm
-
-all: $(BUILD_DIR)/$(TARGET)
-
-$(BUILD_DIR)/$(TARGET): $(OBJECTS)
-	$(CXX) $(CFLAGS) -o $@  $^ $(LDFLAGS)
-
-$(BUILD_DIR)/main.o: main.c
-	$(CXX) -DBUILD_PATH=\"$(BUILD_PATH)\" -DIMAGE_PATH=\"$(IMAGE_PATH)\"$(CFLAGS) -I$(MODEL_PATH) -c $< -o $@
-
-$(BUILD_DIR)/%.o: %.c
-	$(CXX) $(CFLAGS) -c $< -o $@
-
-clean:
-	rm $(BUILD_DIR)/*.o
-	rm $(BUILD_DIR)/$(TARGET)
diff --git a/tests/crt/contrib/stm32/src/main.c b/tests/crt/contrib/stm32/src/main.c
deleted file mode 100644
index a124ab4441ea..000000000000
--- a/tests/crt/contrib/stm32/src/main.c
+++ /dev/null
@@ -1,461 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#include <inttypes.h>
-#include <math.h>
-#include <stdarg.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "ai_runtime_api.h"
-#include "network.h"
-#include "network_data.h"
-
-//
-// Network that we are testing
-//
-extern ai_model_info network_network;
-
-//
-// Dummy: for the runtime
-//
-uint32_t __models_section_start__ = (uint32_t)&network_network;
-uint32_t __models_section_end__ = (uint32_t)&network_network + sizeof(ai_model_info);
-
-static ai_model_info* _model_p = &network_network;
-
-//
-// Global handle to reference the instantiated NN
-//
-static ai_handle _network = AI_HANDLE_NULL;
-
-static uint8_t LoadInputImg(const char* filename, ai_tensor* input);
-static int32_t quantize_val(float val, ai_quantization_info* quant);
-static float dequantize_val(int32_t val, ai_quantization_info* quant);
-
-// =================================================================
-//   Convert_Fixed_To_Float
-// =================================================================
-static float Convert_Fixed_To_Float(uint8_t data, int8_t fl) {
-  uint8_t val = data;
-  float x;
-  if (fl >= 0) {
-    x = ((float)val) / (float)(1 << fl);  // NOLINT
-  } else {
-    x = ((float)val) / (1 / (float)(1 << fl));  // NOLINT
-  }
-  return x;
-}
-
-// =======================================================
-//    error
-// =======================================================
-static void error(const char* fmt, ...) {
-  va_list vp;
-  char emsg[512];
-  int32_t loc = 0;
-
-  //
-  // Prepare main error message:
-  //
-  va_start(vp, fmt);
-  loc += vsprintf(&emsg[loc], fmt, vp);
-  va_end(vp);
-
-  // fputs (emsg, stderr);
-  // fflush (stderr);
-
-  fprintf(stderr, " #### Error: %s.\n", emsg);
-
-  exit(-1);
-}
-
-// ==================================================
-//   aiLogErr
-// ==================================================
-static void aiLogErr(const char* fct, const char* msg) {
-  if (fct) {
-    printf("E: AI error: %s - %s\r\n", fct, msg);
-  } else {
-    printf("E: AI error - %s\r\n", msg);
-  }
-}
-
-// ==================================================
-//   aiPrintLayoutBuffer
-// ==================================================
-static void aiPrintLayoutBuffer(const char* msg, int idx, ai_tensor* tensor) {
-  DLTensor* dltensor = get_dltensor(tensor);
-  DLDataType dtype = dltensor->dtype;
-
-  printf("%s[%d] ", msg, idx);
-  printf(" (%u, %u, %u)", dtype.code, dtype.bits, dtype.lanes);
-  //
-  // Quantization info exists for input/output tensors
-  //
-  const ai_quantization_info* quant = ai_get_quantization(tensor);
-  if (quant != NULL) {
-    printf(" -- TODO: quantization info \n");
-  }
-
-  int32_t size = get_tensor_size(tensor);
-  printf(" %d bytes, shape=(", size);
-  for (int i = 0; i < dltensor->ndim; ++i) {
-    printf("%d,", (int32_t)dltensor->shape[i]);
-  }
-  printf("), address = 0x%08x\r\n", (unsigned int)dltensor->data);
-}
-
-// ==================================================
-//   aiPrintNetworkInfo
-// ==================================================
-static void aiPrintNetworkInfo(ai_handle network) {
-  const char* name = ai_get_name(network);
-  const char* datetime = ai_get_datetime(network);
-  const char* revision = ai_get_revision(network);
-  const char* tool_version = ai_get_tool_version(network);
-  const char* api_version = ai_get_api_version(network);
-
-  uint32_t n_nodes = ai_get_node_size(network);
-  uint32_t n_inputs = ai_get_input_size(network);
-  uint32_t n_outputs = ai_get_output_size(network);
-
-  uint32_t activations_size = ai_get_activations_size(network);
-  uint32_t params_size = ai_get_params_size(network);
-
-  printf("Network configuration...\r\n");
-  printf(" Model name         : %s\r\n", name);
-  printf(" Compile datetime   : %s\r\n", datetime);
-  printf(" Tool revision      : %s (%s)\r\n", revision, tool_version);
-  printf(" API version        : %s\r\n", api_version);
-  printf("Network info...\r\n");
-  printf("  nodes             : %d\r\n", n_nodes);
-  printf("  activation        : %d bytes\r\n", activations_size);
-  printf("  params            : %d bytes\r\n", params_size);
-  printf("  inputs/outputs    : %u/%u\r\n", n_inputs, n_outputs);
-}
-
-// ======================================================
-//   aiInit
-// ======================================================
-static int aiInit(void) {
-  ai_status err = AI_STATUS_OK;
-
-  const char* nn_name = AI_MODEL_name(_model_p);
-  ai_ptr built_in_activations = AI_MODEL_activations(_model_p);
-
-  //
-  // Creating the network
-  //
-  printf("Creating the network \"%s\"..\r\n", nn_name);
-
-  err = ai_create(_model_p, built_in_activations, &_network);
-  if (err != AI_STATUS_OK) {
-    const char* msg = ai_get_error(_network);
-    aiLogErr("ai_create", msg);
-    return -1;
-  }
-
-  //
-  // Query the created network to get relevant info from it
-  //
-  aiPrintNetworkInfo(_network);
-
-  uint32_t n_inputs = ai_get_input_size(_network);
-  uint32_t n_outputs = ai_get_output_size(_network);
-  uint32_t activations_size = ai_get_activations_size(_network);
-  uint32_t params_size = ai_get_params_size(_network);
-
-  const ai_ptr params = ai_get_params(_network);
-  ai_ptr activations = ai_get_activations(_network);
-
-  printf("Weights buffer     : 0x%08x %d bytes)\r\n", (unsigned int)params,
-         (unsigned int)params_size);
-  printf("Activation buffer  : 0x%08x (%d bytes) %s\r\n", (unsigned int)activations,
-         (unsigned int)activations_size,
-         ((uint32_t)activations & (uint32_t)0xFF000000) ? "internal" : "external");
-
-  printf("Inputs:\r\n");
-  for (int i = 0; i < n_inputs; i++) {
-    ai_tensor* input = ai_get_input(_network, i);
-    aiPrintLayoutBuffer("   I", i, input);
-  }
-
-  printf("Outputs:\r\n");
-  for (int i = 0; i < n_outputs; i++) {
-    ai_tensor* output = ai_get_output(_network, i);
-    aiPrintLayoutBuffer("   O", i, output);
-  }
-
-  return 0;
-}
-
-// ======================================================
-//   aiDeInit
-// ======================================================
-static void aiDeInit(void) {
-  ai_status err = AI_STATUS_OK;
-
-  printf("Releasing the network(s)...\r\n");
-
-  if (ai_destroy(_network) != AI_STATUS_OK) {
-    const char* err = ai_get_error(_network);
-    aiLogErr("ai_destroy", err);
-  }
-  _network = AI_HANDLE_NULL;
-  return;
-}
-
-// =================================================================
-//   argmax
-//
-//   Description  : return argument of table maximum value
-//   Argument     : Vector_db *vec: table
-//   Return Value : int: index of max value
-// =================================================================
-static uint8_t argmax(int8_t* vec, uint32_t num) {
-  uint32_t i;
-  uint8_t arg = 0;
-  int8_t imax = vec[0];
-  for (i = 1; i < num; i++) {
-    imax = (imax > vec[i]) ? imax : vec[i];
-    if (imax == vec[i]) {
-      arg = i;
-    }
-  }
-  return (arg);
-}
-
-// ======================================================
-//   aiRun
-// ======================================================
-static int aiRun(void) {
-  ai_status err = AI_STATUS_OK;
-
-  //
-  // Inputs
-  //
-  ai_tensor* input = ai_get_input(_network, 0);
-  if (input == NULL) {
-    const char* err = ai_get_error(_network);
-    aiLogErr("ai_run", err);
-    return -1;
-  }
-
-  //
-  // Outputs
-  //
-  ai_tensor* output = ai_get_output(_network, 0);
-  if (output == NULL) {
-    const char* err = ai_get_error(_network);
-    aiLogErr("ai_run", err);
-    return -1;
-  }
-
-  DLDataType out_dtype = output->dltensor.dtype;
-  if (out_dtype.lanes > 1) {
-    printf("E: vector outputs are not supported ...\r\n");
-    return -1;
-  }
-
-  uint32_t elts = get_tensor_elts(output);
-
-  char outfile_name[128];
-  sprintf(outfile_name, "%s/tvm_results.txt", BUILD_PATH);  // NOLINT
-  FILE* outfile = fopen(outfile_name, "w");
-
-  for (int i = 0; i <= 9; i++) {
-    char image[128];
-
-    sprintf(image, "%s/0%d.raw", IMAGE_PATH, i);  // NOLINT
-    printf("Loading input image %s ... \n", image);
-    if (LoadInputImg(image, input) != 0) {
-      error("Loading image %s\n", image);
-    }
-
-    //
-    // Run the inference
-    //
-    printf("Running the network\r\n");
-
-    if (ai_run(_network) != AI_STATUS_OK) {
-      const char* err = ai_get_error(_network);
-      aiLogErr("ai_run", err);
-      return -1;
-    }
-
-    const ai_quantization_info* output_quant = ai_get_quantization(output);
-    if (output_quant == NULL) {
-      //
-      // Floating point model
-      //
-      float* probabilities = (float*)output->dltensor.data;  // NOLINT
-      for (int i = 0; i < elts; i++) {
-        float val = probabilities[i];
-        // printf (" -- probability[%d] = %g \n", i, val);
-        fprintf(outfile, "%g ", val);
-      }
-
-    } else {
-      //
-      // Quantized model
-      //
-      if (out_dtype.code == kDLInt) {
-        int8_t* probabilities = (int8_t*)output->dltensor.data;  // NOLINT
-        for (int i = 0; i < elts; i++) {
-          int8_t qval = probabilities[i];
-          // printf (" -- probability[%d] = %d \n", i, qval);
-          float val = dequantize_val(qval, output_quant);
-          fprintf(outfile, "%g ", val);
-        }
-      } else {
-        uint8_t* probabilities = (uint8_t*)output->dltensor.data;  // NOLINT
-        for (int i = 0; i < elts; i++) {
-          uint8_t qval = probabilities[i];
-          // printf (" -- probability[%d] = %d \n", i, qval);
-          float val = dequantize_val(qval, output_quant);
-          fprintf(outfile, "%g ", val);
-        }
-      }
-    }
-    fprintf(outfile, "\n");
-  }
-  fclose(outfile);
-
-  return 0;
-}
-
-// =================================================================
-//   quantize_val
-// =================================================================
-static int32_t quantize_val(float val, ai_quantization_info* quant) {
-  float new_val;
-  float input_scale = quant->scale[0];
-  int32_t input_zero_point = quant->zero_point[0];
-  new_val = val / input_scale + input_zero_point;
-  return (int32_t)new_val;
-}
-
-// =================================================================
-//   dequantize_val
-// =================================================================
-static float dequantize_val(int32_t val, ai_quantization_info* quant) {
-  float new_val;
-  float output_scale = quant->scale[0];
-  int32_t output_zero_point = quant->zero_point[0];
-  new_val = (val - output_zero_point) * output_scale;
-  return new_val;
-}
-
-// =================================================================
-//   LoadInputImg
-// =================================================================
-uint8_t LoadInputImg(const char* filename, ai_tensor* input) {
-  DLDataType dtype = input->dltensor.dtype;
-
-  const ai_quantization_info* input_quant = ai_get_quantization(input);
-
-  if (dtype.lanes > 1) {
-    printf("E: vector inputs are not supported ...\r\n");
-    return -1;
-  }
-
-  if (dtype.code == kDLBfloat) {
-    printf("E: Double float inputs are not supported ...\r\n");
-    return -1;
-  }
-
-  FILE* file = fopen(filename, "r");
-  if (file == NULL) {
-    printf("== File %s not found\n", filename);
-    return (-1);
-  }
-
-  //
-  // Find file size
-  //
-  fseek(file, 0L, SEEK_END);
-  size_t img_size = ftell(file);
-  (void)fseek(file, 0L, SEEK_SET);
-
-  // printf ("== Image size = %d\n", img_size);
-
-  uint8_t* image = (uint8_t*)malloc(img_size);  // NOLINT
-  size_t size = fread(image, 1, img_size, file);
-  if (size != img_size) {
-    perror("fread");
-    printf("== Problem reading %s\n", filename);
-    return (-1);
-  }
-
-  fclose(file);
-
-  uint32_t x;
-  uint8_t* p = image;
-  uint8_t* pg = (uint8_t*)input->dltensor.data;  // NOLINT
-
-  for (x = 0; x < img_size; x++) {
-    uint8_t val = p[x];
-    //
-    // Input image needs to be normalized into [0..1] interval
-    //
-    float nval = ((float)val) / 255.0;  // NOLINT
-    if (input_quant != NULL) {
-      if (dtype.code == kDLInt) {
-        int8_t qval = quantize_val(nval, input_quant);
-        *pg = qval;
-        pg += sizeof(int8_t);
-      } else {
-        uint8_t qval = quantize_val(nval, input_quant);
-        *pg = qval;
-        pg += sizeof(uint8_t);
-      }
-    } else {
-      *(float*)pg = nval;  // NOLINT
-      pg += sizeof(float);
-    }
-  }
-
-  free(image);
-
-  return 0;
-}
-
-// ======================================================
-//   main
-// ======================================================
-int main(int argc, char* argv[]) {
-  int status;
-
-  status = aiInit();
-  if (status != 0) {
-    printf("Error initializing.\n");
-  }
-
-  status = aiRun();
-  if (status != 0) {
-    printf("Error running.\n");
-  }
-
-  aiDeInit();
-
-  return (0);
-}
diff --git a/tests/crt/framing_test.cc b/tests/crt/framing_test.cc
deleted file mode 100644
index a64631d67a0c..000000000000
--- a/tests/crt/framing_test.cc
+++ /dev/null
@@ -1,304 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#include <gtest/gtest.h>
-#include <tvm/runtime/crt/page_allocator.h>
-#include <tvm/runtime/crt/rpc_common/frame_buffer.h>
-#include <tvm/runtime/crt/rpc_common/framing.h>
-
-#include <string>
-#include <vector>
-
-#include "buffer_write_stream.h"
-#include "crt_config.h"
-
-using ::tvm::runtime::micro_rpc::Escape;
-using ::tvm::runtime::micro_rpc::FrameBuffer;
-using ::tvm::runtime::micro_rpc::Framer;
-using ::tvm::runtime::micro_rpc::Unframer;
-
-class FramerTest : public ::testing::Test {
- protected:
-  BufferWriteStream<300> write_stream_;
-  Framer framer_{&write_stream_};
-};
-
-class TestPacket {
- public:
-  static std::vector<const TestPacket*> instances;
-
-  // NOTE: take payload and wire as arrays to avoid clipping at \0
-  template <int N, int M>
-  TestPacket(const std::string name, const char (&payload)[N], const char (&wire)[M])
-      : name{name}, payload{payload, N - 1}, wire{wire, M - 1} {  // omit trailing \0
-    instances.emplace_back(this);
-  }
-
-  inline const uint8_t* payload_data() const {
-    return reinterpret_cast<const uint8_t*>(payload.data());
-  }
-
-  inline const uint8_t* wire_data() const { return reinterpret_cast<const uint8_t*>(wire.data()); }
-
-  std::string name;
-  std::string payload;
-  std::string wire;
-};
-
-std::vector<const TestPacket*> TestPacket::instances;
-
-#define TEST_PACKET(name, payload, wire) \
-  static const TestPacket k##name {      \
-#name, payload, wire                 \
-  }
-
-// NOTE: golden packet CRCs are generated with this python:
-// import binascii
-// import struct
-// struct.pack('<H', binascii.crc_hqx('\xff\xfd\x05\0\0\0three', 0xffff))
-
-TEST_PACKET(Packet1, "one", "\xff\xfd\3\0\0\0one\x58\xf4");
-TEST_PACKET(Packet2, "two2", "\xff\xfd\4\0\0\0two2\x13\x11");
-TEST_PACKET(Packet3, "three", "\xff\xfd\5\0\0\0threec\x9f");
-TEST_PACKET(EscapeCodeInSizePacket,
-            "this payload is exactly 255 characters long. chunk is 64 bytes. "
-            "this payload is exactly 255 characters long. chunk is 64 bytes. "
-            "this payload is exactly 255 characters long. chunk is 64 bytes. "
-            "this payload is exactly 255 characters long. chunk is 64 bytes.",
-            "\xff\xfd\xff\xff\0\0\0"
-            "this payload is exactly 255 characters long. chunk is 64 bytes. "
-            "this payload is exactly 255 characters long. chunk is 64 bytes. "
-            "this payload is exactly 255 characters long. chunk is 64 bytes. "
-            "this payload is exactly 255 characters long. chunk is 64 bytes."
-            "6~");
-TEST_PACKET(ZeroLengthPacket, "", "\xff\xfd\0\0\0\0\203D");
-
-// Generated with:
-// import binascii
-// import random
-// import string
-// import struct
-// escaped_prefix = b'es_\xff\xff_cape'
-// crc = b''
-// while b'\xff' not in crc:
-//   suffix = bytes(''.join(random.choices(string.printable, k=10)), 'utf-8')
-//   packet = b'\xff\xfd' + struct.pack('<I', len(escaped_prefix + suffix)) + escaped_prefix +
-//   suffix crc = struct.pack('<H', binascii.crc_hqx(packet, 0xffff))
-// print(suffix)
-// print(packet + crc.replace(b'\xff', b'\xff\xff'))
-TEST_PACKET(EscapePacket, "es_\xff_capeir/^>t@\"hr",
-            "\xff\xfd\x13\0\0\0es_\xff\xff_capeir/^>t@\"hr\xb4\xff\xff");
-
-TEST_F(FramerTest, ValidPacketTrain) {
-  EXPECT_EQ(kTvmErrorNoError, framer_.Write(kPacket1.payload_data(), kPacket1.payload.size()));
-  EXPECT_EQ(kTvmErrorNoError, framer_.Write(kPacket2.payload_data(), kPacket2.payload.size()));
-  framer_.Reset();
-  EXPECT_EQ(kTvmErrorNoError, framer_.Write(kPacket3.payload_data(), kPacket3.payload.size()));
-
-  EXPECT_EQ("\xfe" + kPacket1.wire +     // packet1 plus nop prefix.
-                kPacket2.wire +          // packet2, no prefix.
-                "\xfe" + kPacket3.wire,  // packet3 plus nop prefix.
-            write_stream_.BufferContents());
-}
-
-TEST_F(FramerTest, ZeroLengthPacket) {
-  EXPECT_EQ(kTvmErrorNoError,
-            framer_.Write(kZeroLengthPacket.payload_data(), kZeroLengthPacket.payload.size()));
-  EXPECT_EQ("\xfe" + kZeroLengthPacket.wire, write_stream_.BufferContents());
-}
-
-TEST_F(FramerTest, Escapes) {
-  EXPECT_EQ(kTvmErrorNoError,
-            framer_.Write(kEscapePacket.payload_data(), kEscapePacket.payload.size()));
-  EXPECT_EQ("\xfe" + kEscapePacket.wire, write_stream_.BufferContents());
-}
-
-class UnframerTest : public ::testing::Test {
- protected:
-  BufferWriteStream<300> write_stream_;
-  Unframer unframer_{&write_stream_};
-};
-
-TEST_F(UnframerTest, PacketTooLong) {
-  const uint8_t escape[2] = {uint8_t(Escape::kEscapeStart), uint8_t(Escape::kPacketStart)};
-  uint16_t crc = tvm::runtime::micro_rpc::crc16_compute(escape, sizeof(escape), nullptr);
-  size_t bytes_consumed;
-  EXPECT_EQ(kTvmErrorNoError, unframer_.Write(escape, sizeof(escape), &bytes_consumed));
-  EXPECT_EQ(sizeof(escape), bytes_consumed);
-
-  uint32_t packet_length = write_stream_.capacity() + 1;
-  uint8_t* packet_length_bytes = reinterpret_cast<uint8_t*>(&packet_length);
-  for (size_t i = 0; i < sizeof(packet_length); i++) {
-    ASSERT_NE('\xff', packet_length_bytes[i]);
-  }
-  crc = tvm::runtime::micro_rpc::crc16_compute(packet_length_bytes, sizeof(packet_length), &crc);
-  EXPECT_EQ(kTvmErrorNoError,
-            unframer_.Write(packet_length_bytes, sizeof(packet_length), &bytes_consumed));
-  EXPECT_EQ(sizeof(packet_length), bytes_consumed);
-
-  unsigned int long_payload_len = decltype(write_stream_)::capacity() + 1;
-  auto long_payload = std::make_unique<uint8_t[]>(long_payload_len);
-  for (size_t i = 0; i < long_payload_len; i++) {
-    long_payload[i] = i & 0xff;
-    if (long_payload[i] == uint8_t(Escape::kEscapeStart)) {
-      long_payload[i] = 0;
-    }
-  }
-  crc = tvm::runtime::micro_rpc::crc16_compute(long_payload.get(), long_payload_len, &crc);
-  EXPECT_EQ(kTvmErrorWriteStreamShortWrite,
-            unframer_.Write(long_payload.get(), long_payload_len, &bytes_consumed));
-  EXPECT_EQ(write_stream_.capacity(), bytes_consumed);
-
-  EXPECT_EQ(kTvmErrorNoError,
-            unframer_.Write(reinterpret_cast<uint8_t*>(&crc), sizeof(crc), &bytes_consumed));
-  EXPECT_EQ(2UL, bytes_consumed);  // 2, because framer is now in kFindPacketStart.
-  EXPECT_FALSE(write_stream_.packet_done());
-  EXPECT_FALSE(write_stream_.is_valid());
-  EXPECT_EQ(std::string(reinterpret_cast<char*>(long_payload.get()), write_stream_.capacity()),
-            write_stream_.BufferContents());
-
-  // Writing a smaller packet directly afterward should work.
-  write_stream_.Reset();
-  EXPECT_EQ(kTvmErrorNoError,
-            unframer_.Write(kPacket1.wire_data(), kPacket1.wire.size(), &bytes_consumed));
-  EXPECT_EQ(kPacket1.wire.size(), bytes_consumed);
-  EXPECT_TRUE(write_stream_.packet_done());
-  EXPECT_TRUE(write_stream_.is_valid());
-  EXPECT_EQ(kPacket1.payload, write_stream_.BufferContents());
-}
-
-class UnframerTestParameterized : public UnframerTest,
-                                  public ::testing::WithParamInterface<const TestPacket*> {};
-
-TEST_P(UnframerTestParameterized, TestFullPacket) {
-  size_t bytes_consumed;
-  EXPECT_EQ(kTvmErrorNoError,
-            unframer_.Write(GetParam()->wire_data(), GetParam()->wire.size(), &bytes_consumed));
-  EXPECT_EQ(GetParam()->wire.size(), bytes_consumed);
-  EXPECT_TRUE(write_stream_.packet_done());
-  EXPECT_TRUE(write_stream_.is_valid());
-  EXPECT_EQ(GetParam()->payload, write_stream_.BufferContents());
-}
-
-TEST_P(UnframerTestParameterized, TestByteAtATime) {
-  size_t bytes_consumed;
-  size_t wire_size = GetParam()->wire.size();
-  for (size_t i = 0; i < wire_size; i++) {
-    EXPECT_EQ(kTvmErrorNoError,
-              unframer_.Write(reinterpret_cast<const uint8_t*>(&GetParam()->wire[i]), 1,
-                              &bytes_consumed));
-    EXPECT_EQ(1UL, bytes_consumed);
-    EXPECT_EQ(i == wire_size - 1, write_stream_.packet_done());
-  }
-  EXPECT_TRUE(write_stream_.is_valid());
-  EXPECT_EQ(GetParam()->payload, write_stream_.BufferContents());
-}
-
-TEST_P(UnframerTestParameterized, TestArbitraryBoundary) {
-  size_t bytes_consumed;
-  size_t wire_size = GetParam()->wire.size();
-  for (size_t i = 1; i < wire_size; i++) {
-    unframer_.Reset();
-    write_stream_.Reset();
-    EXPECT_EQ(kTvmErrorNoError, unframer_.Write(GetParam()->wire_data(), i, &bytes_consumed));
-    EXPECT_EQ(i, bytes_consumed);
-    EXPECT_FALSE(write_stream_.packet_done());
-    EXPECT_EQ(kTvmErrorNoError,
-              unframer_.Write(&GetParam()->wire_data()[i], wire_size - i, &bytes_consumed));
-    EXPECT_EQ(wire_size - i, bytes_consumed);
-    EXPECT_TRUE(write_stream_.packet_done());
-    EXPECT_TRUE(write_stream_.is_valid());
-    EXPECT_EQ(GetParam()->payload, write_stream_.BufferContents());
-  }
-}
-
-TEST_P(UnframerTestParameterized, TestArbitraryPacketReset) {
-  size_t bytes_consumed;
-  size_t wire_size = GetParam()->wire.size();
-
-  // This test interrupts packet transmission at an arbitrary point in the packet and restarts from
-  // the beginning. It simulates handling a device reset in the protocol. The behavior of the framer
-  // depends on how much of the packet had been transmitted, so the test is split into parts:
-
-  // Part 1. Restarting during the initial escape sequence.
-  unframer_.Reset();
-  write_stream_.Reset();
-  EXPECT_EQ(kTvmErrorNoError, unframer_.Write(GetParam()->wire_data(), 1, &bytes_consumed));
-  EXPECT_EQ(1UL, bytes_consumed);
-  EXPECT_EQ(kTvmErrorNoError, unframer_.Write(GetParam()->wire_data(), wire_size, &bytes_consumed));
-  EXPECT_EQ(wire_size, bytes_consumed);
-  EXPECT_TRUE(write_stream_.packet_done());
-  EXPECT_TRUE(write_stream_.is_valid());
-  EXPECT_EQ(GetParam()->payload, write_stream_.BufferContents());
-
-  // Part 2. Restarting after the initial escape sequence.
-  for (size_t i = 2; i < wire_size; i++) {
-    unframer_.Reset();
-    write_stream_.Reset();
-    EXPECT_EQ(kTvmErrorNoError, unframer_.Write(GetParam()->wire_data(), i, &bytes_consumed));
-    EXPECT_EQ(i, bytes_consumed);
-
-    // First test byte-by-byte interruption.
-    // Interrupt the packet transmission. The first byte will return no error as it is the escape
-    // byte.
-    EXPECT_EQ(kTvmErrorNoError, unframer_.Write(GetParam()->wire_data(), 1, &bytes_consumed));
-    EXPECT_EQ(1UL, bytes_consumed);
-    EXPECT_FALSE(write_stream_.packet_done());
-
-    // Secondt byte will return a short packet error.
-    EXPECT_EQ(kTvmErrorFramingShortPacket,
-              unframer_.Write(&GetParam()->wire_data()[1], 1, &bytes_consumed));
-    EXPECT_EQ(0UL, bytes_consumed);
-    EXPECT_FALSE(write_stream_.packet_done());
-
-    EXPECT_EQ(kTvmErrorNoError,
-              unframer_.Write(&GetParam()->wire_data()[1], wire_size - 1, &bytes_consumed));
-    EXPECT_EQ(wire_size - 1, bytes_consumed);
-    EXPECT_TRUE(write_stream_.packet_done());
-    EXPECT_TRUE(write_stream_.is_valid());
-    EXPECT_EQ(GetParam()->payload, write_stream_.BufferContents());
-
-    // Next, test interruption just by sending the whole payload at once.
-    unframer_.Reset();
-    write_stream_.Reset();
-    EXPECT_EQ(kTvmErrorNoError, unframer_.Write(GetParam()->wire_data(), i, &bytes_consumed));
-    EXPECT_EQ(i, bytes_consumed);
-
-    // Interrupt the packet transmission. The first Write() call will just consume 1 byte to reset
-    // the internal state.
-    EXPECT_EQ(kTvmErrorFramingShortPacket,
-              unframer_.Write(GetParam()->wire_data(), wire_size, &bytes_consumed));
-    EXPECT_EQ(1UL, bytes_consumed);
-    EXPECT_FALSE(write_stream_.packet_done());
-    EXPECT_EQ(kTvmErrorNoError,
-              unframer_.Write(&GetParam()->wire_data()[1], wire_size - 1, &bytes_consumed));
-    EXPECT_EQ(wire_size - 1, bytes_consumed);
-    EXPECT_TRUE(write_stream_.packet_done());
-    EXPECT_TRUE(write_stream_.is_valid());
-    EXPECT_EQ(GetParam()->payload, write_stream_.BufferContents());
-
-    break;
-  }
-}
-
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-INSTANTIATE_TEST_CASE_P(UnframerTests, UnframerTestParameterized,
-                        ::testing::ValuesIn(TestPacket::instances));
-#pragma GCC diagnostic pop
diff --git a/tests/crt/func_registry_test.cc b/tests/crt/func_registry_test.cc
deleted file mode 100644
index 5962a3acee39..000000000000
--- a/tests/crt/func_registry_test.cc
+++ /dev/null
@@ -1,236 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#include <dmlc/logging.h>
-#include <gtest/gtest.h>
-#include <tvm/runtime/crt/func_registry.h>
-#include <tvm/runtime/crt/internal/common/func_registry.h>
-
-typedef struct {
-  const char* a;
-  const char* b;
-  int ret_val;
-} strcmp_test_t;
-
-strcmp_test_t strcmp_tests[] = {
-    {"Foo", "Foo", 0},        {"Foo", "Bar", 'F' - 'B'},    {"Foo", "", 'F'},
-    {"Fabulous", "Fab", 'u'}, {"Fab", "Fabulous", 0 - 'u'},
-};
-
-std::ostream& operator<<(std::ostream& os, const strcmp_test_t& test) {
-  os << "strcmp_cursor(\"" << test.a << "\", \"" << test.b << "\") -> " << test.ret_val;
-  return os;
-}
-
-class StrCmpTestFixture : public ::testing::TestWithParam<strcmp_test_t> {};
-
-TEST_P(StrCmpTestFixture, Match) {
-  strcmp_test_t param = GetParam();
-  const char* cursor = param.a;
-  EXPECT_EQ(param.ret_val, strcmp_cursor(&cursor, param.b));
-
-  EXPECT_EQ('\0', *cursor);
-
-  size_t a_length = strlen(param.a);
-  EXPECT_EQ(param.a + a_length, cursor);
-}
-
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-INSTANTIATE_TEST_CASE_P(StrCmpTests, StrCmpTestFixture, ::testing::ValuesIn(strcmp_tests));
-#pragma GCC diagnostic pop
-
-TEST(StrCmpScan, Test) {
-  const char* a = "Foo\0Bar\0Whoops\0";
-  const char* cursor = a;
-
-  EXPECT_EQ('o', strcmp_cursor(&cursor, "Fo"));
-  EXPECT_EQ(0, *cursor);
-  EXPECT_EQ(cursor, a + 3);
-  cursor++;
-
-  EXPECT_EQ(0 - 'r', strcmp_cursor(&cursor, "Barr"));
-  EXPECT_EQ(0, *cursor);
-  EXPECT_EQ(cursor, a + 7);
-  cursor++;
-
-  EXPECT_EQ('h' - 'B', strcmp_cursor(&cursor, "WB"));
-  EXPECT_EQ(0, *cursor);
-  EXPECT_EQ(cursor, a + 14);
-  cursor++;
-
-  EXPECT_EQ(0, *cursor);
-  const char* before_cursor = cursor;
-  EXPECT_EQ(0, strcmp_cursor(&cursor, ""));
-  EXPECT_EQ(before_cursor, cursor);
-}
-
-TEST(FuncRegistry, Empty) {
-  TVMFuncRegistry registry{"\000\000", NULL};
-
-  EXPECT_EQ(kTvmErrorFunctionNameNotFound, TVMFuncRegistry_Lookup(&registry, "foo", NULL));
-  EXPECT_EQ(kTvmErrorFunctionIndexInvalid,
-            TVMFuncRegistry_GetByIndex(&registry, (tvm_function_index_t)0, NULL));
-}
-
-extern "C" {
-static int Foo(TVMValue* args, int* type_codes, int num_args, TVMValue* out_ret_value,
-               int* out_ret_tcode, void* resource_handle) {
-  return 0;
-}
-static int Bar(TVMValue* args, int* type_codes, int num_args, TVMValue* out_ret_value,
-               int* out_ret_tcode, void* resource_handle) {
-  return 0;
-}
-}
-
-// Matches the style of registry defined in generated C modules.
-const char* kBasicFuncNames = "\002\000Foo\0Bar\0";  // NOTE: final \0
-const TVMBackendPackedCFunc funcs[2] = {&Foo, &Bar};
-const TVMFuncRegistry kConstRegistry = {kBasicFuncNames, (const TVMBackendPackedCFunc*)funcs};
-
-TEST(FuncRegistry, ConstGlobalRegistry) {
-  tvm_function_index_t func_index = -1;
-  TVMBackendPackedCFunc func = nullptr;
-
-  // Foo
-  EXPECT_EQ(kBasicFuncNames[0], 2);
-  EXPECT_EQ(kBasicFuncNames[1], 0);
-  EXPECT_EQ(kBasicFuncNames[2], 'F');
-  EXPECT_EQ(kTvmErrorNoError, TVMFuncRegistry_Lookup(&kConstRegistry, "Foo", &func_index));
-  EXPECT_EQ(0, func_index);
-
-  EXPECT_EQ(kTvmErrorNoError, TVMFuncRegistry_GetByIndex(&kConstRegistry, func_index, &func));
-  EXPECT_EQ(func, &Foo);
-
-  // Bar
-  EXPECT_EQ(kTvmErrorNoError, TVMFuncRegistry_Lookup(&kConstRegistry, "Bar", &func_index));
-  EXPECT_EQ(1, func_index);
-
-  EXPECT_EQ(kTvmErrorNoError, TVMFuncRegistry_GetByIndex(&kConstRegistry, func_index, &func));
-  EXPECT_EQ(func, &Bar);
-
-  // Expected not found.
-  tvm_function_index_t prev_func_index = func_index;
-  EXPECT_EQ(kTvmErrorFunctionNameNotFound,
-            TVMFuncRegistry_Lookup(&kConstRegistry, "Baz", &func_index));
-  EXPECT_EQ(prev_func_index, func_index);
-
-  // Expected index out of range.
-  func = nullptr;
-  EXPECT_EQ(kTvmErrorFunctionIndexInvalid, TVMFuncRegistry_GetByIndex(&kConstRegistry, 2, &func));
-  EXPECT_EQ(func, nullptr);
-}
-
-/*! \brief Return a test function handle, with number repeating for all bytes in a void*. */
-static TVMBackendPackedCFunc TestFunctionHandle(uint8_t number) {
-  uintptr_t handle = 0;
-  for (size_t i = 0; i < sizeof(TVMBackendPackedCFunc); i++) {
-    handle |= ((uintptr_t)handle) << (8 * i);
-  }
-
-  return (TVMBackendPackedCFunc)handle;
-}
-
-static void snprintf_truncate(char* target, size_t bytes, const char* str) {
-#ifdef __GNUC__
-#if __GNUC__ > 7 || (__GNUC__ == 7 && __GNUC_MINOR__ >= 1)
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wformat-truncation"
-#endif
-#endif
-  EXPECT_GT(snprintf(target, bytes, "%s", str), 0);
-#ifdef __GNUC__
-#if __GNUC__ > 7 || (__GNUC__ == 7 && __GNUC_MINOR__ >= 1)
-#pragma GCC diagnostic pop
-#endif
-#endif
-}
-
-TEST(MutableFuncRegistry, Create) {
-  uint8_t mem_buffer[kTvmAverageFuncEntrySizeBytes * 3];
-  // A substring used to create function names for testing.
-  const char* function_name_chars = "abcdefghijklmnopqrstuvwxyzyxw";
-
-  // function_name_chars is used to produce 2 function names. The second one is expected to
-  // overfill `names`; assert there are at least enough data in function_name_chars to do this.
-  EXPECT_LE(kTvmAverageFuncEntrySizeBytes + kTvmAverageFunctionNameStrlenBytes,
-            strlen(function_name_chars));
-
-  for (unsigned int buf_size = 0; buf_size < kTvmAverageFuncEntrySizeBytes; buf_size++) {
-    EXPECT_EQ(kTvmErrorBufferTooSmall, TVMMutableFuncRegistry_Create(NULL, mem_buffer, buf_size));
-  }
-
-  for (unsigned int rem = 0; rem < kTvmAverageFuncEntrySizeBytes; rem++) {
-    // test_function name will be used to test overfilling.
-    auto test_function_name =
-        std::make_unique<char[]>(kTvmAverageFunctionNameStrlenBytes + 2 + rem);
-    TVMMutableFuncRegistry reg;
-    memset(mem_buffer, 0, sizeof(mem_buffer));
-    EXPECT_EQ(kTvmErrorNoError, TVMMutableFuncRegistry_Create(
-                                    &reg, mem_buffer, kTvmAverageFuncEntrySizeBytes * 2 + rem));
-
-    snprintf_truncate(test_function_name.get(), kTvmAverageFunctionNameStrlenBytes + 1,
-                      function_name_chars);
-
-    // Add function #1, and verify it can be retrieved.
-    EXPECT_EQ(kTvmErrorNoError, TVMMutableFuncRegistry_Set(&reg, test_function_name.get(),
-                                                           TestFunctionHandle(0x01), 0));
-
-    tvm_function_index_t func_index = 100;
-    EXPECT_EQ(kTvmErrorNoError,
-              TVMFuncRegistry_Lookup(&reg.registry, test_function_name.get(), &func_index));
-    EXPECT_EQ(func_index, 0);
-
-    TVMBackendPackedCFunc func = NULL;
-    EXPECT_EQ(kTvmErrorNoError, TVMFuncRegistry_GetByIndex(&reg.registry, func_index, &func));
-    EXPECT_EQ(func, TestFunctionHandle(0x01));
-
-    // Ensure that overfilling `names` by 1 char is not allowed.
-    snprintf_truncate(test_function_name.get(), kTvmAverageFunctionNameStrlenBytes + rem + 2,
-                      function_name_chars + 1);
-
-    EXPECT_EQ(
-        kTvmErrorFunctionRegistryFull,
-        TVMMutableFuncRegistry_Set(&reg, test_function_name.get(), TestFunctionHandle(0x02), 0));
-    EXPECT_EQ(kTvmErrorFunctionNameNotFound,
-              TVMFuncRegistry_Lookup(&reg.registry, test_function_name.get(), &func_index));
-
-    // Add function #2, with intentionally short (by 2 char) name. Verify it can be retrieved.
-    snprintf_truncate(test_function_name.get(), kTvmAverageFunctionNameStrlenBytes - 2 + 1,
-                      function_name_chars + 1);
-    EXPECT_EQ(kTvmErrorNoError, TVMMutableFuncRegistry_Set(&reg, test_function_name.get(),
-                                                           TestFunctionHandle(0x02), 0));
-
-    EXPECT_EQ(kTvmErrorNoError,
-              TVMFuncRegistry_Lookup(&reg.registry, test_function_name.get(), &func_index));
-    EXPECT_EQ(func_index, 1);
-
-    func = NULL;
-    EXPECT_EQ(kTvmErrorNoError, TVMFuncRegistry_GetByIndex(&reg.registry, func_index, &func));
-    EXPECT_EQ(func, TestFunctionHandle(0x01));
-
-    // Try adding another function, which should fail due to lack of function pointers.
-    test_function_name[0] = 'a';
-    test_function_name[1] = 0;
-    EXPECT_EQ(
-        kTvmErrorFunctionRegistryFull,
-        TVMMutableFuncRegistry_Set(&reg, test_function_name.get(), TestFunctionHandle(0x03), 0));
-  }
-}
diff --git a/tests/crt/graph_executor_test.cc b/tests/crt/graph_executor_test.cc
deleted file mode 100644
index d07c9586c4da..000000000000
--- a/tests/crt/graph_executor_test.cc
+++ /dev/null
@@ -1,115 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#include "../../src/runtime/crt/include/tvm/runtime/crt/internal/graph_executor/graph_executor.h"
-
-#include <gtest/gtest.h>
-
-#include "../../src/runtime/crt/include/tvm/runtime/crt/internal/graph_executor/load_json.h"
-
-namespace {
-
-constexpr const char* kJson = R"(
-{
-  "nodes": [
-    {
-      "op": "null",
-      "name": "x",
-      "inputs": []
-    },
-    {
-      "op": "null",
-      "name": "p0",
-      "inputs": []
-    },
-    {
-      "op": "tvm_op",
-      "name": "tvmgen_default_fused_add",
-      "attrs": {
-        "num_outputs": "1",
-        "num_inputs": "2",
-        "flatten_data": "0",
-        "func_name": "tvmgen_default_fused_add",
-        "hash": "a2b7e0a88031366c"
-      },
-      "inputs": [
-        [
-          0,
-          0,
-          0
-        ],
-        [
-          1,
-          0,
-          0
-        ]
-      ]
-    }
-  ],
-  "arg_nodes": [0, 1],
-  "heads": [
-    [
-      2,
-      0,
-      0
-    ]
-  ],
-  "attrs": {
-    "dltype": [
-      "list_str",
-      [
-        "float32",
-        "float32",
-        "float32"
-      ]
-    ],
-    "device_index": [
-      "list_int",
-      [1, 1, 1]
-    ],
-    "storage_id": [
-      "list_int",
-      [0, 1, 2]
-    ],
-    "shape": [
-      "list_shape",
-      [
-        [10, 5],
-        [1, 5],
-        [10, 5]
-      ]
-    ]
-  },
-  "node_row_ptr": [0, 1, 2, 3]
-}
-)";
-
-// Check a JSON graph can be loaded.
-TEST(TVMGraphExecutor_Load, Parse) {
-  JSONReader reader;
-  tvm_crt_error_t err = JSONReader_Create(kJson, &reader);
-  EXPECT_EQ(err, kTvmErrorNoError);
-  TVMGraphExecutor executor;
-  memset(&executor, 0, sizeof(executor));
-  int status = TVMGraphExecutor_Load(&executor, &reader);
-  EXPECT_EQ(status, 0);
-  EXPECT_EQ(executor.nodes_count, 3);
-}
-
-}  // namespace
diff --git a/tests/crt/page_allocator_test.cc b/tests/crt/page_allocator_test.cc
deleted file mode 100644
index 527dbecd0e07..000000000000
--- a/tests/crt/page_allocator_test.cc
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#include <gtest/gtest.h>
-#include <tvm/runtime/crt/internal/memory/page_allocator.h>
-#include <tvm/runtime/crt/page_allocator.h>
-
-#include "crt_config.h"
-
-#define ROUND_UP(qty, modulo) (((qty) + ((modulo)-1)) / (modulo) * (modulo))
-
-static constexpr const unsigned int kTotalPages = 128;
-static constexpr const unsigned int kNumUsablePages =
-    (sizeof(void*) == 8 ? 94 : (sizeof(void*) == 4 ? 99 : 0));
-static constexpr const unsigned int kPageSizeBytesLog = 8;  // 256 byte pages.
-static constexpr const unsigned int kMemoryPoolSizeBytes = kTotalPages * (1 << kPageSizeBytesLog);
-
-class PageAllocatorTest : public ::testing::Test {
- protected:
-  void SetUp() override {
-    memset(raw_memory_pool, 0, sizeof(raw_memory_pool));
-    memory_pool = reinterpret_cast<uint8_t*>(
-        ROUND_UP(((uintptr_t)raw_memory_pool), (1 << kPageSizeBytesLog)));
-    PageMemoryManagerCreate(&interface, memory_pool, kMemoryPoolSizeBytes, kPageSizeBytesLog);
-    mgr = reinterpret_cast<MemoryManager*>(interface);
-    ASSERT_EQ(kNumUsablePages, mgr->ptable.max_pages);
-    dev_ = {kDLCPU, 0};
-  }
-
-  unsigned int AddressToPageNumber(void* a) {
-    return (reinterpret_cast<uintptr_t>(a) - reinterpret_cast<uintptr_t>(memory_pool)) >>
-           kPageSizeBytesLog;
-  }
-
-  uint8_t raw_memory_pool[kMemoryPoolSizeBytes + (1 << kPageSizeBytesLog)];
-  uint8_t* memory_pool;
-  MemoryManagerInterface* interface;
-  MemoryManager* mgr;
-  DLDevice dev_;
-};
-
-#define EXPECT_PAGE(expected, actual) EXPECT_EQ(expected, AddressToPageNumber(actual))
-
-TEST_F(PageAllocatorTest, AllocFreeFifo) {
-  EXPECT_EQ(interface->vleak_size, 0);
-
-  for (int i = 0; i < 2; i++) {
-    void* ptrs[kNumUsablePages];
-    for (size_t idx = 0; idx < kNumUsablePages; idx++) {
-      void* a;
-      EXPECT_EQ(interface->Allocate(interface, 1, dev_, &a), kTvmErrorNoError);
-      if (i == 0) {
-        EXPECT_PAGE(idx, a);
-      } else {
-        EXPECT_PAGE(kNumUsablePages - 1 - idx, a);
-      }
-      EXPECT_EQ(static_cast<size_t>(interface->vleak_size), idx + 1);
-      ptrs[idx] = a;
-    }
-
-    for (int idx = kNumUsablePages - 1; idx >= 0; idx--) {
-      interface->Free(interface, ptrs[idx], dev_);
-      EXPECT_EQ(interface->vleak_size, idx);
-    }
-  }
-}
diff --git a/tests/crt/platform.cc b/tests/crt/platform.cc
deleted file mode 100644
index 5f8da6732a97..000000000000
--- a/tests/crt/platform.cc
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#include <dmlc/logging.h>
-#include <gtest/gtest.h>
-#include <stdarg.h>
-#include <tvm/runtime/crt/platform.h>
-
-// Provide dummy implementations for TVM runtime functions for use by the tests.
-
-extern "C" {
-
-void InternalTVMPlatformAbort(tvm_crt_error_t error_code) {
-  FAIL() << "TVMPlatformAbort(" << error_code << ")";
-}
-
-void TVMPlatformAbort(tvm_crt_error_t error_code) {
-  InternalTVMPlatformAbort(error_code);
-  exit(2);  // for __attribute__((noreturn))
-}
-
-struct TVMModule;
-const TVMModule* TVMSystemLibEntryPoint(void) { return NULL; }
-
-void TVMLogf(const char* fmt, ...) {
-  va_list args;
-  char log_buf[1024];
-  va_start(args, fmt);
-  int ret = vsnprintf(log_buf, sizeof(log_buf), fmt, args);
-  va_end(args);
-
-  if (ret < 0) {
-    LOG(ERROR) << "TVMLogf: error formatting: " << fmt;
-  } else {
-    LOG(INFO) << "TVMLogf: " << std::string(log_buf, ret);
-  }
-}
-
-tvm_crt_error_t TVMPlatformMemoryAllocate(size_t num_bytes, DLDevice dev, void** out_ptr) {
-  *out_ptr = malloc(num_bytes);
-  return *out_ptr ? kTvmErrorNoError : kTvmErrorPlatformNoMemory;
-}
-
-tvm_crt_error_t TVMPlatformMemoryFree(void* ptr, DLDevice dev) {
-  if (ptr) {
-    free(ptr);
-  }
-  return kTvmErrorNoError;
-}
-
-tvm_crt_error_t TVMPlatformTimerStart() { return kTvmErrorFunctionCallNotImplemented; }
-
-tvm_crt_error_t TVMPlatformTimerStop(double* elapsed_time_seconds) {
-  return kTvmErrorFunctionCallNotImplemented;
-}
-}
diff --git a/tests/crt/session_test.cc b/tests/crt/session_test.cc
deleted file mode 100644
index b6b58e819700..000000000000
--- a/tests/crt/session_test.cc
+++ /dev/null
@@ -1,250 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#include <gtest/gtest.h>
-#include <tvm/runtime/crt/page_allocator.h>
-#include <tvm/runtime/crt/rpc_common/frame_buffer.h>
-#include <tvm/runtime/crt/rpc_common/session.h>
-
-#include <string>
-#include <vector>
-
-#include "buffer_write_stream.h"
-#include "crt_config.h"
-
-using ::tvm::runtime::micro_rpc::Framer;
-using ::tvm::runtime::micro_rpc::MessageType;
-using ::tvm::runtime::micro_rpc::Session;
-using ::tvm::runtime::micro_rpc::Unframer;
-
-extern "C" {
-void TestSessionMessageReceivedThunk(void* context, MessageType message_type, FrameBuffer* buf);
-}
-
-class ReceivedMessage {
- public:
-  ReceivedMessage(MessageType type, std::string message) : type{type}, message{message} {}
-
-  bool operator==(const ReceivedMessage& other) const {
-    return other.type == type && other.message == message;
-  }
-
-  MessageType type;
-  std::string message;
-};
-
-class TestSession {
- public:
-  explicit TestSession(uint8_t initial_nonce)
-      : framer{&framer_write_stream},
-        receive_buffer{receive_buffer_array, sizeof(receive_buffer_array)},
-        sess{&framer, &receive_buffer, TestSessionMessageReceivedThunk, this},
-        unframer{sess.Receiver()},
-        initial_nonce{initial_nonce} {}
-
-  void WriteTo(TestSession* other) {
-    auto framer_buffer = framer_write_stream.BufferContents();
-    size_t bytes_to_write = framer_buffer.size();
-    const uint8_t* write_cursor = reinterpret_cast<const uint8_t*>(framer_buffer.data());
-    while (bytes_to_write > 0) {
-      size_t bytes_consumed;
-      auto to_return = other->unframer.Write(write_cursor, bytes_to_write, &bytes_consumed);
-      EXPECT_EQ(to_return, kTvmErrorNoError);
-      bytes_to_write -= bytes_consumed;
-      write_cursor += bytes_consumed;
-    }
-  }
-
-  void ClearBuffers() {
-    framer_write_stream.Reset();
-    messages_received.clear();
-    sess.ClearReceiveBuffer();
-  }
-
-  std::vector<ReceivedMessage> messages_received;
-  BufferWriteStream<300> framer_write_stream;
-  Framer framer;
-  uint8_t receive_buffer_array[300];
-  FrameBuffer receive_buffer;
-  Session sess;
-  Unframer unframer;
-  uint8_t initial_nonce;
-};
-
-#define EXPECT_FRAMED_PACKET(session, expected)          \
-  EXPECT_EQ(std::string(expected, sizeof(expected) - 1), \
-            (session).framer_write_stream.BufferContents());
-
-extern "C" {
-void TestSessionMessageReceivedThunk(void* context, MessageType message_type, FrameBuffer* buf) {
-  std::string message;
-  if (message_type != MessageType::kStartSessionReply) {
-    uint8_t message_buf[300];
-    EXPECT_LE(buf->ReadAvailable(), sizeof(message_buf));
-    size_t message_size_bytes = buf->Read(message_buf, sizeof(message_buf));
-    message = std::string(reinterpret_cast<char*>(message_buf), message_size_bytes);
-  }
-
-  static_cast<TestSession*>(context)->messages_received.emplace_back(
-      ReceivedMessage(message_type, message));
-}
-}
-
-class SessionTest : public ::testing::Test {
- public:
-  static constexpr const uint8_t kAliceNonce = 0x3c;
-  static constexpr const uint8_t kBobNonce = 0xab;
-
-  TestSession alice_{kAliceNonce};
-  TestSession bob_{kBobNonce};
-};
-
-TEST_F(SessionTest, NormalExchange) {
-  tvm_crt_error_t err;
-  err = alice_.sess.Initialize(alice_.initial_nonce);
-  EXPECT_EQ(kTvmErrorNoError, err);
-  EXPECT_FRAMED_PACKET(alice_,
-                       "\xfe\xff\xfd\x03\0\0\0\0\0\x02"
-                       "fw");
-  alice_.WriteTo(&bob_);
-
-  err = bob_.sess.Initialize(bob_.initial_nonce);
-  EXPECT_EQ(kTvmErrorNoError, err);
-  EXPECT_FRAMED_PACKET(bob_,
-                       "\xfe\xff\xfd\x03\0\0\0\0\0\x02"
-                       "fw");
-  alice_.WriteTo(&alice_);
-
-  bob_.ClearBuffers();
-  alice_.ClearBuffers();
-
-  err = alice_.sess.StartSession();
-  EXPECT_EQ(err, kTvmErrorNoError);
-  EXPECT_FRAMED_PACKET(alice_, "\xff\xfd\x04\0\0\0\x82\0\0\x01{\xE9");
-
-  bob_.ClearBuffers();
-  alice_.WriteTo(&bob_);
-  EXPECT_FRAMED_PACKET(bob_,
-                       "\xff\xfd\x4\0\0\0\x82"
-                       "f\x01\x01\x81\xf3");
-  EXPECT_TRUE(bob_.sess.IsEstablished());
-
-  bob_.WriteTo(&alice_);
-  EXPECT_TRUE(alice_.sess.IsEstablished());
-  ASSERT_EQ(alice_.messages_received.size(), 1UL);
-  EXPECT_EQ(alice_.messages_received[0], ReceivedMessage(MessageType::kStartSessionReply, ""));
-
-  alice_.ClearBuffers();
-  alice_.sess.SendMessage(MessageType::kNormal, reinterpret_cast<const uint8_t*>("hello"), 5);
-  EXPECT_FRAMED_PACKET(alice_,
-                       "\xFF\xFD\b\0\0\0\x82"
-                       "f\x10hello\x90(");
-  alice_.WriteTo(&bob_);
-  ASSERT_EQ(bob_.messages_received.size(), 2UL);
-  EXPECT_EQ(bob_.messages_received[0], ReceivedMessage(MessageType::kStartSessionReply, ""));
-  EXPECT_EQ(bob_.messages_received[1], ReceivedMessage(MessageType::kNormal, "hello"));
-
-  bob_.ClearBuffers();
-  bob_.sess.SendMessage(MessageType::kNormal, reinterpret_cast<const uint8_t*>("olleh"), 5);
-  EXPECT_FRAMED_PACKET(bob_,
-                       "\xff\xfd\b\0\0\0\x82"
-                       "f\x10ollehLv");
-  bob_.WriteTo(&alice_);
-  ASSERT_EQ(alice_.messages_received.size(), 1UL);
-  EXPECT_EQ(alice_.messages_received[0], ReceivedMessage(MessageType::kNormal, "olleh"));
-
-  alice_.ClearBuffers();
-  bob_.ClearBuffers();
-
-  alice_.sess.SendMessage(MessageType::kLog, reinterpret_cast<const uint8_t*>("log1"), 4);
-  EXPECT_FRAMED_PACKET(alice_, "\xff\xfd\a\0\0\0\0\0\x03log1\xf0\xd4");
-  alice_.WriteTo(&bob_);
-  ASSERT_EQ(bob_.messages_received.size(), 1UL);
-  EXPECT_EQ(bob_.messages_received[0], ReceivedMessage(MessageType::kLog, "log1"));
-
-  bob_.sess.SendMessage(MessageType::kLog, reinterpret_cast<const uint8_t*>("zero"), 4);
-  EXPECT_FRAMED_PACKET(bob_, "\xff\xfd\a\0\0\0\0\0\x03zero\xb2h");
-  bob_.WriteTo(&alice_);
-  ASSERT_EQ(alice_.messages_received.size(), 1UL);
-  EXPECT_EQ(alice_.messages_received[0], ReceivedMessage(MessageType::kLog, "zero"));
-}
-
-TEST_F(SessionTest, LogBeforeSessionStart) {
-  alice_.sess.SendMessage(MessageType::kLog, reinterpret_cast<const uint8_t*>("log1"), 4);
-  EXPECT_FRAMED_PACKET(alice_, "\xfe\xff\xfd\a\0\0\0\0\0\x03log1\xf0\xd4");
-  alice_.WriteTo(&bob_);
-  ASSERT_EQ(bob_.messages_received.size(), 1UL);
-  EXPECT_EQ(bob_.messages_received[0], ReceivedMessage(MessageType::kLog, "log1"));
-
-  bob_.sess.SendMessage(MessageType::kLog, reinterpret_cast<const uint8_t*>("zero"), 4);
-  EXPECT_FRAMED_PACKET(bob_, "\xfe\xff\xfd\a\0\0\0\0\0\x03zero\xb2h");
-  bob_.WriteTo(&alice_);
-  ASSERT_EQ(alice_.messages_received.size(), 1UL);
-  EXPECT_EQ(alice_.messages_received[0], ReceivedMessage(MessageType::kLog, "zero"));
-}
-
-static constexpr const char kBobStartPacket[] = "\xff\xfd\x04\0\0\0f\0\0\x01`\xa7";
-
-TEST_F(SessionTest, DoubleStart) {
-  tvm_crt_error_t err;
-  err = alice_.sess.Initialize(alice_.initial_nonce);
-  EXPECT_EQ(kTvmErrorNoError, err);
-  EXPECT_FRAMED_PACKET(alice_,
-                       "\xfe\xff\xfd\x03\0\0\0\0\0\x02"
-                       "fw");
-  alice_.WriteTo(&bob_);
-
-  err = bob_.sess.Initialize(bob_.initial_nonce);
-  EXPECT_EQ(kTvmErrorNoError, err);
-  EXPECT_FRAMED_PACKET(bob_,
-                       "\xfe\xff\xfd\x03\0\0\0\0\0\x02"
-                       "fw");
-  alice_.WriteTo(&alice_);
-
-  bob_.ClearBuffers();
-  alice_.ClearBuffers();
-
-  EXPECT_EQ(kTvmErrorNoError, alice_.sess.StartSession());
-  EXPECT_FRAMED_PACKET(alice_, "\xff\xfd\x04\0\0\0\x82\0\0\x01{\xe9");
-  EXPECT_FALSE(alice_.sess.IsEstablished());
-
-  EXPECT_EQ(kTvmErrorNoError, bob_.sess.StartSession());
-  EXPECT_FRAMED_PACKET(bob_, kBobStartPacket);
-  EXPECT_FALSE(bob_.sess.IsEstablished());
-
-  // Sending Alice -> Bob should have no effect (regenerated Bob nonce > regenerated Alice nonce).
-  bob_.framer_write_stream.Reset();
-  alice_.WriteTo(&bob_);
-  EXPECT_FRAMED_PACKET(bob_, "");
-  EXPECT_FALSE(bob_.sess.IsEstablished());
-
-  // Sending Bob -> Alice should start the session.
-  alice_.ClearBuffers();
-  size_t bytes_consumed;
-  EXPECT_EQ(kTvmErrorNoError,
-            alice_.unframer.Write(reinterpret_cast<const uint8_t*>(kBobStartPacket),
-                                  sizeof(kBobStartPacket), &bytes_consumed));
-  EXPECT_EQ(bytes_consumed, sizeof(kBobStartPacket));
-  EXPECT_FRAMED_PACKET(alice_, "\xFF\xFD\x4\0\0\0fE\x01\x01\fb");
-  EXPECT_TRUE(alice_.sess.IsEstablished());
-
-  bob_.ClearBuffers();
-  alice_.WriteTo(&bob_);
-  EXPECT_TRUE(bob_.sess.IsEstablished());
-}
diff --git a/tests/crt/stack_allocator_test.cc b/tests/crt/stack_allocator_test.cc
deleted file mode 100644
index cd0c4a8b65e2..000000000000
--- a/tests/crt/stack_allocator_test.cc
+++ /dev/null
@@ -1,201 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#include "../../src/runtime/crt/memory/stack_allocator.c"
-
-#include <gtest/gtest.h>
-#include <tvm/runtime/crt/stack_allocator.h>
-
-// Check with LIFO checks enabled for stack allocator
-#define TVM_CRT_STACK_ALLOCATOR_ENABLE_LIFO_CHECK
-
-// Number of memory misalignment in bytes
-#define NUM_MEMORY_MISALIGNMENT_BYTES 1
-
-/*!
- * Align memory pointer.
- * This function modifies memory_ptr to adjust alignment.
- * \return Number of memory offset.
- */
-static uint32_t align_pointer(uint8_t** memory_ptr) {
-  uint32_t extra = (uintptr_t)(*memory_ptr) % TVM_RUNTIME_ALLOC_ALIGNMENT_BYTES;
-  uint32_t offset =
-      (TVM_RUNTIME_ALLOC_ALIGNMENT_BYTES - extra) & (TVM_RUNTIME_ALLOC_ALIGNMENT_BYTES - 1);
-  *memory_ptr += offset;
-  return offset;
-}
-
-/*!
- * Add misalignment to memory pointer.
- * This function modifies memory_ptr.
- * \return Number of memory offset.
- */
-static uint32_t misalign_pointer(uint8_t** memory_ptr) {
-  uint32_t extra = (uintptr_t)(*memory_ptr) % TVM_RUNTIME_ALLOC_ALIGNMENT_BYTES;
-  if (extra == 0) {
-    *memory_ptr += NUM_MEMORY_MISALIGNMENT_BYTES;
-    return 1;
-  }
-  return 0;
-}
-
-/*
- * Tests allocations are properly aligned when allocated.
- */
-TEST(StackAllocatorTest, Allocate) {
-  static uint8_t model_memory[128];
-  tvm_workspace_t tvm_runtime_workspace;
-  uint8_t* model_memory_ptr = model_memory;
-  uint32_t offset = align_pointer(&model_memory_ptr);
-  ASSERT_EQ(StackMemoryManager_Init(&tvm_runtime_workspace, model_memory_ptr,
-                                    sizeof(model_memory) - offset),
-            kTvmErrorNoError);
-
-  void* block_one = NULL;
-  ASSERT_EQ(StackMemoryManager_Allocate_Body(&tvm_runtime_workspace, 1, &block_one, 1),
-            kTvmErrorNoError);
-  ASSERT_EQ(block_one, &model_memory_ptr[0]);
-
-  void* block_two = NULL;
-  ASSERT_EQ(StackMemoryManager_Allocate_Body(&tvm_runtime_workspace, 2, &block_two, 1),
-            kTvmErrorNoError);
-  ASSERT_EQ(block_two, &model_memory_ptr[16 + STACK_ALLOCATOR_TAG_SIZE_BYTES]);
-
-  void* two_blocks = NULL;
-  ASSERT_EQ(StackMemoryManager_Allocate_Body(&tvm_runtime_workspace, 24, &two_blocks, 1),
-            kTvmErrorNoError);
-  ASSERT_EQ(two_blocks, &model_memory_ptr[32 + 2 * STACK_ALLOCATOR_TAG_SIZE_BYTES]);
-
-  void* block_three = NULL;
-  ASSERT_EQ(StackMemoryManager_Allocate_Body(&tvm_runtime_workspace, 1, &block_three, 1),
-            kTvmErrorNoError);
-  ASSERT_EQ(block_three, &model_memory_ptr[64 + 3 * STACK_ALLOCATOR_TAG_SIZE_BYTES]);
-}
-
-/*
- * Tests resetting the stack after dealloc.
- */
-TEST(StackAllocatorTest, Free) {
-  static uint8_t model_memory[80];
-  tvm_workspace_t tvm_runtime_workspace;
-  uint8_t* model_memory_ptr = model_memory;
-  uint32_t offset = align_pointer(&model_memory_ptr);
-  ASSERT_EQ(StackMemoryManager_Init(&tvm_runtime_workspace, model_memory_ptr,
-                                    sizeof(model_memory) - offset),
-            kTvmErrorNoError);
-
-  void* block_one = NULL;
-  ASSERT_EQ(StackMemoryManager_Allocate_Body(&tvm_runtime_workspace, 1, &block_one, 1),
-            kTvmErrorNoError);
-  ASSERT_EQ(block_one, &model_memory_ptr[0]);
-
-  void* block_two = NULL;
-  ASSERT_EQ(StackMemoryManager_Allocate_Body(&tvm_runtime_workspace, 1, &block_two, 1),
-            kTvmErrorNoError);
-  ASSERT_EQ(block_two, &model_memory_ptr[16 + STACK_ALLOCATOR_TAG_SIZE_BYTES]);
-  ASSERT_EQ(kTvmErrorNoError, StackMemoryManager_Free_Body(&tvm_runtime_workspace, block_two, 1));
-
-  void* two_blocks = NULL;
-  ASSERT_EQ(StackMemoryManager_Allocate_Body(&tvm_runtime_workspace, 2, &two_blocks, 1),
-            kTvmErrorNoError);
-  ASSERT_EQ(two_blocks, &model_memory_ptr[16 + STACK_ALLOCATOR_TAG_SIZE_BYTES]);
-  ASSERT_EQ(kTvmErrorNoError, StackMemoryManager_Free_Body(&tvm_runtime_workspace, two_blocks, 1));
-
-  void* block_three = NULL;
-  ASSERT_EQ(StackMemoryManager_Allocate_Body(&tvm_runtime_workspace, 1, &block_three, 1),
-            kTvmErrorNoError);
-  ASSERT_EQ(block_three, &model_memory_ptr[16 + STACK_ALLOCATOR_TAG_SIZE_BYTES]);
-}
-
-/*
- * Tests we return NULL if we over allocate.
- */
-TEST(StackAllocatorTest, OverAllocate) {
-  static uint8_t model_memory[80];
-  tvm_workspace_t tvm_runtime_workspace;
-  uint8_t* model_memory_ptr = model_memory;
-  uint32_t offset = align_pointer(&model_memory_ptr);
-  ASSERT_EQ(StackMemoryManager_Init(&tvm_runtime_workspace, model_memory_ptr,
-                                    sizeof(model_memory) - offset),
-            kTvmErrorNoError);
-
-  void* block_one = NULL;
-  ASSERT_EQ(StackMemoryManager_Allocate_Body(&tvm_runtime_workspace, 1, &block_one, 1),
-            kTvmErrorNoError);
-  ASSERT_EQ(block_one, &model_memory_ptr[0]);
-
-  void* block_two = NULL;
-  ASSERT_EQ(StackMemoryManager_Allocate_Body(&tvm_runtime_workspace, 1, &block_two, 1),
-            kTvmErrorNoError);
-  ASSERT_EQ(block_two, &model_memory_ptr[16 + STACK_ALLOCATOR_TAG_SIZE_BYTES]);
-
-  void* two_blocks = NULL;
-  ASSERT_EQ(StackMemoryManager_Allocate_Body(&tvm_runtime_workspace, 64, &two_blocks, 1),
-            kTvmErrorPlatformNoMemory);
-  ASSERT_EQ(two_blocks, (void*)NULL);
-}
-
-/*
- * Test for out-of-order memory deallocation.
- */
-TEST(StackAllocatorTest, FreeOutOfOrder) {
-  static uint8_t model_memory[80];
-  tvm_workspace_t tvm_runtime_workspace;
-  uint8_t* model_memory_ptr = model_memory;
-  uint32_t offset = align_pointer(&model_memory_ptr);
-  ASSERT_EQ(StackMemoryManager_Init(&tvm_runtime_workspace, model_memory_ptr,
-                                    sizeof(model_memory) - offset),
-            kTvmErrorNoError);
-
-  void* block_one = NULL;
-  ASSERT_EQ(StackMemoryManager_Allocate_Body(&tvm_runtime_workspace, 1, &block_one, 1),
-            kTvmErrorNoError);
-  ASSERT_EQ(block_one, &model_memory_ptr[0]);
-
-  void* block_two = NULL;
-  ASSERT_EQ(StackMemoryManager_Allocate_Body(&tvm_runtime_workspace, 1, &block_two, 1),
-            kTvmErrorNoError);
-  ASSERT_EQ(block_two, &model_memory_ptr[16 + STACK_ALLOCATOR_TAG_SIZE_BYTES]);
-
-  ASSERT_EQ(StackMemoryManager_Free_Body(&tvm_runtime_workspace, block_one, 1),
-            kTvmErrorPlatformStackAllocBadFree);
-}
-
-/*
- * Test for initial memory misalignment.
- */
-TEST(StackAllocatorTest, InitialMemoryMisAlignment) {
-  static uint8_t model_memory[80];
-  tvm_workspace_t tvm_runtime_workspace;
-  uint8_t* model_memory_ptr = model_memory;
-
-  // Add misaslignment to memory pointer
-  uint32_t offset = misalign_pointer(&model_memory_ptr);
-
-  // Calculate expected offset
-  uint8_t* misaligned_ptr = model_memory_ptr;
-  uint32_t alignment_offset = align_pointer(&misaligned_ptr);
-
-  ASSERT_EQ(StackMemoryManager_Init(&tvm_runtime_workspace, model_memory_ptr,
-                                    sizeof(model_memory) - offset),
-            kTvmErrorNoError);
-
-  ASSERT_EQ(tvm_runtime_workspace.next_alloc, &model_memory_ptr[alignment_offset]);
-  ASSERT_EQ(tvm_runtime_workspace.workspace_size, sizeof(model_memory) - offset - alignment_offset);
-}
diff --git a/tests/lint/check_file_type.py b/tests/lint/check_file_type.py
index 757c00e0e344..c27bd410f454 100644
--- a/tests/lint/check_file_type.py
+++ b/tests/lint/check_file_type.py
@@ -145,28 +145,6 @@
     "docs/_static/img/tvm-logo-square.png",
     # pytest config
     "pytest.ini",
-    # microTVM tests
-    "tests/micro/testdata/mnist/digit-2.jpg",
-    "tests/micro/testdata/mnist/digit-9.jpg",
-    "tests/micro/testdata/mnist/mnist-8.onnx",
-    # microTVM Zephyr runtime
-    "apps/microtvm/zephyr/template_project/CMakeLists.txt.template",
-    "apps/microtvm/zephyr/template_project/qemu-hack/qemu-system-arm",
-    "apps/microtvm/zephyr/template_project/qemu-hack/qemu-system-xilinx-aarch64",
-    "apps/microtvm/zephyr/template_project/qemu-hack/qemu-system-i386",
-    "apps/microtvm/zephyr/template_project/qemu-hack/qemu-system-riscv32",
-    "apps/microtvm/zephyr/template_project/qemu-hack/qemu-system-riscv64",
-    "apps/microtvm/zephyr/template_project/fvp-hack/FVP_Corstone_SSE-300_Ethos-U55",
-    "apps/microtvm/zephyr/template_project/app-overlay/nucleo_l4r5zi.overlay",
-    # microTVM Arduino runtime
-    "apps/microtvm/arduino/template_project/Makefile.template",
-    # microTVM CRT
-    "src/runtime/crt/crt_config.h.template",
-    "src/runtime/crt/host/CMakeLists.txt.template",
-    # microTVM Virtual Machines
-    "apps/microtvm/poetry.lock",
-    "apps/microtvm/reference-vm/Vagrantfile",
-    "apps/microtvm/reference-vm/base-box/Vagrantfile.packer-template",
     # Hexagon
     "src/runtime/hexagon/rpc/android_bash.sh.template",
     "src/runtime/hexagon/profiler/lwp_handler.S",
diff --git a/tests/lint/pylint.sh b/tests/lint/pylint.sh
index 2e0e0afa145b..90e50dfa9433 100755
--- a/tests/lint/pylint.sh
+++ b/tests/lint/pylint.sh
@@ -18,11 +18,7 @@
 set -euxo pipefail
 
 python3 -m pylint python/tvm --rcfile="$(dirname "$0")"/pylintrc
-python3 -m pylint tests/python/micro/test_crt.py --rcfile="$(dirname "$0")"/pylintrc
 python3 -m pylint tests/python/tvmscript/test_tvmscript_type.py --rcfile="$(dirname "$0")"/pylintrc
-python3 -m pylint tests/python/contrib/test_cmsisnn --rcfile="$(dirname "$0")"/pylintrc
-python3 -m pylint tests/python/contrib/test_ethosn --rcfile="$(dirname "$0")"/pylintrc
-python3 -m pylint tests/python/relay/aot/*.py --rcfile="$(dirname "$0")"/pylintrc
 python3 -m pylint tests/python/ci --rcfile="$(dirname "$0")"/pylintrc
 python3 -m pylint tests/python/integration/ --rcfile="$(dirname "$0")"/pylintrc
 python3 -m pylint tests/python/conftest.py --rcfile="$(dirname "$0")"/pylintrc
diff --git a/tests/lint/rat-excludes b/tests/lint/rat-excludes
index 93478df8dde0..08250b9fb069 100644
--- a/tests/lint/rat-excludes
+++ b/tests/lint/rat-excludes
@@ -20,9 +20,6 @@
 .*\.interp
 .*\.tokens
 
-# microTVM test data files
-testdata
-
 # Generated modules
 .*\.egg-info
 .*gen_modules
diff --git a/tests/micro/.gitignore b/tests/micro/.gitignore
deleted file mode 100644
index 88d8d834ed2c..000000000000
--- a/tests/micro/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-# test workspaces
-*/workspace_*/
diff --git a/tests/micro/__init__.py b/tests/micro/__init__.py
deleted file mode 100644
index 13a83393a912..000000000000
--- a/tests/micro/__init__.py
+++ /dev/null
@@ -1,16 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
diff --git a/tests/micro/arduino/.gitignore b/tests/micro/arduino/.gitignore
deleted file mode 100644
index 30bf0f9bc376..000000000000
--- a/tests/micro/arduino/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-workspace*
diff --git a/tests/micro/arduino/README.md b/tests/micro/arduino/README.md
deleted file mode 100644
index 36cd7d5f46d7..000000000000
--- a/tests/micro/arduino/README.md
+++ /dev/null
@@ -1,42 +0,0 @@
-<!--- Licensed to the Apache Software Foundation (ASF) under one -->
-<!--- or more contributor license agreements.  See the NOTICE file -->
-<!--- distributed with this work for additional information -->
-<!--- regarding copyright ownership.  The ASF licenses this file -->
-<!--- to you under the Apache License, Version 2.0 (the -->
-<!--- "License"); you may not use this file except in compliance -->
-<!--- with the License.  You may obtain a copy of the License at -->
-
-<!---   http://www.apache.org/licenses/LICENSE-2.0 -->
-
-<!--- Unless required by applicable law or agreed to in writing, -->
-<!--- software distributed under the License is distributed on an -->
-<!--- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -->
-<!--- KIND, either express or implied.  See the License for the -->
-<!--- specific language governing permissions and limitations -->
-<!--- under the License. -->
-
-This directory contains tests for MicroTVM's integration with Arduino.
-
-To run the test, you first need to be running in a Python environment with
-all of the appropriate TVM dependencies installed. You can run the test with:
-
-```
-$ cd tvm/tests/micro/arduino
-$ pytest --board=spresense
-```
-
-Most of these tests require a supported Arduino board to be connected.
-If you don't want to run these tests, you can pass the flag
-`--test-build-only` to only test project generation and compilation.
-
-To see the list of supported values for `--board`, run:
-```
-$ pytest --help
-```
-
-If you would like to test with a real hardware and need to target one of many
-identical devices, you have the option to pass the serial number for your
-development board.
-```
-$ pytest --board=due --serial-number="4873ce"
-```
diff --git a/tests/micro/arduino/conftest.py b/tests/micro/arduino/conftest.py
deleted file mode 100644
index ffa1376efe12..000000000000
--- a/tests/micro/arduino/conftest.py
+++ /dev/null
@@ -1,28 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-pytest_plugins = [
-    "tvm.micro.testing.pytest_plugin",
-]
-
-import pytest
-
-
-def pytest_configure(config):
-    config.addinivalue_line(
-        "markers", "requires_hardware: mark test to run only when an Arduino board is connected"
-    )
diff --git a/tests/micro/arduino/test_arduino_error_detection.py b/tests/micro/arduino/test_arduino_error_detection.py
deleted file mode 100644
index 75b97fa86ca3..000000000000
--- a/tests/micro/arduino/test_arduino_error_detection.py
+++ /dev/null
@@ -1,44 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import pytest
-
-from tvm.micro.project_api.server import ServerError
-
-import test_utils
-import tvm.testing
-
-
-@pytest.fixture
-def project(board, microtvm_debug, workspace_dir, serial_number):
-    return test_utils.make_kws_project(board, microtvm_debug, workspace_dir, serial_number)
-
-
-def test_blank_project_compiles(workspace_dir, project):
-    project.build()
-
-
-# Add a bug (an extra curly brace) and make sure the project doesn't compile
-def test_bugged_project_compile_fails(workspace_dir, project):
-    with open(workspace_dir / "project" / "project.ino", "a") as main_file:
-        main_file.write("}\n")
-    with pytest.raises(ServerError):
-        project.build()
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/micro/arduino/test_arduino_rpc_server.py b/tests/micro/arduino/test_arduino_rpc_server.py
deleted file mode 100644
index bc31ceb60570..000000000000
--- a/tests/micro/arduino/test_arduino_rpc_server.py
+++ /dev/null
@@ -1,398 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""
-This unit test simulates an autotuning workflow, where we:
-1. Instantiate the Arduino RPC server project
-2. Build and flash that project onto our target board
-
-"""
-
-import pathlib
-import numpy as np
-import onnx
-import pytest
-
-import tvm
-import tvm.testing
-from PIL import Image
-from tvm import relay
-from tvm.relay.testing import byoc
-from tvm.relay.backend import Executor, Runtime
-
-import test_utils
-
-
-def _make_session(
-    model,
-    arduino_board,
-    workspace_dir,
-    mod,
-    build_config,
-    serial_number: str = None,
-):
-    project = tvm.micro.generate_project(
-        str(test_utils.TEMPLATE_PROJECT_DIR),
-        mod,
-        workspace_dir / "project",
-        {
-            "board": arduino_board,
-            "project_type": "host_driven",
-            "verbose": bool(build_config.get("debug")),
-            "serial_number": serial_number,
-        },
-    )
-    project.build()
-    project.flash()
-    return tvm.micro.Session(project.transport())
-
-
-def _make_sess_from_op(
-    model,
-    arduino_board,
-    workspace_dir,
-    op_name,
-    sched,
-    arg_bufs,
-    build_config,
-    serial_number: str = None,
-):
-    target = tvm.target.target.micro(model)
-    runtime = Runtime("crt", {"system-lib": True})
-    with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}):
-        mod = tvm.build(sched, arg_bufs, target=target, runtime=runtime, name=op_name)
-
-    return _make_session(model, arduino_board, workspace_dir, mod, build_config, serial_number)
-
-
-def _make_add_sess(model, arduino_board, workspace_dir, build_config, serial_number: str = None):
-    A = tvm.te.placeholder((2,), dtype="int8")
-    B = tvm.te.placeholder((1,), dtype="int8")
-    C = tvm.te.compute(A.shape, lambda i: A[i] + B[0], name="C")
-    sched = tvm.te.create_schedule(C.op)
-    return _make_sess_from_op(
-        model,
-        arduino_board,
-        workspace_dir,
-        "add",
-        sched,
-        [A, B, C],
-        build_config,
-        serial_number,
-    )
-
-
-# The same test code can be executed on both the QEMU simulation and on real hardware.
-@tvm.testing.requires_micro
-@pytest.mark.requires_hardware
-def test_compile_runtime(board, microtvm_debug, workspace_dir, serial_number):
-    """Test compiling the on-device runtime."""
-
-    model = test_utils.ARDUINO_BOARDS[board]
-    build_config = {"debug": microtvm_debug}
-
-    # NOTE: run test in a nested function so cPython will delete arrays before closing the session.
-    def test_basic_add(sess):
-        A_data = tvm.nd.array(np.array([2, 3], dtype="int8"), device=sess.device)
-        assert (A_data.numpy() == np.array([2, 3])).all()
-        B_data = tvm.nd.array(np.array([4], dtype="int8"), device=sess.device)
-        assert (B_data.numpy() == np.array([4])).all()
-        C_data = tvm.nd.array(np.array([0, 0], dtype="int8"), device=sess.device)
-        assert (C_data.numpy() == np.array([0, 0])).all()
-
-        system_lib = sess.get_system_lib()
-        system_lib.get_function("add")(A_data, B_data, C_data)
-        assert (C_data.numpy() == np.array([6, 7])).all()
-
-    with _make_add_sess(model, board, workspace_dir, build_config, serial_number) as sess:
-        test_basic_add(sess)
-
-
-@tvm.testing.requires_micro
-@pytest.mark.requires_hardware
-def test_platform_timer(board, microtvm_debug, workspace_dir, serial_number):
-    """Test compiling the on-device runtime."""
-
-    model = test_utils.ARDUINO_BOARDS[board]
-    build_config = {"debug": microtvm_debug}
-
-    # NOTE: run test in a nested function so cPython will delete arrays before closing the session.
-    def test_basic_add(sess):
-        A_data = tvm.nd.array(np.array([2, 3], dtype="int8"), device=sess.device)
-        assert (A_data.numpy() == np.array([2, 3])).all()
-        B_data = tvm.nd.array(np.array([4], dtype="int8"), device=sess.device)
-        assert (B_data.numpy() == np.array([4])).all()
-        C_data = tvm.nd.array(np.array([0, 0], dtype="int8"), device=sess.device)
-        assert (C_data.numpy() == np.array([0, 0])).all()
-
-        system_lib = sess.get_system_lib()
-        time_eval_f = system_lib.time_evaluator(
-            "add", sess.device, number=20, repeat=3, min_repeat_ms=40
-        )
-        result = time_eval_f(A_data, B_data, C_data)
-        assert (C_data.numpy() == np.array([6, 7])).all()
-        assert result.mean > 0
-        assert len(result.results) == 3
-
-    with _make_add_sess(model, board, workspace_dir, build_config, serial_number) as sess:
-        test_basic_add(sess)
-
-
-@tvm.testing.requires_micro
-@pytest.mark.requires_hardware
-def test_relay(board, microtvm_debug, workspace_dir, serial_number):
-    """Testing a simple relay graph"""
-    model = test_utils.ARDUINO_BOARDS[board]
-    build_config = {"debug": microtvm_debug}
-
-    shape = (10,)
-    dtype = "int8"
-
-    # Construct Relay program.
-    x = relay.var("x", relay.TensorType(shape=shape, dtype=dtype))
-    xx = relay.multiply(x, x)
-    z = relay.add(xx, relay.const(np.ones(shape=shape, dtype=dtype)))
-    func = relay.Function([x], z)
-
-    target = tvm.target.target.micro(model)
-    runtime = Runtime("crt", {"system-lib": True})
-    with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}):
-        mod = tvm.relay.build(func, target=target, runtime=runtime)
-
-    with _make_session(model, board, workspace_dir, mod, build_config, serial_number) as session:
-        graph_mod = tvm.micro.create_local_graph_executor(
-            mod.get_graph_json(), session.get_system_lib(), session.device
-        )
-        graph_mod.set_input(**mod.get_params())
-        x_in = np.random.randint(10, size=shape[0], dtype=dtype)
-        graph_mod.run(x=x_in)
-        result = graph_mod.get_output(0).numpy()
-        tvm.testing.assert_allclose(graph_mod.get_input(0).numpy(), x_in)
-        tvm.testing.assert_allclose(result, x_in * x_in + 1)
-
-
-@tvm.testing.requires_micro
-@pytest.mark.requires_hardware
-def test_onnx(board, microtvm_debug, workspace_dir, serial_number):
-    """Testing a simple ONNX model."""
-    model = test_utils.ARDUINO_BOARDS[board]
-    build_config = {"debug": microtvm_debug}
-
-    # Load test images.
-    this_dir = pathlib.Path(__file__).parent
-    mnist_testdata = this_dir.parent / "testdata" / "mnist"
-    digit_2 = Image.open(mnist_testdata / "digit-2.jpg").resize((28, 28))
-    digit_2 = np.asarray(digit_2).astype("float32")
-    digit_2 = np.expand_dims(digit_2, axis=0)
-
-    digit_9 = Image.open(mnist_testdata / "digit-9.jpg").resize((28, 28))
-    digit_9 = np.asarray(digit_9).astype("float32")
-    digit_9 = np.expand_dims(digit_9, axis=0)
-
-    # Load ONNX model and convert to Relay.
-    onnx_model = onnx.load(mnist_testdata / "mnist-8.onnx")
-    shape = {"Input3": (1, 1, 28, 28)}
-    relay_mod, params = relay.frontend.from_onnx(onnx_model, shape=shape, freeze_params=True)
-    relay_mod = relay.transform.DynamicToStatic()(relay_mod)
-
-    target = tvm.target.target.micro(model)
-    runtime = Runtime("crt", {"system-lib": True})
-    with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}):
-        executor = Executor("graph", {"link-params": True})
-        lowered = relay.build(relay_mod, target, params=params, executor=executor, runtime=runtime)
-        graph = lowered.get_graph_json()
-
-    with _make_session(
-        model, board, workspace_dir, lowered, build_config, serial_number
-    ) as session:
-        graph_mod = tvm.micro.create_local_graph_executor(
-            graph, session.get_system_lib(), session.device
-        )
-
-        # Send the digit-2 image and confirm that the correct result is returned.
-        graph_mod.set_input("Input3", tvm.nd.array(digit_2))
-        graph_mod.run()
-        result = graph_mod.get_output(0).numpy()
-        print(result)
-        assert np.argmax(result) == 2
-
-        # Send the digit-9 image and confirm that the correct result is returned.
-        graph_mod.set_input("Input3", tvm.nd.array(digit_9))
-        graph_mod.run()
-        result = graph_mod.get_output(0).numpy()
-        assert np.argmax(result) == 9
-
-
-def check_result(
-    relay_mod,
-    model,
-    arduino_board,
-    workspace_dir,
-    map_inputs,
-    out_shape,
-    result,
-    build_config,
-    serial_number,
-):
-    """Helper function to verify results"""
-    TOL = 1e-5
-    target = tvm.target.target.micro(model)
-    runtime = Runtime("crt", {"system-lib": True})
-    with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}):
-        mod = tvm.relay.build(relay_mod, target=target, runtime=runtime)
-
-    with _make_session(
-        model, arduino_board, workspace_dir, mod, build_config, serial_number
-    ) as session:
-        rt_mod = tvm.micro.create_local_graph_executor(
-            mod.get_graph_json(), session.get_system_lib(), session.device
-        )
-        rt_mod.set_input(**mod.get_params())
-        for name, data in map_inputs.items():
-            rt_mod.set_input(name, data)
-        rt_mod.set_input(**mod.get_params())
-        rt_mod.run()
-
-        out_shapes = out_shape if isinstance(out_shape, list) else [out_shape]
-        results = result if isinstance(result, list) else [result]
-
-        for idx, shape in enumerate(out_shapes):
-            out = tvm.nd.empty(shape, device=session.device)
-            out = rt_mod.get_output(idx, out)
-            tvm.testing.assert_allclose(out.numpy(), results[idx], rtol=TOL, atol=TOL)
-
-
-@tvm.testing.requires_micro
-@pytest.mark.requires_hardware
-def test_byoc_microtvm(board, microtvm_debug, workspace_dir, serial_number):
-    """This is a simple test case to check BYOC capabilities of microTVM"""
-    model = test_utils.ARDUINO_BOARDS[board]
-    build_config = {"debug": microtvm_debug}
-
-    x = relay.var("x", shape=(10, 10))
-    w0 = relay.var("w0", shape=(10, 10))
-    w1 = relay.var("w1", shape=(10, 10))
-    w2 = relay.var("w2", shape=(10, 10))
-    w3 = relay.var("w3", shape=(10, 10))
-    w4 = relay.var("w4", shape=(10, 10))
-    w5 = relay.var("w5", shape=(10, 10))
-    w6 = relay.var("w6", shape=(10, 10))
-    w7 = relay.var("w7", shape=(10, 10))
-
-    # C compiler
-    z0 = relay.add(x, w0)
-    p0 = relay.subtract(z0, w1)
-    q0 = relay.multiply(p0, w2)
-
-    z1 = relay.add(x, w3)
-    p1 = relay.subtract(z1, w4)
-    q1 = relay.multiply(p1, w5)
-
-    # Other parts on TVM
-    z2 = relay.add(x, w6)
-    q2 = relay.subtract(z2, w7)
-
-    r = relay.concatenate((q0, q1, q2), axis=0)
-    f = relay.Function([x, w0, w1, w2, w3, w4, w5, w6, w7], r)
-    mod = tvm.IRModule()
-    ann = byoc.CcompilerAnnotator()
-    mod["main"] = ann.visit(f)
-    mod = tvm.relay.transform.PartitionGraph()(mod)
-    mod = tvm.relay.transform.InferType()(mod)
-
-    x_data = np.random.rand(10, 10).astype("float32")
-    w_data = []
-    for _ in range(8):
-        w_data.append(np.random.rand(10, 10).astype("float32"))
-
-    map_inputs = {"w{}".format(i): w_data[i] for i in range(8)}
-    map_inputs["x"] = x_data
-    check_result(
-        relay_mod=mod,
-        map_inputs=map_inputs,
-        out_shape=(30, 10),
-        result=np.concatenate(
-            (
-                ((x_data + w_data[0]) - w_data[1]) * w_data[2],
-                ((x_data + w_data[3]) - w_data[4]) * w_data[5],
-                x_data + w_data[6] - w_data[7],
-            ),
-            axis=0,
-        ),
-        model=model,
-        build_config=build_config,
-        arduino_board=board,
-        workspace_dir=workspace_dir,
-        serial_number=serial_number,
-    )
-
-
-def _make_add_sess_with_shape(
-    model,
-    arduino_board,
-    workspace_dir,
-    shape,
-    build_config,
-    serial_number: str = None,
-):
-    A = tvm.te.placeholder(shape, dtype="int8")
-    C = tvm.te.compute(A.shape, lambda i: A[i] + A[i], name="C")
-    sched = tvm.te.create_schedule(C.op)
-    return _make_sess_from_op(
-        model,
-        arduino_board,
-        workspace_dir,
-        "add",
-        sched,
-        [A, C],
-        build_config,
-        serial_number,
-    )
-
-
-@pytest.mark.parametrize(
-    "shape,",
-    [
-        pytest.param((1 * 1024,), id="(1*1024)"),
-        pytest.param((4 * 1024,), id="(4*1024)"),
-        pytest.param((16 * 1024,), id="(16*1024)"),
-    ],
-)
-@tvm.testing.requires_micro
-@pytest.mark.requires_hardware
-def test_rpc_large_array(board, microtvm_debug, workspace_dir, shape, serial_number):
-    """Test large RPC array transfer."""
-    model = test_utils.ARDUINO_BOARDS[board]
-    build_config = {"debug": microtvm_debug}
-
-    # NOTE: run test in a nested function so cPython will delete arrays before closing the session.
-    def test_tensors(sess):
-        a_np = np.random.randint(low=-128, high=127, size=shape, dtype="int8")
-
-        A_data = tvm.nd.array(a_np, device=sess.device)
-        assert (A_data.numpy() == a_np).all()
-        C_data = tvm.nd.array(np.zeros(shape, dtype="int8"), device=sess.device)
-        assert (C_data.numpy() == np.zeros(shape)).all()
-
-    with _make_add_sess_with_shape(
-        model, board, workspace_dir, shape, build_config, serial_number
-    ) as sess:
-        test_tensors(sess)
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/micro/arduino/test_arduino_workflow.py b/tests/micro/arduino/test_arduino_workflow.py
deleted file mode 100644
index a9b7e48c452a..000000000000
--- a/tests/micro/arduino/test_arduino_workflow.py
+++ /dev/null
@@ -1,227 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import pathlib
-import re
-import shutil
-import pytest
-
-import tvm.testing
-
-import test_utils
-
-"""
-This unit test simulates a simple user workflow, where we:
-1. Generate a base sketch using a simple audio model
-2. Modify the .ino file, much like a user would
-3. Compile the sketch for the target board
--- If physical hardware is present --
-4. Upload the sketch to a connected board
-5. Open a serial connection to the board
-6. Use serial connection to ensure model behaves correctly
-"""
-
-# Since these tests are sequential, we'll use the same project/workspace
-# directory for all tests in this file. Note that --board can't be loaded
-# from the fixture, since the fixture is function scoped (it has to be
-# for the tests to be named correctly via parameterization).
-@pytest.fixture(scope="module")
-def workflow_workspace_dir(request):
-    board = request.config.getoption("--board")
-    return test_utils.make_workspace_dir("arduino_workflow", board)
-
-
-@pytest.fixture(scope="module")
-def project_dir(workflow_workspace_dir):
-    return workflow_workspace_dir / "project"
-
-
-# We MUST pass workspace_dir, not project_dir, or the workspace will be dereferenced
-# too soon. We can't use the board fixture either for the reason mentioned above.
-@pytest.fixture(scope="module")
-def project(request, microtvm_debug, workflow_workspace_dir):
-    board = request.config.getoption("--board")
-    serial_number = request.config.getoption("--serial-number")
-    return test_utils.make_kws_project(board, microtvm_debug, workflow_workspace_dir, serial_number)
-
-
-def _get_directory_elements(directory):
-    return set(f.name for f in directory.iterdir())
-
-
-def test_project_folder_structure(project_dir, project):
-    assert set(["microtvm_api_server.py", "project.ino", "src"]).issubset(
-        _get_directory_elements(project_dir)
-    )
-
-    source_dir = project_dir / "src"
-    assert _get_directory_elements(source_dir) == set(
-        ["model", "standalone_crt", "platform.c", "platform.h"]
-    )
-
-
-def test_project_model_integrity(project_dir, project):
-    model_dir = project_dir / "src" / "model"
-    assert _get_directory_elements(model_dir) == set(
-        ["default_lib0.c", "default_lib1.c", "default_lib2.c", "model.tar"]
-    )
-
-
-def test_model_platform_templating(project_dir, project):
-    # Ensure platform.c was templated with correct TVM_WORKSPACE_SIZE_BYTES
-    with (project_dir / "src" / "platform.c").open() as f:
-        platform_c = f.read()
-        workspace_size_defs = re.findall(r"\#define TVM_WORKSPACE_SIZE_BYTES ([0-9]*)", platform_c)
-        assert workspace_size_defs
-        assert len(workspace_size_defs) == 1
-
-        # Make sure the TVM_WORKSPACE_SIZE_BYTES we define is a reasonable size. We don't want
-        # to set an exact value, as this test shouldn't break if an improvement to
-        # TVM causes the amount of memory needed to decrease.
-        workspace_size = int(workspace_size_defs[0])
-        assert workspace_size < 30000
-        assert workspace_size > 9000
-
-
-def test_import_rerouting(project_dir, project):
-    # Check one file to ensure imports were rerouted
-    runtime_path = project_dir / "src" / "standalone_crt" / "src" / "runtime"
-    c_backend_api_path = runtime_path / "crt" / "common" / "crt_backend_api.c"
-    assert c_backend_api_path.exists()
-
-    with c_backend_api_path.open() as f:
-        c_backend_api_c = f.read()
-        assert '#include "inttypes.h"' in c_backend_api_c
-        assert "include/tvm/runtime/crt/platform.h" in c_backend_api_c
-
-
-# Build on top of the generated project by replacing the
-# top-level .ino fileand adding data input files, much
-# like a user would
-@pytest.fixture(scope="module")
-def modified_project(project_dir, project):
-    this_dir = pathlib.Path(__file__).parent
-    kws_testdata_dir = this_dir.parent / "testdata" / "kws"
-    arduino_testdata_dir = this_dir / "testdata"
-
-    shutil.copy2(arduino_testdata_dir / "project.ino", project_dir / "project.ino")
-
-    project_data_dir = project_dir / "src" / "data"
-    project_data_dir.mkdir()
-    for sample in ["yes.c", "no.c", "silence.c", "unknown.c"]:
-        shutil.copy2(kws_testdata_dir / sample, project_data_dir / sample)
-
-    return project
-
-
-@pytest.fixture(scope="module")
-def compiled_project(modified_project):
-    modified_project.build()
-    return modified_project
-
-
-def test_compile_yes_no_project(project_dir, project, compiled_project):
-    build_dir = project_dir / "build"
-    assert build_dir.exists()
-    first_build_file = next(build_dir.iterdir(), None)
-    assert first_build_file is not None
-
-
-"""------------------------------------------------------------
-If we're not running on real hardware, no further tests are run
-------------------------------------------------------------"""
-
-
-@pytest.fixture(scope="module")
-def uploaded_project(compiled_project):
-    compiled_project.flash()
-    return compiled_project
-
-
-""" Sample serial output:
-
-category,runtime,yes,no,silence,unknown
-yes,56762,115,-123,-125,-123,
-no,56762,-128,4,-123,-9,
-silence,56792,-128,-118,107,-117,
-unknown,56792,-128,-125,-128,125,
-"""
-SERIAL_OUTPUT_HEADERS = "category,runtime,yes,no,silence,unknown"
-
-
-@pytest.fixture(scope="module")
-def serial_output(uploaded_project):
-    transport = uploaded_project.transport()
-    transport.open()
-    out = transport.read(2048, 60)
-    out_str = out.decode("utf-8")
-    out_lines = out_str.split("\r\n")
-
-    assert SERIAL_OUTPUT_HEADERS in out_lines
-    headers_index = out_lines.index(SERIAL_OUTPUT_HEADERS)
-    data_lines = out_lines[headers_index + 1 : headers_index + 5]
-    split_lines = [line.split(",") for line in data_lines]
-
-    return [[line[0]] + list(map(int, line[1:6])) for line in split_lines]
-
-
-TENSORFLOW_EVALUATIONS = {
-    "yes": [115, -123, -125, -123],
-    "no": [-128, 4, -123, -9],
-    "silence": [-128, -118, 107, -117],
-    "unknown": [-128, -125, -128, 125],
-}
-MAX_PREDICTION_DIFFERENCE = 2
-
-
-@pytest.mark.requires_hardware
-def test_project_inference_correctness(serial_output):
-    predictions = {line[0]: line[2:] for line in serial_output}
-
-    for sample, prediction in predictions.items():
-        # Due to rounding issues, we don't get the *exact* same
-        # values as Tensorflow gives, but they're pretty close
-
-        reference_prediction = TENSORFLOW_EVALUATIONS[sample]
-        deltas = [prediction[i] - reference_prediction[i] for i in range(4)]
-        assert max(deltas) < MAX_PREDICTION_DIFFERENCE
-
-
-MAX_INFERENCE_TIME_US = 200 * 1000
-MAX_INFERENCE_TIME_RANGE_US = 1000
-
-
-@pytest.mark.requires_hardware
-def test_project_inference_runtime(serial_output):
-    runtimes_us = [line[1] for line in serial_output]
-
-    # Inference time will vary based on architecture
-    # and clock speed. However, anything more than 200 ms
-    # is way too long. Each inference takes ~60 ms on the
-    # Sony spresense, running at 156 MHz
-    assert max(runtimes_us) < MAX_INFERENCE_TIME_US
-
-    # Clock speeds should be consistent for each input. On
-    # the Sony spresense, they vary by <100 us. Note that
-    # running with other attached hardware (like the
-    # Spresense extension board) may cause this check to fail
-    range_runtimes_us = max(runtimes_us) - min(runtimes_us)
-    assert range_runtimes_us < MAX_INFERENCE_TIME_RANGE_US
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/micro/arduino/test_utils.py b/tests/micro/arduino/test_utils.py
deleted file mode 100644
index e50e91280bbc..000000000000
--- a/tests/micro/arduino/test_utils.py
+++ /dev/null
@@ -1,92 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import json
-import pathlib
-import requests
-import datetime
-
-import tvm.micro
-import tvm.target.target
-from tvm.micro import project
-from tvm import relay
-from tvm.relay.backend import Executor, Runtime
-from tvm.testing.utils import fetch_model_from_url
-
-TEMPLATE_PROJECT_DIR = pathlib.Path(tvm.micro.get_microtvm_template_projects("arduino"))
-
-BOARDS = TEMPLATE_PROJECT_DIR / "boards.json"
-
-
-def arduino_boards() -> dict:
-    """Returns a dict mapping board to target model"""
-    with open(BOARDS) as f:
-        board_properties = json.load(f)
-
-    boards_model = {board: info["model"] for board, info in board_properties.items()}
-    return boards_model
-
-
-ARDUINO_BOARDS = arduino_boards()
-
-
-def make_workspace_dir(test_name, board):
-    filepath = pathlib.Path(__file__)
-    board_workspace = (
-        filepath.parent
-        / f"workspace_{test_name}_{board}"
-        / datetime.datetime.now().strftime("%Y-%m-%dT%H-%M-%S")
-    )
-
-    number = 0
-    while board_workspace.exists():
-        number += 1
-        board_workspace = pathlib.Path(str(board_workspace) + f"-{number}")
-    board_workspace.parent.mkdir(exist_ok=True, parents=True)
-    t = tvm.contrib.utils.tempdir(board_workspace)
-    return t
-
-
-def make_kws_project(board, microtvm_debug, workspace_dir, serial_number: str):
-    this_dir = pathlib.Path(__file__).parent
-    model = ARDUINO_BOARDS[board]
-    build_config = {"debug": microtvm_debug}
-
-    mod, params = fetch_model_from_url(
-        url="https://github.com/tensorflow/tflite-micro/raw/a56087ffa2703b4d5632f024a8a4c899815c31bb/tensorflow/lite/micro/examples/micro_speech/micro_speech.tflite",
-        model_format="tflite",
-        sha256="09e5e2a9dfb2d8ed78802bf18ce297bff54281a66ca18e0c23d69ca14f822a83",
-    )
-
-    target = tvm.target.target.micro(model)
-    runtime = Runtime("crt")
-    executor = Executor("aot", {"unpacked-api": True})
-
-    with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}):
-        mod = relay.build(mod, target, runtime=runtime, executor=executor, params=params)
-
-    return tvm.micro.generate_project(
-        str(TEMPLATE_PROJECT_DIR),
-        mod,
-        workspace_dir / "project",
-        {
-            "board": board,
-            "project_type": "example_project",
-            "verbose": bool(build_config.get("debug")),
-            "serial_number": serial_number,
-        },
-    )
diff --git a/tests/micro/arduino/testdata/project.ino b/tests/micro/arduino/testdata/project.ino
deleted file mode 100644
index d7ef155b33f6..000000000000
--- a/tests/micro/arduino/testdata/project.ino
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#include "src/platform.h"
-#include "src/data/yes.c"
-#include "src/data/no.c"
-#include "src/data/unknown.c"
-#include "src/data/silence.c"
-#include "src/standalone_crt/include/tvm/runtime/crt/platform.h"
-
-void performInference(int8_t input_data[1960], char *data_name) {
-  int8_t output_data[4];
-  unsigned long start_time = micros();
-  TVMExecute(input_data, output_data);
-  unsigned long end_time = micros();
-
-  Serial.print(data_name);
-  Serial.print(",");
-  Serial.print(end_time - start_time);
-  Serial.print(",");
-  for (int i = 0; i < 4; i++) {
-    Serial.print(output_data[i]);
-    Serial.print(",");
-  }
-  Serial.println();
-}
-
-void setup() {
-  TVMPlatformInitialize();
-  Serial.begin(115200);
-}
-
-void loop() {
-  Serial.println();
-  Serial.println("category,runtime,yes,no,silence,unknown");
-  performInference((int8_t*) input_yes, "yes");
-  performInference((int8_t*) input_no, "no");
-  performInference((int8_t*) input_silence, "silence");
-  performInference((int8_t*) input_unknown, "unknown");
-}
diff --git a/tests/micro/common/__init__.py b/tests/micro/common/__init__.py
deleted file mode 100644
index 13a83393a912..000000000000
--- a/tests/micro/common/__init__.py
+++ /dev/null
@@ -1,16 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
diff --git a/tests/micro/common/conftest.py b/tests/micro/common/conftest.py
deleted file mode 100644
index d86fd41bd8bf..000000000000
--- a/tests/micro/common/conftest.py
+++ /dev/null
@@ -1,19 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-
-pytest_plugins = [
-    "tvm.micro.testing.pytest_plugin",
-]
diff --git a/tests/micro/common/test_autotune.py b/tests/micro/common/test_autotune.py
deleted file mode 100644
index 9f22b9c68f22..000000000000
--- a/tests/micro/common/test_autotune.py
+++ /dev/null
@@ -1,98 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from io import StringIO
-import json
-
-import numpy as np
-import pytest
-
-import tvm
-import tvm.testing
-import tvm.micro.testing
-from tvm.testing.utils import fetch_model_from_url
-
-TUNING_RUNS_PER_OPERATOR = 2
-
-
-@pytest.mark.requires_hardware
-@tvm.testing.requires_micro
-@pytest.mark.skip_boards(
-    ["nucleo_l4r5zi", "", "nucleo_f746zg", "stm32f746g_disco", "nrf5340dk_nrf5340_cpuapp"]
-)
-def test_kws_autotune_workflow(platform, board, tmp_path):
-    mod, params = fetch_model_from_url(
-        url="https://github.com/tensorflow/tflite-micro/raw/a56087ffa2703b4d5632f024a8a4c899815c31bb/tensorflow/lite/micro/examples/micro_speech/micro_speech.tflite",
-        model_format="tflite",
-        sha256="09e5e2a9dfb2d8ed78802bf18ce297bff54281a66ca18e0c23d69ca14f822a83",
-    )
-    target = tvm.micro.testing.get_target(platform, board)
-
-    str_io_logs = tvm.micro.testing.tune_model(
-        platform, board, target, mod, params, TUNING_RUNS_PER_OPERATOR
-    )
-    assert isinstance(str_io_logs, StringIO)
-
-    str_logs = str_io_logs.getvalue().rstrip().split("\n")
-    logs = list(map(json.loads, str_logs))
-
-    # Some tuning tasks don't have any config space, and will only be run once
-    with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}):
-        tasks = tvm.autotvm.task.extract_from_program(mod["main"], {}, target)
-    assert len(tasks) <= len(logs) <= len(tasks) * TUNING_RUNS_PER_OPERATOR
-
-    # Check we tested both operators
-    op_names = list(map(lambda x: x["input"][1], logs))
-    assert op_names[0] == op_names[1] == "conv2d_nhwc_spatial_pack.arm_cpu"
-
-    # Make sure we tested different code. != does deep comparison in Python 3
-    assert logs[0]["config"]["index"] != logs[1]["config"]["index"]
-    assert logs[0]["config"]["entity"] != logs[1]["config"]["entity"]
-
-    # Compile the best model with AOT and connect to it
-    str_io_logs.seek(0)
-    with tvm.micro.testing.create_aot_session(
-        platform,
-        board,
-        target,
-        mod,
-        params,
-        build_dir=tmp_path,
-        tune_logs=str_io_logs,
-    ) as session:
-        aot_executor = tvm.runtime.executor.aot_executor.AotModule(session.create_aot_executor())
-
-        samples = (
-            np.random.randint(low=-127, high=128, size=(1, 1960), dtype=np.int8) for x in range(3)
-        )
-
-        # Validate perforance across random runs
-        runtimes = [
-            runtime
-            for _, runtime in tvm.micro.testing.predict_labels_aot(
-                session, aot_executor, samples, runs_per_sample=20
-            )
-        ]
-        # `time` is the average time taken to execute model inference on the
-        # device, measured in seconds. It does not include the time to upload
-        # the input data via RPC. On slow boards like the Arduino Due, time
-        # is around 0.12 (120 ms), so this gives us plenty of buffer.
-        assert np.median(runtimes) < 1
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/micro/common/test_mlperftiny.py b/tests/micro/common/test_mlperftiny.py
deleted file mode 100644
index 318d68184379..000000000000
--- a/tests/micro/common/test_mlperftiny.py
+++ /dev/null
@@ -1,423 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import os
-import re
-import logging
-from urllib.parse import urlparse
-import struct
-
-import pytest
-import tensorflow as tf
-import numpy as np
-import tarfile
-import tempfile
-import pathlib
-
-import tvm
-import tvm.testing
-from tvm import relay
-from tvm.relay.backend import Executor, Runtime
-from tvm.micro.project_api import server
-from tvm.contrib.download import download_testdata
-from tvm.micro import export_model_library_format
-from tvm.micro.model_library_format import generate_c_interface_header
-from tvm.micro.testing import create_aot_session, predict_labels_aot
-from tvm.micro.testing.utils import (
-    create_header_file,
-    mlf_extract_workspace_size_bytes,
-)
-from tvm.micro.testing.utils import aot_transport_find_message
-
-MLPERF_TINY_MODELS = {
-    "kws": {
-        "name": "Keyword Spotting",
-        "index": 1,
-        "url": "https://github.com/mlcommons/tiny/raw/bceb91c5ad2e2deb295547d81505721d3a87d578/benchmark/training/keyword_spotting/trained_models/kws_ref_model.tflite",
-        "sample": "https://github.com/tlc-pack/web-data/raw/main/testdata/microTVM/data/keyword_spotting_int8_6.pyc.npy",
-        "sample_label": 6,
-    },
-    "vww": {
-        "name": "Visual Wake Words",
-        "index": 2,
-        "url": "https://github.com/mlcommons/tiny/raw/bceb91c5ad2e2deb295547d81505721d3a87d578/benchmark/training/visual_wake_words/trained_models/vww_96_int8.tflite",
-        "sample": "https://github.com/tlc-pack/web-data/raw/main/testdata/microTVM/data/visual_wake_word_int8_1.npy",
-        "sample_label": 1,
-    },
-    # Note: The reason we use quantized model with float32 I/O is
-    # that TVM does not handle the int8 I/O correctly and accuracy
-    # would drop significantly.
-    "ad": {
-        "name": "Anomaly Detection",
-        "index": 3,
-        "url": "https://github.com/mlcommons/tiny/raw/bceb91c5ad2e2deb295547d81505721d3a87d578/benchmark/training/anomaly_detection/trained_models/ToyCar/baseline_tf23/model/model_ToyCar_quant_fullint_micro.tflite",
-        "sample": "https://github.com/tlc-pack/web-data/raw/main/testdata/microTVM/data/anomaly_detection_normal_id_01.npy",
-        # This model takes in a (1, 640) vector, so it must be called 40 times -
-        # once for each time slice.
-    },
-    "ic": {
-        "name": "Image Classification",
-        "index": 4,
-        "url": "https://github.com/mlcommons/tiny/raw/bceb91c5ad2e2deb295547d81505721d3a87d578/benchmark/training/image_classification/trained_models/pretrainedResnet_quant.tflite",
-        "sample": "https://github.com/tlc-pack/web-data/raw/main/testdata/microTVM/data/image_classification_int8_0.npy",
-        "sample_label": 0,
-    },
-}
-
-MLPERFTINY_READY_MSG = "m-ready"
-MLPERFTINY_RESULT_MSG = "m-results"
-MLPERFTINY_NAME_MSG = "m-name"
-
-
-def mlperftiny_get_module(model_name: str):
-    model_url = MLPERF_TINY_MODELS[model_name]["url"]
-    url = urlparse(model_url)
-    model_path = download_testdata(model_url, os.path.basename(url.path), module="model")
-
-    tflite_model_buf = open(model_path, "rb").read()
-    try:
-        import tflite
-
-        tflite_model = tflite.Model.GetRootAsModel(tflite_model_buf, 0)
-    except AttributeError:
-        import tflite.Model
-
-        tflite_model = tflite.Model.Model.GetRootAsModel(tflite_model_buf, 0)
-
-    interpreter = tf.lite.Interpreter(model_path=str(model_path))
-    interpreter.allocate_tensors()
-    input_details = interpreter.get_input_details()
-    output_details = interpreter.get_output_details()
-
-    model_info = {
-        "input_name": input_details[0]["name"],
-        "input_shape": tuple(input_details[0]["shape"]),
-        "input_dtype": np.dtype(input_details[0]["dtype"]).name,
-        "output_name": output_details[0]["name"],
-        "output_shape": tuple(output_details[0]["shape"]),
-        "output_dtype": np.dtype(output_details[0]["dtype"]).name,
-    }
-
-    if model_name != "ad":
-        model_info["quant_output_scale"] = output_details[0]["quantization_parameters"]["scales"][0]
-        model_info["quant_output_zero_point"] = output_details[0]["quantization_parameters"][
-            "zero_points"
-        ][0]
-
-    relay_mod, params = relay.frontend.from_tflite(
-        tflite_model,
-        shape_dict={model_info["input_name"]: model_info["input_shape"]},
-        dtype_dict={model_info["input_name"]: model_info["input_dtype"]},
-    )
-    return relay_mod, params, model_info
-
-
-def get_test_data(model_name: str, project_type: str) -> list:
-    sample_url = MLPERF_TINY_MODELS[model_name]["sample"]
-    url = urlparse(sample_url)
-    sample_path = download_testdata(sample_url, os.path.basename(url.path), module="data")
-    sample = np.load(sample_path)
-    if project_type == "mlperftiny" and model_name != "ad":
-        sample = sample.astype(np.uint8)
-    sample_label = None
-    if "sample_label" in MLPERF_TINY_MODELS[model_name].keys():
-        sample_label = MLPERF_TINY_MODELS[model_name]["sample_label"]
-    return [sample], [sample_label]
-
-
-def predict_ad_labels_aot(session, aot_executor, input_data, runs_per_sample=1):
-    """A special version of tvm/micro/testing/evaluation.py's predict_labels_aot.
-    The runtime returned for each sample is the median of the runtimes for all slices
-    in that sample."""
-
-    assert runs_per_sample > 0
-    assert aot_executor.get_num_inputs() == 1
-    assert aot_executor.get_num_outputs() == 1
-
-    sample_counter = 0
-    for sample in input_data:
-        output_fp32 = np.empty_like(sample)
-        slice_runtimes = []
-
-        for i, time_slice in enumerate(sample):
-            aot_executor.get_input(0).copyfrom(time_slice.reshape((1, 640)))
-            result = aot_executor.module.time_evaluator(
-                "run", session.device, number=runs_per_sample
-            )()
-            slice_runtimes.append(result.mean)
-            output_fp32[i, :] = aot_executor.get_output(0).numpy()
-
-        sample_counter += 1
-        errors = np.mean(np.square(sample - output_fp32), axis=1)
-        yield np.mean(errors), np.median(slice_runtimes)
-
-
-def _mlperftiny_get_name(device_transport) -> str:
-    """Get device name."""
-    device_transport.write(b"name%", timeout_sec=5)
-    name_message = aot_transport_find_message(device_transport, MLPERFTINY_NAME_MSG, timeout_sec=5)
-    m = re.search(r"\[([A-Za-z0-9_]+)\]", name_message)
-    return m.group(1)
-
-
-def _mlperftiny_infer(transport, warmup: int, infer: int, timeout: int):
-    """Send MLPerfTiny infer command."""
-    cmd = f"infer {warmup} {infer}%".encode("UTF-8")
-    transport.write(cmd, timeout_sec=timeout)
-
-
-def _mlperftiny_write_sample(device_transport, data: list, timeout: int):
-    """Write a sample with MLPerfTiny compatible format."""
-    cmd = f"db load {len(data)}%".encode("UTF-8")
-    logging.debug(f"transport write: {cmd}")
-    device_transport.write(cmd, timeout)
-    aot_transport_find_message(device_transport, MLPERFTINY_READY_MSG, timeout_sec=timeout)
-    for item in data:
-        if isinstance(item, float):
-            ba = bytearray(struct.pack("<f", item))
-            hex_array = ["%02x" % b for b in ba]
-        else:
-            hex_val = format(item, "x")
-            # make sure hex value is in HH format
-            if len(hex_val) < 2:
-                hex_val = "0" + hex_val
-            elif len(hex_val) > 2:
-                raise ValueError(f"Hex value not in HH format: {hex_val}")
-            hex_array = [hex_val]
-
-        for hex_val in hex_array:
-            cmd = f"db {hex_val}%".encode("UTF-8")
-            logging.debug(f"transport write: {cmd}")
-            device_transport.write(cmd, timeout)
-            aot_transport_find_message(device_transport, MLPERFTINY_READY_MSG, timeout_sec=timeout)
-
-
-def _mlperftiny_test_dataset(device_transport, dataset, timeout):
-    """Run test dataset compatible with MLPerfTiny format."""
-    num_correct = 0
-    total = 0
-    samples, labels = dataset
-    i_counter = 0
-    for sample in samples:
-        label = labels[i_counter]
-        logging.info(f"Writing Sample {i_counter}")
-        _mlperftiny_write_sample(device_transport, sample.flatten().tolist(), timeout)
-        _mlperftiny_infer(device_transport, 1, 0, timeout)
-        results = aot_transport_find_message(
-            device_transport, MLPERFTINY_RESULT_MSG, timeout_sec=timeout
-        )
-
-        m = re.search(r"m\-results\-\[([A-Za-z0-9_,.]+)\]", results)
-        results = m.group(1).split(",")
-        results_val = [float(x) for x in results]
-        results_val = np.array(results_val)
-
-        if np.argmax(results_val) == label:
-            num_correct += 1
-        total += 1
-        i_counter += 1
-    return float(num_correct / total)
-
-
-def _mlperftiny_test_dataset_ad(device_transport, dataset, timeout):
-    """Run test dataset compatible with MLPerfTiny format for AD model."""
-    samples, _ = dataset
-    result_output = np.zeros(samples[0].shape[0])
-
-    for slice in range(0, 40):
-        _mlperftiny_write_sample(device_transport, samples[0][slice, :].flatten().tolist(), timeout)
-        _mlperftiny_infer(device_transport, 1, 0, timeout)
-        results = aot_transport_find_message(
-            device_transport, MLPERFTINY_RESULT_MSG, timeout_sec=timeout
-        )
-        m = re.search(r"m\-results\-\[([A-Za-z0-9_,.]+)\]", results)
-        results = m.group(1).split(",")
-        results_val = [float(x) for x in results]
-        result_output[slice] = np.array(results_val)
-    return np.average(result_output)
-
-
-@pytest.mark.parametrize("model_name", ["kws", "vww", "ad", "ic"])
-@pytest.mark.parametrize("project_type", ["host_driven", "mlperftiny"])
-@tvm.testing.requires_micro
-@pytest.mark.skip_boards(
-    ["mps2_an521", "mps3_an547", "stm32f746g_disco", "nucleo_f746zg", "nrf5340dk_nrf5340_cpuapp"]
-)
-def test_mlperftiny_models(platform, board, workspace_dir, serial_number, model_name, project_type):
-    """MLPerfTiny models test.
-    Testing MLPerfTiny models using host_driven project. In this case one input sample is used
-    to verify the end to end execution. Accuracy is not checked in this test.
-
-    Also, this test builds each model in standalone mode that can be used for MLPerfTiny submissions.
-    """
-    if platform != "zephyr":
-        pytest.skip(reason="Other platforms are not supported yet.")
-
-    use_cmsis_nn = False
-    relay_mod, params, model_info = mlperftiny_get_module(model_name)
-    target = tvm.micro.testing.get_target(platform, board)
-    project_options = {"config_main_stack_size": 4000, "serial_number": serial_number}
-
-    if use_cmsis_nn:
-        project_options["cmsis_path"] = os.getenv("CMSIS_PATH")
-
-    if model_name == "ad":
-        predictor = predict_ad_labels_aot
-    else:
-        predictor = predict_labels_aot
-
-    samples, labels = get_test_data(model_name, project_type)
-    if project_type == "host_driven":
-        with create_aot_session(
-            platform,
-            board,
-            target,
-            relay_mod,
-            params,
-            build_dir=workspace_dir,
-            # The longest models take ~5 seconds to infer, but running them
-            # ten times (with NUM_TESTING_RUNS_PER_SAMPLE) makes that 50
-            timeout_override=server.TransportTimeouts(
-                session_start_retry_timeout_sec=300,
-                session_start_timeout_sec=150,
-                session_established_timeout_sec=150,
-            ),
-            project_options=project_options,
-            use_cmsis_nn=use_cmsis_nn,
-        ) as session:
-            aot_executor = tvm.runtime.executor.aot_executor.AotModule(
-                session.create_aot_executor()
-            )
-            args = {
-                "session": session,
-                "aot_executor": aot_executor,
-                "input_data": samples,
-                "runs_per_sample": 10,
-            }
-            predicted_labels, runtimes = zip(*predictor(**args))
-
-        avg_runtime = float(np.mean(runtimes) * 1000)
-        print(f"Model {model_name} average runtime: {avg_runtime}")
-
-    elif project_type == "mlperftiny":
-        runtime = Runtime("crt")
-        executor = Executor(
-            "aot", {"unpacked-api": True, "interface-api": "c", "workspace-byte-alignment": 8}
-        )
-
-        config = {"tir.disable_vectorize": True}
-        if use_cmsis_nn:
-            from tvm.relay.op.contrib import cmsisnn
-
-            config["relay.ext.cmsisnn.options"] = {"mcpu": target.mcpu}
-            relay_mod = cmsisnn.partition_for_cmsisnn(relay_mod, params, mcpu=target.mcpu)
-
-        with tvm.transform.PassContext(opt_level=3, config=config):
-            module = tvm.relay.build(
-                relay_mod, target=target, params=params, runtime=runtime, executor=executor
-            )
-
-        temp_dir = tvm.contrib.utils.tempdir()
-        model_tar_path = temp_dir / "model.tar"
-        export_model_library_format(module, model_tar_path)
-        workspace_size = mlf_extract_workspace_size_bytes(model_tar_path)
-
-        extra_tar_dir = tvm.contrib.utils.tempdir()
-        extra_tar_file = extra_tar_dir / "extra.tar"
-        with tarfile.open(extra_tar_file, "w:gz") as tf:
-            with tempfile.TemporaryDirectory() as tar_temp_dir:
-                model_files_path = os.path.join(tar_temp_dir, "include")
-                os.mkdir(model_files_path)
-                header_path = generate_c_interface_header(
-                    module.libmod_name,
-                    [model_info["input_name"]],
-                    [model_info["output_name"]],
-                    [],
-                    {},
-                    [],
-                    0,
-                    model_files_path,
-                    {},
-                    {},
-                )
-                tf.add(header_path, arcname=os.path.relpath(header_path, tar_temp_dir))
-
-            create_header_file(
-                "output_data",
-                np.zeros(
-                    shape=model_info["output_shape"],
-                    dtype=model_info["output_dtype"],
-                ),
-                "include/tvm",
-                tf,
-            )
-
-        input_total_size = 1
-        input_shape = model_info["input_shape"]
-        for i in range(len(input_shape)):
-            input_total_size *= input_shape[i]
-
-        # float input
-        if model_name == "ad":
-            input_total_size *= 4
-
-        template_project_path = pathlib.Path(tvm.micro.get_microtvm_template_projects(platform))
-        project_options.update(
-            {
-                "extra_files_tar": str(extra_tar_file),
-                "project_type": project_type,
-                "board": board,
-                "compile_definitions": [
-                    f"-DWORKSPACE_SIZE={workspace_size + 512}",  # Memory workspace size, 512 is a temporary offset
-                    # since the memory calculation is not accurate.
-                    f'-DTARGET_MODEL={MLPERF_TINY_MODELS[model_name]["index"]}',  # Sets the model index for project compilation.
-                    f"-DTH_MODEL_VERSION=EE_MODEL_VERSION_{model_name.upper()}01",  # Sets model version. This is required by MLPerfTiny API.
-                    f"-DMAX_DB_INPUT_SIZE={input_total_size}",  # Max size of the input data array.
-                ],
-            }
-        )
-
-        if model_name != "ad":
-            project_options["compile_definitions"].append(
-                f'-DOUT_QUANT_SCALE={model_info["quant_output_scale"]}'
-            )
-            project_options["compile_definitions"].append(
-                f'-DOUT_QUANT_ZERO={model_info["quant_output_zero_point"]}'
-            )
-
-        project = tvm.micro.project.generate_project_from_mlf(
-            template_project_path, workspace_dir / "project", model_tar_path, project_options
-        )
-        project.build()
-        project.flash()
-        with project.transport() as transport:
-            aot_transport_find_message(transport, MLPERFTINY_READY_MSG, timeout_sec=200)
-            print(f"Testing {model_name} on {_mlperftiny_get_name(transport)}.")
-            assert _mlperftiny_get_name(transport) == "microTVM"
-            if model_name != "ad":
-                accuracy = _mlperftiny_test_dataset(transport, [samples, labels], 100)
-                print(f"Model {model_name} accuracy: {accuracy}")
-            else:
-                mean_error = _mlperftiny_test_dataset_ad(transport, [samples, None], 100)
-                print(
-                    f"""Model {model_name} mean error: {mean_error}.
-                      Note that this is not the final accuracy number.
-                      To calculate that, you need to use sklearn.metrics.roc_auc_score function."""
-                )
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/micro/common/test_tvmc.py b/tests/micro/common/test_tvmc.py
deleted file mode 100644
index 97d179ed2a9a..000000000000
--- a/tests/micro/common/test_tvmc.py
+++ /dev/null
@@ -1,194 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import pytest
-import subprocess
-import shlex
-import sys
-import logging
-import tempfile
-import pathlib
-import sys
-import os
-import shutil
-
-import tvm
-import tvm.testing
-from tvm.contrib.download import download_testdata
-
-TVMC_COMMAND = [sys.executable, "-m", "tvm.driver.tvmc"]
-
-MODEL_URL = "https://github.com/tensorflow/tflite-micro/raw/a56087ffa2703b4d5632f024a8a4c899815c31bb/tensorflow/lite/micro/examples/micro_speech/micro_speech.tflite"
-MODEL_FILE = "micro_speech.tflite"
-
-executor = tvm.testing.parameter("aot", "graph")
-use_local_path = tvm.testing.parameter(True, False)
-
-# TODO(mehrdadh): replace this with _main from tvm.driver.tvmc.main
-# Issue: https://github.com/apache/tvm/issues/9612
-def _run_tvmc(cmd_args: list, *args, **kwargs):
-    """Run a tvmc command and return the results"""
-    cmd_args_list = TVMC_COMMAND + cmd_args
-    cwd_str = "" if "cwd" not in kwargs else f" (in cwd: {kwargs['cwd']})"
-    logging.debug("run%s: %s", cwd_str, " ".join(shlex.quote(a) for a in cmd_args_list))
-    return subprocess.check_call(cmd_args_list, *args, **kwargs)
-
-
-def create_project_command(project_path: str, mlf_path: str, platform: str, board: str) -> list:
-    """Returns create project command with tvmc micro."""
-    cmd = [
-        "micro",
-        "create-project",
-        project_path,
-        mlf_path,
-        platform,
-        "--project-option",
-        "project_type=host_driven",
-        f"board={board}",
-    ]
-
-    if platform == "zephyr":
-        # TODO: 4096 is driven by experiment on nucleo_l4r5zi. We should cleanup this after we have
-        # better memory management.
-        cmd.append("config_main_stack_size=4096")
-    return cmd
-
-
-def compile_command(
-    model_path: str, target: tvm.target.Target, tar_path: pathlib.Path, executor: str
-):
-    runtime = "crt"
-
-    cmd = [
-        "compile",
-        model_path,
-        f"--target={target}",
-        f"--runtime={runtime}",
-        f"--runtime-crt-system-lib",
-        str(1),
-        f"--executor={executor}",
-    ]
-
-    if executor == "graph":
-        cmd += [
-            "--executor-graph-link-params",
-            str(0),
-        ]
-
-    cmd += [
-        "--output",
-        str(tar_path),
-        "--output-format",
-        "mlf",
-        "--pass-config",
-        "tir.disable_vectorize=1",
-    ]
-    if executor == "graph":
-        cmd += ["--disabled-pass=AlterOpLayout"]
-
-    cmd_str = ""
-    for item in cmd:
-        cmd_str += item
-        cmd_str += " "
-    return cmd
-
-
-def get_workspace_dir(use_local_path: bool) -> pathlib.Path:
-    if use_local_path:
-        out_dir_temp = pathlib.Path(os.path.abspath("./tvmc_relative_path_test"))
-        if os.path.isdir(out_dir_temp):
-            shutil.rmtree(out_dir_temp)
-        os.mkdir(out_dir_temp)
-    else:
-        out_dir_temp = tvm.contrib.utils.tempdir()
-
-    return out_dir_temp
-
-
-@tvm.testing.requires_micro
-def test_tvmc_exist(platform, board):
-    cmd_result = _run_tvmc(["micro", "-h"])
-    assert cmd_result == 0
-
-
-@tvm.testing.requires_micro
-def test_tvmc_model_build_only(platform, board, executor, use_local_path):
-    target = tvm.micro.testing.get_target(platform, board)
-    output_dir = get_workspace_dir(use_local_path)
-
-    model_path = download_testdata(MODEL_URL, MODEL_FILE, module="model")
-    tar_path = str(output_dir / "model.tar")
-    project_dir = str(output_dir / "project")
-
-    cmd_result = _run_tvmc(compile_command(model_path, target, tar_path, executor))
-
-    assert cmd_result == 0, "tvmc failed in step: compile"
-
-    cmd_result = _run_tvmc(create_project_command(project_dir, tar_path, platform, board))
-    assert cmd_result == 0, "tvmc micro failed in step: create-project"
-
-    build_cmd = ["micro", "build", project_dir, platform]
-    cmd_result = _run_tvmc(build_cmd)
-    assert cmd_result == 0, "tvmc micro failed in step: build"
-    if use_local_path:
-        shutil.rmtree(output_dir)
-
-
-@pytest.mark.skip("Flaky, https://github.com/apache/tvm/issues/14004")
-@pytest.mark.requires_hardware
-@tvm.testing.requires_micro
-@pytest.mark.skip_boards(
-    ["nucleo_l4r5zi", "nucleo_f746zg", "stm32f746g_disco", "nrf5340dk_nrf5340_cpuapp"]
-)
-def test_tvmc_model_run(platform, board, executor, use_local_path):
-    target = tvm.micro.testing.get_target(platform, board)
-
-    output_dir = get_workspace_dir(use_local_path)
-
-    model_path = model_path = download_testdata(MODEL_URL, MODEL_FILE, module="data")
-    tar_path = str(output_dir / "model.tar")
-    project_dir = str(output_dir / "project")
-
-    cmd_result = _run_tvmc(compile_command(model_path, target, tar_path, executor))
-    assert cmd_result == 0, "tvmc failed in step: compile"
-
-    cmd_result = _run_tvmc(create_project_command(project_dir, tar_path, platform, board))
-    assert cmd_result == 0, "tvmc micro failed in step: create-project"
-
-    build_cmd = ["micro", "build", project_dir, platform]
-    cmd_result = _run_tvmc(build_cmd)
-
-    assert cmd_result == 0, "tvmc micro failed in step: build"
-
-    flash_cmd = ["micro", "flash", project_dir, platform]
-    cmd_result = _run_tvmc(flash_cmd)
-    assert cmd_result == 0, "tvmc micro failed in step: flash"
-
-    run_cmd = [
-        "run",
-        "--device",
-        "micro",
-        project_dir,
-    ]
-    run_cmd += ["--fill-mode", "random"]
-    cmd_result = _run_tvmc(run_cmd)
-    assert cmd_result == 0, "tvmc micro failed in step: run"
-    if use_local_path:
-        shutil.rmtree(output_dir)
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/micro/project_api/__init__.py b/tests/micro/project_api/__init__.py
deleted file mode 100644
index 09ce2f87f44a..000000000000
--- a/tests/micro/project_api/__init__.py
+++ /dev/null
@@ -1,18 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""Test Project API in different platforms infrastructure."""
diff --git a/tests/micro/project_api/test_project_api.py b/tests/micro/project_api/test_project_api.py
deleted file mode 100644
index 209ab59b4b75..000000000000
--- a/tests/micro/project_api/test_project_api.py
+++ /dev/null
@@ -1,87 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import sys
-import numpy as np
-
-import tvm
-from tvm import relay
-from tvm.micro.project_api import server
-from tvm.relay.backend import Runtime
-from tvm.micro.testing import get_target
-from tvm.relay.backend import Runtime
-import tvm.micro.testing
-
-from .utils import build_project_api
-
-API_GENERATE_PROJECT = "generate_project"
-API_BUILD = "build"
-API_FLASH = "flash"
-API_OPEN_TRANSPORT = "open_transport"
-
-PLATFORM_ARDUINO = "arduino"
-PLATFORM_ZEPHYR = "zephyr"
-
-
-platform = tvm.testing.parameter(PLATFORM_ARDUINO, PLATFORM_ZEPHYR)
-
-
-@tvm.testing.requires_micro
-def test_default_options_exist(platform):
-    board = "qemu_x86" if platform == "zephyr" else "due"
-
-    x = relay.var("x", relay.TensorType(shape=(10,), dtype="int8"))
-    xx = relay.multiply(x, x)
-    z = relay.add(xx, relay.const(np.ones(shape=(10,), dtype="int8")))
-    func = relay.Function([x], z)
-    ir_mod = tvm.IRModule.from_expr(func)
-
-    with tvm.transform.PassContext(opt_level=3):
-        mod = tvm.relay.build(
-            ir_mod, target=tvm.micro.testing.get_target("crt"), runtime=Runtime("crt")
-        )
-
-    temp_dir = tvm.contrib.utils.tempdir()
-    project = tvm.micro.generate_project(
-        str(tvm.micro.get_microtvm_template_projects(platform)),
-        mod,
-        temp_dir / "project",
-        {
-            "board": board,
-            "project_type": "host_driven",
-        },
-    )
-
-    platform_options = project._info["project_options"]
-    default_options = server.default_project_options()
-
-    option_names = []
-    for option in platform_options:
-        option_names.append(option["name"])
-
-    for option in default_options:
-        assert option.name in option_names
-
-
-@tvm.testing.requires_micro
-def test_project_minimal_options(platform):
-    """Test template project with minimum projectOptions"""
-    build_project_api(platform)
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/micro/project_api/test_zephyr_microtvm_api_server.py b/tests/micro/project_api/test_zephyr_microtvm_api_server.py
deleted file mode 100644
index 68b98f2fa527..000000000000
--- a/tests/micro/project_api/test_zephyr_microtvm_api_server.py
+++ /dev/null
@@ -1,31 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import os
-
-import tvm
-
-from .utils import build_project_api
-
-
-@tvm.testing.requires_micro
-def test_option_cmsis_path():
-    """Test project API without CMSIS_PATH environment variable."""
-    cmsis_path = os.environ.get("CMSIS_PATH", None)
-    del os.environ["CMSIS_PATH"]
-    build_project_api("zephyr")
-    os.environ["CMSIS_PATH"] = cmsis_path
diff --git a/tests/micro/project_api/utils.py b/tests/micro/project_api/utils.py
deleted file mode 100644
index 6f1b41877d3d..000000000000
--- a/tests/micro/project_api/utils.py
+++ /dev/null
@@ -1,58 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import numpy as np
-
-import tvm
-from tvm import relay
-from tvm.relay.backend import Runtime
-from tvm.micro.testing import get_target
-
-
-def build_project_api(platform: str):
-    """Build a relay module with Project API."""
-    shape = (10,)
-    dtype = "int8"
-    x = relay.var("x", relay.TensorType(shape=shape, dtype=dtype))
-    xx = relay.multiply(x, x)
-    z = relay.add(xx, relay.const(np.ones(shape=shape, dtype=dtype)))
-    func = relay.Function([x], z)
-    ir_mod = tvm.IRModule.from_expr(func)
-
-    if platform == "arduino":
-        board = "due"
-    elif platform == "zephyr":
-        board = "qemu_x86"
-
-    runtime = Runtime("crt", {"system-lib": True})
-    target = get_target(platform, board)
-    with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}):
-        mod = tvm.relay.build(ir_mod, target=target, runtime=runtime)
-
-    project_options = {
-        "project_type": "host_driven",
-        "board": board,
-    }
-
-    temp_dir = tvm.contrib.utils.tempdir()
-    project = tvm.micro.generate_project(
-        tvm.micro.get_microtvm_template_projects(platform),
-        mod,
-        temp_dir / "project",
-        project_options,
-    )
-    project.build()
diff --git a/tests/micro/stm32/.clang-format b/tests/micro/stm32/.clang-format
deleted file mode 100644
index 9d159247d518..000000000000
--- a/tests/micro/stm32/.clang-format
+++ /dev/null
@@ -1,2 +0,0 @@
-DisableFormat: true
-SortIncludes: false
diff --git a/tests/micro/stm32/conftest.py b/tests/micro/stm32/conftest.py
deleted file mode 100644
index 66a53625fbe7..000000000000
--- a/tests/micro/stm32/conftest.py
+++ /dev/null
@@ -1,19 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import pytest
-
-import tvm.target.target
diff --git a/tests/micro/stm32/test_code_emitter.py b/tests/micro/stm32/test_code_emitter.py
deleted file mode 100644
index 29feb37b890a..000000000000
--- a/tests/micro/stm32/test_code_emitter.py
+++ /dev/null
@@ -1,394 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import os
-import shutil
-import struct
-import sys
-
-import numpy as np
-
-import tensorflow as tf
-
-import tvm
-import tvm.relay as relay
-from tvm.micro.contrib import stm32
-from tvm.contrib.download import download_testdata
-from tvm import testing
-
-import conftest
-
-NUM_ITERATIONS = 10
-
-# =========================================================
-#   get_data
-# =========================================================
-def get_data(in_data_shapes, in_data_dtypes):
-    """Generate a uint8 image."""
-    assert len(in_data_shapes) == 1, "Only single input models are supported."
-    in_data = OrderedDict()
-    for shape_name, shape in in_data_shapes.items():
-        for dtype_name, dtype in in_data_dtypes.items():
-            if dtype_name == shape_name:
-                in_data[shape_name] = np.random.uniform(size=shape).astype(dtype)
-                in_data = np.random.uniform(size=shape).astype("uint8")
-                break
-        if shape_name not in in_data.keys():
-            raise ValueError("Shape and dtype dictionaries do not fit.")
-
-    return in_data
-
-
-# ==================================================================
-#   dump_image
-# ==================================================================
-def dump_image(filename, image):
-    # Flatten image
-    image_data = image.flatten()
-    outputRaw = []
-    # Raw binary format
-    for i in range(0, len(image_data)):
-        outputRaw.append(struct.pack("<B", int(image_data[i]) & 0xFF))
-
-    # Dump image in raw binary format
-    f = open(filename, "wb")
-    for i in range(0, len(outputRaw)):
-        f.write(outputRaw[i])
-    f.close()
-
-
-# ==================================================================
-#   scale_input_data
-# ==================================================================
-def scale_input_data(input_details, data):
-    if input_details["dtype"] == np.uint8 or input_details["dtype"] == np.int8:
-        input_scale, input_zero_point = input_details["quantization"]
-        print(
-            "== TFLite input quantization: scale={}, zero={}".format(input_scale, input_zero_point)
-        )
-        data = data / input_scale + input_zero_point
-    data = data.astype(input_details["dtype"])
-    return data
-
-
-# ==================================================================
-#   scale_output_data
-# ==================================================================
-def scale_output_data(output_details, data):
-    if output_details["dtype"] == np.uint8 or output_details["dtype"] == np.int8:
-        output_scale, output_zero_point = output_details["quantization"]
-        print(
-            "== TFLite output quantization: scale={}, zero={}".format(
-                output_scale, output_zero_point
-            )
-        )
-        data = data.astype(np.float32)
-        data = (data - output_zero_point) * output_scale
-    return data
-
-
-# ========================================================
-#   get_tflite_model
-# ========================================================
-def get_tflite_model(model_path):
-
-    #
-    # Load TFLite model and allocate tensors.
-    #
-    interpreter = tf.lite.Interpreter(model_path=model_path)
-    interpreter.allocate_tensors()
-    #
-    # Get input and output tensors.
-    #
-    input_details = interpreter.get_input_details()
-    output_details = interpreter.get_output_details()
-
-    #
-    # Figure out shapes and
-    #
-
-    shape_dict = {}
-    dtype_dict = {}
-
-    for input in input_details:
-        input_name = input["name"]
-        input_shape = input["shape"].tolist()
-        input_dtype = str(np.dtype(input["dtype"]))
-        shape_dict[input_name] = input_shape
-        dtype_dict[input_name] = input_dtype
-
-    #
-    # Save the model
-    #
-
-    #
-    # Load the TFLite Model for TVM:
-    #
-    # https://docs.tvm.ai/tutorials/frontend/from_tflite.html
-    # https://jackwish.net/tflite/docs/
-
-    model_buf = open(model_path, "rb").read()
-
-    #
-    # Get TFLite model from buffer
-    #
-    try:
-        import tflite
-
-        model = tflite.Model.GetRootAsModel(model_buf, 0)
-        assert isinstance(model, tflite.Model)
-    except AttributeError:
-        import tflite.Model
-
-        model = tflite.Model.Model.GetRootAsModel(model_buf, 0)
-        assert isinstance(model, tflite.Model.Model)
-
-    print("TVM: Importing a TFLite model ...")
-
-    return model, shape_dict, dtype_dict
-
-
-# ========================================================
-#   extract_tflite_quantization
-# ========================================================
-
-
-def _make_qnn_params(quantization):
-    qnn_params = {}
-    qnn_params["min"] = quantization.MinAsNumpy()
-    qnn_params["max"] = quantization.MaxAsNumpy()
-    qnn_params["scale"] = quantization.ScaleAsNumpy()
-    qnn_params["zero_point"] = quantization.ZeroPointAsNumpy()
-    qnn_params["dim"] = quantization.QuantizedDimension()
-    # print("  Quantization: ({}, {}), s={}, z={}, dim={}".format(min, max, scale, zero_point, dim))
-    return qnn_params
-
-
-def extract_tflite_quantization(model):
-
-    assert model.SubgraphsLength() == 1, "only support one subgraph (main subgraph)"
-
-    subgraph = model.Subgraphs(0)
-
-    quantization_info = {}
-
-    # model inputs / outputs
-    model_inputs = subgraph.InputsAsNumpy()
-    model_outputs = subgraph.OutputsAsNumpy()
-
-    for node_id in model_inputs:
-        tensor = subgraph.Tensors(node_id)
-        tensor_name = tensor.Name().decode("utf-8")
-        tensor_type = tensor.Type()
-        dl_tensor_name = stm32.get_input_tensor_name(tensor_name)
-
-        quantization = tensor.Quantization()
-        if quantization is not None:
-            qnn_params = _make_qnn_params(quantization)
-            quantization_info[dl_tensor_name] = qnn_params
-
-    for node_id in model_outputs:
-        tensor = subgraph.Tensors(node_id)
-        tensor_name = tensor.Name().decode("utf-8")
-        tensor_type = tensor.Type()
-        #
-        # TODO: TVM does not preserve the output tensor names.
-        #       Eventually, we should be able to form a valid name.
-        #
-        dl_tensor_name = stm32.get_output_tensor_name(tensor_name, 0)
-
-        quantization = tensor.Quantization()
-        if quantization is not None:
-            qnn_params = _make_qnn_params(quantization)
-            quantization_info[dl_tensor_name] = qnn_params
-
-    return quantization_info
-
-
-# ========================================================
-#   run_tflite_model
-# ========================================================
-def run_tflite_model(model_path, image_data):
-    #
-    # Load TFLite model and allocate tensors.
-    #
-    interpreter = tf.lite.Interpreter(model_path=model_path)
-    interpreter.allocate_tensors()
-    #
-    # Get input and output tensors.
-    #
-    input_details = interpreter.get_input_details()[0]
-    output_details = interpreter.get_output_details()[0]
-
-    #
-    # Run test images
-    #
-    tf_results = np.empty(shape=[NUM_ITERATIONS, 10], dtype=np.float)
-    for i, image in enumerate(image_data):
-        #
-        # Normalize the input data
-        #
-        image = image / 255.0
-        image = scale_input_data(input_details, image)
-        interpreter.set_tensor(input_details["index"], image)
-        interpreter.invoke()
-        tf_results[i] = interpreter.get_tensor(output_details["index"])
-        tf_results[i] = scale_output_data(output_details, tf_results[i])
-
-        print(f"== [{i}] TFLite Output:")
-        print(tf_results[i])
-
-    return tf_results
-
-
-# ========================================================
-#   run_tvm_model
-# ========================================================
-def run_tvm_model(build_dir, model_name, target_dir, image_path):
-
-    curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
-
-    tvm_results_name = os.path.join(build_dir, "tvm_results.txt")
-
-    #
-    # Build the model
-    #
-    tvm_dir = os.path.join(curr_path, "..", "..", "..")
-    test_dir = os.path.join(tvm_dir, "tests", "crt", "contrib", "stm32")
-
-    command = f"make -f {test_dir}/Makefile TVM_PATH={tvm_dir} MODEL_PATH={target_dir} BUILD_PATH={build_dir} IMAGE_PATH={image_path}"
-    print(f"{command}")
-    os.system(command)
-    #
-    # Run
-    #
-    command = f"{target_dir}/{model_name}.exe"
-    print(f"{command}")
-    os.system(command)
-
-    tvm_results = np.loadtxt(tvm_results_name)
-    print(f"== TVM Output:\n {tvm_results}")
-
-    #
-    # Clean temporary image files
-    #
-    if os.path.exists(tvm_results_name):
-        os.remove(tvm_results_name)
-
-    return tvm_results
-
-
-# ========================================================
-#   check_network
-# ========================================================
-def check_network(build_dir, target_name, model_path, image_path):
-
-    model_name = "network"
-
-    model, shape_dict, dtype_dict = get_tflite_model(model_path)
-
-    #
-    # Generate random input data
-    #
-    image_data = []
-    for i in range(NUM_ITERATIONS):
-        assert len(shape_dict) == 1, "Only single input models are supported."
-        image_shape = list(shape_dict.values())[0]
-        in_data = np.random.randint(0, 255, size=image_shape).astype("uint8")
-        # Write raw data for using with the TVM implementation
-        filename = os.path.join(image_path, "{:02d}.raw".format(i))
-        dump_image(filename, in_data)
-        image_data.append(in_data)
-
-    mod, params = relay.frontend.from_tflite(model, shape_dict, dtype_dict)
-
-    #
-    # Build a TVM C module for the ARM CPU (without compiling the kernels
-    # library to the object code form):
-    #
-    with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}):
-        rt_module = relay.build(mod, target="c -device=arm_cpu", params=params)
-
-    #
-    # Export model library format
-    #
-    target_dir = os.path.join(build_dir, target_name + "_gen")
-
-    if os.path.exists(target_dir):
-        print(f'Removing existing "{target_dir}" directory')
-        try:
-            shutil.rmtree(target_dir)
-        except OSError as err:
-            raise ValueError(f"emit_code.Error: {target_dir} : {err.strerror}")
-
-    mlf_tar_path = os.path.join(build_dir, target_name + "_lib.tar")
-    import tvm.micro as micro
-
-    micro.export_model_library_format(rt_module, mlf_tar_path)
-
-    emitter = stm32.CodeEmitter()
-    quantization = extract_tflite_quantization(model)
-    emitter.parse_library_format(mlf_tar_path, quantization)
-    emitter.emit_code(target_dir, model_name)
-
-    #
-    # Results
-    #
-    tf_results = run_tflite_model(model_path, image_data)
-    tvm_results = run_tvm_model(build_dir, model_name, target_dir, image_path)
-
-    check_result(tf_results, tvm_results)
-
-
-# ========================================================
-#   check_result
-# ========================================================
-def check_result(tflite_results, tvm_results):
-    """Helper function to verify results"""
-
-    #
-    # MNIST quantized uint8 results in one single difference of
-    # ~ 0.004 so just escape this
-    #
-    ATOL = 1e-3
-    RTOL = 0.5
-
-    tvm.testing.assert_allclose(tflite_results, tvm_results, rtol=RTOL, atol=ATOL)
-
-
-# ========================================================
-#   test_mnist
-# ========================================================
-def test_mnist():
-    DEBUG = False
-    tempdir_root = None
-    if DEBUG:
-        tempdir_root = os.path.join(
-            curr_path,
-            f"workspace",
-            "test_mnist",
-            datetime.datetime.now().strftime("%Y-%m-%dT%H-%M-%S"),
-        )
-    curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
-    build_dir = tvm.contrib.utils.tempdir(tempdir_root)
-    model_url = "https://storage.googleapis.com/download.tensorflow.org/models/tflite/digit_classifier/mnist.tflite"
-    model_path = download_testdata(model_url, "mnist.tflite", module="model")
-    check_network(build_dir.path, "mnist", model_path, build_dir.path)
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/micro/testdata/kws/no.c b/tests/micro/testdata/kws/no.c
deleted file mode 100644
index a3bd78a5328d..000000000000
--- a/tests/micro/testdata/kws/no.c
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- * This work is a derivative of "Speech Commands V2" by Google, used under CC BY 4.0.
- */
-
-static const char input_no[1960] = {
-    0x80, 0x80, 0x80, 0xc5, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0xc5, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0xc5, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0xb4,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0xcf, 0xe4, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0xdb, 0xe4, 0xc5, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x2f, 0x1e, 0x7,  0xe4, 0xc5, 0xb4, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x52, 0x41, 0x4b, 0x3a, 0x20, 0xf6, 0xcf, 0xb4,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0xb4, 0x80, 0x80, 0xb4, 0x80, 0x80, 0x80, 0xc5, 0xb4, 0x80, 0x80, 0x80, 0xb4, 0x80, 0x80,
-    0x62, 0x53, 0x5d, 0x51, 0x4a, 0xf9, 0xe4, 0xb4, 0xc5, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0xb4, 0x80, 0x80, 0x80, 0xc5, 0x80, 0x80, 0x80, 0xc5, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0xb4, 0xc5, 0x80, 0xcf, 0x80, 0x41, 0x49, 0x6a, 0x5d, 0x75, 0x62, 0x75, 0x63,
-    0x7a, 0x65, 0x7b, 0x64, 0x78, 0x62, 0x75, 0x5d, 0x71, 0x5b, 0x37, 0xd,  0x3,  0xf6, 0xec, 0xd6,
-    0x32, 0x2a, 0x1a, 0xf6, 0x42, 0x4b, 0x3f, 0xe0, 0xe4, 0xcf, 0xf3, 0xef, 0xf3, 0xfb, 0x3,  0x0,
-    0x6d, 0x56, 0x6e, 0x57, 0x69, 0x55, 0x72, 0x5d, 0x66, 0x52, 0x6e, 0x5d, 0x6f, 0x46, 0x64, 0x52,
-    0x62, 0x42, 0x4e, 0x29, 0x32, 0xe,  0x25, 0x35, 0x56, 0x49, 0x4d, 0x42, 0x5d, 0x57, 0x61, 0x34,
-    0x1c, 0x5,  0x20, 0x17, 0x17, 0x17, 0x24, 0x20, 0x76, 0x65, 0x7a, 0x63, 0x7b, 0x65, 0x7b, 0x5d,
-    0x70, 0x53, 0x73, 0x61, 0x70, 0x53, 0x66, 0x57, 0x63, 0x52, 0x5c, 0x3a, 0x54, 0x4d, 0x6b, 0x5f,
-    0x78, 0x66, 0x7a, 0x64, 0x7b, 0x64, 0x75, 0x56, 0x5a, 0x46, 0x4b, 0x3d, 0x46, 0x3e, 0x4e, 0x3f,
-    0x68, 0x58, 0x6e, 0x57, 0x6d, 0x5f, 0x76, 0x5a, 0x6e, 0x57, 0x75, 0x5d, 0x67, 0x53, 0x68, 0x50,
-    0x67, 0x53, 0x6c, 0x59, 0x68, 0x5a, 0x6a, 0x53, 0x65, 0x5a, 0x74, 0x56, 0x6d, 0x5c, 0x6b, 0x4a,
-    0x50, 0x46, 0x58, 0x48, 0x66, 0x56, 0x59, 0x46, 0x5e, 0x43, 0x61, 0x44, 0x61, 0x50, 0x6e, 0x55,
-    0x67, 0x5a, 0x63, 0x4e, 0x5f, 0x3b, 0x63, 0x52, 0x5e, 0x4e, 0x67, 0x4d, 0x62, 0x51, 0x6a, 0x4e,
-    0x62, 0x48, 0x69, 0x55, 0x66, 0x50, 0x62, 0x50, 0x59, 0x40, 0x4c, 0x41, 0x6c, 0x55, 0x5a, 0x3f,
-    0x58, 0x3c, 0x5b, 0x28, 0x50, 0x3d, 0x62, 0x4b, 0x5b, 0x55, 0x62, 0x43, 0x5d, 0x3c, 0x50, 0x37,
-    0x55, 0x2d, 0x55, 0x49, 0x59, 0x48, 0x53, 0x3e, 0x53, 0x46, 0x64, 0x53, 0x61, 0x3f, 0x5e, 0x2e,
-    0x4d, 0x39, 0x4e, 0x41, 0x61, 0x4a, 0x53, 0x36, 0x52, 0x35, 0x55, 0x2a, 0x4f, 0x3a, 0x5a, 0x3e,
-    0x55, 0x4f, 0x5e, 0x37, 0x4d, 0x34, 0x4c, 0x37, 0x4e, 0x28, 0x50, 0x36, 0x53, 0x39, 0x49, 0x2b,
-    0x4f, 0x39, 0x5c, 0x47, 0x51, 0x35, 0x5d, 0x1b, 0x3f, 0x2b, 0x46, 0x3b, 0x5d, 0x44, 0x5a, 0x35,
-    0x4d, 0x35, 0x4e, 0x30, 0x4b, 0x3f, 0x57, 0x35, 0x59, 0x3f, 0x45, 0xd,  0x2b, 0x4,  0x45, 0x26,
-    0x48, 0x36, 0x47, 0x26, 0x44, 0x39, 0x50, 0x2e, 0x46, 0x2f, 0x55, 0x43, 0x4c, 0x23, 0x52, 0x2f,
-    0x3f, 0x25, 0x43, 0x2d, 0x3b, 0xf9, 0x4d, 0x29, 0x44, 0x1b, 0x35, 0x38, 0x48, 0x3a, 0x46, 0x3c,
-    0x5d, 0x29, 0x43, 0x5,  0x4a, 0xd,  0x26, 0xb4, 0x28, 0xcf, 0x3c, 0x13, 0x25, 0x2,  0x32, 0xf9,
-    0x2f, 0x1e, 0x4d, 0x19, 0x3a, 0x2,  0x3c, 0x7,  0x3c, 0x12, 0x3c, 0x10, 0xdb, 0x80, 0x37, 0x24,
-    0x42, 0x21, 0x3a, 0x30, 0x4a, 0x28, 0x32, 0x31, 0x48, 0xe7, 0x2d, 0x80, 0x19, 0xf9, 0x2d, 0xf3,
-    0x32, 0x2,  0x24, 0xb4, 0x14, 0x80, 0x22, 0xb4, 0x35, 0x3,  0x40, 0xf,  0x30, 0x80, 0x26, 0x80,
-    0x26, 0xcf, 0x21, 0x80, 0x80, 0x80, 0xf5, 0xef, 0x28, 0x80, 0x4b, 0x34, 0x3c, 0xdb, 0x34, 0x12,
-    0x44, 0xe0, 0x26, 0x80, 0x1d, 0x80, 0xd6, 0x80, 0x21, 0xe4, 0x80, 0x80, 0xb4, 0x80, 0xf6, 0x11,
-    0x2b, 0xff, 0x3e, 0x16, 0x1f, 0x80, 0x21, 0xf6, 0x14, 0xd6, 0x27, 0xcf, 0x80, 0x80, 0x0,  0xec,
-    0x48, 0xd6, 0x3b, 0x0,  0x36, 0x1d, 0x28, 0xcf, 0x2d, 0xef, 0x25, 0x80, 0xcf, 0x80, 0xf5, 0x80,
-    0xa,  0x80, 0x11, 0x80, 0x80, 0x80, 0xf8, 0xe4, 0x10, 0xea, 0x2a, 0xf1, 0x21, 0x80, 0xcf, 0x80,
-    0x3,  0xe7, 0x1a, 0xb4, 0x80, 0x80, 0xe0, 0xdb, 0x31, 0xe0, 0x32, 0xc,  0x30, 0x80, 0x0,  0xc5,
-    0x34, 0x80, 0x2,  0x80, 0xf1, 0x80, 0xcf, 0x80, 0xb4, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x2,  0x80, 0x14, 0x80, 0xd6, 0x80, 0x80, 0x80, 0xfb, 0xdb, 0x8,  0x80, 0x80, 0x80, 0xe4, 0xe7,
-    0x28, 0xc5, 0x1e, 0xdb, 0x2a, 0xb4, 0x80, 0x80, 0x30, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0xf8, 0xb4, 0x17, 0x80, 0xcf, 0x80, 0x80, 0x80,
-    0x0,  0xcf, 0x12, 0x80, 0x80, 0x80, 0xdb, 0xb4, 0xe4, 0x80, 0x21, 0xb4, 0x2a, 0x80, 0x80, 0x80,
-    0x13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0xf3, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0xfd, 0x80, 0x80, 0x80, 0xe0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0xe4, 0x80, 0xb4, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80};
diff --git a/tests/micro/testdata/kws/silence.c b/tests/micro/testdata/kws/silence.c
deleted file mode 100644
index bc26efa70e4f..000000000000
--- a/tests/micro/testdata/kws/silence.c
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- * This work is a derivative of "Speech Commands V2" by Google, used under CC BY 4.0.
- */
-
-static const char input_silence[1960] = {
-    0x23, 0x17, 0xe0, 0x3,  0x9,  0xe7, 0xe7, 0xdb, 0xcf, 0xc5, 0xe0, 0xdb, 0xc5, 0xcf, 0xef, 0xcf,
-    0xcf, 0xdb, 0xef, 0xdb, 0xe7, 0xc5, 0x5,  0x3,  0xfc, 0xe7, 0xf6, 0xdb, 0xcf, 0xe7, 0x9,  0xef,
-    0xef, 0xdb, 0xcf, 0xe7, 0xe0, 0xe7, 0xe0, 0xc5, 0xff, 0xe0, 0x4,  0xcf, 0xdb, 0xb4, 0x80, 0xdb,
-    0xef, 0x80, 0xc5, 0xe4, 0x9,  0xe4, 0xcf, 0xc5, 0xdb, 0xcf, 0xdb, 0xcf, 0xf5, 0xdb, 0xe7, 0xcf,
-    0xef, 0xe4, 0xe7, 0xe4, 0xe7, 0xdb, 0xdb, 0xcf, 0xc5, 0xdb, 0xcf, 0xcf, 0xcf, 0xb4, 0xcf, 0xcf,
-    0x13, 0xef, 0xf5, 0x80, 0x80, 0x80, 0xc5, 0xcf, 0xcf, 0x80, 0x80, 0xcf, 0xf5, 0xcf, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0xcf, 0xf9, 0xdb, 0xcf, 0x80, 0x80, 0xcf, 0xe7, 0xdb, 0xfb, 0xe4, 0xdb, 0xcf,
-    0xe7, 0xcf, 0xe7, 0xb4, 0xdb, 0xe4, 0xcf, 0xb4, 0xfb, 0x0,  0x6,  0xd6, 0xec, 0xb4, 0x80, 0xb4,
-    0x80, 0x80, 0x80, 0x80, 0xf3, 0xb4, 0xdb, 0xdb, 0xc5, 0xb4, 0xc5, 0x80, 0xcf, 0xb4, 0xdb, 0xb4,
-    0xb4, 0x80, 0xcf, 0x80, 0xdb, 0xb4, 0xb4, 0x80, 0xc5, 0x80, 0xdb, 0xcf, 0xdb, 0xcf, 0xcf, 0xb4,
-    0xff, 0xcf, 0xdb, 0x80, 0xb4, 0x80, 0x80, 0xd6, 0xcf, 0xcf, 0x80, 0xcf, 0xcf, 0xcf, 0xe4, 0xcf,
-    0xc5, 0x80, 0x80, 0x80, 0xdb, 0x80, 0xb4, 0x80, 0xdb, 0x80, 0xb4, 0x80, 0xb4, 0xb4, 0xdb, 0xcf,
-    0xec, 0xe0, 0xcf, 0xe0, 0xe4, 0xd6, 0xdb, 0x80, 0xef, 0xf6, 0xea, 0xd6, 0xb4, 0xd6, 0xec, 0xc5,
-    0xec, 0xcf, 0xc5, 0x80, 0xdb, 0x80, 0x80, 0x80, 0x80, 0xb4, 0xdb, 0xcf, 0xdb, 0xd6, 0xe4, 0xc5,
-    0xdb, 0xb4, 0xcf, 0xc5, 0xcf, 0xd6, 0xe4, 0xc5, 0xf3, 0xe0, 0xec, 0xe0, 0xfd, 0xe7, 0xcf, 0xb4,
-    0x24, 0x1a, 0x0,  0xf1, 0x19, 0xe0, 0xec, 0xe0, 0xb4, 0xcf, 0xdb, 0xd6, 0xb4, 0xb4, 0xb4, 0x80,
-    0xdb, 0x80, 0xdb, 0xc5, 0xf1, 0xe7, 0xea, 0xf8, 0xec, 0xc5, 0xe4, 0xe0, 0xec, 0xc5, 0xcf, 0xb4,
-    0xe4, 0xd6, 0xe4, 0xdb, 0xf1, 0xdb, 0xdb, 0xc5, 0x22, 0xea, 0xe7, 0x80, 0xea, 0xf3, 0xec, 0xfb,
-    0xec, 0xe0, 0xdb, 0xb4, 0xe4, 0xe0, 0xec, 0xd6, 0xf3, 0xb4, 0xb4, 0x80, 0xd6, 0xd6, 0xe4, 0xdb,
-    0xcf, 0xb4, 0xdb, 0xdb, 0xf1, 0xe4, 0xcf, 0xb4, 0xe4, 0xcf, 0xe4, 0xea, 0xea, 0xe4, 0xe4, 0xd6,
-    0xef, 0xb4, 0xc5, 0xc5, 0xd6, 0xc5, 0xe4, 0x80, 0x80, 0x80, 0xb4, 0x80, 0xcf, 0xc5, 0x0,  0xdb,
-    0xb4, 0xb4, 0xdb, 0x80, 0xb4, 0x80, 0x80, 0x80, 0xb4, 0x80, 0x80, 0x80, 0xb4, 0xc5, 0xcf, 0xb4,
-    0xcf, 0xcf, 0xe0, 0xcf, 0xcf, 0x80, 0xb4, 0x80, 0xec, 0xd6, 0xe0, 0xc5, 0xb4, 0xb4, 0xcf, 0x80,
-    0xcf, 0xb4, 0xcf, 0x80, 0xd6, 0xc5, 0x80, 0x80, 0xdb, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0xcf, 0x80, 0x80, 0x80, 0xcf, 0xb4, 0xd6, 0xb4, 0xd6, 0xb4, 0xf1, 0xc5, 0xc5, 0x80, 0xb4, 0x80,
-    0x11, 0xc5, 0xb4, 0x80, 0x80, 0x80, 0xb4, 0x80, 0xb4, 0x80, 0x80, 0x80, 0xc5, 0xcf, 0xb4, 0x80,
-    0xe4, 0xb4, 0x80, 0xb4, 0x80, 0x80, 0x80, 0x80, 0xcf, 0x80, 0xb4, 0x80, 0x80, 0x80, 0xb4, 0xb4,
-    0xd6, 0xc5, 0xb4, 0x80, 0xc5, 0x80, 0xb4, 0x80, 0xcf, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0xb4, 0xc5, 0xe4, 0xc5, 0xb4, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0xef, 0x80, 0xc5, 0xb4, 0xc5, 0xc5, 0xc5, 0xcf, 0xd6, 0xc5, 0xf5, 0xb4, 0xcf, 0x80,
-    0xe4, 0xc5, 0xb4, 0xe0, 0xd6, 0xb4, 0xcf, 0x80, 0xb4, 0xc5, 0xcf, 0x80, 0xe0, 0xc5, 0xd6, 0x80,
-    0x80, 0x80, 0xb4, 0x80, 0x80, 0x80, 0xb4, 0xb4, 0xc5, 0x80, 0xd6, 0xb4, 0xe0, 0xb4, 0xb4, 0xc5,
-    0xc5, 0xb4, 0xc5, 0x80, 0xc5, 0xc5, 0xd6, 0x80, 0x80, 0x80, 0xf8, 0x80, 0x80, 0xb4, 0xd6, 0x80,
-    0xd6, 0xb4, 0xb4, 0x80, 0xb4, 0x80, 0x80, 0x80, 0x80, 0xb4, 0xcf, 0xcf, 0xe7, 0x80, 0xb4, 0x80,
-    0xc5, 0x80, 0xc5, 0x80, 0xb4, 0x80, 0xb4, 0xb4, 0xc5, 0x80, 0xb4, 0x80, 0xc5, 0x80, 0xe0, 0x80,
-    0xef, 0x80, 0xcf, 0x80, 0xb4, 0x80, 0x80, 0x80, 0x80, 0x80, 0xb4, 0xb4, 0xfd, 0xb4, 0x80, 0xb4,
-    0xe0, 0x80, 0xcf, 0xb4, 0xb4, 0x80, 0xe7, 0xb4, 0xe7, 0xb4, 0xb4, 0xd6, 0xb4, 0x80, 0xe0, 0xc5,
-    0x80, 0x80, 0xc5, 0xc5, 0xd6, 0x80, 0xc5, 0x80, 0xdb, 0xc5, 0xea, 0x80, 0x80, 0x80, 0xb4, 0x80,
-    0xb4, 0x80, 0xe0, 0x80, 0x80, 0x80, 0xc5, 0xb4, 0x80, 0x80, 0xd6, 0x80, 0xb4, 0x80, 0xb4, 0x80,
-    0x80, 0xb4, 0xb4, 0x80, 0x80, 0x80, 0x80, 0x80, 0xb4, 0x80, 0xe7, 0xb4, 0xc5, 0x80, 0xd6, 0x80,
-    0xe7, 0xc5, 0xdb, 0x80, 0xdb, 0xcf, 0xe0, 0x80, 0x80, 0x80, 0xc5, 0xb4, 0xdb, 0x80, 0xef, 0xc5,
-    0x80, 0x80, 0x80, 0x80, 0xc5, 0xb4, 0x80, 0x80, 0xb4, 0x80, 0x80, 0x80, 0xb4, 0x80, 0xd6, 0x80,
-    0xc5, 0xb4, 0xdb, 0x80, 0xb4, 0x80, 0x80, 0x80, 0xe0, 0x80, 0x80, 0xb4, 0xf6, 0xdb, 0xc5, 0x80,
-    0x80, 0x80, 0xc5, 0x80, 0x80, 0x80, 0xb4, 0x80, 0xc5, 0x80, 0xb4, 0xb4, 0xd6, 0xb4, 0xd6, 0x80,
-    0x80, 0xb4, 0xd6, 0xb4, 0x80, 0x80, 0xdb, 0xb4, 0xf3, 0xb4, 0xdb, 0x80, 0x80, 0x80, 0xc5, 0x80,
-    0x1d, 0xcf, 0x16, 0x12, 0x17, 0xc,  0x23, 0x2,  0x1,  0xc5, 0xc5, 0xb4, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0xc5, 0xd6, 0xc5, 0xb4, 0xc5, 0xdb, 0x80, 0x80, 0x80, 0x80, 0x80, 0xb4, 0xb4, 0xdb, 0xc5,
-    0xe4, 0x80, 0xdb, 0x80, 0xc5, 0xb4, 0x80, 0x80, 0x78, 0x64, 0x7a, 0x64, 0x76, 0x60, 0x67, 0x55,
-    0x5a, 0x3a, 0x37, 0x24, 0xf6, 0xc5, 0x14, 0x17, 0x1e, 0x18, 0x31, 0x39, 0x44, 0x43, 0x49, 0x3e,
-    0x39, 0x23, 0x18, 0x17, 0x42, 0x41, 0x40, 0x34, 0x39, 0x34, 0x37, 0x30, 0x38, 0x23, 0x22, 0x9,
-    0x75, 0x63, 0x73, 0x63, 0x77, 0x58, 0x73, 0x5f, 0x64, 0x4d, 0x57, 0x41, 0x58, 0x46, 0x36, 0x32,
-    0x45, 0x51, 0x64, 0x56, 0x72, 0x61, 0x67, 0x57, 0x60, 0x52, 0x49, 0x4e, 0x61, 0x53, 0x62, 0x57,
-    0x67, 0x50, 0x66, 0x56, 0x63, 0x52, 0x5e, 0x3d, 0x6b, 0x5a, 0x70, 0x5d, 0x72, 0x50, 0x6c, 0x56,
-    0x67, 0x5a, 0x69, 0x49, 0x5a, 0x4f, 0x56, 0x50, 0x61, 0x50, 0x6c, 0x5d, 0x71, 0x5d, 0x6e, 0x56,
-    0x6c, 0x58, 0x69, 0x55, 0x6c, 0x57, 0x65, 0x57, 0x6c, 0x56, 0x68, 0x4c, 0x61, 0x58, 0x66, 0x44,
-    0x68, 0x52, 0x6b, 0x56, 0x6c, 0x60, 0x6e, 0x52, 0x72, 0x4e, 0x5b, 0x4d, 0x56, 0x4e, 0x68, 0x51,
-    0x69, 0x5a, 0x6a, 0x5a, 0x72, 0x54, 0x6f, 0x5d, 0x75, 0x5f, 0x67, 0x57, 0x65, 0x48, 0x5c, 0x4c,
-    0x66, 0x52, 0x68, 0x52, 0x63, 0x53, 0x64, 0x44, 0x5f, 0x44, 0x60, 0x49, 0x69, 0x60, 0x71, 0x51,
-    0x6c, 0x59, 0x6c, 0x53, 0x62, 0x4b, 0x5c, 0x4e, 0x61, 0x4c, 0x6a, 0x5c, 0x69, 0x4b, 0x6b, 0x56,
-    0x6b, 0x40, 0x5d, 0x43, 0x6c, 0x55, 0x60, 0x3f, 0x5f, 0x4d, 0x69, 0x52, 0x64, 0x4d, 0x64, 0x41,
-    0x59, 0x3b, 0x55, 0x35, 0x67, 0x55, 0x71, 0x5a, 0x69, 0x58, 0x65, 0x48, 0x5e, 0x4e, 0x6a, 0x55,
-    0x69, 0x55, 0x73, 0x5c, 0x68, 0x35, 0x64, 0x57, 0x6a, 0x43, 0x57, 0x42, 0x63, 0x4c, 0x71, 0x57,
-    0x60, 0x43, 0x5a, 0x44, 0x5c, 0x3e, 0x5d, 0x3e, 0x57, 0x31, 0x46, 0x7,  0x56, 0x4b, 0x73, 0x52,
-    0x64, 0x4b, 0x5b, 0x4a, 0x66, 0x4f, 0x69, 0x4d, 0x69, 0x56, 0x6e, 0x3e, 0x4b, 0x37, 0x5c, 0x44,
-    0x56, 0x24, 0x4f, 0x2a, 0x46, 0x3b, 0x61, 0x4e, 0x61, 0x43, 0x5d, 0x45, 0x5e, 0x44, 0x50, 0x3c,
-    0x56, 0x2d, 0x45, 0x4,  0x50, 0x40, 0x64, 0x57, 0x69, 0x4d, 0x64, 0x50, 0x62, 0x4e, 0x67, 0x4e,
-    0x62, 0x56, 0x67, 0x3c, 0x48, 0x23, 0x58, 0x43, 0x53, 0x28, 0x3b, 0xcf, 0x48, 0x48, 0x5c, 0x40,
-    0x4d, 0x37, 0x4e, 0x3c, 0x56, 0x20, 0x3d, 0x11, 0x37, 0xc5, 0x4a, 0xd6, 0x2d, 0x2b, 0x57, 0x4e,
-    0x5a, 0x44, 0x60, 0x43, 0x5a, 0x3f, 0x5c, 0x41, 0x67, 0x50, 0x60, 0x2f, 0x36, 0x1c, 0x54, 0x3e,
-    0x4f, 0xc,  0x2d, 0x80, 0x36, 0x22, 0x50, 0x41, 0x5f, 0x3e, 0x50, 0x3f, 0x5f, 0x3d, 0x46, 0x19,
-    0x41, 0xfd, 0x33, 0xd6, 0x25, 0x2,  0x40, 0x2f, 0x59, 0x3a, 0x4f, 0x3d, 0x47, 0x23, 0x52, 0x32,
-    0x5c, 0x3e, 0x45, 0xcf, 0xd,  0xdb, 0x42, 0x2a, 0x3f, 0x80, 0x15, 0x80, 0xe4, 0xb4, 0x36, 0x28,
-    0x49, 0x39, 0x52, 0x3a, 0x5a, 0x39, 0x52, 0xb,  0x26, 0x80, 0x27, 0xc5, 0x2f, 0xf6, 0x45, 0x24,
-    0x40, 0x29, 0x52, 0x33, 0x43, 0xfc, 0x33, 0x1d, 0x44, 0x17, 0x2e, 0x80, 0x80, 0x80, 0xb4, 0x80,
-    0x80, 0x80, 0x24, 0x80, 0xb4, 0x80, 0x34, 0x32, 0x4c, 0x32, 0x4b, 0x30, 0x54, 0x3f, 0x51, 0x30,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0xe4, 0x80, 0x1,  0x80, 0x26, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0xfd, 0x80, 0x80, 0x80, 0xb4, 0x80,
-    0x29, 0xe0, 0xe0, 0xc5, 0x27, 0x80, 0x1b, 0x7,  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0xf9, 0x80, 0x80, 0x80, 0x80, 0x80, 0xd6, 0x80, 0x80, 0x80, 0xb4, 0x80, 0xf5, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0xe0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x1d, 0xe4, 0x11, 0xb4, 0x32, 0xa,
-    0x6,  0x80, 0x80, 0x80, 0xd6, 0x80, 0x1c, 0xd,  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x15, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0xf8, 0xcf, 0x10, 0x80, 0x17, 0x80, 0x1e, 0x80, 0xff, 0xec, 0x25, 0x80, 0x1c, 0x23,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x11, 0xb4, 0x2,  0x80, 0x30, 0x8,
-    0x15, 0x80, 0x6,  0x20, 0x36, 0xf8, 0x2e, 0x18, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0xf3, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0xd,  0x4,  0xa,  0xea, 0x37, 0x24, 0x2a, 0xc,  0x39, 0x26, 0x43, 0x5,  0x2d, 0x1f,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x14, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x7,  0xcf, 0xf,  0xef, 0x32, 0xd,
-    0x2a, 0x14, 0x37, 0x1,  0x32, 0x0,  0x38, 0x10, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x1c, 0x80, 0x80, 0x80, 0x28, 0xdb, 0xe4, 0xe0, 0xb4, 0x80, 0x16, 0xcf, 0x1b, 0xb4,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0xb4, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80};
diff --git a/tests/micro/testdata/kws/unknown.c b/tests/micro/testdata/kws/unknown.c
deleted file mode 100644
index 6e4df3d20b49..000000000000
--- a/tests/micro/testdata/kws/unknown.c
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- * This work is a derivative of "Speech Commands V2" by Google, used under CC BY 4.0.
- */
-
-static const char input_unknown[1960] = {
-    0x78, 0x66, 0x7a, 0x63, 0x78, 0x62, 0x6d, 0x52, 0x58, 0x19, 0x0,  0xcf, 0x80, 0x80, 0x80, 0x80,
-    0xcf, 0xc5, 0xc5, 0xc5, 0x80, 0x80, 0x80, 0xc5, 0xc5, 0xe7, 0xe0, 0x80, 0x80, 0xc5, 0x80, 0xcf,
-    0xc5, 0xc5, 0x80, 0xc5, 0xcf, 0xe7, 0xe0, 0xdb, 0x72, 0x4b, 0x65, 0x60, 0x70, 0x50, 0x73, 0x59,
-    0x60, 0x4f, 0x4d, 0x3c, 0x11, 0xff, 0xc5, 0xc5, 0xdb, 0xdb, 0xcf, 0xec, 0xe7, 0xcf, 0xcf, 0x2,
-    0x31, 0x4d, 0x4c, 0xe7, 0xdb, 0xc5, 0x80, 0xcf, 0xef, 0xe4, 0x4,  0xff, 0xf5, 0xec, 0xef, 0x5,
-    0x6c, 0x4b, 0x56, 0x54, 0x6a, 0x47, 0x6f, 0x5b, 0x63, 0x55, 0x4c, 0x41, 0x2d, 0x22, 0x20, 0x3a,
-    0x4e, 0xf1, 0xcf, 0xfc, 0x19, 0xf3, 0xe7, 0x2d, 0x48, 0x4e, 0x5b, 0x80, 0xcf, 0xcf, 0x80, 0x80,
-    0x80, 0xdb, 0x3,  0xfb, 0xf5, 0xea, 0x0,  0xf5, 0x62, 0x40, 0x46, 0x47, 0x62, 0x41, 0x68, 0x53,
-    0x5f, 0x51, 0x57, 0x4e, 0x5b, 0x51, 0x58, 0x4b, 0x62, 0x2b, 0xef, 0x44, 0x5d, 0x41, 0x49, 0x5c,
-    0x62, 0x56, 0x58, 0x2f, 0xc5, 0xb4, 0xcf, 0xcf, 0xc5, 0xe0, 0xf9, 0xe7, 0x7,  0xf5, 0xa,  0xfc,
-    0x5b, 0x39, 0x35, 0x3d, 0x5c, 0x37, 0x5d, 0x49, 0x57, 0x49, 0x63, 0x57, 0x61, 0x55, 0x5e, 0x4d,
-    0x64, 0x4b, 0x63, 0x58, 0x5c, 0x49, 0x5f, 0x57, 0x6a, 0x56, 0x68, 0x41, 0x15, 0xf1, 0x7,  0xf1,
-    0xf9, 0xef, 0xfd, 0xfb, 0xc,  0xf6, 0x5,  0xef, 0x5a, 0x40, 0x4a, 0x44, 0x69, 0x57, 0x55, 0x50,
-    0x63, 0x49, 0x67, 0x5a, 0x72, 0x60, 0x70, 0x5a, 0x71, 0x61, 0x77, 0x63, 0x75, 0x5e, 0x71, 0x52,
-    0x6f, 0x5f, 0x78, 0x64, 0x78, 0x5d, 0x56, 0x57, 0x56, 0x28, 0x39, 0x3b, 0x58, 0x49, 0x3d, 0x33,
-    0x58, 0x3f, 0x2a, 0x50, 0x6c, 0x53, 0x6a, 0x5b, 0x69, 0x57, 0x6e, 0x5e, 0x73, 0x60, 0x74, 0x5a,
-    0x75, 0x61, 0x76, 0x60, 0x75, 0x59, 0x6e, 0x4c, 0x6b, 0x4c, 0x6b, 0x58, 0x74, 0x61, 0x6e, 0x36,
-    0x49, 0x41, 0x5b, 0x5d, 0x6e, 0x57, 0x5e, 0x44, 0x50, 0x30, 0x3a, 0x46, 0x5f, 0x3c, 0x64, 0x4e,
-    0x5d, 0x53, 0x69, 0x55, 0x6a, 0x57, 0x69, 0x52, 0x71, 0x5a, 0x6b, 0x47, 0x5f, 0x4d, 0x61, 0x43,
-    0x5b, 0x37, 0x59, 0x3e, 0x57, 0x3f, 0x53, 0xe,  0x44, 0x47, 0x5c, 0x43, 0x62, 0x51, 0x5d, 0x3f,
-    0x4a, 0x2a, 0x39, 0x3f, 0x59, 0x37, 0x5c, 0x40, 0x58, 0x50, 0x65, 0x4e, 0x65, 0x52, 0x67, 0x54,
-    0x6f, 0x52, 0x59, 0x3b, 0x57, 0x48, 0x61, 0x49, 0x54, 0xf8, 0x3e, 0x2d, 0x4e, 0x3e, 0x50, 0xc,
-    0x3e, 0x53, 0x67, 0x2d, 0x4c, 0x3b, 0x4f, 0x2a, 0x43, 0x14, 0x46, 0x37, 0x50, 0x23, 0x58, 0x36,
-    0x57, 0x48, 0x63, 0x46, 0x67, 0x4e, 0x65, 0x55, 0x6d, 0x4c, 0x55, 0x35, 0x41, 0x3b, 0x58, 0x3f,
-    0x53, 0x2f, 0x44, 0x25, 0x48, 0x37, 0x58, 0xe4, 0x4d, 0x48, 0x53, 0x2b, 0x41, 0x28, 0x4a, 0x2d,
-    0x3d, 0x5,  0x44, 0x29, 0x44, 0x1c, 0x5c, 0x3b, 0x53, 0x35, 0x5a, 0x3b, 0x60, 0x45, 0x61, 0x50,
-    0x64, 0x3a, 0x43, 0x1f, 0x35, 0x23, 0x4d, 0x4a, 0x5e, 0x3c, 0x4d, 0x30, 0x51, 0x2e, 0x51, 0xf3,
-    0x4d, 0x3e, 0x50, 0x1a, 0x34, 0xfc, 0x44, 0x27, 0x37, 0xf8, 0x3a, 0x9,  0x32, 0x33, 0x5d, 0x37,
-    0x57, 0x35, 0x5d, 0x3b, 0x58, 0x31, 0x60, 0x45, 0x50, 0xff, 0x3a, 0xe0, 0x24, 0x3,  0x24, 0x3a,
-    0x4f, 0xe,  0x32, 0x1d, 0x46, 0x2d, 0x45, 0x4,  0x56, 0x3d, 0x50, 0x7,  0xa,  0x80, 0x3a, 0x1f,
-    0x31, 0xe0, 0x43, 0x3,  0x26, 0x3a, 0x5b, 0x34, 0x56, 0x30, 0x58, 0x2e, 0x53, 0x1f, 0x61, 0x3f,
-    0x3f, 0x80, 0x2f, 0xe4, 0x2f, 0x14, 0x30, 0x1e, 0x50, 0xe0, 0x22, 0x0,  0x4b, 0x2d, 0x39, 0xdb,
-    0x56, 0x3e, 0x46, 0x34, 0x2d, 0x80, 0x29, 0x5,  0x2f, 0xc5, 0x46, 0xfb, 0x1c, 0x3a, 0x56, 0x26,
-    0x53, 0x2b, 0x4e, 0x8,  0x53, 0x25, 0x65, 0x3a, 0xf,  0x80, 0xf5, 0x80, 0xb,  0xd6, 0x1e, 0x7,
-    0x55, 0xd6, 0x6,  0x80, 0x2c, 0x0,  0x11, 0xe4, 0x3e, 0x26, 0x41, 0x25, 0x2c, 0x80, 0x1d, 0x2,
-    0x2a, 0xd6, 0x45, 0xec, 0x4,  0x3c, 0x54, 0x20, 0x4d, 0x12, 0x49, 0xf6, 0x57, 0x32, 0x61, 0x23,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0xb,  0xe7, 0x3b, 0x80, 0xc5, 0x80, 0xc5, 0x80, 0xcf, 0xdb,
-    0x14, 0x1d, 0x3d, 0x36, 0x3f, 0x80, 0x19, 0xfc, 0x1f, 0x80, 0x40, 0xea, 0x8,  0x3c, 0x52, 0x22,
-    0x3a, 0xf8, 0x49, 0x3,  0x58, 0x21, 0x3c, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0xc5, 0x80,
-    0xf6, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x37, 0x2d, 0x3b, 0x1b, 0x31, 0x80, 0x16, 0xf5,
-    0xf3, 0x80, 0x3e, 0xcf, 0xec, 0x3b, 0x4e, 0x12, 0x4,  0x80, 0x4f, 0x26, 0x5a, 0x1a, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0xfc, 0xb4,
-    0x2c, 0x0,  0x1b, 0x2a, 0x2f, 0x80, 0xc,  0xdb, 0xd6, 0x80, 0x44, 0xfd, 0x11, 0x33, 0x44, 0xd6,
-    0x8,  0x80, 0x4e, 0xe,  0x26, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0xb4, 0x80, 0x80, 0x80, 0x80, 0x80, 0x1,  0x80, 0xe7, 0x80, 0x80, 0x80, 0x80, 0x80, 0x14, 0xdb,
-    0xf8, 0x80, 0x48, 0x0,  0x7,  0xe7, 0x18, 0x80, 0xef, 0x80, 0x36, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0xdb, 0x80, 0x80, 0x80, 0x80, 0x80, 0x17, 0x80, 0x80, 0x80, 0x48, 0x6,  0x10, 0x80, 0xf1, 0x80,
-    0x24, 0x80, 0x7,  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x3c, 0xf1, 0x7,  0x80, 0xc5, 0x80, 0x33, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0xe0, 0x80, 0x26, 0x80, 0xcf, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0xb4, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0xf6, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80};
diff --git a/tests/micro/testdata/kws/yes.c b/tests/micro/testdata/kws/yes.c
deleted file mode 100644
index ec18f20e46cf..000000000000
--- a/tests/micro/testdata/kws/yes.c
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- * This work is a derivative of "Speech Commands V2" by Google, used under CC BY 4.0.
- */
-
-static const char input_yes[1960] = {
-    0x7c, 0x66, 0x79, 0x65, 0x7d, 0x67, 0x7c, 0x67, 0x7c, 0x66, 0x7c, 0x67, 0x7c, 0x67, 0x7d, 0x66,
-    0x7c, 0x67, 0x7d, 0x66, 0x7c, 0x67, 0x7d, 0x66, 0x7c, 0x67, 0x7d, 0x67, 0x7d, 0x67, 0x7d, 0x67,
-    0x7d, 0x67, 0x7d, 0x67, 0x7d, 0x67, 0x7d, 0x67, 0x52, 0x57, 0x78, 0x5a, 0x67, 0x53, 0x6f, 0x4b,
-    0x6d, 0x5c, 0x71, 0x52, 0x66, 0x4d, 0x6e, 0x56, 0x73, 0x50, 0x5f, 0x54, 0x6d, 0x55, 0x6a, 0x5b,
-    0x6f, 0x57, 0x68, 0x50, 0x71, 0x58, 0x6d, 0x57, 0x69, 0x55, 0x6a, 0x55, 0x6c, 0x59, 0x6c, 0x5a,
-    0x5b, 0x3c, 0x54, 0x44, 0x58, 0x4f, 0x66, 0x30, 0x58, 0x50, 0x61, 0x3d, 0x67, 0x36, 0x5b, 0x4d,
-    0x64, 0x51, 0x6a, 0x4d, 0x60, 0x4b, 0x61, 0x53, 0x69, 0x54, 0x60, 0x47, 0x5c, 0x4d, 0x63, 0x45,
-    0x64, 0x4d, 0x63, 0x4b, 0x67, 0x50, 0x68, 0x4d, 0x64, 0x4b, 0x64, 0x4e, 0x5f, 0x3d, 0x53, 0x42,
-    0x59, 0x39, 0x57, 0x43, 0x5e, 0x3a, 0x44, 0x3b, 0x56, 0x3c, 0x5c, 0x46, 0x66, 0x4c, 0x61, 0x3e,
-    0x5d, 0x49, 0x55, 0x48, 0x5d, 0x45, 0x5a, 0x48, 0x5f, 0x41, 0x59, 0x49, 0x5a, 0x46, 0x5d, 0x3b,
-    0x51, 0x3d, 0x4c, 0x44, 0x57, 0x37, 0x54, 0x43, 0x4f, 0xa,  0x32, 0x28, 0x5b, 0x3a, 0x5e, 0x47,
-    0x4d, 0x2b, 0x57, 0x4a, 0x5d, 0x34, 0x52, 0x3e, 0x50, 0x38, 0x54, 0x30, 0x53, 0x41, 0x57, 0x39,
-    0x5c, 0x3c, 0x53, 0x41, 0x5a, 0x1e, 0x4e, 0x41, 0x4d, 0x2c, 0x3e, 0x18, 0x4c, 0x1c, 0x36, 0x11,
-    0x4b, 0x32, 0x52, 0x2f, 0x50, 0x2d, 0x4e, 0x20, 0x50, 0x3c, 0x4a, 0x16, 0x44, 0x22, 0x48, 0x29,
-    0x4d, 0x34, 0x4e, 0x2c, 0x52, 0x2e, 0x46, 0x35, 0x4b, 0x14, 0x50, 0x33, 0x53, 0x3e, 0x50, 0x2d,
-    0x4a, 0x0,  0x4b, 0x3a, 0x47, 0x16, 0x45, 0x32, 0x45, 0x10, 0x42, 0x23, 0x49, 0x39, 0x41, 0x10,
-    0x48, 0x32, 0x4e, 0x30, 0x40, 0x34, 0x46, 0x39, 0x54, 0xf5, 0x49, 0x38, 0x53, 0x2c, 0x4a, 0x37,
-    0x51, 0x2c, 0x46, 0x2f, 0x4c, 0x2a, 0x4d, 0x2b, 0x3d, 0x2f, 0x4e, 0x20, 0x1e, 0x7,  0x41, 0x8,
-    0x39, 0xd,  0x46, 0x20, 0x3b, 0x2a, 0x3f, 0x20, 0x40, 0xe,  0x4e, 0x2e, 0x3e, 0x21, 0x4f, 0x16,
-    0x2e, 0x35, 0x54, 0x32, 0x41, 0x1c, 0x48, 0x2a, 0x44, 0xc,  0x48, 0x21, 0x41, 0x19, 0x48, 0x2a,
-    0x3d, 0x21, 0x44, 0xb4, 0x41, 0x14, 0x3e, 0x2b, 0x45, 0x23, 0x50, 0x28, 0x3e, 0x1f, 0x43, 0x26,
-    0x46, 0x1b, 0x48, 0x12, 0x44, 0x2d, 0x47, 0x22, 0x3c, 0x32, 0x48, 0x26, 0x2f, 0x21, 0x45, 0x17,
-    0x43, 0x22, 0x43, 0x1d, 0x44, 0x28, 0x4d, 0x14, 0x56, 0x23, 0x40, 0x2c, 0x34, 0x80, 0x44, 0xf,
-    0x37, 0x16, 0x49, 0x21, 0x34, 0x1e, 0x3f, 0x22, 0x2b, 0x16, 0x34, 0x28, 0x43, 0x2d, 0x43, 0x11,
-    0x49, 0x1a, 0x46, 0x20, 0x46, 0x21, 0x3d, 0x17, 0x3d, 0x28, 0x3e, 0xf5, 0x33, 0x15, 0x39, 0x20,
-    0x4d, 0x2d, 0x36, 0x80, 0x1a, 0xdb, 0x3e, 0x17, 0x3b, 0x1f, 0x40, 0x17, 0x2b, 0xcf, 0x39, 0x2d,
-    0x4d, 0x2b, 0x35, 0xf6, 0x44, 0x29, 0x3d, 0x24, 0x30, 0x17, 0x3b, 0x28, 0x44, 0xd,  0x38, 0x20,
-    0x3b, 0xf3, 0x45, 0x19, 0x4c, 0x24, 0x37, 0x15, 0xf3, 0xb4, 0x3c, 0x28, 0x36, 0xf3, 0x44, 0x1b,
-    0x48, 0x25, 0x1d, 0xd6, 0x25, 0xcf, 0x3a, 0x9,  0x3f, 0xfc, 0x31, 0xf1, 0x41, 0x24, 0x44, 0x17,
-    0x45, 0x20, 0x42, 0x2,  0x33, 0xb4, 0x31, 0x1b, 0x43, 0x18, 0x2c, 0x14, 0x44, 0xa,  0x43, 0x7,
-    0x4,  0x80, 0x2b, 0xf3, 0x49, 0x2a, 0x47, 0xea, 0x3b, 0xec, 0x30, 0xfb, 0x3c, 0x18, 0x35, 0xff,
-    0x14, 0x18, 0x39, 0x7,  0x3c, 0x5,  0xa,  0xf,  0x35, 0x12, 0x3a, 0x0,  0x2d, 0xc,  0x46, 0x13,
-    0x3e, 0x23, 0x3f, 0x18, 0x3a, 0x16, 0x35, 0xf5, 0x3a, 0x1b, 0x4e, 0x2d, 0x3c, 0xef, 0x3c, 0xfc,
-    0x2e, 0xa,  0x32, 0xb4, 0x23, 0xfb, 0x3e, 0x16, 0x40, 0xe,  0x24, 0x3,  0x44, 0x24, 0x3b, 0xa,
-    0x19, 0x80, 0x28, 0x1a, 0x3b, 0xfb, 0x2a, 0xf,  0x31, 0x4,  0x3a, 0x4,  0x2d, 0xec, 0x29, 0xa,
-    0x25, 0xb4, 0x20, 0xb4, 0x35, 0x1b, 0x31, 0xb4, 0x7,  0xc,  0x4b, 0x1b, 0x1c, 0x80, 0x28, 0xd6,
-    0x23, 0x16, 0x2d, 0xf8, 0x35, 0xf6, 0x45, 0x11, 0x1d, 0xc5, 0x2a, 0xf6, 0x37, 0xea, 0x36, 0x11,
-    0x3f, 0x7,  0x36, 0x11, 0x2e, 0xf1, 0x3b, 0x11, 0x16, 0x2a, 0x3a, 0x6,  0x37, 0xcf, 0x18, 0x80,
-    0x30, 0xd6, 0x14, 0xf1, 0x16, 0xfc, 0x28, 0xe4, 0x3d, 0xe0, 0x2d, 0x80, 0x26, 0xec, 0x3d, 0xf8,
-    0x36, 0xcf, 0x11, 0xef, 0x2c, 0x16, 0x2d, 0xff, 0x35, 0x12, 0x3e, 0xa,  0x35, 0xd,  0x2f, 0xf9,
-    0x3f, 0x2d, 0x40, 0x80, 0xe7, 0x6,  0x2a, 0x80, 0x34, 0x4,  0x5,  0x1d, 0x3d, 0x12, 0x1e, 0xa,
-    0x3f, 0x26, 0x2b, 0xfb, 0x2b, 0x80, 0x26, 0x80, 0x1e, 0x15, 0x24, 0xdb, 0x2a, 0xd6, 0x2b, 0x80,
-    0x6,  0xdb, 0x26, 0xfd, 0x37, 0xec, 0x2a, 0xec, 0x2,  0x1c, 0x3c, 0xe7, 0x11, 0x80, 0xf3, 0xfd,
-    0x3a, 0x1,  0x28, 0x17, 0x3a, 0xdb, 0xf6, 0x80, 0x2,  0xd6, 0x21, 0xcf, 0x2a, 0xdb, 0xf,  0x80,
-    0x2b, 0x17, 0x24, 0xcf, 0x2e, 0xcf, 0x30, 0xf8, 0xa,  0xf1, 0x26, 0xe7, 0x2d, 0xf5, 0x31, 0xef,
-    0x25, 0x80, 0x1,  0xfb, 0xd6, 0x80, 0x19, 0x1c, 0x37, 0xfb, 0x39, 0x11, 0x2c, 0x80, 0x23, 0x18,
-    0x33, 0xf8, 0x2e, 0xd,  0x34, 0xcf, 0x2b, 0xf1, 0x21, 0x80, 0x29, 0x80, 0x1f, 0xe4, 0xe,  0xb,
-    0x25, 0xc5, 0x1f, 0xc5, 0x21, 0x0,  0x19, 0x80, 0xef, 0x80, 0xb,  0xe4, 0x1c, 0xcf, 0x33, 0x16,
-    0x3e, 0x7,  0x21, 0xf5, 0x2f, 0x0,  0x2e, 0xef, 0x23, 0x6,  0x3d, 0xe7, 0x23, 0xe7, 0x26, 0xd6,
-    0x40, 0xfd, 0x30, 0x80, 0xa,  0xf5, 0x35, 0x0,  0x32, 0xf8, 0x20, 0xcf, 0x2d, 0xef, 0x32, 0x13,
-    0x3c, 0x1c, 0x0,  0xfc, 0x26, 0xe0, 0x26, 0xd6, 0xec, 0x80, 0x16, 0xf3, 0xb4, 0xf1, 0x31, 0xcf,
-    0x1f, 0x80, 0x7,  0xf6, 0x19, 0xfd, 0xe7, 0x80, 0x1,  0x80, 0x1c, 0x2,  0x2f, 0x80, 0x2f, 0x80,
-    0x26, 0x4,  0x1c, 0xb4, 0x4,  0xdb, 0x1e, 0xcf, 0x2a, 0x80, 0xdb, 0x80, 0x1a, 0xea, 0x31, 0xa,
-    0x18, 0x23, 0x39, 0xf8, 0x36, 0x22, 0x25, 0xc5, 0x1f, 0x80, 0x26, 0xef, 0x34, 0x80, 0x19, 0xe7,
-    0x2d, 0xe0, 0x17, 0xe4, 0x2f, 0x17, 0x34, 0x7,  0x31, 0xef, 0x25, 0xe0, 0x1e, 0xf8, 0x1d, 0xdb,
-    0xfd, 0xb,  0x11, 0x80, 0x11, 0x80, 0xe7, 0xcf, 0x32, 0x80, 0xc,  0xdb, 0xa,  0x80, 0xf9, 0x80,
-    0x14, 0x14, 0x35, 0x80, 0x2c, 0xf9, 0x1f, 0xdb, 0x1b, 0xea, 0x11, 0x80, 0x26, 0xc5, 0xb,  0xb4,
-    0xb,  0x80, 0x7,  0xef, 0x22, 0x6,  0x20, 0xe0, 0x0,  0x80, 0x1a, 0x1c, 0x25, 0xfb, 0x2f, 0x80,
-    0x80, 0xea, 0x31, 0x19, 0x3c, 0xf,  0x23, 0x80, 0x16, 0x0,  0x38, 0xf1, 0x21, 0xea, 0x2c, 0x80,
-    0x1e, 0xec, 0x2a, 0xe4, 0x7,  0x80, 0xf8, 0x80, 0x9,  0xd6, 0x20, 0xc5, 0x18, 0x80, 0x0,  0x14,
-    0x2a, 0xcf, 0x1d, 0x80, 0xc,  0xe4, 0x1c, 0xa,  0x3a, 0x24, 0x1b, 0x80, 0xf8, 0x80, 0x8,  0x80,
-    0x9,  0x80, 0x20, 0xdb, 0x20, 0xd6, 0x2d, 0x19, 0x1a, 0xd6, 0x25, 0x80, 0xb4, 0x80, 0x38, 0x12,
-    0x17, 0xec, 0x14, 0x80, 0x20, 0xb4, 0x13, 0xdb, 0xb,  0x80, 0xfc, 0x15, 0x2f, 0x0,  0xdb, 0x80,
-    0xf5, 0x0,  0x8,  0xcf, 0xf8, 0xe4, 0xc,  0x13, 0x34, 0x80, 0x17, 0x80, 0xe7, 0x80, 0x11, 0xcf,
-    0x2f, 0xf6, 0x5,  0xdb, 0x27, 0x6,  0xf1, 0x80, 0x11, 0xc5, 0x24, 0x80, 0x11, 0xea, 0xa,  0x80,
-    0x23, 0x1,  0x16, 0xf3, 0xfb, 0x80, 0x15, 0x13, 0x33, 0x6,  0xfc, 0x80, 0xd6, 0x80, 0x10, 0x80,
-    0x1a, 0xf5, 0x11, 0x80, 0x9,  0xc5, 0xf,  0xcf, 0xef, 0xc5, 0x1b, 0xf9, 0x8,  0x80, 0x20, 0xc5,
-    0x1c, 0xdb, 0x1f, 0x80, 0x1e, 0xf3, 0x12, 0xea, 0x26, 0xcf, 0x16, 0xcf, 0x2,  0xd6, 0x7,  0x80,
-    0x24, 0x80, 0xf9, 0xcf, 0x1a, 0xb4, 0x26, 0xc5, 0xfb, 0x80, 0xfc, 0xc5, 0xef, 0xcf, 0x28, 0x80,
-    0x19, 0xcf, 0x28, 0xea, 0x2c, 0xc5, 0x2f, 0xc,  0x1,  0xec, 0x2d, 0xb4, 0x14, 0x80, 0xc,  0xec,
-    0xf5, 0xdb, 0x0,  0xc5, 0x20, 0x80, 0x21, 0x1,  0x0,  0x80, 0xa,  0x80, 0x29, 0x80, 0xdb, 0x7,
-    0xf,  0xb4, 0x23, 0xfb, 0x27, 0xdb, 0x22, 0xec, 0x21, 0x80, 0xd6, 0xb4, 0x15, 0xd6, 0x11, 0x80,
-    0x1f, 0xc5, 0x1a, 0xb4, 0x7,  0xe0, 0x21, 0xcf, 0x14, 0x16, 0x2a, 0x80, 0x80, 0x80, 0xa,  0xe7,
-    0x6,  0x80, 0xb4, 0x80, 0xf,  0x80, 0xfc, 0xe4, 0x13, 0x80, 0x19, 0xb4, 0xd,  0xb4, 0xdb, 0xc5,
-    0x18, 0x80, 0x21, 0xb4, 0x2d, 0xc5, 0xf1, 0xdb, 0xf,  0x80, 0x23, 0xd6, 0x28, 0x80, 0xea, 0xd6,
-    0xe7, 0xcf, 0x11, 0xe4, 0xec, 0x2,  0x20, 0xb4, 0x29, 0xdb, 0x6,  0x80, 0xef, 0x80, 0xe0, 0x80,
-    0x4,  0xc5, 0x32, 0xb4, 0x2f, 0x80, 0x7,  0xb4, 0xe0, 0x80, 0xf5, 0x80, 0x5,  0xb4, 0x8,  0xcf,
-    0x1f, 0xf6, 0x28, 0xdb, 0x1b, 0xff, 0x12, 0x80, 0x2a, 0xff, 0x2f, 0xfc, 0xcf, 0x80, 0xc,  0xf1,
-    0x21, 0x80, 0x2,  0x1,  0x2d, 0xf8, 0xf9, 0xf3, 0x25, 0x80, 0xdb, 0x80, 0xd6, 0x80, 0xc,  0xe4,
-    0x1b, 0xc5, 0xe0, 0xec, 0xec, 0x80, 0x6,  0xb4, 0xf5, 0xcf, 0xc,  0x80, 0x1,  0xf6, 0x1d, 0x80,
-    0xe7, 0x80, 0xf3, 0x80, 0xc5, 0x80, 0xf6, 0x80, 0x1b, 0xcf, 0x11, 0x80, 0xd6, 0x80, 0x80, 0x80,
-    0xdb, 0x80, 0xec, 0x80, 0x19, 0xe0, 0x2,  0x80, 0x19, 0xef, 0x16, 0x80, 0xd6, 0x80, 0xe7, 0x80,
-    0x11, 0xd6, 0xfc, 0x80, 0xa,  0xd6, 0x17, 0xe7, 0xe4, 0x80, 0xb4, 0xb4, 0x1d, 0xb4, 0xf,  0x80,
-    0x32, 0xfb, 0x1b, 0xdb, 0x25, 0xec, 0xf5, 0x80, 0xd6, 0xef, 0x23, 0xec, 0x14, 0x80, 0xe0, 0xdb,
-    0xf9, 0x80, 0xcf, 0x80, 0xff, 0xb4, 0xd,  0x80, 0xe4, 0x80, 0x0,  0xc5, 0x1f, 0xdb, 0x23, 0xe0,
-    0x1,  0x80, 0x80, 0x80, 0xcf, 0x80, 0xb4, 0x80, 0xe0, 0xf6, 0x1d, 0xcf, 0xdb, 0x80, 0xdb, 0x80,
-    0x80, 0xb4, 0xb,  0x80, 0x80, 0x80, 0x1d, 0x80, 0x4,  0xe4, 0xf5, 0x80, 0x80, 0x80, 0x4,  0x80,
-    0xe4, 0x80, 0xfc, 0x80, 0xd6, 0x80, 0xf9, 0x80, 0x80, 0xb4, 0xc,  0x80, 0x26, 0xf9, 0x80, 0x80,
-    0xb4, 0x80, 0xf1, 0x80, 0x80, 0x80, 0xf3, 0xb4, 0x0,  0x80, 0x2,  0xcf, 0xb4, 0xea, 0x14, 0x80,
-    0x18, 0x80, 0xcf, 0x80, 0xd,  0x80, 0xe0, 0x80, 0x16, 0x80, 0xf8, 0xc5, 0x11, 0xb4, 0xf8, 0x80,
-    0x80, 0x80, 0x80, 0x80, 0xe4, 0xe,  0x1c, 0x80, 0xfc, 0xb4, 0x2a, 0x6,  0x31, 0x10, 0x1c, 0x80,
-    0xfd, 0xfc, 0xc,  0xe7, 0xea, 0x80, 0xe7, 0xd6, 0xd,  0xb4, 0x22, 0xf1, 0x7,  0xb4, 0x1d, 0xf6,
-    0x11, 0xd6, 0x28, 0x80, 0xc5, 0xb4, 0x1f, 0xe0, 0x80, 0x80, 0x80, 0x80, 0xfb, 0xe7, 0xc,  0x80,
-    0xdb, 0x80, 0xcf, 0x80, 0x80, 0x80, 0xd6, 0xc5, 0xf,  0x80, 0x80, 0xb4, 0x1b, 0x80, 0x0,  0xdb,
-    0xf5, 0x80, 0x80, 0x80, 0x15, 0xec, 0xf,  0x80, 0xd6, 0x80, 0x80, 0xb4, 0xc,  0xd6, 0xd6, 0x80,
-    0xd6, 0xd6, 0x9,  0x80, 0x80, 0x80, 0x3,  0xc5, 0x9,  0x80, 0x80, 0x80, 0xe4, 0x80, 0xf3, 0x80,
-    0x10, 0xea, 0xb4, 0x80, 0xdb, 0xf3, 0xa,  0x80, 0xc5, 0x80, 0xef, 0x80, 0xc5, 0x80, 0xec, 0x80,
-    0xff, 0x80, 0xa,  0xc5, 0xf1, 0x80, 0xb4, 0x80, 0xe0, 0x80, 0xfb, 0x80, 0xf8, 0x80, 0x3,  0x80,
-    0xc,  0xcf, 0x80, 0xd6, 0xe0, 0x80, 0x80, 0xb4, 0xcf, 0xc5, 0x28, 0xd6, 0x17, 0x80, 0x80, 0x80,
-    0xc5, 0xec, 0x14, 0x80, 0xf3, 0x80, 0xf8, 0x80, 0xf3, 0x80, 0xcf, 0x80, 0xf8, 0xe0, 0xea, 0x80,
-    0xc5, 0x0,  0x35, 0xea, 0x3,  0x80, 0x80, 0x80, 0x17, 0xf,  0x16, 0x80, 0x19, 0xd6, 0x80, 0x80,
-    0x80, 0x80, 0xe0, 0x80, 0xfd, 0x80, 0x4,  0xfc, 0x1e, 0x80, 0xef, 0x80, 0xef, 0xf1, 0x1f, 0x80,
-    0xfc, 0x80, 0xe7, 0x80, 0xff, 0x80, 0xf8, 0x80, 0x80, 0x80, 0x17, 0x80, 0xcf, 0xfb, 0x1c, 0x0,
-    0x26, 0x11, 0x16, 0x80, 0x80, 0xb4, 0x80, 0x80, 0x80, 0xcf, 0xf3, 0x80, 0x14, 0xb4, 0xdb, 0x5,
-    0x19, 0x80, 0xd6, 0x80, 0xf5, 0x80, 0x17, 0xc5, 0x0,  0xc5, 0xcf, 0xc5, 0x4,  0x80, 0x5,  0x80,
-    0xa,  0x80, 0x19, 0xd6, 0x28, 0x5,  0xea, 0x80, 0x80, 0x80, 0x80, 0xec, 0xd,  0x80, 0x80, 0x80,
-    0x2,  0x80, 0xf1, 0x80, 0x80, 0x80, 0xd6, 0x80, 0xd6, 0x80, 0xdb, 0x80, 0xf3, 0x80, 0xff, 0x80,
-    0x80, 0xc5, 0x20, 0x80, 0xea, 0x80, 0xb4, 0x80, 0x22, 0x80, 0x80, 0x80, 0x80, 0x80, 0xc5, 0x80,
-    0x15, 0x80, 0x24, 0xc5, 0xfc, 0x80, 0xb,  0xe4, 0xcf, 0x80, 0x80, 0x80, 0xe7, 0xa,  0x1,  0xdb,
-    0x12, 0x80, 0xf5, 0x80, 0x80, 0x80, 0xa,  0xd6, 0xfd, 0xf5, 0xfc, 0xcf, 0xe,  0x80, 0xd6, 0x80,
-    0x80, 0x80, 0xef, 0x80, 0xfd, 0xc5, 0x12, 0xea, 0x20, 0x80, 0xe0, 0xdb, 0xc5, 0xd6, 0x1a, 0x80,
-    0x80, 0xd6, 0x14, 0xc5, 0x80, 0x80, 0x80, 0xb4, 0x80, 0x80, 0xc5, 0xb4, 0xe4, 0xb4, 0xf6, 0x3,
-    0xfc, 0x80, 0x80, 0x80, 0xfb, 0x80, 0x0,  0xe4, 0x80, 0x80, 0xb4, 0x80, 0x5,  0xb4, 0x80, 0x80,
-    0x19, 0xd6, 0xe0, 0x80, 0x80, 0x80, 0xb4, 0xc5, 0xb4, 0x80, 0xfb, 0x4,  0x13, 0x80, 0xf,  0xc5,
-    0x2,  0xec, 0xb4, 0xb4, 0xef, 0x80, 0xe0, 0x80, 0xcf, 0xf5, 0x1,  0x80, 0xe4, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0x80, 0x1,  0x1c, 0x80, 0x80, 0x80, 0xc5, 0xcf, 0xc,  0x80, 0x80, 0x80, 0x80, 0x80,
-    0x80, 0x80, 0xfb, 0xc5, 0xcf, 0xdb, 0xcf, 0x80, 0xd6, 0x80, 0xea, 0x80, 0x80, 0x80, 0xd6, 0x80,
-    0x80, 0xcf, 0xf9, 0xdb, 0xf8, 0x80, 0xdb, 0xb4, 0xff, 0xe0, 0xb4, 0x80, 0x80, 0x80, 0x80, 0x80,
-    0xea, 0x80, 0xc5, 0x80, 0x80, 0x80, 0xe0, 0xec, 0xf5, 0x80, 0x80, 0x80, 0x17, 0x80, 0xcf, 0x80,
-    0xf8, 0xf6, 0xe7, 0x80, 0xd6, 0x80, 0xcf, 0x80, 0x80, 0x80, 0xb4, 0x80, 0xe4, 0x80, 0xf8, 0x80,
-    0x80, 0x80, 0xdb, 0x80, 0xfb, 0x80, 0x80, 0x80, 0xf3, 0x80, 0x11, 0xc5, 0x80, 0x80, 0xb4, 0x80,
-    0x80, 0x80, 0xd6, 0x80, 0xec, 0xb4, 0x14, 0xb4, 0xf3, 0x80, 0xf9, 0x80, 0x8,  0x80, 0x80, 0x80,
-    0xe7, 0x80, 0x80, 0xc5, 0xf1, 0x80, 0xf3, 0x80};
diff --git a/tests/micro/testdata/mnist/digit-2.jpg b/tests/micro/testdata/mnist/digit-2.jpg
deleted file mode 100644
index b709a206b8d7..000000000000
Binary files a/tests/micro/testdata/mnist/digit-2.jpg and /dev/null differ
diff --git a/tests/micro/testdata/mnist/digit-9.jpg b/tests/micro/testdata/mnist/digit-9.jpg
deleted file mode 100644
index 6ce9cde3b322..000000000000
Binary files a/tests/micro/testdata/mnist/digit-9.jpg and /dev/null differ
diff --git a/tests/micro/testdata/mnist/mnist-8.onnx b/tests/micro/testdata/mnist/mnist-8.onnx
deleted file mode 100644
index fc1a3f733c6e..000000000000
Binary files a/tests/micro/testdata/mnist/mnist-8.onnx and /dev/null differ
diff --git a/tests/micro/zephyr/README.md b/tests/micro/zephyr/README.md
deleted file mode 100644
index f6917bf5e2f4..000000000000
--- a/tests/micro/zephyr/README.md
+++ /dev/null
@@ -1,48 +0,0 @@
-<!--- Licensed to the Apache Software Foundation (ASF) under one -->
-<!--- or more contributor license agreements.  See the NOTICE file -->
-<!--- distributed with this work for additional information -->
-<!--- regarding copyright ownership.  The ASF licenses this file -->
-<!--- to you under the Apache License, Version 2.0 (the -->
-<!--- "License"); you may not use this file except in compliance -->
-<!--- with the License.  You may obtain a copy of the License at -->
-
-<!---   http://www.apache.org/licenses/LICENSE-2.0 -->
-
-<!--- Unless required by applicable law or agreed to in writing, -->
-<!--- software distributed under the License is distributed on an -->
-<!--- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -->
-<!--- KIND, either express or implied.  See the License for the -->
-<!--- specific language governing permissions and limitations -->
-<!--- under the License. -->
-
-This directory contains tests for MicroTVM's integration with Zephyr.
-
-To run the test, you first need to be running in a Python environment with
-all of the appropriate TVM dependencies installed. If you have [Poetry](https://python-poetry.org/)
-installed, you can do the following to get an appropriately-configured Python
-environment:
-
-```
-$ cd tvm/apps/microtvm/
-$ poetry lock && poetry install && poetry shell
-```
-
-You can then run this test (either on real hardware or on a QEMU-emulated
-device) using:
-
-```
-$ cd tvm/tests/micro/zephyr
-$ pytest test_zephyr.py --board=qemu_x86       # For QEMU emulation
-$ pytest test_zephyr.py --board=nrf5340dk_nrf5340_cpuapp  # For nRF5340DK
-```
-
-To see the list of supported values for `--board`, run:
-```
-$ pytest test_zephyr.py --help
-```
-
-If you like to test with a real hardware, you have the option to pass the serial number
-for your development board.
-```
-$ pytest test_zephyr.py --board=nrf5340dk_nrf5340_cpuapp --serial-number="0672FF5"
-```
diff --git a/tests/micro/zephyr/__init__.py b/tests/micro/zephyr/__init__.py
deleted file mode 100644
index 15b94a9f78d0..000000000000
--- a/tests/micro/zephyr/__init__.py
+++ /dev/null
@@ -1,18 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-""" Testing infrastructure for microTVM Zephyr """
diff --git a/tests/micro/zephyr/conftest.py b/tests/micro/zephyr/conftest.py
deleted file mode 100644
index aa1759d770fd..000000000000
--- a/tests/micro/zephyr/conftest.py
+++ /dev/null
@@ -1,53 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-pytest_plugins = [
-    "tvm.micro.testing.pytest_plugin",
-]
-
-import pytest
-
-
-def pytest_addoption(parser):
-    parser.addoption(
-        "--use-fvp",
-        action="store_true",
-        default=False,
-        help="If set true, use the FVP emulator to run the test",
-    )
-
-
-@pytest.fixture
-def use_fvp(request):
-    return request.config.getoption("--use-fvp")
-
-
-@pytest.fixture(autouse=True)
-def xfail_on_fvp(request, use_fvp):
-    """mark the tests as xfail if running on fvp."""
-    if request.node.get_closest_marker("xfail_on_fvp"):
-        if use_fvp:
-            request.node.add_marker(
-                pytest.mark.xfail(reason="checking corstone300 reliability on CI")
-            )
-
-
-def pytest_configure(config):
-    config.addinivalue_line(
-        "markers",
-        "xfail_on_fvp(): mark test as xfail on fvp",
-    )
diff --git a/tests/micro/zephyr/test_ms_tuning.py b/tests/micro/zephyr/test_ms_tuning.py
deleted file mode 100644
index 3adc9ce2c8fc..000000000000
--- a/tests/micro/zephyr/test_ms_tuning.py
+++ /dev/null
@@ -1,183 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import numpy as np
-import pytest
-from types import MappingProxyType
-import pathlib
-import json
-
-import tvm
-from tvm import relay
-import tvm.micro.testing
-from tvm.relay.backend import Executor
-from tvm.contrib import graph_executor
-from tvm import meta_schedule as ms
-from tvm.contrib.micro.meta_schedule.local_builder_micro import get_local_builder_micro
-from tvm.contrib.micro.meta_schedule.rpc_runner_micro import get_rpc_runner_micro
-
-
-def create_relay_module():
-    data_shape = (1, 3, 16, 16)
-    weight_shape = (8, 3, 5, 5)
-    data = relay.var("data", relay.TensorType(data_shape, "float32"))
-    weight = relay.var("weight", relay.TensorType(weight_shape, "float32"))
-    y = relay.nn.conv2d(
-        data,
-        weight,
-        padding=(2, 2),
-        kernel_size=(5, 5),
-        kernel_layout="OIHW",
-        out_dtype="float32",
-    )
-    f = relay.Function([data, weight], y)
-    mod = tvm.IRModule.from_expr(f)
-    mod = relay.transform.InferType()(mod)
-
-    weight_sample = np.random.rand(
-        weight_shape[0], weight_shape[1], weight_shape[2], weight_shape[3]
-    ).astype("float32")
-    params = {mod["main"].params[1].name_hint: weight_sample}
-
-    model_info = {
-        "in_tensor": "data",
-        "in_shape": data_shape,
-        "in_dtype": "float32",
-    }
-
-    return mod, params, model_info
-
-
-@tvm.testing.requires_micro
-@pytest.mark.skip_boards(["mps2_an521", "mps3_an547", "nucleo_f746zg", "stm32f746g_disco"])
-def test_ms_tuning_conv2d(workspace_dir, board, microtvm_debug, use_fvp, serial_number):
-    """Test meta-schedule tuning for microTVM Zephyr"""
-
-    mod, params, model_info = create_relay_module()
-    input_name = model_info["in_tensor"]
-    input_shape = model_info["in_shape"]
-    input_dtype = model_info["in_dtype"]
-    data_sample = np.random.rand(*input_shape).astype(input_dtype)
-
-    platform = "zephyr"
-    project_options = {
-        "board": board,
-        "verbose": microtvm_debug,
-        "project_type": "host_driven",
-        "use_fvp": bool(use_fvp),
-        "serial_number": serial_number,
-        "config_main_stack_size": 4096,
-    }
-    if isinstance(serial_number, list):
-        project_options["serial_number"] = serial_number[0]  # project_api expects an string.
-        serial_numbers = serial_number
-    else:
-        if serial_number is not None:  # use a single device in tuning
-            serial_numbers = [serial_number]
-        else:  # use two dummy serial numbers (for testing with QEMU)
-            serial_numbers = [str(i) for i in range(2)]
-
-    boards_file = pathlib.Path(tvm.micro.get_microtvm_template_projects("zephyr")) / "boards.json"
-    with open(boards_file) as f:
-        boards = json.load(f)
-    target = tvm.micro.testing.get_target("zephyr", board)
-
-    runtime = relay.backend.Runtime("crt", {"system-lib": True})
-    executor = Executor("aot", {"link-params": True})
-    # This line is necessary for link-params to take effect during
-    # task extraction and relay.build(...).
-    mod = mod.with_attr("executor", executor)
-
-    builder = get_local_builder_micro()
-    with ms.Profiler() as profiler:
-        with get_rpc_runner_micro(
-            platform=platform,
-            options=project_options,
-            session_timeout_sec=120,
-            serial_numbers=serial_numbers,
-        ) as runner:
-
-            db: ms.Database = ms.relay_integration.tune_relay(
-                mod=mod,
-                params=params,
-                target=target,
-                builder=builder,
-                runner=runner,
-                strategy="evolutionary",
-                num_trials_per_iter=2,
-                max_trials_per_task=10,
-                max_trials_global=100,
-                work_dir=str(workspace_dir),
-                module_equality="ignore-ndarray",
-            )
-
-        #  Build model using meta_schedule logs
-        opt_mod, opt_params = relay.optimize(mod, target)
-        ms_mod: tvm.runtime.Module = ms.relay_integration.compile_relay(
-            database=db,
-            mod=opt_mod,
-            target=target,
-            params=opt_params,
-            pass_config=MappingProxyType(
-                {
-                    "relay.backend.use_meta_schedule": True,
-                    "relay.backend.tir_converter": "default",
-                    "tir.disable_vectorize": True,
-                }
-            ),
-            executor=executor,
-            runtime=runtime,
-        )
-    print(profiler.table())
-
-    project = tvm.micro.generate_project(
-        str(tvm.micro.get_microtvm_template_projects(platform)),
-        ms_mod,
-        str(workspace_dir / "project"),
-        options=project_options,
-    )
-    project.build()
-    project.flash()
-    with tvm.micro.Session(project.transport()) as session:
-        aot_executor = tvm.runtime.executor.aot_executor.AotModule(session.create_aot_executor())
-        aot_executor.get_input(0).copyfrom(data_sample)
-        result = aot_executor.module.time_evaluator("run", session.device, number=3)()
-        output = aot_executor.get_output(0).numpy()
-
-    # Build reference model (without tuning)
-    dev = tvm.cpu()
-    target = tvm.micro.testing.get_target("crt")
-    with tvm.transform.PassContext(
-        opt_level=3, config={"tir.disable_vectorize": True}, disabled_pass=["AlterOpLayout"]
-    ):
-        ref_mod = relay.build(
-            mod,
-            target=target,
-            params=params,
-            runtime=runtime,
-        )
-    ref_mod.export_library(workspace_dir / "compiled_lib2.so")
-    mod2: tvm.runtime.Module = tvm.runtime.load_module(workspace_dir / "compiled_lib2.so")
-    graph_mod = graph_executor.GraphModule(mod2["default"](dev))
-    graph_mod.set_input(input_name, data_sample)
-    graph_mod.run()
-    ref_output = graph_mod.get_output(0).numpy()
-
-    assert np.allclose(output, ref_output, rtol=1e-4, atol=2e-4), "FAILED"
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/micro/zephyr/test_zephyr.py b/tests/micro/zephyr/test_zephyr.py
deleted file mode 100644
index d247e2187bff..000000000000
--- a/tests/micro/zephyr/test_zephyr.py
+++ /dev/null
@@ -1,699 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import logging
-import os
-import pathlib
-import logging
-
-import pytest
-import numpy as np
-
-import onnx
-from PIL import Image
-
-import tvm
-import tvm.testing
-import tvm.relay as relay
-from tvm.relay.backend import Executor, Runtime
-from tvm.relay.testing import byoc
-from tvm.micro.project_api import server
-from tvm.contrib import utils
-from tvm.micro.testing.utils import check_tune_log
-
-from . import utils
-
-_LOG = logging.getLogger(__name__)
-
-
-def _make_sess_from_op(
-    temp_dir,
-    board,
-    op_name,
-    sched,
-    arg_bufs,
-    build_config,
-    use_fvp,
-    serial_number,
-):
-    runtime = Runtime("crt", {"system-lib": True})
-    target = tvm.micro.testing.get_target("zephyr", board)
-    target = tvm.target.Target(target=target, host=target)
-    with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}):
-        mod = tvm.build(sched, arg_bufs, target=target, runtime=runtime, name=op_name)
-
-    return _make_session(temp_dir, board, mod, build_config, use_fvp, serial_number)
-
-
-def _make_session(temp_dir, board, mod, build_config, use_fvp, serial_number):
-    config_main_stack_size = None
-    if utils.ZEPHYR_BOARDS[board]["is_qemu"]:
-        config_main_stack_size = 1536
-
-    project_options = {
-        "project_type": "host_driven",
-        "verbose": bool(build_config.get("debug")),
-        "board": board,
-        "arm_fvp_path": "/opt/arm/FVP_Corstone_SSE-300/models/Linux64_GCC-6.4/FVP_Corstone_SSE-300_Ethos-U55",
-        "use_fvp": bool(use_fvp),
-        "serial_number": serial_number,
-    }
-    if config_main_stack_size is not None:
-        project_options["config_main_stack_size"] = config_main_stack_size
-
-    project = tvm.micro.generate_project(
-        str(utils.TEMPLATE_PROJECT_DIR),
-        mod,
-        temp_dir / "project",
-        project_options,
-    )
-    project.build()
-    project.flash()
-    return tvm.micro.Session(project.transport())
-
-
-def _make_add_sess(temp_dir, board, build_config, use_fvp, serial_number, dtype="int8"):
-    A = tvm.te.placeholder((2,), dtype=dtype)
-    B = tvm.te.placeholder((1,), dtype=dtype)
-    C = tvm.te.compute(A.shape, lambda i: A[i] + B[0], name="C")
-    sched = tvm.te.create_schedule(C.op)
-    return _make_sess_from_op(
-        temp_dir,
-        board,
-        "add",
-        sched,
-        [A, B, C],
-        build_config,
-        use_fvp,
-        serial_number,
-    )
-
-
-# The same test code can be executed on both the QEMU simulation and on real hardware.
-@tvm.testing.requires_micro
-@pytest.mark.skip_boards(["mps2_an521"])
-@pytest.mark.xfail_on_fvp()
-def test_add_uint(workspace_dir, board, microtvm_debug, use_fvp, serial_number):
-    """Test compiling the on-device runtime."""
-    build_config = {"debug": microtvm_debug}
-
-    # NOTE: run test in a nested function so cPython will delete arrays before closing the session.
-    def test_basic_add(sess):
-        A_data = tvm.nd.array(np.array([2, 3], dtype="int8"), device=sess.device)
-        assert (A_data.numpy() == np.array([2, 3])).all()
-        B_data = tvm.nd.array(np.array([4], dtype="int8"), device=sess.device)
-        assert (B_data.numpy() == np.array([4])).all()
-        C_data = tvm.nd.array(np.array([0, 0], dtype="int8"), device=sess.device)
-        assert (C_data.numpy() == np.array([0, 0])).all()
-
-        system_lib = sess.get_system_lib()
-        system_lib.get_function("add")(A_data, B_data, C_data)
-        assert (C_data.numpy() == np.array([6, 7])).all()
-
-    with _make_add_sess(workspace_dir, board, build_config, use_fvp, serial_number) as sess:
-        test_basic_add(sess)
-
-
-# The same test code can be executed on both the QEMU simulation and on real hardware.
-@tvm.testing.requires_micro
-@pytest.mark.skip_boards(["mps2_an521"])
-@pytest.mark.xfail_on_fvp()
-def test_add_float(workspace_dir, board, microtvm_debug, use_fvp, serial_number):
-    """Test compiling the on-device runtime."""
-    if not utils.ZEPHYR_BOARDS[board]["fpu"]:
-        pytest.skip(f"FPU not enabled for {board}")
-
-    build_config = {"debug": microtvm_debug}
-
-    # NOTE: run test in a nested function so cPython will delete arrays before closing the session.
-    def test_basic_add(sess):
-        A_data = tvm.nd.array(np.array([2.5, 3.5], dtype="float32"), device=sess.device)
-        assert (A_data.numpy() == np.array([2.5, 3.5])).all()
-        B_data = tvm.nd.array(np.array([4.5], dtype="float32"), device=sess.device)
-        assert (B_data.numpy() == np.array([4.5])).all()
-        C_data = tvm.nd.array(np.array([0, 0], dtype="float32"), device=sess.device)
-        assert (C_data.numpy() == np.array([0, 0])).all()
-
-        system_lib = sess.get_system_lib()
-        system_lib.get_function("add")(A_data, B_data, C_data)
-        assert (C_data.numpy() == np.array([7, 8])).all()
-
-    with _make_add_sess(
-        workspace_dir,
-        board,
-        build_config,
-        use_fvp,
-        serial_number,
-        dtype="float32",
-    ) as sess:
-        test_basic_add(sess)
-
-
-@tvm.testing.requires_micro
-@pytest.mark.skip_boards(["mps2_an521"])
-@pytest.mark.xfail_on_fvp()
-def test_platform_timer(workspace_dir, board, microtvm_debug, use_fvp, serial_number):
-    """Test compiling the on-device runtime."""
-    build_config = {"debug": microtvm_debug}
-
-    # NOTE: run test in a nested function so cPython will delete arrays before closing the session.
-    def test_basic_add(sess):
-        A_data = tvm.nd.array(np.array([2, 3], dtype="int8"), device=sess.device)
-        assert (A_data.numpy() == np.array([2, 3])).all()
-        B_data = tvm.nd.array(np.array([4], dtype="int8"), device=sess.device)
-        assert (B_data.numpy() == np.array([4])).all()
-        C_data = tvm.nd.array(np.array([0, 0], dtype="int8"), device=sess.device)
-        assert (C_data.numpy() == np.array([0, 0])).all()
-
-        system_lib = sess.get_system_lib()
-        time_eval_f = system_lib.time_evaluator(
-            "add", sess.device, number=20, repeat=3, min_repeat_ms=40
-        )
-        result = time_eval_f(A_data, B_data, C_data)
-        assert (C_data.numpy() == np.array([6, 7])).all()
-        assert result.mean > 0
-        assert len(result.results) == 3
-
-    with _make_add_sess(workspace_dir, board, build_config, use_fvp, serial_number) as sess:
-        test_basic_add(sess)
-
-
-@tvm.testing.requires_micro
-@pytest.mark.skip_boards(["mps2_an521"])
-@pytest.mark.xfail_on_fvp()
-def test_relay(workspace_dir, board, microtvm_debug, use_fvp, serial_number):
-    """Testing a simple relay graph"""
-    build_config = {"debug": microtvm_debug}
-    shape = (10,)
-    dtype = "int8"
-
-    # Construct Relay program.
-    x = relay.var("x", relay.TensorType(shape=shape, dtype=dtype))
-    xx = relay.multiply(x, x)
-    z = relay.add(xx, relay.const(np.ones(shape=shape, dtype=dtype)))
-    func = relay.Function([x], z)
-    ir_mod = tvm.IRModule.from_expr(func)
-
-    runtime = Runtime("crt", {"system-lib": True})
-    target = tvm.micro.testing.get_target("zephyr", board)
-    with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}):
-        mod = tvm.relay.build(ir_mod, target=target, runtime=runtime)
-
-    with _make_session(workspace_dir, board, mod, build_config, use_fvp, serial_number) as session:
-        graph_mod = tvm.micro.create_local_graph_executor(
-            mod.get_graph_json(), session.get_system_lib(), session.device
-        )
-        graph_mod.set_input(**mod.get_params())
-        x_in = np.random.randint(10, size=shape[0], dtype=dtype)
-        graph_mod.run(x=x_in)
-        result = graph_mod.get_output(0).numpy()
-        tvm.testing.assert_allclose(graph_mod.get_input(0).numpy(), x_in)
-        tvm.testing.assert_allclose(result, x_in * x_in + 1)
-
-
-@tvm.testing.requires_micro
-@pytest.mark.skip_boards(["mps2_an521"])
-@pytest.mark.xfail_on_fvp()
-def test_onnx(workspace_dir, board, microtvm_debug, use_fvp, serial_number):
-    """Testing a simple ONNX model."""
-    build_config = {"debug": microtvm_debug}
-
-    this_dir = pathlib.Path(os.path.dirname(__file__))
-    mnist_testdata = this_dir.parent / "testdata" / "mnist"
-    digit_2 = Image.open(mnist_testdata / "digit-2.jpg").resize((28, 28))
-    digit_2 = np.asarray(digit_2).astype("float32")
-    digit_2 = np.expand_dims(digit_2, axis=0)
-
-    digit_9 = Image.open(mnist_testdata / "digit-9.jpg").resize((28, 28))
-    digit_9 = np.asarray(digit_9).astype("float32")
-    digit_9 = np.expand_dims(digit_9, axis=0)
-
-    # Load ONNX model and convert to Relay.
-    onnx_model = onnx.load(mnist_testdata / "mnist-8.onnx")
-    shape = {"Input3": (1, 1, 28, 28)}
-    relay_mod, params = relay.frontend.from_onnx(onnx_model, shape=shape, freeze_params=True)
-    relay_mod = relay.transform.DynamicToStatic()(relay_mod)
-
-    # We add the link-params=True option to ensure the model parameters are compiled in.
-    # There is currently a bug preventing the host_driven environment from receiving
-    # the model weights when set using graph_mod.set_input().
-    # See: https://github.com/apache/tvm/issues/7567
-    target = tvm.micro.testing.get_target("zephyr", board)
-    with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}):
-        executor = Executor("graph", {"link-params": True})
-        runtime = Runtime("crt", {"system-lib": True})
-        lowered = relay.build(relay_mod, target, params=params, executor=executor, runtime=runtime)
-        graph = lowered.get_graph_json()
-
-    with _make_session(
-        workspace_dir, board, lowered, build_config, use_fvp, serial_number
-    ) as session:
-        graph_mod = tvm.micro.create_local_graph_executor(
-            graph, session.get_system_lib(), session.device
-        )
-
-        # Send the digit-2 image and confirm that the correct result is returned.
-        graph_mod.set_input("Input3", tvm.nd.array(digit_2))
-        graph_mod.run()
-        result = graph_mod.get_output(0).numpy()
-        assert np.argmax(result) == 2
-
-        # Send the digit-9 image and confirm that the correct result is returned.
-        graph_mod.set_input("Input3", tvm.nd.array(digit_9))
-        graph_mod.run()
-        result = graph_mod.get_output(0).numpy()
-        assert np.argmax(result) == 9
-
-
-def check_result(
-    temp_dir,
-    relay_mod,
-    board,
-    map_inputs,
-    out_shape,
-    result,
-    build_config,
-    use_fvp,
-    serial_number,
-):
-    """Helper function to verify results"""
-    TOL = 1e-5
-    runtime = Runtime("crt", {"system-lib": True})
-    target = tvm.micro.testing.get_target("zephyr", board)
-    with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}):
-        mod = tvm.relay.build(relay_mod, target=target, runtime=runtime)
-
-    with _make_session(temp_dir, board, mod, build_config, use_fvp, serial_number) as session:
-        rt_mod = tvm.micro.create_local_graph_executor(
-            mod.get_graph_json(), session.get_system_lib(), session.device
-        )
-        rt_mod.set_input(**mod.get_params())
-        for name, data in map_inputs.items():
-            rt_mod.set_input(name, data)
-        rt_mod.set_input(**mod.get_params())
-        rt_mod.run()
-
-        out_shapes = out_shape if isinstance(out_shape, list) else [out_shape]
-        results = result if isinstance(result, list) else [result]
-
-        for idx, shape in enumerate(out_shapes):
-            out = tvm.nd.empty(shape, device=session.device)
-            out = rt_mod.get_output(idx, out)
-            tvm.testing.assert_allclose(out.numpy(), results[idx], rtol=TOL, atol=TOL)
-
-
-@tvm.testing.requires_micro
-@pytest.mark.skip_boards(["mps2_an521"])
-@pytest.mark.xfail_on_fvp()
-def test_byoc_microtvm(workspace_dir, board, microtvm_debug, use_fvp, serial_number):
-    """This is a simple test case to check BYOC capabilities of microTVM"""
-    build_config = {"debug": microtvm_debug}
-    x = relay.var("x", shape=(10, 10))
-    w0 = relay.var("w0", shape=(10, 10))
-    w1 = relay.var("w1", shape=(10, 10))
-    w2 = relay.var("w2", shape=(10, 10))
-    w3 = relay.var("w3", shape=(10, 10))
-    w4 = relay.var("w4", shape=(10, 10))
-    w5 = relay.var("w5", shape=(10, 10))
-    w6 = relay.var("w6", shape=(10, 10))
-    w7 = relay.var("w7", shape=(10, 10))
-
-    # C compiler
-    z0 = relay.add(x, w0)
-    p0 = relay.subtract(z0, w1)
-    q0 = relay.multiply(p0, w2)
-
-    z1 = relay.add(x, w3)
-    p1 = relay.subtract(z1, w4)
-    q1 = relay.multiply(p1, w5)
-
-    # Other parts on TVM
-    z2 = relay.add(x, w6)
-    q2 = relay.subtract(z2, w7)
-
-    r = relay.concatenate((q0, q1, q2), axis=0)
-    f = relay.Function([x, w0, w1, w2, w3, w4, w5, w6, w7], r)
-    mod = tvm.IRModule()
-    ann = byoc.CcompilerAnnotator()
-    mod["main"] = ann.visit(f)
-    mod = tvm.relay.transform.PartitionGraph()(mod)
-    mod = tvm.relay.transform.InferType()(mod)
-
-    x_data = np.random.rand(10, 10).astype("float32")
-    w_data = []
-    for _ in range(8):
-        w_data.append(np.random.rand(10, 10).astype("float32"))
-
-    map_inputs = {"w{}".format(i): w_data[i] for i in range(8)}
-    map_inputs["x"] = x_data
-    check_result(
-        temp_dir=workspace_dir,
-        relay_mod=mod,
-        map_inputs=map_inputs,
-        out_shape=(30, 10),
-        result=np.concatenate(
-            (
-                ((x_data + w_data[0]) - w_data[1]) * w_data[2],
-                ((x_data + w_data[3]) - w_data[4]) * w_data[5],
-                x_data + w_data[6] - w_data[7],
-            ),
-            axis=0,
-        ),
-        board=board,
-        build_config=build_config,
-        use_fvp=use_fvp,
-        serial_number=serial_number,
-    )
-
-
-def _make_add_sess_with_shape(temp_dir, board, shape, build_config, use_fvp, serial_number):
-    A = tvm.te.placeholder(shape, dtype="int8")
-    C = tvm.te.compute(A.shape, lambda i: A[i] + A[i], name="C")
-    sched = tvm.te.create_schedule(C.op)
-    return _make_sess_from_op(
-        temp_dir, board, "add", sched, [A, C], build_config, use_fvp, serial_number
-    )
-
-
-@pytest.mark.parametrize(
-    "shape,",
-    [
-        pytest.param((1 * 1024,), id="(1*1024)"),
-        pytest.param((4 * 1024,), id="(4*1024)"),
-        pytest.param((16 * 1024,), id="(16*1024)"),
-    ],
-)
-@tvm.testing.requires_micro
-@pytest.mark.skip_boards(["mps2_an521"])
-@pytest.mark.xfail_on_fvp()
-def test_rpc_large_array(workspace_dir, board, microtvm_debug, shape, use_fvp, serial_number):
-    """Test large RPC array transfer."""
-    build_config = {"debug": microtvm_debug}
-
-    # NOTE: run test in a nested function so cPython will delete arrays before closing the session.
-    def test_tensors(sess):
-        a_np = np.random.randint(low=-128, high=127, size=shape, dtype="int8")
-
-        A_data = tvm.nd.array(a_np, device=sess.device)
-        assert (A_data.numpy() == a_np).all()
-        C_data = tvm.nd.array(np.zeros(shape, dtype="int8"), device=sess.device)
-        assert (C_data.numpy() == np.zeros(shape)).all()
-
-    with _make_add_sess_with_shape(
-        workspace_dir, board, shape, build_config, use_fvp, serial_number
-    ) as sess:
-        test_tensors(sess)
-
-
-@pytest.mark.xfail(strict=False, reason="See https://github.com/apache/tvm/issues/10297")
-@tvm.testing.requires_micro
-def test_autotune_conv2d(workspace_dir, board, microtvm_debug, use_fvp, serial_number):
-    """Test AutoTune for microTVM Zephyr"""
-    if board != "qemu_x86":
-        pytest.xfail(f"Autotune fails on {board}.")
-
-    runtime = Runtime("crt", {"system-lib": True})
-    build_config = {"debug": microtvm_debug}
-
-    # Create a Relay model
-    data_shape = (1, 3, 16, 16)
-    weight_shape = (8, 3, 5, 5)
-    data = relay.var("data", relay.TensorType(data_shape, "float32"))
-    weight = relay.var("weight", relay.TensorType(weight_shape, "float32"))
-    y = relay.nn.conv2d(
-        data,
-        weight,
-        padding=(2, 2),
-        kernel_size=(5, 5),
-        kernel_layout="OIHW",
-        out_dtype="float32",
-    )
-    f = relay.Function([data, weight], y)
-    mod = tvm.IRModule.from_expr(f)
-    mod = relay.transform.InferType()(mod)
-
-    data_sample = np.random.rand(data_shape[0], data_shape[1], data_shape[2], data_shape[3]).astype(
-        "float32"
-    )
-    weight_sample = np.random.rand(
-        weight_shape[0], weight_shape[1], weight_shape[2], weight_shape[3]
-    ).astype("float32")
-    params = {mod["main"].params[1].name_hint: weight_sample}
-
-    target = tvm.micro.testing.get_target("zephyr", board)
-    pass_context = tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True})
-    with pass_context:
-        tasks = tvm.autotvm.task.extract_from_program(mod["main"], {}, target)
-    assert len(tasks) > 0
-
-    config_main_stack_size = None
-    if utils.ZEPHYR_BOARDS[board]["is_qemu"]:
-        config_main_stack_size = 1536
-
-    project_options = {
-        "board": board,
-        "verbose": 1,
-        "project_type": "host_driven",
-        "use_fvp": bool(use_fvp),
-        "serial_number": serial_number,
-    }
-    if config_main_stack_size is not None:
-        project_options["config_main_stack_size"] = config_main_stack_size
-
-    module_loader = tvm.micro.AutoTvmModuleLoader(
-        template_project_dir=utils.TEMPLATE_PROJECT_DIR,
-        project_options=project_options,
-    )
-
-    timeout = 200
-    builder = tvm.autotvm.LocalBuilder(
-        timeout=timeout,
-        n_parallel=1,
-        build_kwargs={"build_option": {"tir.disable_vectorize": True}},
-        do_fork=True,
-        build_func=tvm.micro.autotvm_build_func,
-        runtime=runtime,
-    )
-    runner = tvm.autotvm.LocalRunner(
-        number=1, repeat=1, timeout=timeout, module_loader=module_loader
-    )
-
-    measure_option = tvm.autotvm.measure_option(builder=builder, runner=runner)
-
-    log_path = pathlib.Path("zephyr_autotune.log")
-    if log_path.exists():
-        log_path.unlink()
-
-    n_trial = 10
-    for task in tasks:
-        tuner = tvm.autotvm.tuner.GATuner(task)
-        tuner.tune(
-            n_trial=n_trial,
-            measure_option=measure_option,
-            callbacks=[
-                tvm.autotvm.callback.log_to_file(str(log_path)),
-                tvm.autotvm.callback.progress_bar(n_trial, si_prefix="M"),
-            ],
-            si_prefix="M",
-        )
-        assert tuner.best_flops > 0
-
-    check_tune_log(log_path)
-
-    # Build without tuning
-    with pass_context:
-        lowered = tvm.relay.build(mod, target=target, runtime=runtime, params=params)
-
-    temp_dir = utils.tempdir()
-    with _make_session(temp_dir, board, lowered, build_config, use_fvp, serial_number) as session:
-        graph_mod = tvm.micro.create_local_graph_executor(
-            lowered.get_graph_json(), session.get_system_lib(), session.device
-        )
-        graph_mod.set_input(**lowered.get_params())
-        graph_mod.run(data=data_sample)
-        expected_output = graph_mod.get_output(0).numpy()
-        del graph_mod
-
-    # Build using autotune logs
-    with tvm.autotvm.apply_history_best(str(log_path)):
-        with pass_context:
-            lowered_tuned = tvm.relay.build(mod, target=target, runtime=runtime, params=params)
-
-    temp_dir = utils.tempdir()
-    with _make_session(
-        temp_dir, board, lowered_tuned, build_config, use_fvp, serial_number
-    ) as session:
-        graph_mod = tvm.micro.create_local_graph_executor(
-            lowered_tuned.get_graph_json(), session.get_system_lib(), session.device
-        )
-        graph_mod.set_input(**lowered_tuned.get_params())
-        graph_mod.run(data=data_sample)
-        output = graph_mod.get_output(0).numpy()
-        del graph_mod
-
-    tvm.testing.assert_allclose(output, expected_output, rtol=1e-4, atol=1e-5)
-
-
-@tvm.testing.requires_micro
-@pytest.mark.skip(reason="due to https://github.com/apache/tvm/issues/13856")
-def test_schedule_build_with_cmsis_dependency(workspace_dir, board, microtvm_debug, use_fvp):
-    """Test Relay schedule with CMSIS dependency. This test shows if microTVM Auto tuning
-    with Zephyr breaks if CMSIS dependency was required for a schedule.
-    """
-    build_config = {"debug": microtvm_debug}
-    target = tvm.target.target.micro(
-        utils.ZEPHYR_BOARDS[board]["model"], options=["-keys=arm_cpu,cpu"]
-    )
-
-    if not target.features.has_dsp:
-        pytest.skip(f"ISA does not support DSP. target: {target}")
-
-    # Create a Relay conv2d
-    data_shape = (1, 16, 16, 3)
-    weight_shape = (5, 5, 8, 3)
-    data = relay.var("data", relay.TensorType(data_shape, "int8"))
-    weight = relay.var("weight", relay.TensorType(weight_shape, "int8"))
-    y = relay.nn.conv2d(
-        data,
-        weight,
-        padding=(2, 2),
-        kernel_size=(5, 5),
-        data_layout="NHWC",
-        kernel_layout="HWOI",
-        out_dtype="int32",
-    )
-    func = relay.Function([data, weight], y)
-    ir_mod = tvm.IRModule.from_expr(func)
-
-    runtime = Runtime("crt", {"system-lib": True})
-
-    with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}):
-        mod = tvm.relay.build(ir_mod, target=target, runtime=runtime)
-
-    project_options = {
-        "project_type": "host_driven",
-        "verbose": bool(build_config.get("debug")),
-        "board": board,
-        "cmsis_path": os.getenv("CMSIS_PATH"),
-        "use_fvp": bool(use_fvp),
-    }
-
-    project_dir = workspace_dir / "project"
-    project = tvm.micro.generate_project(
-        str(utils.TEMPLATE_PROJECT_DIR),
-        mod,
-        project_dir,
-        project_options,
-    )
-    project.build()
-
-    with open(project_dir / "CMakeLists.txt", "r") as cmake_f:
-        cmake_content = cmake_f.read()
-
-    assert "CMSIS/DSP/Include" in cmake_content
-    assert "CMSIS/DSP/Include/dsp" in cmake_content
-    assert "CMSIS/DSP/Include" in cmake_content
-    assert "CMSIS-NN/Include" in cmake_content
-
-
-@tvm.testing.requires_micro
-def test_debugging_enabled(workspace_dir):
-    """Test debugging enabled for LED. `verbose=True` in project option enables
-    debugging. For this test a physical board(nucleo_l4r5zi) is used instead of
-    QEMU since LED config is not available on QEMU.
-    """
-    board = "nucleo_l4r5zi"
-    project_options = {
-        "project_type": "host_driven",
-        "board": board,
-        "verbose": True,
-    }
-    shape = (10,)
-    dtype = "int8"
-    x = relay.var("x", relay.TensorType(shape=shape, dtype=dtype))
-    xx = relay.multiply(x, x)
-    z = relay.add(xx, relay.const(np.ones(shape=shape, dtype=dtype)))
-    func = relay.Function([x], z)
-    ir_mod = tvm.IRModule.from_expr(func)
-
-    runtime = Runtime("crt", {"system-lib": True})
-    executor = Executor("aot")
-    target = tvm.micro.testing.get_target("zephyr", board)
-
-    with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}):
-        mod = tvm.relay.build(ir_mod, target=target, runtime=runtime, executor=executor)
-
-    project = tvm.micro.generate_project(
-        str(utils.TEMPLATE_PROJECT_DIR),
-        mod,
-        workspace_dir / "project",
-        project_options,
-    )
-    project.build()
-
-
-@tvm.testing.requires_micro
-@pytest.mark.skip_boards(["mps2_an521", "mps3_an547"])
-def test_qemu_make_fail(workspace_dir, board, microtvm_debug, serial_number):
-    """Testing QEMU make fail."""
-    if not utils.ZEPHYR_BOARDS[board]["is_qemu"]:
-        pytest.skip("Only for QEMU targets.")
-
-    build_config = {"debug": microtvm_debug}
-    shape = (10,)
-    dtype = "float32"
-
-    # Construct Relay program.
-    x = relay.var("x", relay.TensorType(shape=shape, dtype=dtype))
-    xx = relay.multiply(x, x)
-    z = relay.add(xx, relay.const(np.ones(shape=shape, dtype=dtype)))
-    func = relay.Function([x], z)
-    ir_mod = tvm.IRModule.from_expr(func)
-
-    target = tvm.micro.testing.get_target("zephyr", board)
-    executor = Executor("aot")
-    runtime = Runtime("crt", {"system-lib": True})
-    with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}):
-        lowered = relay.build(ir_mod, target, executor=executor, runtime=runtime)
-
-    project_options = {
-        "project_type": "host_driven",
-        "verbose": bool(build_config.get("debug")),
-        "board": board,
-    }
-
-    sample = np.zeros(shape=shape, dtype=dtype)
-    project = tvm.micro.generate_project(
-        str(utils.TEMPLATE_PROJECT_DIR),
-        lowered,
-        workspace_dir / "project",
-        project_options,
-    )
-    project.build()
-
-    file_path = workspace_dir / "project" / "build" / "build.ninja"
-    assert file_path.is_file(), f"[{file_path}] does not exist."
-
-    # Remove a file to create make failure.
-    os.remove(file_path)
-    project.flash()
-    with pytest.raises(server.JSONRPCError) as excinfo:
-        project.transport().open()
-    assert "QEMU setup failed" in str(excinfo.value)
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/micro/zephyr/test_zephyr_aot_exec.py b/tests/micro/zephyr/test_zephyr_aot_exec.py
deleted file mode 100644
index 7c8018134599..000000000000
--- a/tests/micro/zephyr/test_zephyr_aot_exec.py
+++ /dev/null
@@ -1,152 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import pytest
-import numpy as np
-
-import tvm
-import tvm.testing
-import tvm.micro.testing
-import tvm.relay as relay
-from tvm.relay.backend import Executor, Runtime
-
-from . import utils
-
-
-def _make_session(workspace_dir, board, mod, build_config, use_fvp, serial_number):
-    config_main_stack_size = None
-    if utils.ZEPHYR_BOARDS[board]["is_qemu"]:
-        # fyi: qemu_riscv64 seems to be the greediest stack user
-        config_main_stack_size = 4096
-    else:
-        # increase stack size for HW platforms
-        config_main_stack_size = 2048
-
-    project_options = {
-        "project_type": "host_driven",
-        "verbose": bool(build_config.get("debug")),
-        "board": board,
-        "arm_fvp_path": "/opt/arm/FVP_Corstone_SSE-300/models/Linux64_GCC-6.4/FVP_Corstone_SSE-300_Ethos-U55",
-        "use_fvp": bool(use_fvp),
-        "serial_number": serial_number,
-    }
-    if config_main_stack_size is not None:
-        project_options["config_main_stack_size"] = config_main_stack_size
-
-    project = tvm.micro.generate_project(
-        str(utils.TEMPLATE_PROJECT_DIR),
-        mod,
-        workspace_dir / "project",
-        project_options,
-    )
-    project.build()
-    project.flash()
-    return tvm.micro.Session(project.transport())
-
-
-@tvm.testing.requires_micro
-@pytest.mark.skip_boards(["mps2_an521"])
-@pytest.mark.xfail_on_fvp()
-def test_relay(workspace_dir, board, microtvm_debug, use_fvp, serial_number):
-    """Testing a simple relay graph"""
-
-    build_config = {"debug": microtvm_debug}
-    shape = (10,)
-    dtype = "int8"
-
-    # Construct Relay program.
-    x = relay.var("x", relay.TensorType(shape=shape, dtype=dtype))
-    xx = relay.multiply(x, x)
-    z = relay.add(xx, relay.const(np.ones(shape=shape, dtype=dtype)))
-    func = relay.Function([x], z)
-    ir_mod = tvm.IRModule.from_expr(func)
-
-    runtime = Runtime("crt", {"system-lib": True})
-    executor = Executor("aot")
-    target = tvm.micro.testing.get_target("zephyr", board)
-    with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}):
-        mod = tvm.relay.build(ir_mod, target=target, runtime=runtime, executor=executor)
-
-    with _make_session(workspace_dir, board, mod, build_config, use_fvp, serial_number) as session:
-
-        aot_executor = tvm.runtime.executor.aot_executor.AotModule(session.create_aot_executor())
-
-        x_in = np.random.randint(10, size=shape[0], dtype=dtype)
-        aot_executor.run(x=x_in)
-        result = aot_executor.get_output(0).numpy()
-        tvm.testing.assert_allclose(aot_executor.get_input(0).numpy(), x_in)
-        tvm.testing.assert_allclose(result, x_in * x_in + 1)
-
-
-@tvm.testing.requires_micro
-@pytest.mark.skip_boards(["mps2_an521"])
-@pytest.mark.xfail_on_fvp()
-def test_aot_executor(workspace_dir, board, microtvm_debug, use_fvp, serial_number):
-    """Test use of the AOT executor with microTVM."""
-
-    build_config = {"debug": microtvm_debug}
-    shape = (10,)
-    dtype = "int8"
-
-    print("test_relay: construct relay program\n")
-
-    # Construct Relay program.
-    relay_mod = tvm.relay.fromtext(
-        """
-      #[version = "0.0.5"]
-      def @main(%a : Tensor[(1, 2), uint8], %b : Tensor[(1, 2), uint8]) {
-          %0 = %a + %b;
-          %0
-      }"""
-    )
-
-    runtime = Runtime("crt", {"system-lib": True})
-    executor = Executor("aot")
-    target = tvm.micro.testing.get_target("zephyr", board)
-    with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}):
-        mod = tvm.relay.build(relay_mod, target=target, runtime=runtime, executor=executor)
-
-    def do_test():
-
-        aot_executor = tvm.runtime.executor.aot_executor.AotModule(session.create_aot_executor())
-
-        assert aot_executor.get_input_index("a") == 0
-        assert aot_executor.get_input_index("b") == 1
-
-        assert aot_executor.get_num_inputs() == 2
-        assert aot_executor.get_num_outputs() == 1
-
-        A_np = np.array([[2, 3]], dtype="uint8")
-        B_np = np.array([[4, 7]], dtype="uint8")
-
-        A_data = aot_executor.get_input("a").copyfrom(A_np)
-        B_data = aot_executor.get_input("b").copyfrom(B_np)
-
-        aot_executor.run()
-
-        out = aot_executor.get_output(0)
-        assert (out.numpy() == np.array([6, 10])).all()
-
-        B_np_new = np.array([[5, 8]])
-        aot_executor.set_input("b", B_np_new)
-        assert (B_data.numpy() == B_np_new).all()
-
-    with _make_session(workspace_dir, board, mod, build_config, use_fvp, serial_number) as session:
-        do_test()
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/micro/zephyr/test_zephyr_aot_exec_standalone.py b/tests/micro/zephyr/test_zephyr_aot_exec_standalone.py
deleted file mode 100644
index 6995bacdb5d0..000000000000
--- a/tests/micro/zephyr/test_zephyr_aot_exec_standalone.py
+++ /dev/null
@@ -1,86 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import pytest
-import numpy as np
-
-import tvm
-import tvm.testing
-import tvm.micro.testing
-import tvm.relay as relay
-from tvm.relay.backend import Executor, Runtime
-from tvm.contrib.download import download_testdata
-
-from . import utils
-
-
-@tvm.testing.requires_micro
-@pytest.mark.skip_boards(["mps2_an521", "mps3_an547"])
-def test_tflite(workspace_dir, board, microtvm_debug, serial_number):
-    """Testing a TFLite model."""
-    input_shape = (1, 49, 10, 1)
-    output_shape = (1, 12)
-    build_config = {"debug": microtvm_debug}
-
-    model_url = "https://github.com/mlcommons/tiny/raw/bceb91c5ad2e2deb295547d81505721d3a87d578/benchmark/training/keyword_spotting/trained_models/kws_ref_model.tflite"
-    model_path = download_testdata(model_url, "kws_ref_model.tflite", module="model")
-
-    # Import TFLite model
-    tflite_model_buf = open(model_path, "rb").read()
-    try:
-        import tflite
-
-        tflite_model = tflite.Model.GetRootAsModel(tflite_model_buf, 0)
-    except AttributeError:
-        import tflite.Model
-
-        tflite_model = tflite.Model.Model.GetRootAsModel(tflite_model_buf, 0)
-
-    # Load TFLite model and convert to Relay
-    relay_mod, params = relay.frontend.from_tflite(
-        tflite_model, shape_dict={"input_1": input_shape}, dtype_dict={"input_1 ": "int8"}
-    )
-
-    target = tvm.micro.testing.get_target("zephyr", board)
-    executor = Executor(
-        "aot", {"unpacked-api": True, "interface-api": "c", "workspace-byte-alignment": 4}
-    )
-    runtime = Runtime("crt")
-    with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}):
-        lowered = relay.build(relay_mod, target, params=params, runtime=runtime, executor=executor)
-
-    sample_url = "https://github.com/tlc-pack/web-data/raw/main/testdata/microTVM/data/keyword_spotting_int8_6.pyc.npy"
-    sample_path = download_testdata(sample_url, "keyword_spotting_int8_6.pyc.npy", module="data")
-    sample = np.load(sample_path)
-
-    project, _ = utils.generate_project(
-        workspace_dir,
-        board,
-        lowered,
-        build_config,
-        sample,
-        output_shape,
-        "int8",
-        False,
-        serial_number,
-    )
-
-    result, _ = utils.run_model(project)
-    assert result == 6
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/micro/zephyr/test_zephyr_armv7m.py b/tests/micro/zephyr/test_zephyr_armv7m.py
deleted file mode 100644
index cd589a19e886..000000000000
--- a/tests/micro/zephyr/test_zephyr_armv7m.py
+++ /dev/null
@@ -1,176 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import os
-import pathlib
-
-import pytest
-import numpy as np
-
-import tvm
-import tvm.rpc
-import tvm.micro
-import tvm.testing
-from tvm import relay
-
-from tvm.contrib.download import download_testdata
-from tvm.relay.backend import Executor, Runtime
-
-from . import utils
-
-
-def _open_tflite_model():
-    # Import TFLite model
-
-    model_url = "https://github.com/tlc-pack/web-data/raw/b2f3c02427b67267a00fd968ba1fce28fc833028/testdata/microTVM/model/mnist_model_quant.tflite"
-    model_path = download_testdata(model_url, "mnist_model_quant.tflite", module="model")
-
-    tflite_model_buf = open(model_path, "rb").read()
-
-    try:
-        import tflite
-
-        tflite_model = tflite.Model.GetRootAsModel(tflite_model_buf, 0)
-    except AttributeError:
-        import tflite.Model
-
-        tflite_model = tflite.Model.Model.GetRootAsModel(tflite_model_buf, 0)
-
-    relay_mod, params = relay.frontend.from_tflite(tflite_model)
-
-    return relay_mod, params
-
-
-def _get_test_data(testdata_dir):
-
-    from PIL import Image
-
-    image_files = ["digit-2.jpg"]
-
-    for file in image_files:
-        img = Image.open(testdata_dir / file).resize((28, 28))
-        img = np.asarray(img).astype("uint8")
-        sample = np.reshape(img, -1)
-
-    output_shape = (1, 10)
-
-    return sample, output_shape
-
-
-def _apply_desired_layout_simd(relay_mod):
-
-    desired_layouts = {"qnn.conv2d": ["NHWC", "HWOI"], "nn.conv2d": ["NHWC", "HWOI"]}
-
-    seq = tvm.transform.Sequential(
-        [relay.transform.RemoveUnusedFunctions(), relay.transform.ConvertLayout(desired_layouts)]
-    )
-
-    with tvm.transform.PassContext(opt_level=3):
-        return seq(relay_mod)
-
-
-def _apply_desired_layout_no_simd(relay_mod):
-
-    desired_layouts = {"qnn.conv2d": ["NHWC", "HWIO"], "nn.conv2d": ["NHWC", "HWIO"]}
-
-    seq = tvm.transform.Sequential(
-        [relay.transform.RemoveUnusedFunctions(), relay.transform.ConvertLayout(desired_layouts)]
-    )
-
-    with tvm.transform.PassContext(opt_level=3):
-        return seq(relay_mod)
-
-
-@tvm.testing.requires_micro
-@pytest.mark.skip_boards(
-    ["mps2_an521", "stm32f746g_disco", "nucleo_f746zg", "nucleo_l4r5zi", "nrf5340dk_nrf5340_cpuapp"]
-)
-@pytest.mark.xfail(reason="due https://github.com/apache/tvm/issues/12619")
-def test_armv7m_intrinsic(workspace_dir, board, microtvm_debug, serial_number):
-    """Testing a ARM v7m SIMD extension."""
-    build_config = {"debug": microtvm_debug}
-
-    this_dir = pathlib.Path(os.path.dirname(__file__))
-    testdata_dir = this_dir.parent / "testdata" / "mnist"
-
-    relay_mod, params = _open_tflite_model()
-
-    sample, output_shape = _get_test_data(testdata_dir)
-
-    relay_mod_simd = _apply_desired_layout_simd(relay_mod)
-    # kernel layout "HWIO" is not supported by arm_cpu SIMD extension (see tvm\python\relay\op\strategy\arm_cpu.py)
-    relay_mod_no_simd = _apply_desired_layout_no_simd(relay_mod)
-
-    target = tvm.target.target.micro(utils.ZEPHYR_BOARDS[board]["model"], options=["-keys=cpu"])
-    target_simd = tvm.target.target.micro(
-        utils.ZEPHYR_BOARDS[board]["model"], options=["-keys=arm_cpu,cpu"]
-    )
-
-    executor = Executor("aot", {"unpacked-api": True, "interface-api": "c"})
-    runtime = Runtime("crt")
-
-    workspace_dir_simd = workspace_dir / "simd"
-    workspace_dir_no_simd = workspace_dir / "nosimd"
-
-    os.makedirs(workspace_dir_simd, exist_ok=True)
-    os.makedirs(workspace_dir_no_simd, exist_ok=True)
-
-    with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}):
-        lowered_simd = relay.build(
-            relay_mod_simd, target_simd, params=params, runtime=runtime, executor=executor
-        )
-        lowered_no_simd = relay.build(
-            relay_mod_no_simd, target, params=params, runtime=runtime, executor=executor
-        )
-
-        simd_project, _ = utils.generate_project(
-            workspace_dir_simd,
-            board,
-            lowered_simd,
-            build_config,
-            sample,
-            output_shape,
-            "float32",
-            True,
-            serial_number,
-        )
-        result_simd, time_simd = utils.run_model(simd_project)
-
-        no_simd_project, _ = utils.generate_project(
-            workspace_dir_no_simd,
-            board,
-            lowered_no_simd,
-            build_config,
-            sample,
-            output_shape,
-            "float32",
-            False,
-            serial_number,
-        )
-        result_no_simd, time_no_simd = utils.run_model(no_simd_project)
-
-    assert result_no_simd == result_simd == 2
-
-    # Time performance measurements on QEMU emulator are always equal to zero.
-    if board not in [
-        "mps2_an521",
-        "mps3_an547",
-    ]:
-        assert time_no_simd > time_simd
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/micro/zephyr/utils.py b/tests/micro/zephyr/utils.py
deleted file mode 100644
index fed7c53c2915..000000000000
--- a/tests/micro/zephyr/utils.py
+++ /dev/null
@@ -1,177 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import io
-import os
-import json
-import pathlib
-import tarfile
-import tempfile
-import logging
-
-import numpy as np
-
-from urllib.request import urlopen, urlretrieve
-from urllib.error import HTTPError
-import json
-import requests
-
-import tvm.micro
-from tvm.micro import export_model_library_format
-from tvm.micro.testing.utils import create_header_file
-from tvm.micro.testing.utils import (
-    mlf_extract_workspace_size_bytes,
-    aot_transport_init_wait,
-    aot_transport_find_message,
-)
-
-TEMPLATE_PROJECT_DIR = pathlib.Path(tvm.micro.get_microtvm_template_projects("zephyr"))
-
-_LOG = logging.getLogger(__name__)
-
-
-def zephyr_boards() -> dict:
-    """Returns Zephyr board properties"""
-    with open(TEMPLATE_PROJECT_DIR / "boards.json") as f:
-        board_properties = json.load(f)
-    return board_properties
-
-
-ZEPHYR_BOARDS = zephyr_boards()
-
-
-def build_project(
-    temp_dir, zephyr_board, mod, build_config, serial_number, simd=False, extra_files_tar=None
-):
-    project_dir = temp_dir / "project"
-
-    with tempfile.TemporaryDirectory() as tar_temp_dir:
-        model_tar_path = pathlib.Path(tar_temp_dir) / "model.tar"
-        export_model_library_format(mod, model_tar_path)
-
-        workspace_size = mlf_extract_workspace_size_bytes(model_tar_path)
-        project_options = {
-            "extra_files_tar": extra_files_tar,
-            "project_type": "aot_standalone_demo",
-            "verbose": bool(build_config.get("debug")),
-            "board": zephyr_board,
-            "serial_number": serial_number,
-            "compile_definitions": [
-                # TODO(mehrdadh): It fails without offset.
-                f"-DWORKSPACE_SIZE={workspace_size + 128}",
-            ],
-        }
-        if simd:
-            project_options["config_main_stack_size"] = 1536
-
-        project = tvm.micro.project.generate_project_from_mlf(
-            str(TEMPLATE_PROJECT_DIR), project_dir, model_tar_path, project_options
-        )
-        project.build()
-    return project, project_dir
-
-
-# TODO move CMSIS integration to microtvm_api_server.py
-# see https://discuss.tvm.apache.org/t/tvm-capturing-dependent-libraries-of-code-generated-tir-initially-for-use-in-model-library-format/11080
-def loadCMSIS(temp_dir):
-    REPO_PATH = "ARM-software/CMSIS_5"
-    BRANCH = "master"
-    API_PATH_URL = f"https://api.github.com/repos/{REPO_PATH}/git/trees"
-    RAW_PATH_URL = f"https://raw.githubusercontent.com/{REPO_PATH}/{BRANCH}"
-
-    url = "https://api.github.com/repos/ARM-software/CMSIS_5/git/trees/master?recursive=1"
-    r = requests.get(url)
-    res = r.json()
-
-    include_trees = {}
-
-    for file in res["tree"]:
-        if file["path"] in {"CMSIS/DSP/Include", "CMSIS/DSP/Include/dsp", "CMSIS/NN/Include"}:
-            include_trees.update({file["path"]: file["sha"]})
-
-    for path, sha in include_trees.items():
-        url = f"{API_PATH_URL}/{sha}"
-        content = json.load(urlopen(url))
-        temp_path = f"{temp_dir}"
-        if path == "CMSIS/DSP/Include/dsp":
-            temp_path = f"{temp_dir}/dsp"
-            if not os.path.isdir(temp_path):
-                os.makedirs(temp_path)
-        for item in content["tree"]:
-            if item["type"] == "blob":
-                file_name = item["path"]
-                file_url = f"{RAW_PATH_URL}/{path}/{file_name}"
-                print(file_name, "   ", file_url)
-                try:
-                    urlretrieve(file_url, f"{temp_path}/{file_name}")
-                except HTTPError as e:
-                    print(f"Failed to download {file_url}: {e}")
-
-
-def run_model(project):
-    project.flash()
-
-    with project.transport() as transport:
-        aot_transport_init_wait(transport)
-        transport.write(b"infer%", timeout_sec=5)
-        result_line = aot_transport_find_message(transport, "result", timeout_sec=60)
-
-    result_line = result_line.strip("\n")
-    result_line = result_line.split(":")
-    result = int(result_line[1])
-    time = int(result_line[2])
-    _LOG.info(f"Result: {result}\ttime: {time} ms")
-
-    return result, time
-
-
-def generate_project(
-    temp_dir,
-    board,
-    lowered,
-    build_config,
-    sample,
-    output_shape,
-    output_type,
-    load_cmsis,
-    serial_number,
-):
-    with tempfile.NamedTemporaryFile() as tar_temp_file:
-        with tarfile.open(tar_temp_file.name, "w:gz") as tf:
-            with tempfile.TemporaryDirectory() as tar_temp_dir:
-                model_files_path = pathlib.Path(tar_temp_dir) / "include"
-                model_files_path.mkdir(parents=True)
-                if load_cmsis:
-                    loadCMSIS(model_files_path)
-                    tf.add(
-                        model_files_path, arcname=os.path.relpath(model_files_path, tar_temp_dir)
-                    )
-            create_header_file("input_data", sample, "include/tvm", tf)
-            create_header_file(
-                "output_data", np.zeros(shape=output_shape, dtype=output_type), "include/tvm", tf
-            )
-
-        project, project_dir = build_project(
-            temp_dir,
-            board,
-            lowered,
-            build_config,
-            serial_number,
-            simd=load_cmsis,
-            extra_files_tar=tar_temp_file.name,
-        )
-
-    return project, project_dir
diff --git a/tests/python/conftest.py b/tests/python/conftest.py
index 51e7516a1dcd..9c5cd8ac22ca 100644
--- a/tests/python/conftest.py
+++ b/tests/python/conftest.py
@@ -16,7 +16,6 @@
 # under the License.
 """Configure pytest"""
 import sys
-import pytest
 
 COLLECT_IGNORE = []
 if sys.platform.startswith("win"):
@@ -37,23 +36,3 @@
     # COLLECT_IGNORE.append("auto_scheduler/test_auto_scheduler_measure.py") # exception ignored
 
     COLLECT_IGNORE.append("tir_base/test_tir_intrin.py")
-
-
-def pytest_addoption(parser):
-    parser.addoption(
-        "--enable-corstone300-tests",
-        action="store_true",
-        default=False,
-        help="Run Corstone-300 FVP tests",
-    )
-
-
-def pytest_collection_modifyitems(config, items):
-    if not config.getoption("--enable-corstone300-tests"):
-        for item in items:
-            if "corstone300" in item.keywords:
-                item.add_marker(
-                    pytest.mark.skip(
-                        reason="Need --enable-corstone300-tests option to run this test"
-                    )
-                )
diff --git a/tests/python/contrib/test_cmsisnn/__init__.py b/tests/python/contrib/test_cmsisnn/__init__.py
deleted file mode 100644
index f9a622464a47..000000000000
--- a/tests/python/contrib/test_cmsisnn/__init__.py
+++ /dev/null
@@ -1,17 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Infrastructure and tests for CMSIS-NN"""
diff --git a/tests/python/contrib/test_cmsisnn/test_binary_ops.py b/tests/python/contrib/test_cmsisnn/test_binary_ops.py
deleted file mode 100644
index 8c0da922f093..000000000000
--- a/tests/python/contrib/test_cmsisnn/test_binary_ops.py
+++ /dev/null
@@ -1,472 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""CMSIS-NN integration tests: binary ops"""
-
-import itertools
-
-import numpy as np
-import pytest
-
-import tvm
-from tvm import relay
-from tvm.relay.op.contrib import cmsisnn
-from tvm.testing.aot import get_dtype_range, generate_ref_data, AOTTestModel, compile_and_run
-from tvm.micro.testing.aot_test_utils import (
-    AOT_USMP_CORSTONE300_RUNNER,
-)
-
-from .utils import (
-    skip_if_no_reference_system,
-    make_module,
-    make_qnn_relu,
-    assert_partitioned_function,
-    assert_no_external_function,
-    create_test_runner,
-)
-
-
-def generate_tensor_constant():
-    rng = np.random.default_rng(12321)
-    dtype = "int8"
-    shape = (1, 16, 16, 3)
-    in_min, in_max = get_dtype_range(dtype)
-    values = tvm.nd.array(rng.integers(in_min, high=in_max, size=shape, dtype=dtype))
-    return relay.const(values, dtype)
-
-
-def generate_scalar_constant():
-    dtype = "int8"
-    return relay.const(-30, dtype)
-
-
-def generate_variable(name, dtype="int8"):
-    return relay.var(name, shape=(1, 16, 16, 3), dtype=dtype)
-
-
-def make_model(
-    op,
-    input_0,
-    input_1,
-    input_0_scale,
-    input_0_zero_point,
-    input_1_scale,
-    input_1_zero_point,
-    relu_type="NONE",
-    out_scale=1.0 / 256,
-    out_zero_point=-128,
-):
-    """Create a Relay Function / network model"""
-    binary_op = op(
-        input_0,
-        input_1,
-        relay.const(input_0_scale, "float32"),
-        relay.const(input_0_zero_point, "int32"),
-        relay.const(input_1_scale, "float32"),
-        relay.const(input_1_zero_point, "int32"),
-        relay.const(out_scale, "float32"),
-        relay.const(out_zero_point, "int32"),
-    )
-    return make_qnn_relu(binary_op, relu_type, out_scale, out_zero_point, "int8")
-
-
-@skip_if_no_reference_system
-@tvm.testing.requires_cmsisnn
-@pytest.mark.parametrize("op", [relay.qnn.op.mul, relay.qnn.op.add])
-@pytest.mark.parametrize("relu_type", ["RELU", "NONE"])
-@pytest.mark.parametrize(
-    [
-        "input_0_scale",
-        "input_0_zero_point",
-        "input_1_scale",
-        "input_1_zero_point",
-    ],
-    [[0.256, 33, 0.256, 33], [0.0128, -64, 0.0128, -64], [0.0128, -64, 0.256, 33]],
-)
-@pytest.mark.parametrize(
-    "compiler_cpu, cpu_flags", [("cortex-m55", "+nomve"), ("cortex-m55", ""), ("cortex-m7", "")]
-)
-def test_op_int8(
-    op,
-    relu_type,
-    input_0_scale,
-    input_0_zero_point,
-    input_1_scale,
-    input_1_zero_point,
-    compiler_cpu,
-    cpu_flags,
-):
-    """Tests QNN binary operator for CMSIS-NN"""
-    interface_api = "c"
-    use_unpacked_api = True
-
-    dtype = "int8"
-    shape = [1, 16, 16, 3]
-    model = make_model(
-        op,
-        generate_variable("input_0"),
-        generate_variable("input_1"),
-        input_0_scale,
-        input_0_zero_point,
-        input_1_scale,
-        input_1_zero_point,
-        relu_type,
-    )
-    orig_mod = make_module(model)
-
-    cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod)
-
-    # validate pattern matching
-    assert_partitioned_function(orig_mod, cmsisnn_mod)
-
-    # validate the output
-    in_min, in_max = get_dtype_range(dtype)
-    inputs = {
-        "input_0": np.random.randint(in_min, high=in_max, size=shape, dtype=dtype),
-        "input_1": np.random.randint(in_min, high=in_max, size=shape, dtype=dtype),
-    }
-    output_list = generate_ref_data(orig_mod["main"], inputs)
-    compile_and_run(
-        AOTTestModel(
-            module=cmsisnn_mod,
-            inputs=inputs,
-            outputs=output_list,
-            output_tolerance=1,
-        ),
-        create_test_runner(compiler_cpu, cpu_flags),
-        interface_api,
-        use_unpacked_api,
-    )
-
-
-@skip_if_no_reference_system
-@tvm.testing.requires_cmsisnn
-@pytest.mark.parametrize("op", [relay.qnn.op.mul, relay.qnn.op.add])
-@pytest.mark.parametrize("relu_type", ["RELU", "NONE"])
-@pytest.mark.parametrize(
-    [
-        "input_0_scale",
-        "input_1_scale",
-        "output_scale",
-    ],
-    [
-        [0.256, 0.256, 0.256],
-        [0.0128, 0.0128, 0.0128],
-        [0.0128, 0.256, 0.256],
-    ],
-)
-@pytest.mark.parametrize(
-    "compiler_cpu, cpu_flags", [("cortex-m55", "+nomve"), ("cortex-m55", ""), ("cortex-m7", "")]
-)
-def test_op_int16(
-    op,
-    relu_type,
-    input_0_scale,
-    input_1_scale,
-    output_scale,
-    compiler_cpu,
-    cpu_flags,
-):
-    """Tests QNN 16bit binary operators for CMSIS-NN"""
-    interface_api = "c"
-    use_unpacked_api = True
-
-    dtype = "int16"
-    shape = [1, 16, 16, 3]
-    model = make_model(
-        op,
-        generate_variable("input_0", dtype),
-        generate_variable("input_1", dtype),
-        input_0_scale,
-        0,
-        input_1_scale,
-        0,
-        relu_type,
-        output_scale,
-        0,
-    )
-    orig_mod = make_module(model)
-
-    cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod)
-
-    assert_partitioned_function(orig_mod, cmsisnn_mod)
-
-    # validate the output
-    in_min, in_max = get_dtype_range(dtype)
-    inputs = {
-        "input_0": np.random.randint(in_min, high=in_max, size=shape, dtype=dtype),
-        "input_1": np.random.randint(in_min, high=in_max, size=shape, dtype=dtype),
-    }
-    output_list = generate_ref_data(orig_mod["main"], inputs)
-    compile_and_run(
-        AOTTestModel(
-            module=cmsisnn_mod,
-            inputs=inputs,
-            outputs=output_list,
-            output_tolerance=1,
-        ),
-        create_test_runner(compiler_cpu, cpu_flags),
-        interface_api,
-        use_unpacked_api,
-    )
-
-
-@skip_if_no_reference_system
-@tvm.testing.requires_cmsisnn
-@pytest.mark.parametrize("op", [relay.qnn.op.mul, relay.qnn.op.add])
-@pytest.mark.parametrize("relu_type", ["RELU", "NONE"])
-@pytest.mark.parametrize(
-    [
-        "input_0_scale",
-        "input_0_zero_point",
-        "input_1_scale",
-        "input_1_zero_point",
-        "output_scale",
-        "output_zero_point",
-    ],
-    [
-        [0.256, 0, 0.256, 33, 0.256, 33],
-        [0.0128, -64, 0.0128, 0, 0.0128, -64],
-        [0.0128, -64, 0.256, 33, 0.256, 0],
-    ],
-)
-def test_op_int16_cannot_partition(
-    op,
-    relu_type,
-    input_0_scale,
-    input_0_zero_point,
-    input_1_scale,
-    input_1_zero_point,
-    output_scale,
-    output_zero_point,
-):
-    """Tests QNN 16bit binary operators for CMSIS-NN in the edge case of
-    non-zero zero points"""
-
-    model = make_model(
-        op,
-        generate_variable("input_0", "int16"),
-        generate_variable("input_1", "int16"),
-        input_0_scale,
-        input_0_zero_point,
-        input_1_scale,
-        input_1_zero_point,
-        relu_type,
-        output_scale,
-        output_zero_point,
-    )
-    orig_mod = make_module(model)
-
-    cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod)
-
-    # arm_elementwise_(mul|add)_s16 does not support non-zero shifts in any
-    # argument
-    assert_no_external_function(cmsisnn_mod)
-
-
-@skip_if_no_reference_system
-@tvm.testing.requires_cmsisnn
-@pytest.mark.parametrize("op", [relay.qnn.op.mul, relay.qnn.op.add])
-@pytest.mark.parametrize("relu_type", ["RELU", "NONE"])
-def test_same_input_to_binary_op(op, relu_type):
-    """Tests QNN binary operator for CMSIS-NN where both inputs are the same"""
-    interface_api = "c"
-    use_unpacked_api = True
-    test_runner = AOT_USMP_CORSTONE300_RUNNER
-
-    dtype = "int8"
-    shape = [1, 16, 16, 3]
-    input_ = generate_variable("input")
-    input_scale = 0.256
-    input_zero_point = 33
-
-    model = make_model(
-        op,
-        input_,
-        input_,
-        input_scale,
-        input_zero_point,
-        input_scale,
-        input_zero_point,
-        relu_type,
-    )
-    orig_mod = make_module(model)
-
-    cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod)
-
-    # validate pattern matching
-    assert_partitioned_function(orig_mod, cmsisnn_mod)
-
-    # Check if the number of internal function parameter is 1
-    cmsisnn_global_func = cmsisnn_mod["tvmgen_default_cmsis_nn_main_0"]
-    assert (
-        isinstance(cmsisnn_global_func.body, tvm.relay.expr.Call)
-        and len(cmsisnn_global_func.body.args) == 1
-    ), "Composite function for the binary op should have only 1 parameter."
-
-    # validate the output
-    in_min, in_max = get_dtype_range(dtype)
-    inputs = {
-        "input": np.random.randint(in_min, high=in_max, size=shape, dtype=dtype),
-    }
-    output_list = generate_ref_data(orig_mod["main"], inputs)
-    compile_and_run(
-        AOTTestModel(
-            module=cmsisnn_mod,
-            inputs=inputs,
-            outputs=output_list,
-            output_tolerance=1,
-        ),
-        test_runner,
-        interface_api,
-        use_unpacked_api,
-    )
-
-
-def parameterize_for_constant_inputs(test):
-    """Generates parameters in such a way so that at least one of the inputs is a constant,
-    both can't be variables, both can't be scalars.
-    """
-    op = [relay.qnn.op.mul, relay.qnn.op.add]
-    input_0 = [generate_variable("input_0"), generate_tensor_constant(), generate_scalar_constant()]
-    input_1 = [generate_variable("input_1"), generate_tensor_constant(), generate_scalar_constant()]
-    all_combinations = itertools.product(op, input_0, input_1)
-    all_combinations = filter(
-        lambda parameters: not (
-            (
-                isinstance(parameters[1], tvm.relay.expr.Var)
-                and isinstance(parameters[2], tvm.relay.expr.Var)
-            )
-            or (
-                isinstance(parameters[1], tvm.relay.expr.Constant)
-                and isinstance(parameters[2], tvm.relay.expr.Constant)
-                and parameters[1].data.numpy().ndim == 0
-                and parameters[2].data.numpy().ndim == 0
-            )
-        ),
-        all_combinations,
-    )
-    return pytest.mark.parametrize(
-        ["op", "input_0", "input_1"],
-        all_combinations,
-    )(test)
-
-
-@skip_if_no_reference_system
-@tvm.testing.requires_cmsisnn
-@parameterize_for_constant_inputs
-def test_constant_input_int8(op, input_0, input_1):
-    """Tests binary ops where one of the operands is a constant"""
-    interface_api = "c"
-    use_unpacked_api = True
-    test_runner = AOT_USMP_CORSTONE300_RUNNER
-
-    dtype = "int8"
-    shape = [1, 16, 16, 3]
-    input_0_scale = 0.256
-    input_0_zero_point = 33
-    input_1_scale = 0.128
-    input_1_zero_point = -24
-    model = make_model(
-        op,
-        input_0,
-        input_1,
-        input_0_scale,
-        input_0_zero_point,
-        input_1_scale,
-        input_1_zero_point,
-    )
-    orig_mod = make_module(model)
-
-    cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod)
-
-    # validate pattern matching
-    assert_partitioned_function(orig_mod, cmsisnn_mod)
-
-    # validate the output
-    in_min, in_max = get_dtype_range(dtype)
-    inputs = {}
-    if isinstance(input_0, tvm.relay.expr.Var):
-        inputs.update({"input_0": np.random.randint(in_min, high=in_max, size=shape, dtype=dtype)})
-    if isinstance(input_1, tvm.relay.expr.Var):
-        inputs.update({"input_1": np.random.randint(in_min, high=in_max, size=shape, dtype=dtype)})
-    output_list = generate_ref_data(orig_mod["main"], inputs)
-    compile_and_run(
-        AOTTestModel(
-            module=cmsisnn_mod,
-            inputs=inputs,
-            outputs=output_list,
-            output_tolerance=1,
-        ),
-        test_runner,
-        interface_api,
-        use_unpacked_api,
-    )
-
-
-@skip_if_no_reference_system
-@tvm.testing.requires_cmsisnn
-@pytest.mark.parametrize("op", [relay.qnn.op.mul, relay.qnn.op.add])
-def test_both_scalar_inputs_int8(
-    op,
-):
-    """Tests binary ops where both operands are scalars"""
-    input_scale = 0.256
-    input_zero_point = 33
-    model = make_model(
-        op,
-        generate_scalar_constant(),
-        generate_scalar_constant(),
-        input_scale,
-        input_zero_point,
-        input_scale,
-        input_zero_point,
-    )
-
-    orig_mod = make_module(model)
-    cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod)
-    assert_no_external_function(cmsisnn_mod)
-
-
-@skip_if_no_reference_system
-@tvm.testing.requires_cmsisnn
-@pytest.mark.parametrize("op", [relay.qnn.op.mul, relay.qnn.op.add])
-@pytest.mark.parametrize(["input_dtype"], [["uint8"], ["uint16"]])
-def test_invalid_parameters(
-    op,
-    input_dtype,
-):
-    """Tests binary ops for non int8 dtypes"""
-    input_scale = 0.256
-    input_zero_point = 33
-    model = make_model(
-        op,
-        generate_variable("input_0", input_dtype),
-        generate_variable("input_1", input_dtype),
-        input_scale,
-        input_zero_point,
-        input_scale,
-        input_zero_point,
-    )
-
-    orig_mod = make_module(model)
-    cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod)
-    assert_no_external_function(cmsisnn_mod)
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/contrib/test_cmsisnn/test_conv2d.py b/tests/python/contrib/test_cmsisnn/test_conv2d.py
deleted file mode 100644
index 6f012640c2ae..000000000000
--- a/tests/python/contrib/test_cmsisnn/test_conv2d.py
+++ /dev/null
@@ -1,1025 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""CMSIS-NN integration tests: Conv2D"""
-import itertools
-import numpy as np
-import pytest
-import tvm
-from tvm import relay
-from tvm.relay.op.contrib import cmsisnn
-
-from tvm.testing.aot import (
-    get_dtype_range,
-    generate_ref_data,
-    AOTTestModel,
-    compile_models,
-    compile_and_run,
-    run_and_check,
-)
-from tvm.micro.testing.aot_test_utils import AOT_USMP_CORSTONE300_RUNNER
-from .utils import (
-    make_module,
-    get_same_padding,
-    get_conv2d_qnn_params,
-    get_kernel_bias_dtype,
-    make_qnn_relu,
-    assert_partitioned_function,
-    assert_no_external_function,
-    create_test_runner,
-    CheckForPadsWithinCompositeFunc,
-)
-
-
-def make_model(
-    shape,
-    kernel_shape,
-    input_zero_point,
-    input_scale,
-    kernel_zero_point,
-    kernel_scale,
-    output_zero_point,
-    output_scale,
-    padding,
-    strides,
-    dilation,
-    groups,
-    dtype,
-    kernel_dtype,
-    bias_dtype,
-    out_channels,
-    kernel_layout,
-    enable_bias,
-    relu_type,
-    input_op=None,
-):
-    """Return a model and any parameters it may have"""
-    if input_op:
-        op = input_op
-    else:
-        op = relay.var("input", shape=shape, dtype=dtype)
-
-    h_index = kernel_layout.index("H")
-    w_index = kernel_layout.index("W")
-    kernel_h = kernel_shape[h_index]
-    kernel_w = kernel_shape[w_index]
-    p = (0, 0, 0, 0)
-    if padding == "SAME":
-        p = get_same_padding((shape[1], shape[2]), (kernel_h, kernel_w), dilation, strides)
-
-    rng = np.random.default_rng(12321)
-    kmin, kmax = get_dtype_range(kernel_dtype)
-    kernel = tvm.nd.array(
-        rng.integers(
-            kmin,
-            high=kmax,
-            size=kernel_shape,
-            dtype=kernel_dtype,
-        )
-    )
-    kernel_const = relay.const(kernel, kernel_dtype)
-    conv2d_kernel_sc = kernel_scale[0] if out_channels == 1 else kernel_scale
-    conv = relay.qnn.op.conv2d(
-        op,
-        kernel_const,
-        input_zero_point=relay.const(input_zero_point, "int32"),
-        kernel_zero_point=relay.const(kernel_zero_point, "int32"),
-        input_scale=relay.const(input_scale, "float32"),
-        kernel_scale=relay.const(conv2d_kernel_sc, "float32"),
-        kernel_size=(kernel_h, kernel_w),
-        data_layout="NHWC",
-        kernel_layout=kernel_layout,
-        dilation=dilation,
-        strides=strides,
-        groups=groups,
-        channels=out_channels,
-        padding=p,
-        out_dtype=bias_dtype,
-    )
-    bias = tvm.nd.array(rng.integers(0, high=10, size=(out_channels,), dtype=bias_dtype))
-    bias_const = relay.const(bias, bias_dtype)
-    last_op = relay.nn.bias_add(conv, bias_const, axis=3) if enable_bias else conv
-    requant_input_sc = [sc * input_scale for sc in kernel_scale]
-    requant_input_sc = requant_input_sc[0] if out_channels == 1 else requant_input_sc
-    last_op = relay.qnn.op.requantize(
-        last_op,
-        relay.const(requant_input_sc, "float32"),
-        relay.const(0, "int32"),
-        relay.const(output_scale, "float32"),
-        relay.const(output_zero_point, "int32"),
-        out_dtype=dtype,
-    )
-    last_op = make_qnn_relu(last_op, relu_type, output_scale, output_zero_point, dtype)
-    params = {"w": kernel, "b": bias}
-    return last_op, params
-
-
-@tvm.testing.requires_cmsisnn
-@pytest.mark.parametrize("padding", ["SAME", "VALID"])
-@pytest.mark.parametrize("enable_bias", [True, False])
-@pytest.mark.parametrize(
-    "input_zero_point, input_scale, kernel_scale, out_channels",
-    [(10, 0.0128, [0.11, 0.22], 2)],
-)
-def test_conv2d_number_primfunc_args(
-    padding,
-    enable_bias,
-    input_zero_point,
-    input_scale,
-    kernel_scale,
-    out_channels,
-):
-    """Tests number of arguments in Conv2D primfunc"""
-    interface_api = "c"
-    use_unpacked_api = True
-
-    ifm_shape = (1, 64, 100, 4)
-    kernel_size = (3, 3)
-    strides = (1, 1)
-    dilation = (1, 1)
-    dtype = "int8"
-    groups = 1
-    kernel_layout = "HWIO"
-    kernel_h = kernel_size[0]
-    kernel_w = kernel_size[1]
-    kernel_shape = (kernel_h, kernel_w, ifm_shape[3] // groups, out_channels)
-    kernel_zero_point = 0
-    in_min, in_max = get_dtype_range(dtype)
-    relu_type = "RELU"
-
-    kernel_dtype, bias_dtype = get_kernel_bias_dtype(dtype)
-
-    output_scale, output_zero_point = get_conv2d_qnn_params(
-        kernel_shape,
-        input_scale,
-        input_zero_point,
-        kernel_scale,
-        kernel_zero_point,
-        input_dtype=dtype,
-        kernel_dtype=kernel_dtype,
-        output_dtype=dtype,
-    )
-
-    model, params = make_model(
-        ifm_shape,
-        kernel_shape,
-        input_zero_point,
-        input_scale,
-        kernel_zero_point,
-        kernel_scale,
-        output_zero_point,
-        output_scale,
-        padding,
-        strides,
-        dilation,
-        groups,
-        dtype,
-        kernel_dtype,
-        bias_dtype,
-        out_channels,
-        kernel_layout,
-        enable_bias,
-        relu_type,
-    )
-    orig_mod = make_module(model)
-    cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod, params)
-
-    # validate pattern matching
-    assert_partitioned_function(orig_mod, cmsisnn_mod)
-
-    # compile the model
-    rng = np.random.default_rng(12345)
-    inputs = {"input": rng.integers(in_min, high=in_max, size=ifm_shape, dtype=dtype)}
-    output_list = generate_ref_data(orig_mod["main"], inputs, params)
-
-    compiled_models = compile_models(
-        AOTTestModel(module=cmsisnn_mod, inputs=inputs, outputs=output_list, params=params),
-        interface_api,
-        use_unpacked_api,
-        pass_config={"tir.usmp.enable": False},
-    )
-
-    # validate number of TIR primfunc args
-    expected_num_params = 6 if enable_bias else 5
-    cmsisnn_tir_mod = None
-    for target, mod in compiled_models[0].executor_factory.lowered_ir_mods.items():
-        if target.kind.name == "cmsis-nn":
-            cmsisnn_tir_mod = mod
-
-    cmsisnn_func = cmsisnn_tir_mod["tvmgen_default_cmsis_nn_main_0"]
-    assert (
-        len(cmsisnn_func.params) == expected_num_params
-    ), "Generated unexpected number of function arguments."
-
-
-@tvm.testing.requires_cmsisnn
-@pytest.mark.parametrize("dtype", ["int8", "int16"])
-@pytest.mark.parametrize("padding", ["SAME", "VALID"])
-@pytest.mark.parametrize("relu_type", ["RELU"])
-@pytest.mark.parametrize("enable_bias", [True, False])
-@pytest.mark.parametrize(
-    "input_zero_point, input_scale, kernel_scale, out_channels",
-    [(10, 0.0128, [0.11, 0.22], 2), (-64, 1, [1, 0.0256, 1.37], 3)],
-)
-@pytest.mark.parametrize(
-    "compiler_cpu, cpu_flags", [("cortex-m55", "+nomve"), ("cortex-m55", ""), ("cortex-m7", "")]
-)
-def test_conv2d_symmetric_padding(
-    dtype,
-    padding,
-    enable_bias,
-    relu_type,
-    input_zero_point,
-    input_scale,
-    kernel_scale,
-    out_channels,
-    compiler_cpu,
-    cpu_flags,
-):
-    """Tests QNN Conv2D where the padding is symmetric on both sides of input"""
-    interface_api = "c"
-    use_unpacked_api = True
-
-    ifm_shape = (1, 64, 100, 4)
-    kernel_size = (3, 3)
-    strides = (1, 1)
-    dilation = (1, 1)
-    groups = 1
-    # input_zero_point is not handled by TFLM when int16
-    input_zero_point = input_zero_point if dtype == "int8" else 0
-    kernel_layout = "HWIO"
-    kernel_h = kernel_size[0]
-    kernel_w = kernel_size[1]
-    kernel_shape = (kernel_h, kernel_w, ifm_shape[3] // groups, out_channels)
-    kernel_zero_point = 0
-    in_min, in_max = get_dtype_range(dtype)
-
-    kernel_dtype, bias_dtype = get_kernel_bias_dtype(dtype)
-
-    output_scale, output_zero_point = get_conv2d_qnn_params(
-        kernel_shape,
-        input_scale,
-        input_zero_point,
-        kernel_scale,
-        kernel_zero_point,
-        input_dtype=dtype,
-        kernel_dtype=kernel_dtype,
-        output_dtype=dtype,
-    )
-
-    model, params = make_model(
-        ifm_shape,
-        kernel_shape,
-        input_zero_point,
-        input_scale,
-        kernel_zero_point,
-        kernel_scale,
-        output_zero_point,
-        output_scale,
-        padding,
-        strides,
-        dilation,
-        groups,
-        dtype,
-        kernel_dtype,
-        bias_dtype,
-        out_channels,
-        kernel_layout,
-        enable_bias,
-        relu_type,
-    )
-    orig_mod = make_module(model)
-    cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod, params)
-
-    # validate pattern matching
-    assert_partitioned_function(orig_mod, cmsisnn_mod)
-
-    # validate the output
-    rng = np.random.default_rng(12345)
-    inputs = {"input": rng.integers(in_min, high=in_max, size=ifm_shape, dtype=dtype)}
-    output_list = generate_ref_data(orig_mod["main"], inputs, params)
-    compile_and_run(
-        AOTTestModel(
-            module=cmsisnn_mod,
-            inputs=inputs,
-            outputs=output_list,
-            params=params,
-            output_tolerance=1,
-        ),
-        create_test_runner(compiler_cpu, cpu_flags),
-        interface_api,
-        use_unpacked_api,
-    )
-
-
-@tvm.testing.requires_cmsisnn
-@pytest.mark.parametrize("padding", ["SAME", "VALID"])
-@pytest.mark.parametrize("relu_type", ["RELU", "NONE"])
-@pytest.mark.parametrize("enable_bias", [True, False])
-@pytest.mark.parametrize(
-    "input_zero_point, input_scale, kernel_scale, out_channels",
-    [(10, 0.0128, [0.11, 0.22], 2), (-64, 1, [1, 0.0256, 1.37], 3)],
-)
-def test_conv2d_asymmetric_padding(
-    padding,
-    enable_bias,
-    relu_type,
-    input_zero_point,
-    input_scale,
-    kernel_scale,
-    out_channels,
-):
-    """Tests QNN Conv2D where the padding is asymmetric on different sides of input"""
-    interface_api = "c"
-    use_unpacked_api = True
-    test_runner = AOT_USMP_CORSTONE300_RUNNER
-
-    dtype = "int8"
-    ifm_shape = (1, 25, 25, 12)
-    kernel_size = (5, 5)
-    strides = (2, 2)
-    dilation = (1, 1)
-    groups = 1
-    input_zero_point = input_zero_point if dtype == "int8" else 0
-    kernel_layout = "HWIO"
-    kernel_h = kernel_size[0]
-    kernel_w = kernel_size[1]
-    kernel_shape = (kernel_h, kernel_w, ifm_shape[3] // groups, out_channels)
-    kernel_zero_point = 0
-    in_min, in_max = get_dtype_range(dtype)
-
-    kernel_dtype, bias_dtype = get_kernel_bias_dtype(dtype)
-
-    output_scale, output_zero_point = get_conv2d_qnn_params(
-        kernel_shape,
-        input_scale,
-        input_zero_point,
-        kernel_scale,
-        kernel_zero_point,
-        input_dtype=dtype,
-        kernel_dtype=kernel_dtype,
-        output_dtype=dtype,
-    )
-
-    model, params = make_model(
-        ifm_shape,
-        kernel_shape,
-        input_zero_point,
-        input_scale,
-        kernel_zero_point,
-        kernel_scale,
-        output_zero_point,
-        output_scale,
-        padding,
-        strides,
-        dilation,
-        groups,
-        dtype,
-        kernel_dtype,
-        bias_dtype,
-        out_channels,
-        kernel_layout,
-        enable_bias,
-        relu_type,
-    )
-    orig_mod = make_module(model)
-    cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod, params)
-    # validate pattern matching
-    assert_partitioned_function(orig_mod, cmsisnn_mod)
-
-    # validate the output
-    rng = np.random.default_rng(12345)
-    inputs = {"input": rng.integers(in_min, high=in_max, size=ifm_shape, dtype=dtype)}
-    output_list = generate_ref_data(orig_mod["main"], inputs, params)
-    compile_and_run(
-        AOTTestModel(
-            module=cmsisnn_mod,
-            inputs=inputs,
-            outputs=output_list,
-            params=params,
-            output_tolerance=1,
-        ),
-        test_runner,
-        interface_api,
-        use_unpacked_api,
-    )
-
-
-@tvm.testing.requires_cmsisnn
-@pytest.mark.parametrize("ifm_shape", [(1, 25, 25, 12), (1, 64, 100, 4)])
-@pytest.mark.parametrize(
-    "pad_width",
-    [
-        ((0, 0), (0, 1), (1, 2), (0, 0)),
-        ((0, 0), (1, 1), (1, 1), (0, 0)),
-        ((0, 0), (2, 2), (3, 4), (0, 0)),
-    ],
-)
-def test_pad_conv2d_fusion_int8(
-    ifm_shape,
-    pad_width,
-):
-    """Tests QNN Conv2D where the padding is asymmetric on different sides of input"""
-    interface_api = "c"
-    use_unpacked_api = True
-    test_runner = AOT_USMP_CORSTONE300_RUNNER
-
-    ifm_shape = (1, 25, 25, 12)
-    kernel_size = (5, 5)
-    strides = (2, 2)
-    dilation = (1, 1)
-    padding = "SAME"
-    dtype = "int8"
-    enable_bias = True
-    relu_type = "NONE"
-    input_zero_point = 10
-    input_scale = 0.0128
-    kernel_scale = [0.11, 0.22]
-    out_channels = 2
-    groups = 1
-    kernel_layout = "HWIO"
-    kernel_h = kernel_size[0]
-    kernel_w = kernel_size[1]
-    kernel_shape = (kernel_h, kernel_w, ifm_shape[3] // groups, out_channels)
-    kernel_zero_point = 0
-    in_min, in_max = get_dtype_range(dtype)
-
-    kernel_dtype, bias_dtype = get_kernel_bias_dtype(dtype)
-    output_scale, output_zero_point = get_conv2d_qnn_params(
-        kernel_shape,
-        input_scale,
-        input_zero_point,
-        kernel_scale,
-        kernel_zero_point,
-        input_dtype=dtype,
-        kernel_dtype=kernel_dtype,
-        output_dtype=dtype,
-    )
-
-    invar = relay.var("input", shape=ifm_shape, dtype=dtype)
-    pad = relay.nn.pad(
-        invar,
-        pad_width=pad_width,  # ((), (top, bottom), (left, right), ())
-        pad_value=input_zero_point,
-        pad_mode="constant",
-    )
-
-    model, params = make_model(
-        ifm_shape,
-        kernel_shape,
-        input_zero_point,
-        input_scale,
-        kernel_zero_point,
-        kernel_scale,
-        output_zero_point,
-        output_scale,
-        padding,
-        strides,
-        dilation,
-        groups,
-        dtype,
-        kernel_dtype,
-        bias_dtype,
-        out_channels,
-        kernel_layout,
-        enable_bias,
-        relu_type,
-        input_op=pad,
-    )
-    orig_mod = make_module(model)
-    cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod, params)
-
-    # validate pattern matching
-    assert_partitioned_function(orig_mod, cmsisnn_mod, False)
-
-    # check pad is not present inside CMSIS-NN partitioned function
-    cmsisnn_func = None
-    for var in cmsisnn_mod.get_global_vars():
-        if "cmsis_nn_main_0" in var.name_hint:
-            cmsisnn_func = cmsisnn_mod[var]
-            pad_verifier = CheckForPadsWithinCompositeFunc()
-            pad_verifier.visit_function(cmsisnn_func)
-            pad_verifier.assert_no_pads_within_func()
-
-    # validate the output
-    rng = np.random.default_rng(12345)
-    inputs = {"input": rng.integers(in_min, high=in_max, size=ifm_shape, dtype=dtype)}
-    output_list = generate_ref_data(orig_mod["main"], inputs, params)
-    compile_and_run(
-        AOTTestModel(
-            module=cmsisnn_mod,
-            inputs=inputs,
-            outputs=output_list,
-            params=params,
-            output_tolerance=1,
-        ),
-        test_runner,
-        interface_api,
-        use_unpacked_api,
-    )
-
-
-@tvm.testing.requires_cmsisnn
-@pytest.mark.parametrize(
-    "ifm_shape, pad_width, conv2d_padding",
-    [
-        [(1, 25, 25, 12), ((0, 0), (0, 2), (1, 2), (0, 0)), "SAME"],
-        [(1, 64, 100, 4), ((0, 0), (1, 3), (1, 1), (0, 0)), "VALID"],
-        [(1, 55, 55, 3), ((0, 0), (2, 1), (3, 5), (0, 0)), "SAME"],
-    ],
-)
-def test_invalid_pad_conv2d_fusion_int8(
-    ifm_shape,
-    pad_width,
-    conv2d_padding,
-):
-    """Tests QNN Conv2D where the padding is asymmetric on different sides of input"""
-    interface_api = "c"
-    use_unpacked_api = True
-    test_runner = AOT_USMP_CORSTONE300_RUNNER
-
-    ifm_shape = (1, 25, 25, 12)
-    kernel_size = (5, 5)
-    strides = (2, 2)
-    dilation = (1, 1)
-    dtype = "int8"
-    enable_bias = True
-    relu_type = "NONE"
-    input_zero_point = 10
-    input_scale = 0.0128
-    kernel_scale = [0.11, 0.22]
-    out_channels = 2
-    groups = 1
-    kernel_layout = "HWIO"
-    kernel_h = kernel_size[0]
-    kernel_w = kernel_size[1]
-    kernel_shape = (kernel_h, kernel_w, ifm_shape[3] // groups, out_channels)
-    kernel_zero_point = 0
-    in_min, in_max = get_dtype_range(dtype)
-
-    kernel_dtype, bias_dtype = get_kernel_bias_dtype(dtype)
-
-    output_scale, output_zero_point = get_conv2d_qnn_params(
-        kernel_shape,
-        input_scale,
-        input_zero_point,
-        kernel_scale,
-        kernel_zero_point,
-        input_dtype=dtype,
-        kernel_dtype=kernel_dtype,
-        output_dtype=dtype,
-    )
-
-    invar = relay.var("input", shape=ifm_shape, dtype=dtype)
-    pad = relay.nn.pad(
-        invar,
-        pad_width=pad_width,  # ((), (top, bottom), (left, right), ())
-        pad_value=input_zero_point,
-        pad_mode="constant",
-    )
-
-    model, params = make_model(
-        ifm_shape,
-        kernel_shape,
-        input_zero_point,
-        input_scale,
-        kernel_zero_point,
-        kernel_scale,
-        output_zero_point,
-        output_scale,
-        conv2d_padding,
-        strides,
-        dilation,
-        groups,
-        dtype,
-        kernel_dtype,
-        bias_dtype,
-        out_channels,
-        kernel_layout,
-        enable_bias,
-        relu_type,
-        input_op=pad,
-    )
-    orig_mod = make_module(model)
-    cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod, params)
-
-    # validate pattern matching
-    assert_partitioned_function(orig_mod, cmsisnn_mod)
-
-    # check pad is only present inside main function
-    cmsisnn_func = None
-    for var in cmsisnn_mod.get_global_vars():
-        if "cmsis_nn_main_0" in var.name_hint:
-            cmsisnn_func = cmsisnn_mod[var]
-            pad_verifier = CheckForPadsWithinCompositeFunc()
-            pad_verifier.visit_function(cmsisnn_func)
-            pad_verifier.assert_no_pads_within_func()
-        else:
-            main_func = cmsisnn_mod[var]
-            pad_verifier = CheckForPadsWithinCompositeFunc()
-            pad_verifier.visit_function(main_func)
-            pad_verifier.assert_pads_within_func()
-
-    # validate the output
-    rng = np.random.default_rng(12345)
-    inputs = {"input": rng.integers(in_min, high=in_max, size=ifm_shape, dtype=dtype)}
-    output_list = generate_ref_data(orig_mod["main"], inputs, params)
-    compile_and_run(
-        AOTTestModel(
-            module=cmsisnn_mod,
-            inputs=inputs,
-            outputs=output_list,
-            params=params,
-            output_tolerance=1,
-        ),
-        test_runner,
-        interface_api,
-        use_unpacked_api,
-    )
-
-
-# pylint: disable=import-outside-toplevel
-@tvm.testing.requires_cmsisnn
-@pytest.mark.parametrize("ifm_shape", [(1, 55, 55, 3)])
-@pytest.mark.parametrize("kernel_shape", [(3, 2), (1, 3)])
-@pytest.mark.parametrize("strides, dilation", [((3, 2), (1, 1))])
-@pytest.mark.parametrize("padding", ["SAME", "VALID"])
-@pytest.mark.parametrize("activation", ["NONE", "RELU"])
-def test_conv2d_int8_tflite(ifm_shape, kernel_shape, strides, dilation, padding, activation):
-    """Compares TVM output against TFLite output"""
-    interface_api = "c"
-    use_unpacked_api = True
-    test_runner = AOT_USMP_CORSTONE300_RUNNER
-    dtype = "int8"
-
-    from tvm.relay.testing.tflite import TFLiteModel
-
-    tfl_model = TFLiteModel(dtype)
-    conv2d_function = tfl_model.create_conv2d_single(
-        kernel_shape, strides, padding, dilation, activation
-    )
-    tfl_model.create_tflite_model(conv2d_function, [ifm_shape])
-    relay_mod, relay_params = tfl_model.convert_to_relay()
-
-    cmsisnn_mod = cmsisnn.partition_for_cmsisnn(relay_mod, relay_params)
-
-    # validate pattern matching
-    assert_partitioned_function(relay_mod, cmsisnn_mod)
-
-    # validate CMSIS-NN output against TFLite output
-    input_map, output_map, output_tolerance = tfl_model.generate_reference_data()
-    compile_and_run(
-        AOTTestModel(
-            module=cmsisnn_mod,
-            inputs=input_map,
-            outputs=output_map,
-            params=relay_params,
-            output_tolerance=output_tolerance,
-        ),
-        test_runner,
-        interface_api,
-        use_unpacked_api,
-    )
-
-
-@tvm.testing.requires_cmsisnn
-@pytest.mark.parametrize("dtype", ["int8", "int16"])
-@pytest.mark.parametrize("ifm_shape", [(1, 28, 28, 12), (1, 64, 100, 4)])
-@pytest.mark.parametrize("kernel_size", [(3, 3)])
-@pytest.mark.parametrize("padding", ["SAME", "VALID"])
-@pytest.mark.parametrize("strides, dilation", [((1, 1), (1, 1))])
-@pytest.mark.parametrize("relu_type", ["RELU"])
-@pytest.mark.parametrize(
-    "depth_multiplier, enable_bias",
-    [(1, True), (3, True)],
-)
-@pytest.mark.parametrize(
-    "input_zero_point, input_scale, kernel_scale, out_channels",
-    [(10, 0.0128, [0.11, 0.22], 2), (-64, 1, [1, 0.0256, 1.37], 3)],
-)
-@pytest.mark.parametrize(
-    "compiler_cpu, cpu_flags", [("cortex-m55", "+nomve"), ("cortex-m55", ""), ("cortex-m7", "")]
-)
-def test_depthwise(
-    dtype,
-    ifm_shape,
-    kernel_size,
-    padding,
-    strides,
-    dilation,
-    enable_bias,
-    relu_type,
-    input_zero_point,
-    input_scale,
-    kernel_scale,
-    out_channels,
-    depth_multiplier,
-    compiler_cpu,
-    cpu_flags,
-):
-    """Tests QNN Depthwise int8 op via CMSIS-NN"""
-    interface_api = "c"
-    use_unpacked_api = True
-
-    groups = 1
-    input_zero_point = input_zero_point if dtype == "int8" else 0
-    kernel_layout = "HWIO"
-    kernel_h = kernel_size[0]
-    kernel_w = kernel_size[1]
-    kernel_shape = (kernel_h, kernel_w, ifm_shape[3] // groups, out_channels)
-    kernel_zero_point = 0
-    in_min, in_max = get_dtype_range(dtype)
-
-    groups = ifm_shape[3]
-    kernel_layout = "HWOI"
-    kernel_shape = (kernel_h, kernel_w, ifm_shape[3], depth_multiplier)
-    out_channels = ifm_shape[3] * depth_multiplier
-    ks_len = len(kernel_scale)
-    kernel_scale = [kernel_scale[i % ks_len] for i in range(out_channels)]
-
-    kernel_dtype, bias_dtype = get_kernel_bias_dtype(dtype)
-
-    output_scale, output_zero_point = get_conv2d_qnn_params(
-        kernel_shape,
-        input_scale,
-        input_zero_point,
-        kernel_scale,
-        kernel_zero_point,
-        input_dtype=dtype,
-        kernel_dtype=kernel_dtype,
-        output_dtype=dtype,
-        is_depthwise=True,
-    )
-
-    model, params = make_model(
-        ifm_shape,
-        kernel_shape,
-        input_zero_point,
-        input_scale,
-        kernel_zero_point,
-        kernel_scale,
-        output_zero_point,
-        output_scale,
-        padding,
-        strides,
-        dilation,
-        groups,
-        dtype,
-        kernel_dtype,
-        bias_dtype,
-        out_channels,
-        kernel_layout,
-        enable_bias,
-        relu_type,
-    )
-    orig_mod = make_module(model)
-    cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod, params)
-
-    # validate pattern matching
-    assert_partitioned_function(orig_mod, cmsisnn_mod)
-
-    # validate the output
-    rng = np.random.default_rng(12345)
-    inputs = {"input": rng.integers(in_min, high=in_max, size=ifm_shape, dtype=dtype)}
-    output_list = generate_ref_data(orig_mod["main"], inputs, params)
-    compile_and_run(
-        AOTTestModel(
-            module=cmsisnn_mod,
-            inputs=inputs,
-            outputs=output_list,
-            params=params,
-            output_tolerance=1,
-        ),
-        create_test_runner(compiler_cpu, cpu_flags),
-        interface_api,
-        use_unpacked_api,
-    )
-
-
-@tvm.testing.requires_cmsisnn
-@pytest.mark.parametrize("padding", ["SAME", "VALID"])
-@pytest.mark.parametrize("strides, dilation", [((1, 1), (1, 1))])
-@pytest.mark.parametrize("relu_type", ["RELU", "NONE"])
-@pytest.mark.parametrize("depth_multiplier", [1, 3])
-@pytest.mark.parametrize(
-    "input_zero_point, input_scale, kernel_scale",
-    [
-        (
-            10,
-            0.0128,
-            [0.11, 0.22],
-        ),
-        (
-            -64,
-            1,
-            [1, 0.0256, 1.37],
-        ),
-    ],
-)
-def test_relay_conv2d_cmsisnn_depthwise_int8(
-    padding,
-    strides,
-    dilation,
-    relu_type,
-    input_zero_point,
-    input_scale,
-    kernel_scale,
-    depth_multiplier,
-):
-    """Tests QNN Depthwise int8 op via CMSIS-NN"""
-    interface_api = "c"
-    use_unpacked_api = True
-    test_runner = AOT_USMP_CORSTONE300_RUNNER
-
-    dtype = "int8"
-    in_min, in_max = get_dtype_range(dtype)
-
-    ifm_shape = (1, 24, 24, 1)
-    groups = ifm_shape[3]
-    input_zero_point = input_zero_point if dtype == "int8" else 0
-    kernel_layout = "HWIO"
-    (kernel_h, kernel_w) = (3, 3)
-    kernel_shape = (kernel_h, kernel_w, ifm_shape[3], depth_multiplier)
-    out_channels = ifm_shape[3] * depth_multiplier
-    enable_bias = True
-    ks_len = len(kernel_scale)
-    kernel_zero_point = 0
-    kernel_scale = [kernel_scale[i % ks_len] for i in range(out_channels)]
-
-    kernel_dtype, bias_dtype = get_kernel_bias_dtype(dtype)
-
-    output_scale, output_zero_point = get_conv2d_qnn_params(
-        kernel_shape,
-        input_scale,
-        input_zero_point,
-        kernel_scale,
-        kernel_zero_point,
-        input_dtype=dtype,
-        kernel_dtype=kernel_dtype,
-        output_dtype=dtype,
-        is_depthwise=True,
-    )
-
-    model, params = make_model(
-        ifm_shape,
-        kernel_shape,
-        input_zero_point,
-        input_scale,
-        kernel_zero_point,
-        kernel_scale,
-        output_zero_point,
-        output_scale,
-        padding,
-        strides,
-        dilation,
-        groups,
-        dtype,
-        kernel_dtype,
-        bias_dtype,
-        out_channels,
-        kernel_layout,
-        enable_bias,
-        relu_type,
-    )
-    orig_mod = make_module(model)
-    cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod, params)
-
-    # validate pattern matching
-    assert_partitioned_function(orig_mod, cmsisnn_mod)
-
-    # generate reference output
-    rng = np.random.default_rng(12345)
-    inputs = {"input": rng.integers(in_min, high=in_max, size=ifm_shape, dtype=dtype)}
-    output_list = generate_ref_data(orig_mod["main"], inputs, params)
-
-    # validate presence of depthwise convolution
-    compiled_models = compile_models(
-        AOTTestModel(
-            module=cmsisnn_mod,
-            inputs=inputs,
-            outputs=output_list,
-            params=params,
-            output_tolerance=1,
-        ),
-        interface_api,
-        use_unpacked_api,
-        pass_config=test_runner.pass_config,
-    )
-
-    cmsisnn_tir_mod = None
-    for target, mod in compiled_models[0].executor_factory.lowered_ir_mods.items():
-        if target.kind.name == "cmsis-nn":
-            cmsisnn_tir_mod = mod
-
-    cmsisnn_func = cmsisnn_tir_mod["tvmgen_default_cmsis_nn_main_0"]
-    call_extern = None
-    # This happens when context buffer is init in case depthM != 1
-    if isinstance(cmsisnn_func.body, tvm.tir.stmt.Evaluate):
-        call_extern = cmsisnn_func.body.value
-    else:
-        call_extern = cmsisnn_func.body.body.value
-    assert (
-        call_extern.args[0].value == "arm_depthwise_conv_wrapper_s8"
-    ), "Relay Conv2D should be mapped to CMSIS-NN Depthwise Convolution."
-
-    # validate the output
-    run_and_check(
-        models=compiled_models,
-        runner=test_runner,
-        interface_api=interface_api,
-    )
-
-
-def parameterize_for_invalid_model(test):
-    """Generates non-int8 non-int16 inputs"""
-    in_dtype = ["uint8", "int8", "int16"]
-    kernel_dtype = ["uint8", "int8"]
-    kernel_zero_point = [-33, 10, 0]
-    input_zero_point = [64, 0]
-    all_combinations = itertools.product(
-        in_dtype, kernel_dtype, kernel_zero_point, input_zero_point
-    )
-    all_combinations = filter(
-        lambda parameters: not (
-            (parameters[0] == "int8" or (parameters[0] == "int16" and parameters[3] == 0))
-            and parameters[1] == "int8"
-            and parameters[2] == 0
-        ),
-        all_combinations,
-    )
-    return pytest.mark.parametrize(
-        ["in_dtype", "kernel_dtype", "kernel_zero_point", "input_zero_point"],
-        all_combinations,
-    )(test)
-
-
-@tvm.testing.requires_cmsisnn
-@parameterize_for_invalid_model
-def test_invalid_parameters(
-    in_dtype,
-    kernel_dtype,
-    kernel_zero_point,
-    input_zero_point,
-):
-    """Tests Depthwise op for non int8 inputs"""
-    ifm_shape = (1, 28, 28, 12)
-    out_channels = 2
-    input_scale = 1
-    kernel_scale = [0.11, 0.0237]
-
-    kernel_layout = "HWIO"
-    kernel_shape = [3, 3, ifm_shape[3], out_channels]
-    _, bias_dtype = get_kernel_bias_dtype(in_dtype)
-    output_scale, output_zero_point = get_conv2d_qnn_params(
-        kernel_shape,
-        input_scale,
-        input_zero_point,
-        kernel_scale,
-        kernel_zero_point,
-        in_dtype,
-        kernel_dtype,
-        in_dtype,
-        is_depthwise=False,
-    )
-    model, params = make_model(
-        shape=ifm_shape,
-        kernel_shape=kernel_shape,
-        input_zero_point=input_zero_point,
-        input_scale=input_scale,
-        kernel_zero_point=kernel_zero_point,
-        kernel_scale=kernel_scale,
-        output_zero_point=output_zero_point,
-        output_scale=output_scale,
-        padding="SAME",
-        strides=(1, 1),
-        dilation=(1, 1),
-        groups=1,
-        dtype=in_dtype,
-        kernel_dtype=kernel_dtype,
-        bias_dtype=bias_dtype,
-        out_channels=out_channels,
-        kernel_layout=kernel_layout,
-        enable_bias=True,
-        relu_type="NONE",
-    )
-    orig_mod = make_module(model)
-    cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod, params)
-    assert_no_external_function(cmsisnn_mod)
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/contrib/test_cmsisnn/test_extract_constants.py b/tests/python/contrib/test_cmsisnn/test_extract_constants.py
deleted file mode 100644
index 7d3e81a9c79d..000000000000
--- a/tests/python/contrib/test_cmsisnn/test_extract_constants.py
+++ /dev/null
@@ -1,277 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""CMSIS-NN integration tests: extract_constants pass"""
-import numpy as np
-import pytest
-import tvm
-import tvm.testing
-from tvm import relay
-
-tvm._ffi._init_api("relay.ext.cmsisnn.transform", __name__)
-
-
-class CheckFunctionsForConstants(tvm.relay.ExprVisitor):
-    """Provides methods to test number of constants present in a function"""
-
-    def __init__(self):
-        super().__init__()
-        self.num_constants_ = 0
-
-    def visit_call(self, call):
-        super().visit_call(call)
-        for arg in call.args:
-            if isinstance(arg, relay.Constant) and arg.data.numpy().ndim > 0:
-                self.num_constants_ += 1
-
-    def check_num_constants(self):
-        assert self.num_constants_ == 0, "Functions should not have constant arguments in Calls"
-
-
-def set_external_func_attr(func, compiler, ext_symbol):
-    func = func.with_attr("Primitive", tvm.tir.IntImm("int32", 1))
-    func = func.with_attr("Compiler", compiler)
-    func = func.with_attr("global_symbol", ext_symbol)
-    return func
-
-
-def set_composite_func_attr(func, name):
-    func = func.with_attr("Composite", name)
-    return func
-
-
-@tvm.testing.requires_cmsisnn
-def test_external_function():
-    """Tests the pass ExternConstants when the function is a global function"""
-    input1_data = np.random.uniform(0, 1, (8, 8)).astype("float32")
-    input0 = relay.var("input0", shape=(8, 8))
-    input1_const = relay.const(input1_data, "float32")
-    binary_op = input0 + input1_const
-    extern_func = relay.Function([input0], binary_op, relay.TensorType((8, 8), "float32"))
-    global_var = relay.GlobalVar("external_function")
-    extern_func = set_external_func_attr(extern_func, "cmsis-nn", global_var.name_hint)
-
-    arg = relay.var("arg", shape=(8, 8))
-    call_extern_func = relay.Call(global_var, [arg])
-    main_func = relay.Function([arg], call_extern_func, relay.TensorType((8, 8), "float32"))
-    main_var = relay.GlobalVar("main")
-
-    mod = tvm.IRModule()
-    mod[global_var] = extern_func
-    mod[main_var] = main_func
-
-    mod = ExtractConstantsFromPartitionedFunction()(mod)
-    constant_verifier = CheckFunctionsForConstants()
-    constant_verifier.visit_function(mod[global_var])
-    constant_verifier.check_num_constants()
-    relay.transform.InferType()(mod)
-
-
-@tvm.testing.requires_cmsisnn
-def test_nested_function():
-    """Tests the pass ExternConstants when a composite function
-    is present within global function
-    """
-    input1_data = np.random.uniform(0, 1, (8, 8)).astype("float32")
-    input0 = relay.var("input0", shape=(8, 8))
-    input1_const = relay.const(input1_data, "float32")
-    binary_op0 = input0 + input1_const
-    binary_op1 = binary_op0 * relay.const(5.0, "float32")
-    local_func = relay.Function([input0], binary_op1, relay.TensorType((8, 8), "float32"))
-    local_func = set_composite_func_attr(local_func, "cmsis-nn")
-
-    arg = relay.var("arg", shape=(8, 8))
-    call_local_func = relay.Call(local_func, [arg])
-    extern_func = relay.Function([arg], call_local_func, relay.TensorType((8, 8), "float32"))
-
-    global_arg = relay.var("garg", shape=(8, 8))
-    global_var = relay.GlobalVar("external_function")
-    extern_func = set_external_func_attr(extern_func, "cmsis-nn", global_var.name_hint)
-    call_extern_func = relay.Call(global_var, [global_arg])
-    main_func = relay.Function([global_arg], call_extern_func, relay.TensorType((8, 8), "float32"))
-    main_var = relay.GlobalVar("main")
-
-    mod = tvm.IRModule()
-    mod[global_var] = extern_func
-    mod[main_var] = main_func
-
-    mod = ExtractConstantsFromPartitionedFunction()(mod)
-    constant_verifier = CheckFunctionsForConstants()
-    constant_verifier.visit_function(mod[global_var])
-    constant_verifier.check_num_constants()
-    relay.transform.InferType()(mod)
-
-
-@tvm.testing.requires_cmsisnn
-def test_internal_function_with_duplicate_arguments():
-    """Tests the pass ExternConstants when a composite function
-    is present within global function with repeating arguments
-    to one of the binary ops.
-    """
-    input0 = relay.var("input0", shape=(8, 8))
-    binary_op0 = input0 + input0
-    binary_op1 = binary_op0 * relay.const(5.0, "float32")
-    local_func = relay.Function([input0], binary_op1, relay.TensorType((8, 8), "float32"))
-    local_func = set_composite_func_attr(local_func, "cmsis-nn")
-
-    arg = relay.var("arg", shape=(8, 8))
-    call_local_func = relay.Call(local_func, [arg])
-    extern_func = relay.Function([arg], call_local_func, relay.TensorType((8, 8), "float32"))
-
-    global_arg = relay.var("global_var", shape=(8, 8))
-    global_var = relay.GlobalVar("external_function")
-    extern_func = set_external_func_attr(extern_func, "cmsis-nn", global_var.name_hint)
-    call_extern_func = relay.Call(global_var, [global_arg])
-    main_func = relay.Function([global_arg], call_extern_func, relay.TensorType((8, 8), "float32"))
-    main_var = relay.GlobalVar("main")
-
-    mod = tvm.IRModule()
-    mod[global_var] = extern_func
-    mod[main_var] = main_func
-
-    mod = ExtractConstantsFromPartitionedFunction()(mod)
-    constant_verifier = CheckFunctionsForConstants()
-    constant_verifier.visit_function(mod[global_var])
-    constant_verifier.check_num_constants()
-    relay.transform.InferType()(mod)
-
-
-@tvm.testing.requires_cmsisnn
-def test_multiple_functions():
-    """Tests the pass ExternConstants when global function
-    contains multiple composite functions inside it
-    """
-    f0_input1_data = np.random.uniform(0, 1, (8, 8)).astype("float32")
-    f0_input0 = relay.var("f0_in0", shape=(8, 8))
-    f0_input1_const = relay.const(f0_input1_data, "float32")
-    f0_binary_op = f0_input0 + f0_input1_const
-    f0_func = relay.Function([f0_input0], f0_binary_op, relay.TensorType((8, 8), "float32"))
-    f0_func = set_composite_func_attr(f0_func, "cmsis-nn")
-
-    f1_input1_data = np.random.uniform(0, 1, (8, 8)).astype("float32")
-    f1_input0 = relay.var("f1_in0", shape=(8, 8))
-    f1_input1_const = relay.const(f1_input1_data, "float32")
-    f1_binary_op = f1_input0 + f1_input1_const
-    f1_func = relay.Function([f1_input0], f1_binary_op, relay.TensorType((8, 8), "float32"))
-    f1_func = set_composite_func_attr(f1_func, "cmsis-nn")
-
-    arg0 = relay.var("arg0", shape=(8, 8))
-    call_local_func0 = relay.Call(f0_func, [arg0])
-    call_local_func1 = relay.Call(f1_func, [call_local_func0])
-    extern_func = relay.Function([arg0], call_local_func1, relay.TensorType((8, 8), "float32"))
-    input0 = relay.var("input0", shape=(8, 8))
-    global_var = relay.GlobalVar("cmsis-nn")
-    extern_func = set_external_func_attr(extern_func, "cmsis-nn", global_var.name_hint)
-    call_extern_func = relay.Call(global_var, [input0])
-    main_func = relay.Function([input0], call_extern_func, relay.TensorType((8, 8), "float32"))
-    main_var = relay.GlobalVar("main")
-
-    mod = tvm.IRModule()
-    mod[global_var] = extern_func
-    mod[main_var] = main_func
-
-    mod = ExtractConstantsFromPartitionedFunction()(mod)
-    constant_verifier = CheckFunctionsForConstants()
-    constant_verifier.visit_function(mod[global_var])
-    constant_verifier.check_num_constants()
-    relay.transform.InferType()(mod)
-
-
-@tvm.testing.requires_cmsisnn
-def test_main_function():
-    """Tests the pass ExternConstants on main function"""
-    input0 = relay.var("input0", shape=(8, 8))
-    input1 = relay.var("input1", shape=(8, 8))
-    binary_op = input0 + input1
-    extern_func = relay.Function([input0, input1], binary_op, relay.TensorType((8, 8), "float32"))
-    global_var = relay.GlobalVar("external_function")
-    extern_func = set_external_func_attr(extern_func, "cmsis-nn", global_var.name_hint)
-
-    arg = relay.var("arg", shape=(8, 8))
-    input_data = np.random.uniform(0, 1, (8, 8)).astype("float32")
-    input_const = relay.const(input_data, "float32")
-    binary_op = arg + input_const
-    call_extern_func = relay.Call(global_var, [arg, binary_op])
-    main_func = relay.Function([arg], call_extern_func, relay.TensorType((8, 8), "float32"))
-    main_var = relay.GlobalVar("main")
-
-    mod = tvm.IRModule()
-    mod[global_var] = extern_func
-    mod[main_var] = main_func
-
-    mod = ExtractConstantsFromPartitionedFunction()(mod)
-    check_for_constants = CheckFunctionsForConstants()
-    check_for_constants.visit_call(mod[main_var].body)
-    assert (
-        check_for_constants.num_constants_ == 1
-    ), "main() should have same number of arguments as before"
-
-
-@tvm.testing.requires_cmsisnn
-@pytest.mark.parametrize("external_compiler", ["cmsis-nn", "other_compiler"])
-def test_multiple_functions_non_cmsisnn_compiler(external_compiler):
-    """Tests the pass ExternConstants on non CMSIS-NN targets"""
-    y20_data = np.random.uniform(0, 1, (8, 8)).astype("float32")
-    x20 = relay.var("x20", shape=(8, 8))
-    y20_const = relay.const(y20_data, "float32")
-    z20 = x20 + y20_const
-    f20 = relay.Function([x20], z20, relay.TensorType((8, 8), "float32"))
-    f20 = set_composite_func_attr(f20, "cmsis-nn.qnn_op_1")
-    x10 = relay.var("x10", shape=(8, 8))
-    call_local_func0 = relay.Call(f20, [x10])
-    extern_func0 = relay.Function([x10], call_local_func0, relay.TensorType((8, 8), "float32"))
-
-    y21_data = np.random.uniform(0, 1, (8, 8)).astype("float32")
-    x21 = relay.var("x21", shape=(8, 8))
-    y21_const = relay.const(y21_data, "float32")
-    z21 = x21 + y21_const
-    f21 = relay.Function([x21], z21, relay.TensorType((8, 8), "float32"))
-    f21 = set_composite_func_attr(f21, "cmsis-nn.qnn_op_2")
-    x11 = relay.var("x11", shape=(8, 8))
-    call_local_func1 = relay.Call(f21, [x11])
-    extern_func1 = relay.Function([x11], call_local_func1, relay.TensorType((8, 8), "float32"))
-
-    input0 = relay.var("input0", shape=(8, 8))
-    global_var0 = relay.GlobalVar("external_function_0")
-    extern_func0 = set_external_func_attr(extern_func0, external_compiler, global_var0.name_hint)
-    call_extern_func0 = relay.Call(global_var0, [input0])
-    global_var1 = relay.GlobalVar("external_function_1")
-    extern_func1 = set_external_func_attr(extern_func1, external_compiler, global_var1.name_hint)
-    call_extern_func1 = relay.Call(global_var1, [call_extern_func0])
-    main_func = relay.Function([input0], call_extern_func1, relay.TensorType((8, 8), "float32"))
-    main_var = relay.GlobalVar("main")
-
-    mod = tvm.IRModule()
-    mod[global_var0] = extern_func0
-    mod[global_var1] = extern_func1
-    mod[main_var] = main_func
-
-    mod = ExtractConstantsFromPartitionedFunction()(mod)
-    check_for_constants = CheckFunctionsForConstants()
-    check_for_constants.visit_call(mod[main_var].body)
-
-    num_extracted_constants = 0
-    if external_compiler == "cmsis-nn":
-        num_extracted_constants = 2
-
-    assert (
-        check_for_constants.num_constants_ == num_extracted_constants
-    ), "main() should have same number of arguments as before"
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/contrib/test_cmsisnn/test_fully_connected.py b/tests/python/contrib/test_cmsisnn/test_fully_connected.py
deleted file mode 100644
index 46b1488eb3fe..000000000000
--- a/tests/python/contrib/test_cmsisnn/test_fully_connected.py
+++ /dev/null
@@ -1,246 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""CMSIS-NN integration tests: Fully Connected"""
-import itertools
-import numpy as np
-import pytest
-import tvm
-from tvm import relay
-from tvm.relay.op.contrib import cmsisnn
-
-from tvm.testing.aot import get_dtype_range, generate_ref_data, AOTTestModel, compile_and_run
-from .utils import (
-    make_module,
-    get_conv2d_qnn_params,
-    make_qnn_relu,
-    assert_partitioned_function,
-    assert_no_external_function,
-    create_test_runner,
-    get_kernel_bias_dtype,
-)
-
-
-def make_model(
-    in_shape,  # [batchsize, in_channels]
-    kernel_shape,  # [out_channels, num_inputs]
-    input_zero_point,
-    kernel_zero_point,
-    input_scale,
-    kernel_scale,
-    output_zero_point,
-    output_scale,
-    dtype,
-    kernel_dtype,
-    bias_dtype,
-    out_channels,
-    enable_bias,
-    relu_type="NONE",
-):
-    """Return a model and any parameters it may have"""
-    input_ = relay.var("input", shape=in_shape, dtype=dtype)
-    rng = np.random.default_rng(12321)
-    kmin, kmax = get_dtype_range(kernel_dtype)
-    weight = tvm.nd.array(
-        rng.integers(
-            kmin,
-            high=kmax,
-            size=kernel_shape,
-            dtype=kernel_dtype,
-        )
-    )
-    weight_const = relay.const(weight, kernel_dtype)
-    dense = relay.qnn.op.dense(
-        input_,
-        weight_const,
-        input_zero_point=relay.const(input_zero_point, "int32"),
-        kernel_zero_point=relay.const(kernel_zero_point, "int32"),
-        input_scale=relay.const(input_scale, "float32"),
-        kernel_scale=relay.const(kernel_scale, "float32"),
-        units=out_channels,
-        out_dtype=bias_dtype,
-    )
-
-    bias = tvm.nd.array(rng.integers(0, high=10, size=(out_channels,), dtype=bias_dtype))
-    bias_const = relay.const(bias, bias_dtype)
-    last_op = relay.nn.bias_add(dense, bias_const) if enable_bias else dense
-    requant_input_sc = input_scale * kernel_scale
-    last_op = relay.qnn.op.requantize(
-        last_op,
-        relay.const(requant_input_sc, "float32"),
-        relay.const(0, "int32"),
-        relay.const(output_scale, "float32"),
-        relay.const(output_zero_point, "int32"),
-        out_dtype=dtype,
-    )
-    last_op = make_qnn_relu(last_op, relu_type, output_scale, output_zero_point, dtype)
-    params = {"w": weight, "b": bias}
-    return last_op, params
-
-
-@tvm.testing.requires_cmsisnn
-@pytest.mark.parametrize("dtype", ["int8", "int16"])
-@pytest.mark.parametrize("in_shape", [(2, 28), (1, 64)])
-@pytest.mark.parametrize("out_channels", [12, 128])
-@pytest.mark.parametrize("enable_bias", [False, True])
-@pytest.mark.parametrize(
-    "input_zero_point, input_scale, kernel_scale",
-    [(10, 0.0128, 0.11), (-64, 0.0256, 1.37)],
-)
-@pytest.mark.parametrize(
-    "compiler_cpu, cpu_flags", [("cortex-m55", "+nomve"), ("cortex-m55", ""), ("cortex-m7", "")]
-)
-def test_ops(
-    dtype,
-    in_shape,
-    enable_bias,
-    input_zero_point,
-    input_scale,
-    kernel_scale,
-    out_channels,
-    compiler_cpu,
-    cpu_flags,
-):
-    """Test QNN fully connected layer"""
-    interface_api = "c"
-    use_unpacked_api = True
-
-    kernel_dtype, bias_dtype = get_kernel_bias_dtype(dtype)
-    kernel_zero_point = 0
-    kernel_shape = [out_channels, in_shape[1]]
-    conv2d_kernel_shape = (1, 1, kernel_shape[0], kernel_shape[1])
-    in_min, in_max = get_dtype_range(dtype)
-
-    output_scale, output_zero_point = get_conv2d_qnn_params(
-        conv2d_kernel_shape,
-        input_scale,
-        input_zero_point,
-        kernel_scale,
-        kernel_zero_point,
-        dtype,
-    )
-
-    model, params = make_model(
-        in_shape,
-        kernel_shape,
-        input_zero_point,
-        kernel_zero_point,
-        input_scale,
-        kernel_scale,
-        output_zero_point,
-        output_scale,
-        dtype,
-        kernel_dtype,
-        bias_dtype,
-        out_channels,
-        enable_bias,
-    )
-    orig_mod = make_module(model)
-    cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod, params)
-
-    # validate pattern matching
-    assert_partitioned_function(orig_mod, cmsisnn_mod)
-
-    # validate the output
-    rng = np.random.default_rng(12345)
-    inputs = {"input": rng.integers(in_min, high=in_max, size=in_shape, dtype=dtype)}
-    output_list = generate_ref_data(orig_mod["main"], inputs, params)
-    compile_and_run(
-        AOTTestModel(
-            module=cmsisnn_mod,
-            inputs=inputs,
-            outputs=output_list,
-            params=params,
-            output_tolerance=1,
-        ),
-        create_test_runner(compiler_cpu, cpu_flags),
-        interface_api,
-        use_unpacked_api,
-    )
-
-
-def parameterize_for_invalid_model(test):
-    """Generates parameters for non int8 inputs to fully connected layer"""
-    in_dtype = ["uint8", "int8", "int16"]
-    kernel_dtype = ["uint8", "int8"]
-    kernel_zero_point = [-33, 10, 0]
-    all_combinations = itertools.product(in_dtype, kernel_dtype, kernel_zero_point)
-    all_combinations = filter(
-        lambda parameters: not (
-            (parameters[0] == "int8" or parameters[0] == "int16")
-            and parameters[1] == "int8"
-            and parameters[2] == 0
-        ),
-        all_combinations,
-    )
-    return pytest.mark.parametrize(
-        ["in_dtype", "kernel_dtype", "kernel_zero_point"],
-        all_combinations,
-    )(test)
-
-
-@tvm.testing.requires_cmsisnn
-@parameterize_for_invalid_model
-def test_invalid_parameters(
-    in_dtype,
-    kernel_dtype,
-    kernel_zero_point,
-):
-    """Tests fully connected layer with non int8 inputs"""
-    in_shape = (2, 28)
-    out_channels = 2
-    input_scale = 1
-    input_zero_point = 24
-    kernel_scale = [0.11, 0.0237]
-    _, bias_dtype = get_kernel_bias_dtype(in_dtype)
-
-    kernel_shape = [out_channels, in_shape[1]]
-    conv2d_kernel_shape = [1, 1, kernel_shape[0], kernel_shape[1]]
-    output_scale, output_zero_point = get_conv2d_qnn_params(
-        conv2d_kernel_shape,
-        input_scale,
-        input_zero_point,
-        kernel_scale,
-        kernel_zero_point,
-        in_dtype,
-        kernel_dtype,
-        in_dtype,
-    )
-    model, params = make_model(
-        in_shape=in_shape,
-        kernel_shape=kernel_shape,
-        input_zero_point=input_zero_point,
-        kernel_zero_point=kernel_zero_point,
-        input_scale=input_scale,
-        kernel_scale=kernel_scale,
-        output_zero_point=output_zero_point,
-        output_scale=output_scale,
-        dtype=in_dtype,
-        kernel_dtype=kernel_dtype,
-        bias_dtype=bias_dtype,
-        out_channels=out_channels,
-        enable_bias=True,
-    )
-    orig_mod = make_module(model)
-    cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod, params)
-
-    # validate pattern matching
-    assert_no_external_function(cmsisnn_mod)
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/contrib/test_cmsisnn/test_fuse_pads.py b/tests/python/contrib/test_cmsisnn/test_fuse_pads.py
deleted file mode 100644
index 4ea306cc4382..000000000000
--- a/tests/python/contrib/test_cmsisnn/test_fuse_pads.py
+++ /dev/null
@@ -1,344 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""CMSIS-NN integration tests: fuse_pads pass"""
-import numpy as np
-import pytest
-import tvm
-from tvm.testing.aot import get_dtype_range
-from tvm import relay
-from .utils import CheckForPadsWithinCompositeFunc
-
-tvm._ffi._init_api("relay.ext.cmsisnn.transform", __name__)
-
-
-def set_external_func_attr(func, compiler, ext_symbol):
-    func = func.with_attr("Primitive", tvm.tir.IntImm("int32", 1))
-    func = func.with_attr("Compiler", compiler)
-    func = func.with_attr("global_symbol", ext_symbol)
-    return func
-
-
-def set_composite_func_attr(func, name):
-    func = func.with_attr("Composite", name)
-    return func
-
-
-@pytest.mark.parametrize(
-    "ifm_shape, pad_width, conv2d_padding, ofm_shape",
-    [
-        [(1, 25, 25, 12), ((0, 0), (0, 2), (1, 2), (0, 0)), (1, 1, 1, 1), (1, 26, 28, 2)],
-        [(1, 64, 100, 4), ((0, 0), (1, 3), (1, 1), (0, 0)), (0, 0, 0, 0), (1, 64, 100, 2)],
-        [(1, 55, 55, 3), ((0, 0), (2, 1), (3, 5), (0, 0)), (0, 0, 1, 1), (1, 57, 59, 2)],
-    ],
-)
-def test_invalid_padding_for_fusion(ifm_shape, pad_width, conv2d_padding, ofm_shape):
-    """Negative tests for pads preceding Conv2D that cannot be fused."""
-    dtype = "int8"
-    kernel_size = (3, 3)
-    ofm_channels = 2
-    local_input = relay.var("local_input", shape=ifm_shape, dtype=dtype)
-    pad = relay.nn.pad(
-        local_input,
-        pad_width=pad_width,  # ((), (top, bottom), (left, right), ())
-        pad_value=10,
-        pad_mode="constant",
-    )
-    rng = np.random.default_rng(12321)
-    in_min, in_max = get_dtype_range(dtype)
-    local_weight = tvm.nd.array(
-        rng.integers(
-            in_min,
-            high=in_max,
-            size=(ofm_channels, kernel_size[0], kernel_size[1], ifm_shape[3]),
-            dtype=dtype,
-        )
-    )
-    local_weight = relay.const(local_weight, dtype)
-    conv2d = relay.qnn.op.conv2d(
-        pad,
-        local_weight,
-        relay.const(1, "int32"),
-        relay.const(1, "int32"),
-        relay.const(1, "float32"),
-        relay.const(1, "float32"),
-        data_layout="NHWC",
-        kernel_layout="OHWI",
-        channels=ofm_channels,
-        kernel_size=(3, 3),
-        padding=conv2d_padding,
-        out_dtype="int32",
-    )
-    requantize = relay.qnn.op.requantize(
-        conv2d,
-        relay.const(1, "float32"),
-        relay.const(1, "int32"),
-        relay.const(1, "float32"),
-        relay.const(1, "int32"),
-        axis=0,
-        out_dtype=dtype,
-    )
-    local_func = relay.Function(relay.analysis.free_vars(requantize), requantize)
-    local_func = set_composite_func_attr(local_func, "cmsis-nn.qnn_conv2d")
-
-    mod = tvm.IRModule()
-    ext_input = relay.var("ext_input", shape=ifm_shape, dtype=dtype)
-    call_local_func = relay.Call(local_func, [ext_input])
-    extern_func = relay.Function(relay.analysis.free_vars(call_local_func), call_local_func)
-    extern_var = relay.GlobalVar("external_function")
-    extern_func = set_external_func_attr(extern_func, "cmsis-nn", extern_var.name_hint)
-    mod[extern_var] = extern_func
-
-    main_input = relay.var("main_input", shape=ifm_shape, dtype=dtype)
-    call_extern_func = relay.Call(extern_var, [main_input])
-    main_func = relay.Function([main_input], call_extern_func, relay.TensorType(ofm_shape, dtype))
-    main_var = relay.GlobalVar("main")
-    mod[main_var] = main_func
-
-    mod = relay.transform.InferType()(mod)
-
-    error_regex = r"Difference on each side of a dimension should be either 0 or 1"
-
-    with pytest.raises(tvm.TVMError, match=error_regex):
-        mod = CMSISNNFusePads()(mod)
-
-
-@pytest.mark.parametrize(
-    "ifm_shape, pad_width, conv2d_padding, ofm_shape",
-    [
-        [(1, 25, 25, 12), ((0, 0), (0, 1), (1, 2), (0, 0)), (1, 1, 1, 1), (1, 26, 28, 2)],
-        [(1, 64, 100, 4), ((0, 0), (1, 1), (1, 1), (0, 0)), (0, 0, 0, 0), (1, 64, 100, 2)],
-        [(1, 55, 55, 3), ((0, 0), (2, 1), (3, 2), (0, 0)), (0, 0, 1, 1), (1, 57, 59, 2)],
-    ],
-)
-def test_pad_conv2d_fusion_noncmsisnn_target(ifm_shape, pad_width, conv2d_padding, ofm_shape):
-    """Tests the pads and conv2d fusion for non-cmsisnn targets.
-    It is expected that pad will not be fused with Conv2D in this case.
-    """
-    dtype = "int8"
-    kernel_size = (3, 3)
-    ofm_channels = 2
-    local_input = relay.var("local_input", shape=ifm_shape, dtype=dtype)
-    pad = relay.nn.pad(
-        local_input,
-        pad_width=pad_width,  # ((), (top, bottom), (left, right), ())
-        pad_value=10,
-        pad_mode="constant",
-    )
-    rng = np.random.default_rng(12321)
-    in_min, in_max = get_dtype_range(dtype)
-    local_weight = tvm.nd.array(
-        rng.integers(
-            in_min,
-            high=in_max,
-            size=(ofm_channels, kernel_size[0], kernel_size[1], ifm_shape[3]),
-            dtype=dtype,
-        )
-    )
-    local_weight = relay.const(local_weight, dtype)
-    conv2d = relay.qnn.op.conv2d(
-        pad,
-        local_weight,
-        relay.const(1, "int32"),
-        relay.const(1, "int32"),
-        relay.const(1, "float32"),
-        relay.const(1, "float32"),
-        data_layout="NHWC",
-        kernel_layout="OHWI",
-        channels=ofm_channels,
-        kernel_size=(3, 3),
-        padding=conv2d_padding,
-        out_dtype="int32",
-    )
-    requantize = relay.qnn.op.requantize(
-        conv2d,
-        relay.const(1, "float32"),
-        relay.const(1, "int32"),
-        relay.const(1, "float32"),
-        relay.const(1, "int32"),
-        axis=0,
-        out_dtype=dtype,
-    )
-    local_func = relay.Function(relay.analysis.free_vars(requantize), requantize)
-    local_func = set_composite_func_attr(local_func, "noncmsis-nn.qnn_conv2d")
-
-    mod = tvm.IRModule()
-    ext_input = relay.var("ext_input", shape=ifm_shape, dtype=dtype)
-    call_local_func = relay.Call(local_func, [ext_input])
-    extern_func = relay.Function(relay.analysis.free_vars(call_local_func), call_local_func)
-    extern_var = relay.GlobalVar("external_function")
-    extern_func = set_external_func_attr(extern_func, "noncmsis-nn", extern_var.name_hint)
-    mod[extern_var] = extern_func
-
-    main_input = relay.var("main_input", shape=ifm_shape, dtype=dtype)
-    call_extern_func = relay.Call(extern_var, [main_input])
-    main_func = relay.Function([main_input], call_extern_func, relay.TensorType(ofm_shape, dtype))
-    main_var = relay.GlobalVar("main")
-    mod[main_var] = main_func
-
-    mod = relay.transform.InferType()(mod)
-
-    mod = CMSISNNFusePads()(mod)
-    pad_verifier = CheckForPadsWithinCompositeFunc()
-    pad_verifier.visit_function(mod[extern_var])
-    pad_verifier.assert_pads_within_func()
-
-
-@pytest.mark.parametrize(
-    "ifm_shape, pad_width, conv2d_padding, ofm_shape",
-    [
-        [(1, 25, 25, 12), ((0, 0), (0, 1), (1, 2), (0, 0)), (1, 1, 1, 1), (1, 26, 28, 2)],
-        [(1, 64, 100, 4), ((0, 0), (1, 1), (1, 1), (0, 0)), (0, 0, 0, 0), (1, 64, 100, 2)],
-        [(1, 55, 55, 3), ((0, 0), (2, 1), (3, 2), (0, 0)), (0, 0, 1, 1), (1, 57, 59, 2)],
-    ],
-)
-def test_pad_conv2d_fusion(ifm_shape, pad_width, conv2d_padding, ofm_shape):
-    """Tests the pads and conv2d fusion."""
-    dtype = "int8"
-    kernel_size = (3, 3)
-    ofm_channels = 2
-    local_input = relay.var("local_input", shape=ifm_shape, dtype=dtype)
-    pad = relay.nn.pad(
-        local_input,
-        pad_width=pad_width,  # ((), (top, bottom), (left, right), ())
-        pad_value=10,
-        pad_mode="constant",
-    )
-    rng = np.random.default_rng(12321)
-    kmin, kmax = get_dtype_range(dtype)
-    local_weight = tvm.nd.array(
-        rng.integers(
-            kmin,
-            high=kmax,
-            size=(ofm_channels, kernel_size[0], kernel_size[1], ifm_shape[3]),
-            dtype=dtype,
-        )
-    )
-    local_weight = relay.const(local_weight, dtype)
-    conv2d = relay.qnn.op.conv2d(
-        pad,
-        local_weight,
-        relay.const(1, "int32"),
-        relay.const(1, "int32"),
-        relay.const(1, "float32"),
-        relay.const(1, "float32"),
-        data_layout="NHWC",
-        kernel_layout="OHWI",
-        channels=ofm_channels,
-        kernel_size=(3, 3),
-        padding=conv2d_padding,
-        out_dtype="int32",
-    )
-    requantize = relay.qnn.op.requantize(
-        conv2d,
-        relay.const(1, "float32"),
-        relay.const(1, "int32"),
-        relay.const(1, "float32"),
-        relay.const(1, "int32"),
-        axis=0,
-        out_dtype=dtype,
-    )
-    local_func = relay.Function(relay.analysis.free_vars(requantize), requantize)
-    local_func = set_composite_func_attr(local_func, "cmsis-nn.qnn_conv2d")
-
-    mod = tvm.IRModule()
-    ext_input = relay.var("ext_input", shape=ifm_shape, dtype=dtype)
-    call_local_func = relay.Call(local_func, [ext_input])
-    extern_func = relay.Function(relay.analysis.free_vars(call_local_func), call_local_func)
-    extern_var = relay.GlobalVar("external_function")
-    extern_func = set_external_func_attr(extern_func, "cmsis-nn", extern_var.name_hint)
-    mod[extern_var] = extern_func
-
-    main_input = relay.var("main_input", shape=ifm_shape, dtype=dtype)
-    call_extern_func = relay.Call(extern_var, [main_input])
-    main_func = relay.Function([main_input], call_extern_func, relay.TensorType(ofm_shape, dtype))
-    main_var = relay.GlobalVar("main")
-    mod[main_var] = main_func
-
-    mod = relay.transform.InferType()(mod)
-
-    mod = CMSISNNFusePads()(mod)
-    pad_verifier = CheckForPadsWithinCompositeFunc()
-    pad_verifier.visit_function(mod[extern_var])
-    pad_verifier.assert_no_pads_within_func()
-
-
-def test_without_preceding_pad():
-    """Tests the pass FusePads when padding is not present before qnn.conv2d."""
-    dtype = "int8"
-    ifm_shape = (1, 56, 56, 64)
-    ofm_shape = (1, 56, 56, 64)
-    local_input = relay.var("local_input", shape=ifm_shape, dtype=dtype)
-    rng = np.random.default_rng(12321)
-    kmin, kmax = get_dtype_range(dtype)
-    local_weight = tvm.nd.array(
-        rng.integers(
-            kmin,
-            high=kmax,
-            size=(64, 3, 3, 64),
-            dtype=dtype,
-        )
-    )
-    local_weight = relay.const(local_weight, dtype)
-    conv2d = relay.qnn.op.conv2d(
-        local_input,
-        local_weight,
-        relay.const(1, "int32"),
-        relay.const(1, "int32"),
-        relay.const(1, "float32"),
-        relay.const(1, "float32"),
-        data_layout="NHWC",
-        kernel_layout="OHWI",
-        channels=64,
-        kernel_size=(3, 3),
-        padding=(1, 1, 1, 1),
-        out_dtype="int32",
-    )
-    requantize = relay.qnn.op.requantize(
-        conv2d,
-        relay.const(1, "float32"),
-        relay.const(1, "int32"),
-        relay.const(1, "float32"),
-        relay.const(1, "int32"),
-        axis=0,
-        out_dtype=dtype,
-    )
-    relu = relay.nn.relu(requantize)
-    local_func = relay.Function(relay.analysis.free_vars(relu), relu)
-    local_func = set_composite_func_attr(local_func, "cmsis-nn.qnn_conv2d")
-
-    mod = tvm.IRModule()
-    ext_input = relay.var("ext_input", shape=ifm_shape, dtype=dtype)
-    call_local_func = relay.Call(local_func, [ext_input])
-    extern_func = relay.Function(relay.analysis.free_vars(call_local_func), call_local_func)
-    extern_var = relay.GlobalVar("external_function")
-    extern_func = set_external_func_attr(extern_func, "cmsis-nn", extern_var.name_hint)
-    mod[extern_var] = extern_func
-
-    main_input = relay.var("main_input", shape=ifm_shape, dtype=dtype)
-    call_extern_func = relay.Call(extern_var, [main_input])
-    main_func = relay.Function(relay.analysis.free_vars(call_extern_func), call_extern_func)
-    main_func = relay.Function([main_input], call_extern_func, relay.TensorType(ofm_shape, dtype))
-    main_var = relay.GlobalVar("main")
-    mod[main_var] = main_func
-
-    mod = relay.transform.InferType()(mod)
-
-    mod = CMSISNNFusePads()(mod)
-    pad_verifier = CheckForPadsWithinCompositeFunc()
-    pad_verifier.visit_function(mod[extern_var])
-    pad_verifier.assert_no_pads_within_func()
diff --git a/tests/python/contrib/test_cmsisnn/test_generate_constants.py b/tests/python/contrib/test_cmsisnn/test_generate_constants.py
deleted file mode 100644
index b83884128441..000000000000
--- a/tests/python/contrib/test_cmsisnn/test_generate_constants.py
+++ /dev/null
@@ -1,230 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""CMSIS-NN integration tests: generate_constants pass"""
-import math
-import numpy as np
-import pytest
-import tvm
-from tvm.testing.aot import get_dtype_range
-from tvm import relay
-from tvm.relay.op.contrib import cmsisnn
-
-from .utils import (
-    make_module,
-    get_same_padding,
-    get_conv2d_qnn_params,
-    make_qnn_relu,
-)
-
-tvm._ffi._init_api("relay.ext.cmsisnn.transform", __name__)
-
-
-def quantize_scale(scale):
-    multiplier, shift = math.frexp(scale)
-    multiplier_q31 = round(multiplier * (1 << 31))
-    return multiplier_q31, shift
-
-
-class CheckGeneratedConstants(tvm.relay.ExprVisitor):
-    """Provides methods to compare against expected quantization parameters"""
-
-    def __init__(self, enable_bias, multiplier, shift):
-        super().__init__()
-        self.num_constant_args_ = 0
-        self.enable_bias_ = enable_bias
-        self.multiplier_ = multiplier
-        self.shift_ = shift
-
-    def visit_call(self, call):
-        """Tests if the multiplier and shift constants required by CMSIS-NN API were generated"""
-        super().visit_call(call)
-        if isinstance(call.op, tvm.ir.expr.GlobalVar):
-            multiplier = call.args[2]
-            shift = call.args[6] if self.enable_bias_ else call.args[5]
-            assert isinstance(
-                multiplier, relay.expr.Constant
-            ), "Expected quantized multiplier at argument#3"
-            assert isinstance(
-                shift, relay.expr.Constant
-            ), "Expected a constant while looking for quantized shift"
-            multiplier = multiplier.data.numpy()
-            shift = shift.data.numpy()
-            tvm.testing.assert_allclose(multiplier, self.multiplier_, atol=100, rtol=1e-10)
-            tvm.testing.assert_allclose(shift, self.shift_, atol=1, rtol=1e-5)
-
-
-def make_model(
-    shape,
-    kernel_shape,
-    input_zero_point,
-    input_scale,
-    kernel_zero_point,
-    kernel_scale,
-    output_zero_point,
-    output_scale,
-    padding,
-    strides,
-    dilation,
-    groups,
-    dtype,
-    kernel_dtype,
-    out_channels,
-    weight_format,
-    enable_bias,
-    relu_type,
-):
-    """Return a model and any parameters it may have"""
-    h_index = weight_format.index("H")
-    w_index = weight_format.index("W")
-    kernel_h = kernel_shape[h_index]
-    kernel_w = kernel_shape[w_index]
-    a = relay.var("input", shape=shape, dtype=dtype)
-    p = (0, 0, 0, 0)
-    if padding == "SAME":
-        p = get_same_padding((shape[1], shape[2]), (kernel_h, kernel_w), dilation, strides)
-        a = relay.nn.pad(
-            a,
-            pad_width=[(0, 0), (p[0], p[2]), (p[1], p[3]), (0, 0)],
-            pad_value=input_zero_point,
-            pad_mode="constant",
-        )
-        shape = (shape[0], shape[1] + p[0] + p[2], shape[2] + p[1] + p[3], shape[3])
-
-    weight_shape = (kernel_h, kernel_w, shape[3] // groups, out_channels)
-    rng = np.random.default_rng(12321)
-    kmin, kmax = get_dtype_range(kernel_dtype)
-    weight = tvm.nd.array(
-        rng.integers(
-            kmin,
-            high=kmax,
-            size=weight_shape,
-            dtype=kernel_dtype,
-        )
-    )
-    weight_const = relay.const(weight, kernel_dtype)
-    conv = relay.qnn.op.conv2d(
-        a,
-        weight_const,
-        input_zero_point=relay.const(input_zero_point, "int32"),
-        kernel_zero_point=relay.const(kernel_zero_point, "int32"),
-        input_scale=relay.const(input_scale, "float32"),
-        kernel_scale=relay.const(kernel_scale, "float32"),
-        kernel_size=(kernel_h, kernel_w),
-        data_layout="NHWC",
-        kernel_layout=weight_format,
-        dilation=dilation,
-        strides=strides,
-        groups=groups,
-        channels=out_channels,
-        padding=p,
-        out_dtype="int32",
-    )
-    bias = tvm.nd.array(rng.integers(0, high=10, size=(out_channels,), dtype="int32"))
-    bias_const = relay.const(bias, "int32")
-    last_op = relay.nn.bias_add(conv, bias_const, axis=3) if enable_bias else conv
-    requant_input_sc = [sc * input_scale for sc in kernel_scale]
-    last_op = relay.qnn.op.requantize(
-        last_op,
-        relay.const(requant_input_sc, "float32"),
-        relay.const(0, "int32"),
-        relay.const(output_scale, "float32"),
-        relay.const(output_zero_point, "int32"),
-        out_dtype=dtype,
-    )
-    last_op = make_qnn_relu(last_op, relu_type, output_scale, output_zero_point, dtype)
-    params = {"w": weight, "b": bias}
-    return last_op, params
-
-
-@tvm.testing.requires_cmsisnn
-@pytest.mark.parametrize("enable_bias", [True, False])
-@pytest.mark.parametrize(
-    "input_zero_point, input_scale, kernel_scale, out_channels",
-    [(10, 0.0128, [0.11, 0.22], 2), (-64, 1, [1, 0.0256, 1.37], 3)],
-)
-def test_op_int8(
-    enable_bias,
-    input_zero_point,
-    input_scale,
-    kernel_scale,
-    out_channels,
-):
-    """Tests for CMSIS-NN constants when the dtype is int8"""
-    ifm_shape = (1, 28, 28, 3)
-    padding = "VALID"
-    strides = (1, 1)
-    dilation = (1, 1)
-    kernel_size = (3, 3)
-    kernel_zero_point = 0
-    groups = 1
-    weight_format = "HWIO"
-    kernel_h = kernel_size[0]
-    kernel_w = kernel_size[1]
-    dtype = "int8"
-    relu_type = "RELU"
-
-    weight_shape = (kernel_h, kernel_w, ifm_shape[3] // groups, out_channels)
-
-    output_scale, output_zero_point = get_conv2d_qnn_params(
-        weight_shape,
-        input_scale,
-        input_zero_point,
-        kernel_scale,
-        kernel_zero_point,
-        dtype,
-        dtype,
-        dtype,
-        False,
-    )
-
-    model, params = make_model(
-        ifm_shape,
-        weight_shape,
-        input_zero_point,
-        input_scale,
-        kernel_zero_point,
-        kernel_scale,
-        output_zero_point,
-        output_scale,
-        padding,
-        strides,
-        dilation,
-        groups,
-        dtype,
-        dtype,
-        out_channels,
-        weight_format,
-        enable_bias,
-        relu_type,
-    )
-    mod = make_module(model)
-
-    cmsisnn_mod = cmsisnn.partition_for_cmsisnn(mod, params)
-    multiplier_array = []
-    shift_array = []
-    for i in range(out_channels):
-        multiplier, shift = quantize_scale(input_scale * kernel_scale[i] / output_scale)
-        multiplier_array.append(multiplier)
-        shift_array.append(shift)
-    CheckGeneratedConstants(enable_bias, multiplier_array, shift_array).visit_function(
-        cmsisnn_mod["main"]
-    )
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/contrib/test_cmsisnn/test_invalid_graphs.py b/tests/python/contrib/test_cmsisnn/test_invalid_graphs.py
deleted file mode 100644
index a4ea1ea32e6b..000000000000
--- a/tests/python/contrib/test_cmsisnn/test_invalid_graphs.py
+++ /dev/null
@@ -1,75 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""CMSIS-NN integration tests: Tests invalid graphs"""
-import numpy as np
-import tvm
-
-from tvm.testing.aot import AOTTestModel, get_dtype_range, compile_and_run, generate_ref_data
-from tvm.micro.testing.aot_test_utils import (
-    AOT_USMP_CORSTONE300_RUNNER,
-)
-from .utils import (
-    skip_if_no_reference_system,
-)
-
-
-@skip_if_no_reference_system
-@tvm.testing.requires_cmsisnn
-def test_empty_function():
-    """Test partitioned function without composite function"""
-    original_model = """
-#[version = "0.0.5"]
-def @main(%data : Tensor[(16, 29), int8]) -> Tensor[(16, 29), int8] {
-    add(%data, %data)
-}
-"""
-    cmsisnn_model = """
-#[version = "0.0.5"]
-def @tvmgen_default_cmsis_nn_main_1(%i1: Tensor[(16, 29), int8], Inline=1, Compiler="cmsis-nn", global_symbol="tvmgen_default_cmsis_nn_main_1", Primitive=1) -> Tensor[(16, 29), int8] {
-  add(%i1, %i1)
-}
-def @main(%data : Tensor[(16, 29), int8]) -> Tensor[(16, 29), int8] {
-  %1 = @tvmgen_default_cmsis_nn_main_1(%data) /* ty=Tensor[(16, 29), int8] */;
-  %1
-}
-"""
-    orig_mod = tvm.relay.fromtext(original_model)
-    cmsisnn_mod = tvm.relay.fromtext(cmsisnn_model)
-    params = {}
-
-    # validate the output
-    interface_api = "c"
-    use_unpacked_api = True
-    test_runner = AOT_USMP_CORSTONE300_RUNNER
-    dtype = "int8"
-    in_min, in_max = get_dtype_range(dtype)
-    rng = np.random.default_rng(12345)
-    inputs = {"data": rng.integers(in_min, high=in_max, size=(16, 29), dtype=dtype)}
-    outputs = generate_ref_data(orig_mod["main"], inputs, params)
-    compile_and_run(
-        AOTTestModel(
-            module=cmsisnn_mod,
-            inputs=inputs,
-            outputs=outputs,
-            params=params,
-            output_tolerance=0,
-        ),
-        test_runner,
-        interface_api,
-        use_unpacked_api,
-    )
diff --git a/tests/python/contrib/test_cmsisnn/test_last_error.py b/tests/python/contrib/test_cmsisnn/test_last_error.py
deleted file mode 100644
index f21d5d1a0383..000000000000
--- a/tests/python/contrib/test_cmsisnn/test_last_error.py
+++ /dev/null
@@ -1,164 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""CMSIS-NN integration tests: debug_last_error"""
-
-import re
-import numpy as np
-import pytest
-import tvm
-from tvm import relay
-from tvm.relay.op.contrib import cmsisnn
-
-from tvm.testing.aot import (
-    get_dtype_range,
-    generate_ref_data,
-    AOTTestModel,
-    compile_and_run,
-)
-from .utils import (
-    make_module,
-    get_same_padding,
-    make_qnn_relu,
-    assert_partitioned_function,
-    create_test_runner,
-)
-
-
-def make_model(
-    pool_op,
-    shape,
-    pool_size,
-    strides,
-    padding,
-    dtype,
-    scale,
-    zero_point,
-    relu_type,
-    layout,
-    input_op,
-):
-    """Create a Relay Function / network model"""
-    if input_op:
-        op = input_op
-    else:
-        op = relay.var("input", shape=shape, dtype=dtype)
-    pad_ = (0, 0, 0, 0)
-    if padding == "SAME":
-        dilation = (1, 1)
-        pad_ = get_same_padding((shape[1], shape[2]), pool_size, dilation, strides)
-        op = relay.nn.pad(
-            op,
-            pad_width=[(0, 0), (pad_[0], pad_[2]), (pad_[1], pad_[3]), (0, 0)],
-            pad_value=zero_point,
-            pad_mode="constant",
-        )
-    if pool_op.__name__ == relay.nn.avg_pool2d.__name__:
-        op = relay.cast(op, "int32")
-    op = pool_op(
-        op, pool_size=pool_size, strides=strides, padding=pad_, ceil_mode=True, layout=layout
-    )
-    if pool_op.__name__ == relay.nn.avg_pool2d.__name__:
-        op = relay.cast(op, dtype)
-    op = make_qnn_relu(op, relu_type, scale, zero_point, dtype)
-    return op
-
-
-@tvm.testing.requires_cmsisnn
-@pytest.mark.parametrize("debug_last_error", [True, False])
-def test_last_error(debug_last_error):
-    """Tests debug_last_error"""
-    dtype = "int16"
-    in_shape = (1, 28, 28, 12)
-    pool_size = (3, 3)
-    strides = (2, 2)
-    padding = "SAME"
-    relu_type = "NONE"
-    pool_type = relay.nn.avg_pool2d
-    zero_point = -34
-    scale = 0.0256
-    compiler_cpu = "cortex-m55"
-    cpu_flags = "+nomve"
-    layout = "NHWC"
-    input_op = None
-
-    interface_api = "c"
-    use_unpacked_api = True
-
-    model = make_model(
-        pool_op=pool_type,
-        shape=in_shape,
-        pool_size=pool_size,
-        strides=strides,
-        padding=padding,
-        dtype=dtype,
-        scale=scale,
-        zero_point=zero_point,
-        relu_type=relu_type,
-        layout=layout,
-        input_op=input_op,
-    )
-    orig_mod = make_module(model)
-
-    cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod)
-
-    # validate pattern matching
-    assert_partitioned_function(orig_mod, cmsisnn_mod)
-
-    # validate the output
-    in_min, in_max = get_dtype_range(dtype)
-    inputs = {
-        "input": np.random.randint(in_min, high=in_max, size=in_shape, dtype=dtype),
-    }
-    output_list = generate_ref_data(orig_mod["main"], inputs)
-
-    def checker(base_path: str) -> bool:
-        def read_file(path):
-            with open(path) as f:
-                return f.read()
-
-        test = read_file(base_path + "/build/test.c")
-        test_check = "TVMGetLastError" in test
-
-        default_lib2 = read_file(base_path + "/codegen/host/src/default_lib2.c")
-        regex = (
-            r"(?s)arm_avgpool_s16(.*?)"
-            r'ARM_CMSIS_NN_ARG_ERROR: TVMAPISetLastError\("ARM_CMSIS_NN_ARG_ERROR(.*?)'
-            r'ARM_CMSIS_NN_NO_IMPL_ERROR: TVMAPISetLastError\("ARM_CMSIS_NN_NO_IMPL_ERROR'
-        )
-        default_lib2_check = re.search(regex, default_lib2) is not None
-
-        if debug_last_error:
-            return test_check and default_lib2_check
-        else:
-            return not (test_check or default_lib2_check)
-
-    result = compile_and_run(
-        AOTTestModel(
-            module=cmsisnn_mod,
-            inputs=inputs,
-            outputs=output_list,
-            params=None,
-            output_tolerance=1,
-        ),
-        create_test_runner(compiler_cpu, cpu_flags, debug_last_error=debug_last_error),
-        interface_api,
-        use_unpacked_api,
-        debug_last_error=debug_last_error,
-        checker=checker,
-    )
-    assert result
diff --git a/tests/python/contrib/test_cmsisnn/test_networks.py b/tests/python/contrib/test_cmsisnn/test_networks.py
deleted file mode 100644
index 16afffdccefb..000000000000
--- a/tests/python/contrib/test_cmsisnn/test_networks.py
+++ /dev/null
@@ -1,151 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""CMSIS-NN: testing with networks"""
-
-import pytest
-import numpy as np
-
-import tvm.testing
-from tvm import relay
-from tvm.contrib.download import download_testdata
-from tvm.relay.op.contrib import cmsisnn
-from tvm.testing.aot import AOTTestModel, get_dtype_range, compile_and_run, generate_ref_data
-from tvm.micro.testing.aot_test_utils import (
-    AOT_CORSTONE300_RUNNER,
-    AOT_USMP_CORSTONE300_RUNNER,
-)
-from .utils import skip_if_no_reference_system
-
-# pylint: disable=import-outside-toplevel
-def _convert_to_relay(
-    tflite_model_buf,
-    input_data,
-    input_node,
-):
-    """Converts TFLite model to Relay module and params"""
-
-    def convert_to_list(x):
-        if not isinstance(x, list):
-            x = [x]
-        return x
-
-    # TFLite.Model.Model has changed to TFLite.Model from 1.14 to 2.1
-    try:
-        import tflite.Model
-
-        tflite_model = tflite.Model.Model.GetRootAsModel(tflite_model_buf, 0)
-    except AttributeError:
-        import tflite
-
-        tflite_model = tflite.Model.GetRootAsModel(tflite_model_buf, 0)
-    except ImportError:
-        raise ImportError("The tflite package must be installed")
-
-    input_data = convert_to_list(input_data)
-    input_node = convert_to_list(input_node)
-
-    shape_dict = {}
-    dtype_dict = {}
-    for i, name in enumerate(input_node):
-        shape_dict[name] = input_data[i].shape
-        dtype_dict[name] = input_data[i].dtype.name
-
-    mod, params = relay.frontend.from_tflite(
-        tflite_model, shape_dict=shape_dict, dtype_dict=dtype_dict
-    )
-
-    return mod, params
-
-
-@skip_if_no_reference_system
-@tvm.testing.requires_package("tflite")
-@tvm.testing.requires_cmsisnn
-@pytest.mark.parametrize("test_runner", [AOT_CORSTONE300_RUNNER, AOT_USMP_CORSTONE300_RUNNER])
-def test_cnn_small(test_runner):
-    """Download a small network and tests TVM via CMSIS-NN output against TFLite output"""
-    # download the model
-    base_url = (
-        "https://github.com/ARM-software/ML-zoo/raw/"
-        "48a22ee22325d15d2371a6df24eb7d67e21dcc97"
-        "/models/keyword_spotting/cnn_small/tflite_int8"
-    )
-    file_to_download = "cnn_s_quantized.tflite"
-    file_saved = "cnn_s_quantized_15Dec2021.tflite"
-    model_file = download_testdata("{}/{}".format(base_url, file_to_download), file_saved)
-
-    with open(model_file, "rb") as f:
-        tflite_model_buf = f.read()
-
-    input_shape = (1, 490)
-    dtype = "int8"
-    in_min, in_max = get_dtype_range(dtype)
-    rng = np.random.default_rng(12345)
-    input_data = rng.integers(in_min, high=in_max, size=input_shape, dtype=dtype)
-
-    orig_mod, params = _convert_to_relay(tflite_model_buf, input_data, "input")
-    cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod, params)
-
-    # validate CMSIS-NN output against CPU output
-    interface_api = "c"
-    use_unpacked_api = True
-    inputs = {"input": input_data}
-    params = {}
-    output_list = generate_ref_data(orig_mod["main"], inputs, params)
-    compile_and_run(
-        AOTTestModel(
-            module=cmsisnn_mod,
-            inputs=inputs,
-            outputs=output_list,
-            params=params,
-            output_tolerance=1,
-        ),
-        test_runner,
-        interface_api,
-        use_unpacked_api,
-    )
-
-
-@tvm.testing.requires_package("tflite")
-def test_keyword_scramble():
-    """Download keyword_scrambled and test for Relay conversion.
-    In future, this test can be extended for CMSIS-NN"""
-    # download the model
-    base_url = (
-        "https://github.com/tensorflow/tflite-micro/raw/"
-        "de8f61a074460e1fa5227d875c95aa303be01240/"
-        "tensorflow/lite/micro/models"
-    )
-    file_to_download = "keyword_scrambled.tflite"
-    file_saved = "keyword_scrambled.tflite"
-    model_file = download_testdata("{}/{}".format(base_url, file_to_download), file_saved)
-
-    with open(model_file, "rb") as f:
-        tflite_model_buf = f.read()
-
-    input_shape = (1, 96)
-    dtype = "int8"
-    in_min, in_max = get_dtype_range(dtype)
-    rng = np.random.default_rng(12345)
-    input_data = rng.integers(in_min, high=in_max, size=input_shape, dtype=dtype)
-
-    with pytest.raises(tvm.error.OpNotImplemented):
-        _, _ = _convert_to_relay(tflite_model_buf, input_data, "input")
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/contrib/test_cmsisnn/test_pooling.py b/tests/python/contrib/test_cmsisnn/test_pooling.py
deleted file mode 100644
index c6e5f02e712a..000000000000
--- a/tests/python/contrib/test_cmsisnn/test_pooling.py
+++ /dev/null
@@ -1,246 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""CMSIS-NN integration tests: Pooling"""
-import numpy as np
-import pytest
-import tvm
-from tvm import relay
-from tvm.relay.op.contrib import cmsisnn
-
-from tvm.testing.aot import (
-    get_dtype_range,
-    generate_ref_data,
-    AOTTestModel,
-    compile_and_run,
-)
-from tvm.micro.testing.aot_test_utils import AOT_USMP_CORSTONE300_RUNNER
-from .utils import (
-    make_module,
-    get_same_padding,
-    make_qnn_relu,
-    assert_partitioned_function,
-    assert_no_external_function,
-    create_test_runner,
-)
-
-
-def make_model(
-    pool_op,
-    shape=(1, 28, 28, 12),
-    pool_size=(3, 3),
-    strides=(2, 2),
-    padding="VALID",
-    dtype="int8",
-    scale=1,
-    zero_point=-33,
-    relu_type="RELU",
-    layout="NHWC",
-    input_op=None,
-):
-    """Return a model and any parameters it may have,
-    all parameters are defaulted to known good values
-    """
-    if input_op:
-        op = input_op
-    else:
-        op = relay.var("input", shape=shape, dtype=dtype)
-    pad_ = (0, 0, 0, 0)
-    if padding == "SAME":
-        dilation = (1, 1)
-        pad_ = get_same_padding((shape[1], shape[2]), pool_size, dilation, strides)
-        op = relay.nn.pad(
-            op,
-            pad_width=[(0, 0), (pad_[0], pad_[2]), (pad_[1], pad_[3]), (0, 0)],
-            pad_value=zero_point,
-            pad_mode="constant",
-        )
-    if pool_op.__name__ == relay.nn.avg_pool2d.__name__:
-        op = relay.cast(op, "int32")
-    op = pool_op(
-        op, pool_size=pool_size, strides=strides, padding=pad_, ceil_mode=True, layout=layout
-    )
-    if pool_op.__name__ == relay.nn.avg_pool2d.__name__:
-        op = relay.cast(op, dtype)
-    op = make_qnn_relu(op, relu_type, scale, zero_point, dtype)
-    return op
-
-
-@tvm.testing.requires_cmsisnn
-@pytest.mark.parametrize("dtype", ["int16", "int8"])
-@pytest.mark.parametrize("in_shape", [(1, 28, 28, 12), (1, 64, 100, 4)])
-@pytest.mark.parametrize(
-    "pool_size, strides, padding", [((3, 3), (2, 2), "SAME"), ((2, 2), (1, 1), "VALID")]
-)
-@pytest.mark.parametrize("relu_type", ["NONE", "RELU"])
-@pytest.mark.parametrize("pool_type", [relay.nn.max_pool2d, relay.nn.avg_pool2d])
-@pytest.mark.parametrize("zero_point, scale", [(-34, 0.0256)])
-@pytest.mark.parametrize(
-    "compiler_cpu, cpu_flags", [("cortex-m55", "+nomve"), ("cortex-m55", ""), ("cortex-m7", "")]
-)
-def test_ops(
-    dtype,
-    in_shape,
-    pool_size,
-    strides,
-    padding,
-    relu_type,
-    pool_type,
-    zero_point,
-    scale,
-    compiler_cpu,
-    cpu_flags,
-):
-    """Tests QNN pooling op for int8 and int16 pooling"""
-    interface_api = "c"
-    use_unpacked_api = True
-
-    model = make_model(
-        pool_op=pool_type,
-        shape=in_shape,
-        pool_size=pool_size,
-        strides=strides,
-        padding=padding,
-        dtype=dtype,
-        scale=scale,
-        zero_point=zero_point,
-        relu_type=relu_type,
-    )
-    orig_mod = make_module(model)
-
-    cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod)
-
-    # validate pattern matching
-    assert_partitioned_function(orig_mod, cmsisnn_mod)
-
-    # validate the output
-    in_min, in_max = get_dtype_range(dtype)
-    np.random.seed(0)
-    inputs = {
-        "input": np.random.randint(in_min, high=in_max, size=in_shape, dtype=dtype),
-    }
-    output_list = generate_ref_data(orig_mod["main"], inputs)
-    compile_and_run(
-        AOTTestModel(
-            module=cmsisnn_mod,
-            inputs=inputs,
-            outputs=output_list,
-            params=None,
-            output_tolerance=1,
-        ),
-        create_test_runner(compiler_cpu, cpu_flags),
-        interface_api,
-        use_unpacked_api,
-    )
-
-
-@tvm.testing.requires_cmsisnn
-@pytest.mark.parametrize(
-    "pool_size, strides, padding", [((3, 3), (2, 2), "SAME"), ((2, 2), (1, 1), "VALID")]
-)
-@pytest.mark.parametrize("relu_type", ["NONE", "RELU"])
-def test_int8_pool_with_float32_input(
-    pool_size,
-    strides,
-    padding,
-    relu_type,
-):
-    """Tests QNN maxpool partitions with float32 input"""
-    interface_api = "c"
-    use_unpacked_api = True
-    test_runner = AOT_USMP_CORSTONE300_RUNNER
-
-    in_shape = (1, 28, 28, 12)
-    zero_point, scale = (-34, 0.0256)
-
-    input_ = relay.var("input", shape=in_shape, dtype="float32")
-    op = relay.op.add(input_, input_)
-    op = relay.qnn.op.quantize(op, relay.const(scale), relay.const(zero_point), -1, "int8")
-
-    model = make_model(
-        pool_op=relay.nn.max_pool2d,
-        shape=in_shape,
-        pool_size=pool_size,
-        strides=strides,
-        padding=padding,
-        scale=scale,
-        zero_point=zero_point,
-        relu_type=relu_type,
-        input_op=op,
-    )
-    orig_mod = make_module(model)
-
-    cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod)
-
-    # validate pattern matching
-    assert_partitioned_function(orig_mod, cmsisnn_mod)
-
-    # validate the output
-    np.random.seed(0)
-    inputs = {"input": np.random.uniform(0, 1, in_shape).astype("float32")}
-    output_list = generate_ref_data(orig_mod["main"], inputs)
-    compile_and_run(
-        AOTTestModel(
-            module=cmsisnn_mod,
-            inputs=inputs,
-            outputs=output_list,
-            params=None,
-            output_tolerance=1,
-        ),
-        test_runner,
-        interface_api,
-        use_unpacked_api,
-    )
-
-
-@tvm.testing.requires_cmsisnn
-@pytest.mark.parametrize("op", [relay.nn.avg_pool2d, relay.nn.max_pool2d])
-def test_invalid_datatype(op):
-    """Checks CMSIS-NN partitioning for non int8 dtype"""
-    model = make_model(pool_op=op, dtype="int64")
-    orig_mod = make_module(model)
-    cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod)
-    assert_no_external_function(cmsisnn_mod)
-
-
-@tvm.testing.requires_cmsisnn
-@pytest.mark.parametrize("op", [relay.nn.avg_pool2d, relay.nn.max_pool2d])
-def test_invalid_batch_size(op):
-    """Checks CMSIS-NN partitioning when batch size is not 1"""
-    model = make_model(
-        pool_op=op,
-        shape=(2, 28, 28, 12),
-    )
-
-    orig_mod = make_module(model)
-    cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod)
-    assert_no_external_function(cmsisnn_mod)
-
-
-@tvm.testing.requires_cmsisnn
-@pytest.mark.parametrize("op", [relay.nn.avg_pool2d, relay.nn.max_pool2d])
-def test_invalid_layout(op):
-    """Checks CMSIS-NN partitioning when layout is not NHWC"""
-    model = make_model(pool_op=op, layout="NCHW")
-
-    orig_mod = make_module(model)
-    cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod)
-    assert_no_external_function(cmsisnn_mod)
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/contrib/test_cmsisnn/test_remove_reshapes.py b/tests/python/contrib/test_cmsisnn/test_remove_reshapes.py
deleted file mode 100644
index 3cd60341ebfe..000000000000
--- a/tests/python/contrib/test_cmsisnn/test_remove_reshapes.py
+++ /dev/null
@@ -1,169 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""CMSIS-NN integration tests: Reshape removal"""
-import numpy as np
-import pytest
-import tvm
-from tvm import relay
-from tvm.relay.op.contrib import cmsisnn
-
-from tvm.testing.aot import (
-    get_dtype_range,
-    generate_ref_data,
-    AOTTestModel,
-    compile_models,
-    run_and_check,
-)
-from tvm.micro.testing.aot_test_utils import AOT_USMP_CORSTONE300_RUNNER
-from .utils import (
-    make_module,
-    get_same_padding,
-    make_qnn_relu,
-    assert_partitioned_function,
-)
-
-
-def make_model(
-    pool_op,
-    shape=(1, 28, 28, 12),
-    pool_size=(3, 3),
-    strides=(2, 2),
-    padding="VALID",
-    dtype="int8",
-    scale=1,
-    zero_point=-33,
-    relu_type="RELU",
-    layout="NHWC",
-    input_op=None,
-):
-    """Return a model and any parameters it may have,
-    all parameters are defaulted to known good values
-    """
-    if input_op:
-        op = input_op
-    else:
-        op = relay.var("input", shape=shape, dtype=dtype)
-    pad_ = (0, 0, 0, 0)
-    if padding == "SAME":
-        dilation = (1, 1)
-        pad_ = get_same_padding((shape[1], shape[2]), pool_size, dilation, strides)
-        op = relay.nn.pad(
-            op,
-            pad_width=[(0, 0), (pad_[0], pad_[2]), (pad_[1], pad_[3]), (0, 0)],
-            pad_value=zero_point,
-            pad_mode="constant",
-        )
-    if pool_op.__name__ == relay.nn.avg_pool2d.__name__:
-        op = relay.cast(op, "int32")
-    op = pool_op(
-        op, pool_size=pool_size, strides=strides, padding=pad_, ceil_mode=True, layout=layout
-    )
-    if pool_op.__name__ == relay.nn.avg_pool2d.__name__:
-        op = relay.cast(op, dtype)
-    op = make_qnn_relu(op, relu_type, scale, zero_point, dtype)
-    return op
-
-
-@tvm.testing.requires_cmsisnn
-@pytest.mark.parametrize("padding", ["SAME", "VALID"])
-def test_reshape_removal(padding):
-    """Tests reshape is removed from the network"""
-    interface_api = "c"
-    use_unpacked_api = True
-    test_runner = AOT_USMP_CORSTONE300_RUNNER
-
-    in_shape = (1, 28, 28, 12)
-    pool_size = (3, 3)
-    strides = (2, 2)
-    relu_type = "NONE"
-    zero_point, scale = (-34, 0.0256)
-
-    max_pool = make_model(
-        pool_op=relay.nn.max_pool2d,
-        shape=in_shape,
-        pool_size=pool_size,
-        strides=strides,
-        padding=padding,
-        scale=scale,
-        zero_point=zero_point,
-        relu_type=relu_type,
-    )
-    new_shape = (1, 28, 28, 3) if padding == "VALID" else (1, 30, 30, 3)
-    reshape = relay.reshape(max_pool, newshape=new_shape)
-
-    model = make_model(
-        pool_op=relay.nn.avg_pool2d,
-        shape=new_shape,
-        pool_size=pool_size,
-        strides=strides,
-        padding=padding,
-        scale=scale,
-        zero_point=zero_point,
-        relu_type=relu_type,
-        input_op=reshape,
-    )
-    orig_mod = make_module(model)
-
-    cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod)
-
-    # validate pattern matching
-    assert_partitioned_function(orig_mod, cmsisnn_mod)
-
-    # generate reference output
-    rng = np.random.default_rng(12345)
-    in_min, in_max = get_dtype_range("int8")
-    inputs = {"input": rng.integers(in_min, high=in_max, size=in_shape, dtype="int8")}
-    output_list = generate_ref_data(orig_mod["main"], inputs, params=None)
-
-    # validate presence of depthwise convolution
-    compiled_models = compile_models(
-        AOTTestModel(
-            module=cmsisnn_mod,
-            inputs=inputs,
-            outputs=output_list,
-            params=None,
-            output_tolerance=1,
-        ),
-        interface_api,
-        use_unpacked_api,
-        pass_config=test_runner.pass_config,
-    )
-
-    main_mod = None
-    for target, mod in compiled_models[0].executor_factory.lowered_ir_mods.items():
-        if target.kind.name == "c":
-            main_mod = mod
-
-    # when padding="SAME", extra padding is introduced which causes Reshape to be fused with the
-    # Pad. RemoveReshapes pass cannot remove a fused Reshape. Whereas padding="VALID" doesn't need
-    # an extra Pad layer. In this case, the pass removes the Reshape from the graph.
-    reshapes_present = any(["reshape" in gv.name_hint for gv in main_mod.get_global_vars()])
-    check_reshapes = reshapes_present if padding == "SAME" else not reshapes_present
-    expected_reshapes = "a" if padding == "SAME" else "No"
-    assert check_reshapes, "Expeting {} reshape layer(s).".format(expected_reshapes)
-
-    # validate the output
-    run_and_check(
-        models=compiled_models,
-        runner=test_runner,
-        interface_api=interface_api,
-    )
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/contrib/test_cmsisnn/test_scalar_to_tensor_constant.py b/tests/python/contrib/test_cmsisnn/test_scalar_to_tensor_constant.py
deleted file mode 100644
index 88ae2cba5f57..000000000000
--- a/tests/python/contrib/test_cmsisnn/test_scalar_to_tensor_constant.py
+++ /dev/null
@@ -1,336 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""CMSIS-NN integration tests: scalar_to_tensor_constant pass"""
-import numpy as np
-import tvm
-import tvm.testing
-from tvm import relay
-
-tvm._ffi._init_api("relay.ext.cmsisnn.transform", __name__)
-
-
-def generate_variable(name, shape, dtype="int8"):
-    return relay.var(name, shape=shape, dtype=dtype)
-
-
-def make_binary_op(
-    op,
-    input_0,
-    input_1,
-    input_0_scale,
-    input_0_zero_point,
-    input_1_scale,
-    input_1_zero_point,
-    out_scale=1.0 / 256,
-    out_zero_point=-128,
-):
-    """Create a Relay Function / network model"""
-    return op(
-        input_0,
-        input_1,
-        relay.const(input_0_scale, "float32"),
-        relay.const(input_0_zero_point, "int32"),
-        relay.const(input_1_scale, "float32"),
-        relay.const(input_1_zero_point, "int32"),
-        relay.const(out_scale, "float32"),
-        relay.const(out_zero_point, "int32"),
-    )
-
-
-class CheckFunctionsForConstants(tvm.relay.ExprVisitor):
-    """Provides method to test number of scalar constants present in a function"""
-
-    def __init__(self):
-        super().__init__()
-        self.num_constants_ = 0
-
-    def visit_call(self, call):
-        super().visit_call(call)
-        for arg in call.args:
-            if isinstance(arg, relay.Constant) and arg.data.numpy().ndim > 0:
-                self.num_constants_ += 1
-
-    def check_num_constants(self):
-        assert self.num_constants_ == 0, "Functions should not have constant arguments in Calls"
-
-
-def set_external_func_attr(func, compiler, ext_symbol):
-    func = func.with_attr("Primitive", tvm.tir.IntImm("int32", 1))
-    func = func.with_attr("Compiler", compiler)
-    func = func.with_attr("global_symbol", ext_symbol)
-    return func
-
-
-def set_composite_func_attr(func, name):
-    func = func.with_attr("Composite", name)
-    return func
-
-
-@tvm.testing.requires_cmsisnn
-def test_single_scalar_position_0():
-    """Tests conversion to tensor constant when first operand is a scalar"""
-    dtype = "int8"
-    shape = (8, 8)
-    operand0 = generate_variable("operand0", None, dtype)
-    operand1 = generate_variable("operand1", shape, dtype)
-    binary_op = make_binary_op(
-        relay.qnn.op.add,
-        operand0,
-        operand1,
-        input_0_scale=0.0128,
-        input_0_zero_point=32,
-        input_1_scale=0.256,
-        input_1_zero_point=-64,
-    )
-
-    local_func = relay.Function([operand0, operand1], binary_op, relay.TensorType(shape, dtype))
-    local_func = set_composite_func_attr(local_func, "cmsis-nn.qnn_add")
-
-    arg0 = relay.expr.const(3, dtype)
-    arg1 = relay.var("arg1", shape=shape, dtype=dtype)
-    call_local_func = relay.Call(local_func, [arg0, arg1])
-    extern_func = relay.Function([arg1], call_local_func, relay.TensorType(shape, dtype))
-
-    x = relay.var("x", shape=shape, dtype=dtype)
-    global_var = relay.GlobalVar("external_function")
-    extern_func = set_external_func_attr(extern_func, "cmsis-nn", global_var.name_hint)
-    call_extern_func = relay.Call(global_var, [x])
-    main_func = relay.Function([x], call_extern_func, relay.TensorType(shape, dtype))
-    main_var = relay.GlobalVar("main")
-
-    mod = tvm.IRModule()
-    mod[global_var] = extern_func
-    mod[main_var] = main_func
-
-    mod = relay.transform.InferType()(mod)
-    mod = ScalarToTensorConstants()(mod)
-    mod = relay.transform.InferType()(mod)
-    check_for_constants = CheckFunctionsForConstants()
-    check_for_constants.visit_call(mod[global_var].body)
-    assert (
-        check_for_constants.num_constants_ == 1
-    ), "Scalar constant wasn't converted into tensor constant"
-
-
-@tvm.testing.requires_cmsisnn
-def test_single_scalar_position_1():
-    """Tests conversion to tensor constant when second operand is a scalar"""
-    dtype = "int8"
-    shape = (8, 8)
-    operand0 = generate_variable("operand0", shape, dtype)
-    operand1 = generate_variable("operand1", None, dtype)
-    binary_op = make_binary_op(
-        relay.qnn.op.add,
-        operand0,
-        operand1,
-        input_0_scale=0.0128,
-        input_0_zero_point=32,
-        input_1_scale=0.256,
-        input_1_zero_point=-64,
-    )
-
-    local_func = relay.Function([operand0, operand1], binary_op, relay.TensorType(shape, dtype))
-    local_func = set_composite_func_attr(local_func, "cmsis-nn.qnn_add")
-
-    arg0 = relay.var("arg0", shape=shape, dtype=dtype)
-    arg1 = relay.expr.const(3, dtype)
-    call_local_func = relay.Call(local_func, [arg0, arg1])
-    extern_func = relay.Function([arg0], call_local_func, relay.TensorType(shape, dtype))
-
-    x = relay.var("x", shape=shape, dtype=dtype)
-    global_var = relay.GlobalVar("external_function")
-    extern_func = set_external_func_attr(extern_func, "cmsis-nn", global_var.name_hint)
-    call_extern_func = relay.Call(global_var, [x])
-    main_func = relay.Function([x], call_extern_func, relay.TensorType(shape, dtype))
-    main_var = relay.GlobalVar("main")
-
-    mod = tvm.IRModule()
-    mod[global_var] = extern_func
-    mod[main_var] = main_func
-
-    mod = relay.transform.InferType()(mod)
-    mod = ScalarToTensorConstants()(mod)
-    mod = relay.transform.InferType()(mod)
-    check_for_constants = CheckFunctionsForConstants()
-    check_for_constants.visit_call(mod[global_var].body)
-    assert (
-        check_for_constants.num_constants_ == 1
-    ), "Scalar constant wasn't converted into tensor constant"
-
-
-@tvm.testing.requires_cmsisnn
-def test_primary_operands_all_scalars():
-    """Tests conversion to tensor constants all operands are scalars"""
-    dtype = "int8"
-    shape = None
-    operand0 = generate_variable("operand0", None, dtype)
-    operand1 = generate_variable("operand1", None, dtype)
-    binary_op = make_binary_op(
-        relay.qnn.op.add,
-        operand0,
-        operand1,
-        input_0_scale=0.0128,
-        input_0_zero_point=32,
-        input_1_scale=0.256,
-        input_1_zero_point=-64,
-    )
-
-    local_func = relay.Function([operand0, operand1], binary_op, relay.TensorType(shape, dtype))
-    local_func = set_composite_func_attr(local_func, "cmsis-nn.qnn_add")
-
-    arg0 = relay.expr.const(7, dtype)
-    arg1 = relay.expr.const(3, dtype)
-    call_local_func = relay.Call(local_func, [arg0, arg1])
-    extern_func = relay.Function([], call_local_func, relay.TensorType(shape, dtype))
-
-    global_var = relay.GlobalVar("external_function")
-    extern_func = set_external_func_attr(extern_func, "cmsis-nn", global_var.name_hint)
-    call_extern_func = relay.Call(global_var, [])
-    main_func = relay.Function([], call_extern_func, relay.TensorType(shape, dtype))
-    main_var = relay.GlobalVar("main")
-
-    mod = tvm.IRModule()
-    mod[global_var] = extern_func
-    mod[main_var] = main_func
-
-    mod = relay.transform.InferType()(mod)
-    mod = ScalarToTensorConstants()(mod)
-    new_mod = relay.transform.InferType()(mod)
-    tvm.ir.assert_structural_equal(mod[global_var].body, new_mod[global_var].body)
-
-
-@tvm.testing.requires_cmsisnn
-def test_all_primary_operands_tensor_constants():
-    """Tests conversion to tensor constants all operands are tensors"""
-    dtype = "int8"
-    shape = (1, 3, 3, 32)
-    operand0 = generate_variable("operand0", shape, dtype)
-    operand1 = generate_variable("operand1", shape, dtype)
-    binary_op = make_binary_op(
-        relay.qnn.op.add,
-        operand0,
-        operand1,
-        input_0_scale=0.0128,
-        input_0_zero_point=32,
-        input_1_scale=0.256,
-        input_1_zero_point=-64,
-    )
-
-    local_func = relay.Function([operand0, operand1], binary_op, relay.TensorType(shape, dtype))
-    local_func = set_composite_func_attr(local_func, "cmsis-nn.qnn_add")
-
-    rng = np.random.default_rng(12345)
-    arg0 = relay.const(rng.integers(-128, high=127, size=shape, dtype=dtype))
-    arg1 = relay.const(rng.integers(-128, high=127, size=shape, dtype=dtype))
-    call_local_func = relay.Call(local_func, [arg0, arg1])
-    extern_func = relay.Function([], call_local_func, relay.TensorType(shape, dtype))
-
-    global_var = relay.GlobalVar("external_function")
-    extern_func = set_external_func_attr(extern_func, "cmsis-nn", global_var.name_hint)
-    call_extern_func = relay.Call(global_var, [])
-    main_func = relay.Function([], call_extern_func, relay.TensorType(shape, dtype))
-    main_var = relay.GlobalVar("main")
-
-    mod = tvm.IRModule()
-    mod[global_var] = extern_func
-    mod[main_var] = main_func
-
-    mod = relay.transform.InferType()(mod)
-    mod = ScalarToTensorConstants()(mod)
-    new_mod = relay.transform.InferType()(mod)
-    tvm.ir.assert_structural_equal(mod[global_var].body, new_mod[global_var].body)
-
-
-@tvm.testing.requires_cmsisnn
-def test_duplicate_constant_arguments():
-    """Tests the pass when repeating operands are arguments to the binary op"""
-    dtype = "int8"
-    shape = (1, 3, 3, 32)
-    operand0 = generate_variable("operand0", shape, dtype)
-    operand1 = generate_variable("operand1", shape, dtype)
-    binary_op = make_binary_op(
-        relay.qnn.op.add,
-        operand0,
-        operand0,
-        input_0_scale=0.0128,
-        input_0_zero_point=32,
-        input_1_scale=0.256,
-        input_1_zero_point=-64,
-    )
-
-    local_func = relay.Function([operand0, operand1], binary_op, relay.TensorType(shape, dtype))
-    local_func = set_composite_func_attr(local_func, "cmsis-nn.qnn_add")
-
-    rng = np.random.default_rng(12345)
-    arg0 = relay.const(rng.integers(-128, high=127, size=shape, dtype=dtype))
-    call_local_func = relay.Call(local_func, [arg0, arg0])
-    extern_func = relay.Function([], call_local_func, relay.TensorType(shape, dtype))
-
-    global_var = relay.GlobalVar("external_function")
-    extern_func = set_external_func_attr(extern_func, "cmsis-nn", global_var.name_hint)
-    call_extern_func = relay.Call(global_var, [])
-    main_func = relay.Function([], call_extern_func, relay.TensorType(shape, dtype))
-    main_var = relay.GlobalVar("main")
-
-    mod = tvm.IRModule()
-    mod[global_var] = extern_func
-    mod[main_var] = main_func
-
-    mod = relay.transform.InferType()(mod)
-    mod = ScalarToTensorConstants()(mod)
-    new_mod = relay.transform.InferType()(mod)
-    tvm.ir.assert_structural_equal(mod[global_var].body, new_mod[global_var].body)
-
-
-@tvm.testing.requires_cmsisnn
-def test_non_cmsisnn_ext_func():
-    """Non CMSISNN functions should not be altered."""
-
-    def get_mod():
-        operand1 = relay.var("operand1", shape=None)
-        operand2 = relay.var("operand2", shape=None)
-        binary_op = operand1 + operand2
-        local_func = relay.Function(
-            [operand1, operand2], binary_op, relay.TensorType((), "float32")
-        )
-        local_func = set_composite_func_attr(local_func, "cmsis-nn.qnn_add")
-
-        arg0 = relay.expr.const(5, "float32")
-        arg1 = relay.expr.const(3, "float32")
-        call_local_func = relay.Call(local_func, [arg0, arg1])
-        extern_func = relay.Function([], call_local_func, relay.TensorType((), "float32"))
-
-        global_var = relay.GlobalVar("external_function")
-        extern_func = set_external_func_attr(extern_func, "foo", global_var.name_hint)
-        call_extern_func = relay.Call(global_var, [])
-        main_func = relay.Function([], call_extern_func, relay.TensorType((), "float32"))
-        main_var = relay.GlobalVar("main")
-
-        mod = tvm.IRModule()
-        mod[global_var] = extern_func
-        mod[main_var] = main_func
-        mod = relay.transform.InferType()(mod)
-        return mod
-
-    expected = get_mod()["external_function"].body
-    actual = ScalarToTensorConstants()(get_mod())["external_function"].body
-    tvm.ir.assert_structural_equal(expected, actual)
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/contrib/test_cmsisnn/test_softmax.py b/tests/python/contrib/test_cmsisnn/test_softmax.py
deleted file mode 100644
index 82547f44f597..000000000000
--- a/tests/python/contrib/test_cmsisnn/test_softmax.py
+++ /dev/null
@@ -1,177 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""CMSIS-NN integration tests: Softmax"""
-import itertools
-
-import numpy as np
-import pytest
-
-import tvm.testing
-from tvm import relay
-from tvm.relay.op.contrib import cmsisnn
-from tvm.testing.aot import get_dtype_range, AOTTestModel, compile_and_run, generate_ref_data
-
-from .utils import (
-    skip_if_no_reference_system,
-    make_module,
-    assert_partitioned_function,
-    assert_no_external_function,
-    create_test_runner,
-)
-
-
-def make_model(
-    shape, in_dtype, out_dtype, in_zero_point, in_scale, out_zero_point=-128, out_scale=1.0 / 256
-):
-    """Create a Relay Function / network model"""
-    a = relay.var("in0", shape=shape, dtype=in_dtype)
-    dequantize = relay.qnn.op.dequantize(
-        a,
-        input_scale=relay.const(in_scale, "float32"),
-        input_zero_point=relay.const(in_zero_point, "int32"),
-    )
-    softmax = relay.nn.softmax(dequantize)
-    model = relay.qnn.op.quantize(
-        softmax,
-        output_scale=relay.const(out_scale, "float32"),
-        output_zero_point=relay.const(out_zero_point, "int32"),
-        out_dtype=out_dtype,
-    )
-    return model
-
-
-@skip_if_no_reference_system
-@tvm.testing.requires_cmsisnn
-@pytest.mark.parametrize(["zero_point", "scale"], [[33, 0.256], [-64, 0.0128]])
-@pytest.mark.parametrize(
-    "compiler_cpu, cpu_flags", [("cortex-m55", "+nomve"), ("cortex-m55", ""), ("cortex-m7", "")]
-)
-def test_op_int8(zero_point, scale, compiler_cpu, cpu_flags):
-    """Tests int8 QNN Softmax for CMSIS-NN"""
-    interface_api = "c"
-    use_unpacked_api = True
-
-    dtype = "int8"
-    shape = [1, 16, 16, 3]
-    model = make_model(shape, dtype, dtype, zero_point, scale)
-    orig_mod = make_module(model)
-
-    cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod)
-
-    # validate pattern matching
-    assert_partitioned_function(orig_mod, cmsisnn_mod)
-
-    # validate the output
-    in_min, in_max = get_dtype_range(dtype)
-    np.random.seed(0)
-    input_data = np.random.randint(in_min, high=in_max, size=shape, dtype=dtype)
-    inputs = {"in0": input_data}
-    params = {}
-    output_list = generate_ref_data(orig_mod["main"], inputs, params)
-    compile_and_run(
-        AOTTestModel(module=cmsisnn_mod, inputs=inputs, outputs=output_list, params=params),
-        create_test_runner(compiler_cpu, cpu_flags),
-        interface_api,
-        use_unpacked_api,
-    )
-
-
-@skip_if_no_reference_system
-@tvm.testing.requires_cmsisnn
-@pytest.mark.parametrize(["zero_point", "scale"], [[0, 1.0 / 32768]])
-@pytest.mark.parametrize(
-    "compiler_cpu, cpu_flags", [("cortex-m55", "+nomve"), ("cortex-m55", ""), ("cortex-m7", "")]
-)
-def test_op_int16(zero_point, scale, compiler_cpu, cpu_flags):
-    """Tests int16 QNN Softmax for CMSIS-NN"""
-    interface_api = "c"
-    use_unpacked_api = True
-
-    dtype = "int16"
-    shape = [1, 16, 16, 3]
-
-    # output scale and zero_point must be fixed
-    model = make_model(shape, dtype, dtype, zero_point, scale, 0, 1.0 / 32768)
-    orig_mod = make_module(model)
-    cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod)
-
-    # validate pattern matching
-    assert_partitioned_function(orig_mod, cmsisnn_mod)
-
-    # validate the output
-    in_min, in_max = get_dtype_range(dtype)
-    np.random.seed(0)
-    input_data = np.random.randint(in_min, high=in_max, size=shape, dtype=dtype)
-    inputs = {"in0": input_data}
-    params = {}
-    output_list = generate_ref_data(orig_mod["main"], inputs, params)
-    compile_and_run(
-        AOTTestModel(
-            module=cmsisnn_mod,
-            inputs=inputs,
-            outputs=output_list,
-            params=params,
-            output_tolerance=2,
-        ),
-        create_test_runner(compiler_cpu, cpu_flags),
-        interface_api,
-        use_unpacked_api,
-    )
-
-
-def parameterize_for_invalid_model(test):
-    """Generates parameters for non int8 input and output of Softmax"""
-    in_dtype = ["uint8", "int8"]
-    out_dtype = ["uint8", "int8"]
-    zero_point = [-128, 64]
-    scale = [1.0 / 256, 0.2]
-    out_zero_point = [-128, 33]
-    out_scale = [1.0 / 256, 0.2]
-    all_combinations = itertools.product(
-        in_dtype, out_dtype, zero_point, scale, out_zero_point, out_scale
-    )
-    all_combinations = filter(
-        lambda parameters: not (
-            parameters[0] == "int8"
-            and parameters[1] == "int8"
-            and parameters[4] == -128
-            and parameters[5] == 1.0 / 256
-        ),
-        all_combinations,
-    )
-    return pytest.mark.parametrize(
-        ["in_dtype", "out_dtype", "zero_point", "scale", "out_zero_point", "out_scale"],
-        all_combinations,
-    )(test)
-
-
-@parameterize_for_invalid_model
-@tvm.testing.requires_cmsisnn
-def test_invalid_parameters(in_dtype, out_dtype, zero_point, scale, out_zero_point, out_scale):
-    """Tests for non int8 input and output of Softmax"""
-    model = make_model(
-        [1, 16, 16, 3], in_dtype, out_dtype, zero_point, scale, out_zero_point, out_scale
-    )
-
-    orig_mod = make_module(model)
-    cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod)
-    assert_no_external_function(cmsisnn_mod)
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/contrib/test_cmsisnn/utils.py b/tests/python/contrib/test_cmsisnn/utils.py
deleted file mode 100644
index 65f7402e6b83..000000000000
--- a/tests/python/contrib/test_cmsisnn/utils.py
+++ /dev/null
@@ -1,297 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""CMSIS-NN functions for testing networks"""
-
-import math
-from typing import List, Union, Tuple
-import numpy as np
-
-import tvm
-from tvm import relay
-from tvm.testing.aot import AOTTestRunner, get_dtype_range
-
-
-def skip_if_no_reference_system(func):
-    return tvm.testing.skip_if_32bit(reason="Reference system unavailable in i386 container")(func)
-
-
-def count_num_calls(mod):
-    """Counts number of CallNode(s) in the IRModule"""
-
-    class CallCounter(relay.ExprVisitor):
-        def __init__(self):
-            super().__init__()
-            self.count = 0
-
-        def visit_call(self, call):
-            if isinstance(call.op, tvm.ir.Op):
-                self.count += 1
-
-            super().visit_call(call)
-
-    counter = CallCounter()
-    for var in mod.get_global_vars():
-        counter.visit(mod[var.name_hint])
-    return counter.count
-
-
-def assert_partitioned_function(orig_mod, cmsisnn_mod, expected_ops_unchanged=True):
-    """
-    if KCompiler attribute is missing, this function raises an assertion.
-
-    Parameters
-    ----------
-    orig_mod : IRModule
-        Pre-partitioning module
-    cmsisnn_mod : IRModule
-        Post-partitioning module
-    is_num_calls_same: bool
-        Are number of CallNode(s) before and after partitioning expected to be the same
-    """
-    attrs = [
-        cmsisnn_mod[var.name_hint].attrs
-        for var in cmsisnn_mod.get_global_vars()
-        if cmsisnn_mod[var.name_hint].attrs
-    ]
-    assert any(attrs), "At least one function with external attributes was expected."
-
-    compilers = [
-        key == "Compiler" and value == "cmsis-nn" for attr in attrs for key, value in attr.items()
-    ]
-    assert any(compilers), "Module does not contain function for cmsisnn target."
-
-    if expected_ops_unchanged:
-        assert count_num_calls(orig_mod) == count_num_calls(
-            cmsisnn_mod
-        ), "Number of calls changed during partitioning"
-
-
-def assert_no_external_function(mod):
-    attrs = [mod[var.name_hint].attrs for var in mod.get_global_vars() if mod[var.name_hint].attrs]
-    assert not any(attrs), "No function should have an external attribute."
-
-
-def make_module(func):
-    """Creates IRModule from Function"""
-    func = relay.Function(relay.analysis.free_vars(func), func)
-    mod = tvm.IRModule.from_expr(func)
-    mod = relay.transform.InferType()(mod)
-    return mod
-
-
-def get_same_padding(in_shape, kernel, dilation, stride):
-    """
-    Provides CMSIS-NN padding when output dim == input dim.
-    This is TFLu's "SAME" padding case.
-    """
-    dilated_kernel_h = dilation[0] * (kernel[0] - 1) + 1
-    out = int(math.ceil(float(in_shape[0]) / float(stride[0])))
-    pad = max(0, (out - 1) * stride[0] + dilated_kernel_h - in_shape[0])
-    pad_top = pad // 2
-    pad_bottom = pad - pad_top
-
-    dilated_kernel_w = dilation[1] * (kernel[1] - 1) + 1
-    out = int(math.ceil(float(in_shape[1]) / float(stride[1])))
-    pad = max(0, (out - 1) * stride[1] + dilated_kernel_w - in_shape[1])
-    pad_left = pad // 2
-    pad_right = pad - pad_left
-    return [pad_top, pad_left, pad_bottom, pad_right]
-
-
-def get_kernel_bias_dtype(input_dtype):
-    """
-    Returns (kernel_dtype, bias_dtype) based on input's dtype.
-    """
-    # uint8 corresponds to an invalid case, so returning int types
-    # does not cause tests to break
-    if input_dtype in ("int8", "uint8"):
-        return ("int8", "int32")
-    elif input_dtype == "int16":
-        return ("int8", "int64")
-    raise ValueError("Invalid dtype provided to get_kernel_bias_dtype()")
-
-
-def get_conv2d_qnn_params(
-    kernel_shape: List[int],
-    input_scale: float,
-    input_zp: int,
-    kernel_scale: Union[float, List[float]],
-    kernel_zp: int,
-    input_dtype: str = "int8",
-    kernel_dtype: str = "int8",
-    output_dtype: str = "int8",
-    is_depthwise: bool = False,
-) -> Tuple[float, int]:
-    """
-    Calculate the output quantization parameters for convolution based on the input and
-    kernel quantization paramters and the data types.
-
-    Parameters
-    ----------
-    kernel_shape : List[int]
-        shape of the kernel
-    input_scale : float
-        scale of the input tensor
-    input_zp : int
-        zero point of the input tensor
-    kernel_scale : Union[float, List[float]]
-        scale(s) of the kernel tensor
-    kernel_zp : int
-        zero point of the kernel tensor
-    is_depthwise : bool
-        whether it is a depthwise convolution
-    input_dtype : str
-        data type of the input tensor
-    kernel_dtype : str
-        data type of the kernel tensor
-    output_dtype : str
-        data type of the output tensor
-
-    Returns
-    -------
-    output_scale : float
-        scale of the output tensor
-    output_zp : int
-        zero point of the output tensor
-    """
-    input_dtype_min, input_dtype_max = get_dtype_range(input_dtype)
-    input_max = input_scale * (input_dtype_max - input_zp)
-    input_min = input_scale * (input_dtype_min - input_zp)
-
-    kernel_dtype_min, kernel_dtype_max = get_dtype_range(kernel_dtype)
-    kernel_sc_max = np.max(kernel_scale)
-    kernel_max = kernel_sc_max * (kernel_dtype_max - kernel_zp)
-
-    kernel_sc_min = np.min(kernel_scale)
-    kernel_min = kernel_sc_min * (kernel_dtype_min - kernel_zp)
-
-    kernel_h = kernel_shape[1]
-    kernel_w = kernel_shape[2]
-    channels = kernel_shape[3]
-    num_elements = kernel_h * kernel_w * channels
-    # Adjust the result if it is a depthwise convolution
-    if is_depthwise:
-        num_elements = num_elements / channels
-
-    # The smallest and largest possible values in the unquantized output tensor
-    output_limits = [
-        kernel_max * input_max * num_elements,
-        kernel_min * input_max * num_elements,
-        kernel_min * input_min * num_elements,
-        kernel_max * input_min * num_elements,
-    ]
-
-    output_max = max(output_limits)
-    output_min = min(output_limits)
-    output_dtype_min, output_dtype_max = get_dtype_range(output_dtype)
-
-    output_scale = (output_max - output_min) / (output_dtype_max - output_dtype_min)
-    output_zp = int(output_dtype_min - (output_min / output_scale))
-
-    return output_scale, output_zp
-
-
-def make_qnn_relu(expr, fused_activation_fn, scale, zero_point, dtype):
-    """Mimics convert_qnn_fused_activation_function from TFLite frontend"""
-    quantize = lambda x: float(int(round(x / scale)) + zero_point)
-
-    # Get min/max of the output dtype. This will be used to ensure that clip a_min/a_max are not
-    # beyond the dtype range.
-    qmin, qmax = get_dtype_range(dtype)
-
-    # The input expr is a quantized tensor with its scale and zero point. We calculate the
-    # suitable clip off points based on these scale and zero point.
-    if fused_activation_fn == "NONE":
-        return expr
-    if fused_activation_fn == "RELU6":
-        return tvm.relay.op.clip(expr, a_min=max(qmin, quantize(0)), a_max=min(qmax, quantize(6.0)))
-    if fused_activation_fn == "RELU_N1_TO_1":
-        return tvm.relay.op.clip(
-            expr, a_min=max(qmin, quantize(-1.0)), a_max=min(qmax, quantize(1.0))
-        )
-    if fused_activation_fn == "RELU":
-        return tvm.relay.op.clip(expr, a_min=max(qmin, quantize(0.0)), a_max=qmax)
-    raise ValueError("Invalid argument provided with fused_activation_fn")
-
-
-class CheckForPadsWithinCompositeFunc(tvm.relay.ExprVisitor):
-    """Provides method to test number of pads present inside the function being visited."""
-
-    def __init__(self):
-        super().__init__()
-        self.num_pads_ = 0
-
-    def visit_call(self, call):
-        super().visit_call(call)
-        if (
-            isinstance(call, tvm.relay.Call)
-            and isinstance(call.op, tvm.ir.op.Op)
-            and call.op.name == "nn.pad"
-        ):
-            self.num_pads_ += 1
-
-    def assert_no_pads_within_func(self):
-        assert self.num_pads_ == 0, "CMSIS-NN composite function should not have pads."
-
-    def assert_pads_within_func(self):
-        assert self.num_pads_ > 0, "Composite function should have pads within it."
-
-
-def create_test_runner(compiler_cpu="cortex-m55", cpu_flags="", debug_last_error=False):
-    """
-    Creates AOT test runner for CMSIS-NN tests.
-
-    Parameters
-    ----------
-    compiler_cpu : str
-       Equivalent of gcc option mcpu
-       Options:  cortex-m55, cortex-m7
-    cpu_flags: str
-        Disable Arm(R) Cortex(R)-M profile vector extension (mve)
-        Options:
-        Arm(R) Cortex(R)-M55: when null +mve is set by default.
-            +nomve disables vector extensions.
-        Arm(R) Cortex(R)-M7 does not support mve.
-    debug_last_error: bool
-        Whether to enable storing the last error
-    """
-    # cmsis_cpu is used to find out start up code inside CMSIS package
-    cmsis_cpu = "ARMCM7" if compiler_cpu == "cortex-m7" else "ARMCM55"
-    mfloat_abi = "soft" if compiler_cpu == "cortex-m7" else "hard"
-    return AOTTestRunner(
-        makefile="corstone300",
-        prologue="""
-        UartStdOutInit();
-        """,
-        includes=["uart_stdout.h"],
-        pass_config={
-            "relay.ext.cmsisnn.options": {
-                "mcpu": compiler_cpu + cpu_flags,
-                "debug_last_error": debug_last_error,
-            },
-            "tir.usmp.enable": True,
-            "tir.disable_storage_rewrite": True,
-        },
-        parameters={
-            "ARM_CPU": cmsis_cpu,
-            "MCPU": compiler_cpu,
-            "MCPU_FLAGS": cpu_flags,
-            "MFLOAT_ABI": mfloat_abi,
-            "DEBUG_LAST_ERROR": 1 if debug_last_error else 0,
-        },
-    )
diff --git a/tests/python/contrib/test_ethosn/__init__.py b/tests/python/contrib/test_ethosn/__init__.py
deleted file mode 100644
index be2b46d24133..000000000000
--- a/tests/python/contrib/test_ethosn/__init__.py
+++ /dev/null
@@ -1,17 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Infrastructure and tests for Arm(R) Ethos(TM)-N"""
diff --git a/tests/python/contrib/test_ethosn/_infrastructure.py b/tests/python/contrib/test_ethosn/_infrastructure.py
deleted file mode 100644
index a71ab3dbc663..000000000000
--- a/tests/python/contrib/test_ethosn/_infrastructure.py
+++ /dev/null
@@ -1,22 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""Expose test functions to the Python front end"""
-
-import tvm._ffi
-
-tvm._ffi._init_api("relay.ethos-n.test.infra", __name__)
diff --git a/tests/python/contrib/test_ethosn/infrastructure.py b/tests/python/contrib/test_ethosn/infrastructure.py
deleted file mode 100644
index 334cd6d3b87c..000000000000
--- a/tests/python/contrib/test_ethosn/infrastructure.py
+++ /dev/null
@@ -1,409 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""Arm(R) Ethos(TM)-N test functions"""
-
-from __future__ import absolute_import, print_function
-from hashlib import md5
-from itertools import zip_longest, combinations
-import os
-from typing import Tuple
-import math
-
-import numpy as np
-from PIL import Image
-
-import tvm
-from tvm import relay
-from tvm.contrib import utils, graph_executor, download
-from tvm.relay.op.contrib import partition_for_ethosn
-from tvm.driver.tvmc.target import parse_target
-
-from . import _infrastructure
-
-
-def get_real_image(im_height, im_width):
-    repo_base = "https://github.com/dmlc/web-data/raw/main/tensorflow/models/InceptionV1/"
-    img_name = "elephant-299.jpg"
-    image_url = os.path.join(repo_base, img_name)
-    img_path = download.download_testdata(image_url, img_name, module="data")
-    image = Image.open(img_path).resize((im_height, im_width))
-    x = np.array(image).astype("uint8")
-    data = np.reshape(x, (1, im_height, im_width, 3))
-    return data
-
-
-def assert_lib_hash(lib, golden):
-    """Check that the Ethos-N runtime modules in a library hash to the same values
-    as given by the golden hash(es).
-
-    If there's only one Ethos-N module, the golden hash may be provided as a str.
-    If there's multiple, a set of golden hashes should be provided to correspond
-    with each Ethos-N module that is expected.
-
-    This function is used to ensure that no change is made which alters the output
-    of a compilation. If such a change is made deliberately (eg. to fix a bug) then
-    the golden hash should be updated after verifying on hardware that the behaviour
-    is still correct.
-
-    This method is used because of the lack of hardware availability in upstream CI.
-    """
-    # Convert str hash into a set of hashes
-    if isinstance(golden, str):
-        golden = {golden}
-
-    temp = utils.tempdir()
-    path = temp.relpath("lib.cmm")
-    hash_set = set()
-    for mod in lib.imported_modules:
-        if mod.type_key == "ethos-n":
-            mod.save(path)
-            with open(path, "rb") as compiled_model:
-                lib_hash = md5(compiled_model.read()).hexdigest()
-            hash_set.add(lib_hash)
-
-    assert hash_set == golden, "Expected hash: {} Got hash: {}".format(golden, hash_set)
-
-
-def make_module(func, params):
-    func = relay.Function(relay.analysis.free_vars(func), func)
-    if params:
-        relay.build_module.bind_params_by_name(func, params)
-    mod = tvm.IRModule.from_expr(func)
-    return relay.transform.InferType()(mod)
-
-
-def make_ethosn_composite(ethosn_expr, name):
-    variables = relay.analysis.free_vars(ethosn_expr)
-    inner_vars = [relay.Var(v.name_hint, v.type_annotation) for v in variables]
-    func = relay.Function(inner_vars, ethosn_expr)
-    func = func.with_attr("Composite", name)
-    call = relay.Call(func, variables)
-    return call
-
-
-def make_ethosn_partition(ethosn_expr):
-    """Make an Ethos(TM)-N partition."""
-
-    # Create an Ethos-N global function
-    mod = tvm.IRModule({})
-    variables = relay.analysis.free_vars(ethosn_expr)
-    # NB: it is illegal to reuse variables inside and outside a scope in Relay
-    # if you want to duplicate types and names you must re-allocate them.
-    fresh_vars = [relay.Var(v.name_hint, v.type_annotation) for v in variables]
-    binds = {}
-    for var, fresh_var in zip(variables, fresh_vars):
-        binds[var] = fresh_var
-    ethosn_expr_fresh = relay.bind(ethosn_expr, binds)
-    func = relay.Function(fresh_vars, ethosn_expr_fresh)
-    func = func.with_attr("Primitive", tvm.tir.IntImm("int32", 1))
-    func = func.with_attr("Inline", tvm.tir.IntImm("int32", 1))
-    func = func.with_attr("Compiler", "ethos-n")
-    func = func.with_attr("global_symbol", "ethos-n_0")
-    global_var = relay.GlobalVar("ethos-n_0")
-    mod[global_var] = func
-    mod = relay.transform.InferType()(mod)
-
-    # These are the vars to call the Ethos-N partition with
-    more_vars = relay.analysis.free_vars(ethosn_expr)
-    # Call the Ethos-N partition in main
-    call_fn1 = global_var(*more_vars)
-    mod["main"] = relay.Function(more_vars, call_fn1)
-    return relay.transform.InferType()(mod)
-
-
-def get_host_op_count(mod):
-    """Return the number of host operators."""
-
-    class Counter(tvm.relay.ExprVisitor):
-        def __init__(self):
-            super().__init__()
-            self.count = 0
-
-        def visit_call(self, call):
-            if isinstance(call.op, tvm.ir.Op):
-                self.count += 1
-            super().visit_call(call)
-
-    c = Counter()
-    c.visit(mod["main"])
-    return c.count
-
-
-def build(
-    mod, params, npu=True, expected_host_ops=0, npu_partitions=1, additional_config_args=None
-):
-    """Build a network with or without Ethos-N offloading.
-
-    Parameters
-    ----------
-    mod : IRModule
-        The Relay module to build.
-    params : dict of str to NDArray
-        The weights to build with.
-    npu : bool, optional
-        Whether to build with Ethos-N offloading.
-    expected_host_ops : int, optional
-        The number of ops expected to remain on the host.
-    npu_partitions : int, optional
-        The number of Ethos-N partitions expected.
-    additional_config_args : dict, optional
-        Additional compiler config options for the NPU.
-    """
-    relay.backend.te_compiler.get().clear()
-    if not additional_config_args:
-        additional_config_args = {}
-    npu_config = {**get_ethosn_device_options(), **additional_config_args}
-    with tvm.transform.PassContext(opt_level=3, config={"relay.ext.ethos-n.options": npu_config}):
-        with tvm.target.Target("llvm"):
-            if npu:
-                mod = partition_for_ethosn(mod, params)
-                host_op_count = get_host_op_count(mod)
-                assert (
-                    host_op_count == expected_host_ops
-                ), "Got {} host operators, expected {}".format(host_op_count, expected_host_ops)
-
-                attrs = [
-                    mod[var.name_hint].attrs
-                    for var in mod.get_global_vars()
-                    if mod[var.name_hint].attrs
-                ]
-                partition_count = sum(
-                    [
-                        key == "Compiler" and value == "ethos-n"
-                        for attr in attrs
-                        for key, value in attr.items()
-                    ]
-                )
-                assert (
-                    npu_partitions == partition_count
-                ), "Got {} ethos-n partitions, expected {}".format(partition_count, npu_partitions)
-
-            return relay.build(mod, params=params)
-
-
-def run(lib, inputs, outputs, npu=True):
-    """Run a module with specified inputs.
-
-    Parameters
-    ----------
-    lib : runtime.Module
-        The runtime module.
-    inputs : dict of str to NDArray
-        The input dictionary.
-    outputs : int
-        The expected number of outputs.
-    npu : bool
-        Whether or not any part of the lib is offloaded to Ethos-N.
-        If it's false (i.e. it's all running on the CPU), we set
-        the mocked result equal to the output so that a subsequent
-        mocked run on the NPU returns the same value.
-
-    Returns
-    -------
-    out : list of NDArray
-        The results.
-
-    """
-    # Export and load lib to confirm this works
-    lib_name = "mod.so"
-    temp = utils.tempdir()
-    lib_path = temp.relpath(lib_name)
-    lib.export_library(lib_path)
-    lib = tvm.runtime.load_module(lib_path)
-    module = graph_executor.GraphModule(lib["default"](tvm.cpu()))
-    module.set_input(**inputs)
-    module.run()
-    out = [module.get_output(i) for i in range(outputs)]
-    if not npu:
-        inference_result(out)
-    return out
-
-
-def build_and_run(
-    mod,
-    inputs,
-    outputs,
-    params,
-    npu=True,
-    expected_host_ops=0,
-    npu_partitions=1,
-    additional_config_args=None,
-):
-    """
-    Convenient wrapper for building and running a module on the NPU.
-    """
-    lib = build(mod, params, npu, expected_host_ops, npu_partitions, additional_config_args)
-    return run(lib, inputs, outputs, npu)
-
-
-def verify(answers, dtype, atol, rtol=1e-07, verify_saturation=True):
-    """Compare the array of answers. Each entry is a list of outputs"""
-    if len(answers) < 2:
-        print("No results to compare: expected at least two, found ", len(answers))
-    for answer in zip_longest(*answers):
-        for outs in combinations(answer, 2):
-            if verify_saturation:
-                assert (
-                    np.count_nonzero(outs[0].numpy() == np.iinfo(dtype).max)
-                    < 0.25 * outs[0].numpy().size
-                ), "Output is saturated: {}".format(outs[0])
-                assert (
-                    np.count_nonzero(outs[0].numpy() == np.iinfo(dtype).min)
-                    < 0.25 * outs[0].numpy().size
-                ), "Output is saturated: {}".format(outs[0])
-            tvm.testing.assert_allclose(outs[0].numpy(), outs[1].numpy(), rtol=rtol, atol=atol)
-
-
-def inference_result(outputs):
-    """Set the expected results of an Ethos inference, if the testing
-    infrastructure is available. This assumes that the entire graph
-    was offloaded to the neural processor."""
-    if tvm.get_global_func("relay.ethos-n.test.infra.inference_result", True):
-        return _infrastructure.inference_result(*outputs)
-    return False
-
-
-def test_error(mod, params, err_msg):
-    """Test an operator error message."""
-
-    caught = None
-    with tvm.transform.PassContext(
-        opt_level=3, config={"relay.ext.ethos-n.options": get_ethosn_device_options()}
-    ):
-        with tvm.target.Target("llvm"):
-            try:
-                mod = relay.transform.InferType()(mod)
-                relay.build(mod, params=params)
-            except tvm.error.TVMError as error:
-                caught = error.args[0]
-            finally:
-                relay.backend.te_compiler.get().clear()
-
-    assert caught is not None
-    assert err_msg in caught, caught
-
-
-def get_conv2d(var, shape, dtype):
-    """Standard convolution to test activation functions"""
-
-    weight_shape = (1, 1, shape[3], 1)
-    weights_array = tvm.nd.array(np.ones(weight_shape, dtype))
-    weights = relay.const(weights_array, dtype)
-    conv = relay.qnn.op.conv2d(
-        var,
-        weights,
-        input_zero_point=relay.const(0, "int32"),
-        kernel_zero_point=relay.const(0, "int32"),
-        input_scale=relay.const(1.0, "float32"),
-        kernel_scale=relay.const(1.0, "float32"),
-        kernel_size=(1, 1),
-        channels=1,
-        data_layout="NHWC",
-        kernel_layout="HWIO",
-    )
-    b = tvm.nd.array(np.zeros((shape[0],), "int32"))
-    biasc = relay.const(b, "int32")
-    bias = relay.nn.bias_add(conv, biasc, axis=0)
-    req = relay.qnn.op.requantize(
-        bias,
-        relay.const(1.0, "float32"),  # input zero scale
-        relay.const(0, "int32"),  # input zero point
-        relay.const(1.1, "float32"),  # output zero scale
-        relay.const(0, "int32"),  # output zero point
-        out_dtype=dtype,
-    )
-    params = {"w": weights_array, "b": b}
-    return req, params
-
-
-def get_conv2d_qnn_params(
-    dtype, input_zp, input_sc, kernel_zp, kernel_sc, kernel_h, kernel_w, channels
-):
-    """Return Conv2D QNN params."""
-
-    kernel_sc = (
-        kernel_sc.numpy() if isinstance(kernel_sc, tvm.runtime.ndarray.NDArray) else [kernel_sc]
-    )
-    dtype_min = np.iinfo(dtype).min
-    dtype_max = np.iinfo(dtype).max
-
-    input_max = input_sc * (dtype_max - input_zp)
-    input_min = input_sc * (dtype_min - input_zp)
-
-    kernel_max = max(kernel_sc) * (dtype_max - kernel_zp)
-    kernel_min = min(kernel_sc) * (dtype_min - kernel_zp)
-
-    output_limits = [
-        kernel_max * kernel_h * kernel_w * channels * input_max,
-        kernel_min * kernel_h * kernel_w * channels * input_max,
-        kernel_min * kernel_h * kernel_w * channels * input_min,
-        kernel_max * kernel_h * kernel_w * channels * input_min,
-    ]
-    output_max = max(output_limits)
-    output_min = min(output_limits)
-
-    output_sc = (output_max - output_min) / (dtype_max - dtype_min)
-    output_zp = int(dtype_min - (output_min / output_sc))
-    return output_zp, output_sc
-
-
-def get_same_padding(
-    data: Tuple[int, int],
-    kernel: Tuple[int, int],
-    dilation: Tuple[int, int],
-    stride: Tuple[int, int],
-) -> Tuple[int, int, int, int]:
-    """
-    Get the padding values required for 'SAME' padding.
-
-    Parameters
-    ----------
-    data : Tuple[int, int]
-        The height and width of the data respectively.
-    kernel : Tuple[int, int]
-        The height and width of the kernel respectively.
-    dilation : Tuple[int, int]
-        The dilation of the kernel.
-    stride : Tuple[int, int]
-        The stride of the kernel.
-
-    Returns
-    -------
-    Tuple[int, int, int, int]
-        The padding values for top, left, bottom and right respectively.
-    """
-    dilated_kernel_h = dilation[0] * (kernel[0] - 1) + 1
-    dilated_kernel_w = dilation[1] * (kernel[1] - 1) + 1
-    out = int(math.ceil(float(data[0]) / float(stride[0])))
-    pad = max(0, (out - 1) * stride[0] + dilated_kernel_h - data[0])
-    pad_top = pad // 2
-    pad_bottom = pad - pad_top
-
-    out = int(math.ceil(float(data[1]) / float(stride[1])))
-    pad = max(0, (out - 1) * stride[1] + dilated_kernel_w - data[1])
-    pad_left = pad // 2
-    pad_right = pad - pad_left
-    return (pad_top, pad_left, pad_bottom, pad_right)
-
-
-def get_ethosn_device_options():
-    """Determine the NPU configuration used for testing."""
-    default_target_string = "ethos-n -variant=n78 -tops=1 -ple_ratio=2"
-    target_string = os.getenv("ETHOSN_TEST_TARGET_CONFIG", default_target_string)
-    target = parse_target(target_string)
-    return target[0]["opts"]
diff --git a/tests/python/contrib/test_ethosn/test_addition.py b/tests/python/contrib/test_ethosn/test_addition.py
deleted file mode 100644
index c4503f4bc030..000000000000
--- a/tests/python/contrib/test_ethosn/test_addition.py
+++ /dev/null
@@ -1,417 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""Arm(R) Ethos(TM)-N integration addition tests"""
-
-import numpy as np
-import pytest
-import tvm
-from tvm import relay
-from tvm.testing import requires_ethosn
-from . import infrastructure as tei
-
-
-def _get_model(
-    lhs_shape,
-    rhs_shape,
-    lhs_zp,
-    lhs_sc,
-    rhs_zp,
-    rhs_sc,
-    out_zp,
-    out_sc,
-    dtype,
-    lhs_is_constant=False,
-    rhs_is_constant=False,
-    constant_data=None,
-):
-    """Return a model and any parameters it may have"""
-
-    def create_or_assign_constant(shape, dtype, default_data):
-        """Creates new numpy array or assigns default_data if available."""
-
-        iinfo = np.iinfo(dtype)
-        data_min = iinfo.min
-        data_max = iinfo.max
-
-        nparray = None
-        if default_data:
-            nparray = np.array(default_data, dtype=dtype).reshape(shape)
-        else:
-            nparray = np.random.randint(data_min, data_max + 1, size=shape, dtype=dtype)
-
-        return relay.const(nparray, dtype=dtype)
-
-    if lhs_is_constant:
-        a = create_or_assign_constant(lhs_shape, dtype, constant_data)
-    else:
-        a = relay.var("a", shape=lhs_shape, dtype=dtype)
-
-    if rhs_is_constant:
-        b = create_or_assign_constant(rhs_shape, dtype, constant_data)
-    else:
-        b = relay.var("b", shape=rhs_shape, dtype=dtype)
-
-    model = relay.qnn.op.add(
-        lhs=a,
-        rhs=b,
-        lhs_scale=relay.const(lhs_sc, "float32"),
-        lhs_zero_point=relay.const(lhs_zp, "int32"),
-        rhs_scale=relay.const(rhs_sc, "float32"),
-        rhs_zero_point=relay.const(rhs_zp, "int32"),
-        output_scale=relay.const(out_sc, "float32"),
-        output_zero_point=relay.const(out_zp, "int32"),
-    )
-    return model
-
-
-def _get_addition_qnn_params(dtype):
-    iinfo = np.iinfo(dtype)
-    data_min = iinfo.min
-    data_max = iinfo.max
-    lhs_zp = np.random.randint(data_min, data_max)
-    lhs_sc = np.random.random() * 2
-    rhs_zp = np.random.randint(data_min, data_max)
-    rhs_sc = np.random.random() * 2
-
-    input1_max = lhs_sc * (255 - lhs_zp)
-    input1_min = -lhs_sc * lhs_zp
-    input2_max = rhs_sc * (255 - rhs_zp)
-    input2_min = -rhs_sc * rhs_zp
-    output_max = input1_max + input2_max
-    output_min = input1_min + input2_min
-    output_sc = (output_max - output_min) / 255
-    output_zp = -int(output_min / output_sc)
-    return lhs_zp, lhs_sc, rhs_zp, rhs_sc, output_zp, output_sc
-
-
-@requires_ethosn
-@pytest.mark.parametrize("dtype", ["uint8", "int8"])
-@pytest.mark.parametrize("shape", [(1, 22, 9, 9), (1, 27, 21, 16)])
-def test_addition(dtype, shape):
-    """Compare Addition output with TVM."""
-    np.random.seed(0)
-
-    iinfo = np.iinfo(dtype)
-    data_min = iinfo.min
-    data_max = iinfo.max
-    lhs_zp, lhs_sc, rhs_zp, rhs_sc, out_zp, out_sc = _get_addition_qnn_params(dtype)
-
-    outputs = []
-    inputs = {
-        "a": tvm.nd.array(np.random.randint(data_min, data_max + 1, size=shape, dtype=dtype)),
-        "b": tvm.nd.array(np.random.randint(data_min, data_max + 1, size=shape, dtype=dtype)),
-    }
-    model = _get_model(shape, shape, lhs_zp, lhs_sc, rhs_zp, rhs_sc, out_zp, out_sc, dtype)
-    for npu in [False, True]:
-        mod = tei.make_module(model, [])
-        outputs.append(
-            tei.build_and_run(
-                mod,
-                inputs,
-                1,
-                {},
-                npu=npu,
-                additional_config_args={"inline_non_compute_intensive_partitions": False},
-            )
-        )
-
-    tei.verify(outputs, dtype, 1)
-
-
-@requires_ethosn
-@pytest.mark.parametrize("dtype", ["uint8", "int8"])
-@pytest.mark.parametrize(
-    "lhs_shape,lhs_is_constant,rhs_shape,rhs_is_constant",
-    [
-        ((1, 4, 4, 8), True, (1, 1, 1, 8), True),
-        ((4,), True, (1, 16, 12, 4), True),
-        ((1, 1, 1, 8), True, (1, 4, 4, 8), True),
-        ((1, 16, 12, 4), True, (4,), True),
-    ],
-)
-def test_addition_both_inputs_constants(
-    dtype, lhs_shape, lhs_is_constant, rhs_shape, rhs_is_constant
-):
-    """Check if addition is simplified when both inputs are constants."""
-    np.random.seed(0)
-
-    lhs_zp, lhs_sc, rhs_zp, rhs_sc, out_zp, out_sc = _get_addition_qnn_params(dtype)
-
-    model = _get_model(
-        lhs_shape,
-        rhs_shape,
-        lhs_zp,
-        lhs_sc,
-        rhs_zp,
-        rhs_sc,
-        out_zp,
-        out_sc,
-        dtype,
-        lhs_is_constant=lhs_is_constant,
-        rhs_is_constant=rhs_is_constant,
-    )
-    from tvm.relay.op.contrib import partition_for_ethosn  # pylint: disable=import-outside-toplevel
-
-    mod = tei.make_module(model, {})
-    assert "qnn.add" in mod.astext(False)
-    mod = partition_for_ethosn(mod, {})
-    assert "qnn.add" not in mod.astext(False)
-
-
-@requires_ethosn
-@pytest.mark.parametrize("dtype", ["uint8", "int8"])
-@pytest.mark.parametrize(
-    "lhs_shape,lhs_is_constant,rhs_shape,rhs_is_constant",
-    [
-        ((1, 4, 4, 8), False, (1, 4, 4, 8), True),
-        ((1, 16, 12, 4), True, (1, 16, 12, 4), False),
-    ],
-)
-def test_addition_with_one_constant(dtype, lhs_shape, lhs_is_constant, rhs_shape, rhs_is_constant):
-    """Validate addition with one input as a constant."""
-    np.random.seed(0)
-
-    iinfo = np.iinfo(dtype)
-    data_min = iinfo.min
-    data_max = iinfo.max
-    lhs_zp, lhs_sc, rhs_zp, rhs_sc, out_zp, out_sc = _get_addition_qnn_params(dtype)
-
-    model = _get_model(
-        lhs_shape,
-        rhs_shape,
-        lhs_zp,
-        lhs_sc,
-        rhs_zp,
-        rhs_sc,
-        out_zp,
-        out_sc,
-        dtype,
-        lhs_is_constant=lhs_is_constant,
-        rhs_is_constant=rhs_is_constant,
-    )
-    input_shape = rhs_shape if lhs_is_constant else lhs_shape
-    input_name = "b" if lhs_is_constant else "a"
-    inputs = {
-        input_name: tvm.nd.array(
-            np.random.randint(data_min, data_max + 1, size=input_shape, dtype=dtype)
-        )
-    }
-
-    outputs = []
-    for npu in [False, True]:
-        mod = tei.make_module(model, {})
-        outputs.append(
-            tei.build_and_run(
-                mod,
-                inputs,
-                1,
-                {},
-                npu=npu,
-                additional_config_args={"inline_non_compute_intensive_partitions": False},
-            )
-        )
-    tei.verify(outputs, dtype, 1)
-
-
-@requires_ethosn
-@pytest.mark.parametrize("dtype", ["uint8", "int8"])
-@pytest.mark.parametrize(
-    "lhs_shape,lhs_is_constant,rhs_shape,rhs_is_constant",
-    [
-        ((1, 4, 4, 8), False, (1, 1, 1, 8), True),
-        ((4,), True, (1, 16, 12, 4), False),
-        ((1, 1, 1, 8), True, (1, 4, 4, 8), False),
-        ((1, 16, 12, 4), False, (4,), True),
-    ],
-)
-def test_addition_to_depthwise(dtype, lhs_shape, lhs_is_constant, rhs_shape, rhs_is_constant):
-    """Compare addition to depthwise with TVM."""
-    np.random.seed(0)
-
-    iinfo = np.iinfo(dtype)
-    data_min = iinfo.min
-    data_max = iinfo.max
-    lhs_zp, lhs_sc, rhs_zp, rhs_sc, out_zp, out_sc = _get_addition_qnn_params(dtype)
-
-    model = _get_model(
-        lhs_shape,
-        rhs_shape,
-        lhs_zp,
-        lhs_sc,
-        rhs_zp,
-        rhs_sc,
-        out_zp,
-        out_sc,
-        dtype,
-        lhs_is_constant=lhs_is_constant,
-        rhs_is_constant=rhs_is_constant,
-    )
-    input_shape = rhs_shape if lhs_is_constant else lhs_shape
-    input_name = "b" if lhs_is_constant else "a"
-    inputs = {
-        input_name: tvm.nd.array(
-            np.random.randint(data_min, data_max + 1, size=input_shape, dtype=dtype)
-        )
-    }
-    outputs = []
-    for npu in [False, True]:
-        mod = tei.make_module(model, {})
-        outputs.append(tei.build_and_run(mod, inputs, 1, {}, npu=npu))
-    tei.verify(outputs, dtype, 1)
-
-
-@requires_ethosn
-@pytest.mark.parametrize(
-    "lhs_shape,lhs_is_constant,rhs_shape,rhs_is_constant",
-    [
-        ((1, 2, 8, 4), False, None, True),
-        ((1, 5, 6, 7), False, (1, 1, 1, 1), True),
-        (None, True, (1, 2, 8, 4), False),
-        ((1, 1, 1, 1), True, (1, 5, 6, 7), False),
-    ],
-)
-def test_addition_to_reinterpret_quantize(lhs_shape, lhs_is_constant, rhs_shape, rhs_is_constant):
-    """Compare addition to depthwise with TVM."""
-    np.random.seed(0)
-
-    dtype = "uint8"
-    iinfo = np.iinfo(dtype)
-    data_min = iinfo.min
-    data_max = iinfo.max
-
-    # Add can only be offloaded as a reinterpret quantize operation if
-    # it is an identity operation. We must choose the quantization and
-    # constant data carefully to maske sure that this is the case.
-    if lhs_is_constant:
-        rhs_zp = 128
-        rhs_sc = 0.0078125
-        lhs_zp = 0
-        lhs_sc = 0.003921568859368563
-    else:
-        lhs_zp = 128
-        lhs_sc = 0.0078125
-        rhs_zp = 0
-        rhs_sc = 0.003921568859368563
-    out_zp = 0
-    out_sc = 0.007814894430339336
-    constant_data = 255
-
-    model = _get_model(
-        lhs_shape,
-        rhs_shape,
-        lhs_zp,
-        lhs_sc,
-        rhs_zp,
-        rhs_sc,
-        out_zp,
-        out_sc,
-        dtype,
-        lhs_is_constant=lhs_is_constant,
-        rhs_is_constant=rhs_is_constant,
-        constant_data=constant_data,
-    )
-    input_shape = rhs_shape if lhs_is_constant else lhs_shape
-    input_name = "b" if lhs_is_constant else "a"
-    inputs = {
-        input_name: tvm.nd.array(
-            np.random.randint(data_min, data_max + 1, size=input_shape, dtype=dtype)
-        )
-    }
-    outputs = []
-    for npu in [False, True]:
-        mod = tei.make_module(model, {})
-        outputs.append(
-            tei.build_and_run(
-                mod,
-                inputs,
-                1,
-                {},
-                npu=npu,
-                additional_config_args={"inline_non_compute_intensive_partitions": False},
-            )
-        )
-    tei.verify(outputs, dtype, 1)
-
-
-@requires_ethosn
-@pytest.mark.parametrize(
-    "dtype,shape,err_msg",
-    [
-        (
-            "uint8",
-            (2, 4, 4, 4),
-            "batch size=2, batch size must = 1; batch size=2, batch size must = 1",
-        ),
-        (
-            "int16",
-            (1, 4, 4, 4),
-            "dtype='int16', dtype must be either uint8, int8 or int32; dtype='int16', "
-            "dtype must be either uint8, int8 or int32",
-        ),
-    ],
-)
-def test_addition_failure(dtype, shape, err_msg):
-    """Check addition error messages."""
-    np.random.seed(0)
-
-    lhs_zp, lhs_sc, rhs_zp, rhs_sc, out_zp, out_sc = _get_addition_qnn_params(dtype)
-
-    model = _get_model(shape, shape, lhs_zp, lhs_sc, rhs_zp, rhs_sc, out_zp, out_sc, dtype)
-    model = tei.make_ethosn_composite(model, "ethos-n.qnn_add")
-    mod = tei.make_ethosn_partition(model)
-    tei.test_error(mod, {}, err_msg)
-
-
-@requires_ethosn
-@pytest.mark.parametrize("dtype", ["uint8", "int8"])
-@pytest.mark.parametrize(
-    "lhs_shape,lhs_is_constant,rhs_shape,rhs_is_constant",
-    [
-        ((1, 4, 4, 8), True, (1, 1, 4, 8), False),
-        ((1, 4, 4, 8), False, (1, 1, 4, 8), False),
-        ((1, 16, 1, 4), True, (1, 1, 12, 4), False),
-    ],
-)
-def test_unsupported_broadcast_addition(
-    dtype, lhs_shape, lhs_is_constant, rhs_shape, rhs_is_constant
-):
-    """Test broadcast compatible addition falls back to TVM."""
-    np.random.seed(0)
-
-    lhs_zp, lhs_sc, rhs_zp, rhs_sc, out_zp, out_sc = _get_addition_qnn_params(dtype)
-
-    model = _get_model(
-        lhs_shape,
-        rhs_shape,
-        lhs_zp,
-        lhs_sc,
-        rhs_zp,
-        rhs_sc,
-        out_zp,
-        out_sc,
-        dtype,
-        lhs_is_constant=lhs_is_constant,
-        rhs_is_constant=rhs_is_constant,
-    )
-    from tvm.relay.op.contrib import partition_for_ethosn  # pylint: disable=import-outside-toplevel
-
-    mod = tei.make_module(model, {})
-    assert "qnn.add" in mod.astext(False)
-    mod = partition_for_ethosn(mod, {})
-    assert "qnn.add" in mod.astext(False)
-    assert "ethos-n.qnn_add" not in mod.astext(False)
diff --git a/tests/python/contrib/test_ethosn/test_codegen.py b/tests/python/contrib/test_ethosn/test_codegen.py
deleted file mode 100644
index 3759d83b1e0e..000000000000
--- a/tests/python/contrib/test_ethosn/test_codegen.py
+++ /dev/null
@@ -1,106 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""NPU codegen tests"""
-
-import pytest
-import numpy as np
-
-import tvm
-from tvm import relay
-from tvm.testing import requires_ethosn
-
-from . import infrastructure as tei
-
-
-@requires_ethosn
-def test_compile_with_unsupported_variant():
-    """Test compilation with unsupported variant."""
-    dtype = "int8"
-    input_shape = (1, 2, 2, 2)
-
-    x = relay.var("x", shape=input_shape, dtype=dtype)
-    y = relay.reshape(x, newshape=(1, 1, 1, 8))
-    mod = tei.make_ethosn_partition(y)
-
-    additional_config_args = {
-        "variant": "foo",
-        "inline_non_compute_intensive_partitions": False,
-    }
-
-    inputs = {
-        "x": np.random.randint(
-            low=np.iinfo(dtype).min, high=np.iinfo(dtype).max, size=input_shape, dtype=dtype
-        )
-    }
-
-    with pytest.raises(tvm.TVMError, match=r"Unknown NPU type"):
-        tei.build_and_run(mod, inputs, 1, {}, True, additional_config_args=additional_config_args)
-
-
-@requires_ethosn
-def test_experimental_compiler(capfd):
-    """Test compilation with the experimental compiler."""
-    dtype = "int8"
-    input_shape = (1, 2, 2, 2)
-
-    x = relay.var("x", shape=input_shape, dtype=dtype)
-    y = relay.reshape(x, newshape=(1, 1, 1, 8))
-    mod = tei.make_ethosn_partition(y)
-
-    additional_config_args = {
-        "variant": "n78",
-        "experimental_compiler": True,
-        "inline_non_compute_intensive_partitions": False,
-    }
-
-    tei.build(mod, {}, True, additional_config_args=additional_config_args)
-
-    # Check for hints that the experimental compiler was activated.
-    # The support library logs a warning to say the experimental
-    # compiler is in use. Check that this warning was logged.
-    captured = capfd.readouterr()
-    assert (
-        "WARNING: Experimental Compiler in use." in captured.err
-    ), "Experimental compiler was not activated."
-
-
-@requires_ethosn
-def test_without_experimental_compiler(capfd):
-    """Test compilation when the experimental compiler is not enabled."""
-    dtype = "int8"
-    input_shape = (1, 2, 2, 2)
-
-    x = relay.var("x", shape=input_shape, dtype=dtype)
-    y = relay.reshape(x, newshape=(1, 1, 1, 8))
-    mod = tei.make_ethosn_partition(y)
-
-    additional_config_args = {
-        "variant": "n78",
-        "experimental_compiler": False,
-        "inline_non_compute_intensive_partitions": False,
-    }
-
-    tei.build(mod, {}, True, additional_config_args=additional_config_args)
-
-    # Check for hints that the experimental compiler was activated.
-    # The support library logs a warning to say the experimental
-    # compiler is in use. Check that this warning was logged.
-    captured = capfd.readouterr()
-    assert (
-        "WARNING: Experimental Compiler in use." not in captured.err
-    ), "Experimental compiler was enabled when it is not expected to be."
diff --git a/tests/python/contrib/test_ethosn/test_concatenate.py b/tests/python/contrib/test_ethosn/test_concatenate.py
deleted file mode 100644
index 83e84046d0a6..000000000000
--- a/tests/python/contrib/test_ethosn/test_concatenate.py
+++ /dev/null
@@ -1,130 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""Concatenate tests for Arm(R) Ethos(TM)-N"""
-
-import numpy as np
-import pytest
-import tvm
-from tvm import relay
-from tvm.testing import requires_ethosn
-from . import infrastructure as tei
-
-
-def _get_inputs(shapes, dtype):
-    inputs = {}
-    for i, shape in enumerate(shapes):
-        inputs["in" + str(i)] = tvm.nd.array(
-            np.random.randint(np.iinfo(dtype).min, np.iinfo(dtype).max + 1, size=shape, dtype=dtype)
-        )
-
-    return inputs
-
-
-def _get_model(shapes, dtype, axis):
-    tup = []
-    for i, shape in enumerate(shapes):
-        a = relay.var("in" + str(i), shape=shape, dtype=dtype)
-        tup.append(a)
-
-    zeroi = relay.const(1, "int32")
-    zerof = relay.const(0.5, "float32")
-    con = relay.qnn.op.concatenate(
-        tup,
-        input_scales=[zerof] * len(shapes),
-        input_zero_points=[zeroi] * len(shapes),
-        output_scale=zerof,
-        output_zero_point=zeroi,
-        axis=axis,
-    )
-    return con
-
-
-@requires_ethosn
-@pytest.mark.parametrize("dtype", ["uint8", "int8"])
-@pytest.mark.parametrize(
-    "shapes,axis",
-    [
-        ([(1, 4), (1, 6)], 1),
-        ([(1, 16, 4), (1, 16, 4)], 1),
-        ([(1, 25, 4, 16)] * 3, 3),
-        ([(1, 25, 4, 16), (1, 25, 5, 16), (1, 25, 6, 16)], 2),
-        ([(1, 4), (1, 6)], -1),
-        ([(1, 16, 4), (1, 16, 4)], -2),
-    ],
-)
-def test_concatenate(dtype, shapes, axis):
-    """Compare Concatenate output with TVM."""
-    np.random.seed(0)
-
-    outputs = []
-    inputs = _get_inputs(shapes, dtype)
-    for npu in [False, True]:
-        model = _get_model(shapes, dtype, axis)
-        mod = tei.make_module(model, {})
-        outputs.append(
-            tei.build_and_run(
-                mod,
-                inputs,
-                1,
-                {},
-                npu=npu,
-                additional_config_args={"inline_non_compute_intensive_partitions": False},
-            )
-        )
-
-        tei.verify(outputs, dtype, 0)
-
-
-@requires_ethosn
-@pytest.mark.parametrize(
-    "shapes,dtype,axis,err_msg",
-    [
-        ([(1, 4, 4, 4, 4), (1, 4, 4, 4, 4)], "uint8", 1, "dimensions=5, dimensions must be <= 4;"),
-        (
-            [(1, 4, 4, 4), (1, 4, 4, 4)],
-            "uint8",
-            3,
-            "Concatenation along the channels dimension (axis 3) "
-            "requires input tensors with a multiple of 16 channels;",
-        ),
-        (
-            [(1, 4, 4, 4), (1, 4, 4, 4)],
-            "int16",
-            2,
-            "dtype='int16', dtype must be either uint8, int8 or int32; dtype='int16', "
-            "dtype must be either uint8, int8 or int32;",
-        ),
-        (
-            [(2, 4, 4, 4), (2, 4, 4, 4)],
-            "uint8",
-            2,
-            "batch size=2, batch size must = 1; batch size=2, batch size must = 1;",
-        ),
-        (
-            [(1, 4, 4, 4)],
-            "uint8",
-            0,
-            "Concatenation cannot be performed along batch axis (axis 0);",
-        ),
-    ],
-)
-def test_concatenate_failure(shapes, dtype, axis, err_msg):
-    """Check Concatenate error messages."""
-    model = _get_model(shapes, dtype, axis)
-    mod = tei.make_ethosn_partition(model)
-    tei.test_error(mod, {}, err_msg)
diff --git a/tests/python/contrib/test_ethosn/test_constant_duplication.py b/tests/python/contrib/test_ethosn/test_constant_duplication.py
deleted file mode 100644
index b3cd0046f508..000000000000
--- a/tests/python/contrib/test_ethosn/test_constant_duplication.py
+++ /dev/null
@@ -1,85 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""Test that constants aren't duplicated for Arm(R) Ethos(TM)-N"""
-
-import numpy as np
-import tvm
-from tvm import relay
-from tvm.testing import requires_ethosn
-from . import infrastructure as tei
-
-
-def _get_model():
-    """Return a model and any parameters it may have"""
-    shape = (1, 4, 4, 4)
-    kernel_h = 3
-    kernel_w = 3
-    out_channels = 8
-
-    a = relay.var("a", shape=shape, dtype="uint8")
-    add_const_value = tvm.nd.array(np.random.randint(0, high=10, size=shape, dtype="uint8"))
-    add_const = relay.const(add_const_value, "uint8")
-    a = relay.add(a, add_const)
-    weight_shape = (kernel_h, kernel_w, shape[3], out_channels)
-    weights_array = tvm.nd.array(
-        np.random.randint(low=0, high=255, size=weight_shape, dtype="uint8")
-    )
-    weights = relay.const(weights_array, "uint8")
-    conv = relay.qnn.op.conv2d(
-        a,
-        weights,
-        input_zero_point=relay.const(0, "int32"),
-        kernel_zero_point=relay.const(0, "int32"),
-        input_scale=relay.const(0.3, "float32"),
-        kernel_scale=relay.const(0.4, "float32"),
-        kernel_size=(kernel_h, kernel_w),
-        data_layout="NHWC",
-        kernel_layout="HWIO",
-        dilation=(1, 1),
-        strides=(1, 1),
-        groups=1,
-        channels=out_channels,
-        padding=(0, 0, 0, 0),
-        out_dtype="int32",
-    )
-    b = tvm.nd.array(np.random.randint(0, high=10, size=(out_channels,), dtype="int32"))
-    biasc = relay.const(b, "int32")
-    bias = relay.nn.bias_add(conv, biasc, axis=3)
-    req = relay.qnn.op.requantize(
-        bias,
-        relay.const(0.3 * 0.4, "float32"),  # input zero scale
-        relay.const(0, "int32"),  # input zero point
-        relay.const(0.4, "float32"),  # output zero scale
-        relay.const(0, "int32"),  # output zero point
-        out_dtype="uint8",
-    )
-    params = {"w": weights_array, "b": b}
-    return req, params
-
-
-@requires_ethosn
-def test_constant_duplication():
-    """Test that constants are not duplicated."""
-
-    np.random.seed(0)
-    model, params = _get_model()
-    mod = tei.make_module(model, params)
-    res = tei.build(mod, params, npu=True, expected_host_ops=1)
-    for key, value in res.params.items():
-        assert key == "p0"
-        assert value.numpy().size == 64
diff --git a/tests/python/contrib/test_ethosn/test_conv2d.py b/tests/python/contrib/test_ethosn/test_conv2d.py
deleted file mode 100644
index e4c8b1c8da29..000000000000
--- a/tests/python/contrib/test_ethosn/test_conv2d.py
+++ /dev/null
@@ -1,369 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""Arm(R) Ethos(TM)-N integration conv2d tests"""
-
-import numpy as np
-import pytest
-
-import tvm
-from tvm import relay
-from tvm.testing import requires_ethosn
-
-from . import infrastructure as tei
-
-
-def _get_model(
-    shape,
-    kernel_h,
-    kernel_w,
-    input_zp,
-    input_sc,
-    kernel_zp,
-    kernel_sc,
-    output_zp,
-    output_sc,
-    pad,
-    strides,
-    dilation,
-    groups,
-    dtype,
-    out_channels,
-    weight_format,
-):
-    """Return a model and any parameters it may have"""
-    a = relay.var("a", shape=shape, dtype=dtype)
-    if pad in ("op", "both"):
-        p = tei.get_same_padding((shape[1], shape[2]), (kernel_h, kernel_w), dilation, strides)
-        a = relay.nn.pad(
-            a,
-            pad_width=[(0, 0), (p[0], p[2]), (p[1], p[3]), (0, 0)],
-            pad_value=input_zp,
-            pad_mode="constant",
-        )
-        shape = (shape[0], shape[1] + p[0] + p[2], shape[2] + p[1] + p[3], shape[3])
-
-    p = tei.get_same_padding((shape[1], shape[2]), (kernel_h, kernel_w), dilation, strides)
-    if weight_format == "HWIO":
-        weight_shape = (kernel_h, kernel_w, shape[3] // groups, out_channels)
-    else:
-        weight_shape = (kernel_h, kernel_w, out_channels, 1)
-    weights_array = tvm.nd.array(
-        np.random.randint(
-            np.iinfo(dtype).min, high=np.iinfo(dtype).max + 1, size=weight_shape, dtype=dtype
-        )
-    )
-    weights = relay.const(weights_array, dtype)
-    conv = relay.qnn.op.conv2d(
-        a,
-        weights,
-        input_zero_point=relay.const(input_zp, "int32"),
-        kernel_zero_point=relay.const(kernel_zp, "int32"),
-        input_scale=relay.const(input_sc, "float32"),
-        kernel_scale=relay.const(kernel_sc, "float32"),
-        kernel_size=(kernel_h, kernel_w),
-        data_layout="NHWC",
-        kernel_layout=weight_format,
-        dilation=dilation,
-        strides=strides,
-        groups=groups,
-        channels=out_channels,
-        padding=p if pad in ("attr", "both") else (0, 0, 0, 0),
-        out_dtype="int32",
-    )
-    bias_data = tvm.nd.array(
-        np.random.randint(
-            np.iinfo(dtype).min, high=np.iinfo(dtype).max + 1, size=(out_channels,), dtype="int32"
-        )
-    )
-    biasc = relay.const(bias_data, "int32")
-    bias = relay.nn.bias_add(conv, biasc, axis=3)
-    if isinstance(kernel_sc, tvm.runtime.ndarray.NDArray):
-        req_input_sc = [sc * input_sc for sc in kernel_sc.numpy()]
-    else:
-        req_input_sc = input_sc * kernel_sc
-    req = relay.qnn.op.requantize(
-        bias,
-        relay.const(req_input_sc, "float32"),  # input zero scale
-        relay.const(0, "int32"),  # input zero point
-        relay.const(output_sc, "float32"),  # output zero scale
-        relay.const(output_zp, "int32"),  # output zero point
-        out_dtype=dtype,
-    )
-    params = {"w": weights_array, "b": bias_data}
-    return req, params
-
-
-@requires_ethosn
-@pytest.mark.parametrize(
-    "dtype,qnn_per_channel", [("uint8", False), ("int8", False), ("int8", True)]
-)
-@pytest.mark.parametrize("pad,stride", [("attr", (2, 2)), ("none", (2, 2)), ("op", (1, 1))])
-@pytest.mark.parametrize(
-    "shape,out_channels,kernel_size",
-    [
-        [(1, 17, 20, 26), 4, (3, 1)],
-        [(1, 9, 20, 30), 7, (1, 5)],
-        [(1, 21, 21, 22), 8, (2, 2)],
-    ],
-)
-def test_conv2d(
-    dtype,
-    shape,
-    out_channels,
-    kernel_size,
-    pad,
-    stride,
-    qnn_per_channel,
-):
-    """Compare Conv2D output with TVM."""
-    np.random.seed(0)
-
-    dilation = (1, 1)
-    groups = 1
-    weight_format = "HWIO"
-
-    outputs = []
-    inputs = {
-        "a": tvm.nd.array(
-            np.random.randint(
-                np.iinfo(dtype).min,
-                np.iinfo(dtype).max + 1,
-                size=shape,
-                dtype=dtype,
-            )
-        ),
-    }
-    input_zp = np.random.randint(np.iinfo(dtype).min, np.iinfo(dtype).max)
-    input_sc = np.random.random() * 2
-    if qnn_per_channel:
-        kernel_sc = tvm.nd.array(
-            np.random.uniform(low=0, high=2, size=(out_channels,)).astype(np.float32)
-        )
-    else:
-        kernel_sc = np.random.random() * 2
-    kernel_zp = (
-        0 if dtype == "int8" else np.random.randint(np.iinfo(dtype).min, np.iinfo(dtype).max)
-    )
-    output_zp, output_sc = tei.get_conv2d_qnn_params(
-        dtype, input_zp, input_sc, kernel_zp, kernel_sc, kernel_size[0], kernel_size[1], shape[3]
-    )
-    model, params = _get_model(
-        shape,
-        kernel_size[0],
-        kernel_size[1],
-        input_zp,
-        input_sc,
-        kernel_zp,
-        kernel_sc,
-        output_zp,
-        output_sc,
-        pad,
-        stride,
-        dilation,
-        groups,
-        dtype,
-        out_channels,
-        weight_format,
-    )
-    for npu in [False, True]:
-        mod = tei.make_module(model, params)
-        outputs.append(tei.build_and_run(mod, inputs, 1, params, npu=npu))
-
-    tei.verify(outputs, dtype, 1)
-
-
-@requires_ethosn
-@pytest.mark.parametrize(
-    "dtype,qnn_per_channel", [("uint8", False), ("int8", False), ("int8", True)]
-)
-@pytest.mark.parametrize("pad,stride", [("attr", (2, 2)), ("none", (2, 2)), ("op", (1, 1))])
-@pytest.mark.parametrize(
-    "shape,kernel_size",
-    [
-        [(1, 17, 20, 28), (3, 3)],
-        [(1, 9, 20, 30), (5, 5)],
-        [(1, 21, 21, 22), (2, 2)],
-    ],
-)
-def test_conv2d_depthwise(
-    dtype,
-    shape,
-    kernel_size,
-    pad,
-    stride,
-    qnn_per_channel,
-):
-    """Compare Conv2D output with TVM."""
-    np.random.seed(0)
-
-    dilation = (1, 1)
-    out_channels = shape[3]
-    groups = out_channels
-    weight_format = "HWOI"
-
-    outputs = []
-    inputs = {
-        "a": tvm.nd.array(
-            np.random.randint(
-                np.iinfo(dtype).min,
-                np.iinfo(dtype).max + 1,
-                size=shape,
-                dtype=dtype,
-            )
-        ),
-    }
-    input_zp = np.random.randint(0, np.iinfo(dtype).max)
-    input_sc = np.random.random() * 2
-    if qnn_per_channel:
-        kernel_sc = tvm.nd.array(
-            np.random.uniform(low=0, high=2, size=(out_channels,)).astype(np.float32)
-        )
-    else:
-        kernel_sc = np.random.random() * 2
-    kernel_zp = (
-        0 if dtype == "int8" else np.random.randint(np.iinfo(dtype).min, np.iinfo(dtype).max)
-    )
-    output_zp, output_sc = tei.get_conv2d_qnn_params(
-        dtype, input_zp, input_sc, kernel_zp, kernel_sc, kernel_size[0], kernel_size[1], shape[3]
-    )
-    model, params = _get_model(
-        shape,
-        kernel_size[0],
-        kernel_size[1],
-        input_zp,
-        input_sc,
-        kernel_zp,
-        kernel_sc,
-        output_zp,
-        output_sc,
-        pad,
-        stride,
-        dilation,
-        groups,
-        dtype,
-        out_channels,
-        weight_format,
-    )
-    for npu in [False, True]:
-        mod = tei.make_module(model, params)
-        outputs.append(tei.build_and_run(mod, inputs, 1, params, npu=npu))
-
-    tei.verify(outputs, dtype, 1)
-
-
-@requires_ethosn
-@pytest.mark.parametrize(
-    "shape,pad,stride,dilation,err_msg",
-    [
-        (
-            (1, 4, 4, 4),
-            "both",
-            (1, 1),
-            (1, 1),
-            "both op and attr padding exist, must be either op/attr only or no padding",
-        ),
-        (
-            (1, 4, 4, 4),
-            "none",
-            (1, 1, 1),
-            (1, 1),
-            "stride size=3, stride size must = 2",
-        ),
-        (
-            (1, 4, 4, 4),
-            "none",
-            (1, 1),
-            (2, 1),
-            "dilation=[2, 1], dilation must = [1, 1]",
-        ),
-        (
-            (2, 4, 4, 4),
-            "none",
-            (1, 1),
-            (1, 1),
-            "batch size=2, batch size must = 1",
-        ),
-    ],
-)
-def test_conv2d_failure(shape, pad, stride, dilation, err_msg):
-    """Check Conv2D error messages."""
-    np.random.seed(0)
-
-    kernel_size = (2, 2)
-    groups = 1
-    dtype = "uint8"
-    out_channels = 8
-    weight_format = "HWIO"
-
-    model, _ = _get_model(
-        shape,
-        kernel_size[0],
-        kernel_size[1],
-        0,
-        1,
-        0,
-        1,
-        0,
-        1,
-        pad,
-        stride,
-        dilation,
-        groups,
-        dtype,
-        out_channels,
-        weight_format,
-    )
-    model = tei.make_ethosn_composite(model, "ethos-n.qnn_conv2d")
-    mod = tei.make_ethosn_partition(model)
-    tei.test_error(mod, {}, err_msg)
-
-
-@requires_ethosn
-def test_conv2d_out_of_range_scale():
-    """Check Conv2D scale out of range error."""
-    np.random.seed(0)
-
-    input_sc = 1024
-    kernel_sc = 1024
-    output_sc = 1
-
-    model, _ = _get_model(
-        (1, 4, 4, 4),
-        1,
-        1,
-        0,
-        input_sc,
-        0,
-        kernel_sc,
-        0,
-        output_sc,
-        "none",
-        (1, 1),
-        (1, 1),
-        1,
-        "uint8",
-        8,
-        "HWIO",
-    )
-    model = tei.make_ethosn_composite(model, "ethos-n.qnn_conv2d")
-    mod = tei.make_ethosn_partition(model)
-
-    expected_err_msg = (
-        "Overall scale (of the input * weights / output) should be in the range (2^-32, 65536)"
-    )
-    tei.test_error(mod, {}, expected_err_msg)
diff --git a/tests/python/contrib/test_ethosn/test_conv2d_transpose.py b/tests/python/contrib/test_ethosn/test_conv2d_transpose.py
deleted file mode 100644
index 4d99a310ac44..000000000000
--- a/tests/python/contrib/test_ethosn/test_conv2d_transpose.py
+++ /dev/null
@@ -1,300 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""Arm(R) Ethos(TM)-N integration conv2d tests"""
-
-import pytest
-import numpy as np
-
-import tvm
-from tvm import relay
-from tvm.relay.op.contrib import ethosn_api_version
-from tvm.testing import requires_ethosn
-from . import infrastructure as tei
-
-
-def _get_model(
-    shape,
-    kernel_h,
-    kernel_w,
-    input_zp,
-    input_sc,
-    kernel_zp,
-    kernel_sc,
-    output_zp,
-    output_sc,
-    stride,
-    dilation,
-    groups,
-    kernel_layout,
-    dtype,
-    out_channels,
-    bias,
-):
-    """Return a model and any parameters it may have"""
-    a = relay.var("a", shape=shape, dtype=dtype)
-    p = tei.get_same_padding((shape[1], shape[2]), (kernel_h, kernel_w), dilation, stride)
-    weight_shape = (shape[3], out_channels // groups, kernel_h, kernel_w)
-
-    weight_data = tvm.nd.array(
-        np.random.randint(
-            np.iinfo(dtype).min,
-            high=(np.iinfo(dtype).max + 1),
-            size=weight_shape,
-            dtype=dtype,
-        )
-    )
-    weights = relay.const(weight_data, dtype)
-    op = relay.qnn.op.conv2d_transpose(
-        a,
-        weights,
-        input_zero_point=relay.const(input_zp, "int32"),
-        input_scale=relay.const(input_sc, "float32"),
-        kernel_zero_point=relay.const(kernel_zp, "int32"),
-        kernel_scale=relay.const(kernel_sc, "float32"),
-        kernel_size=(kernel_h, kernel_w),
-        padding=p,
-        strides=stride,
-        dilation=dilation,
-        data_layout="NHWC",
-        kernel_layout=kernel_layout,
-        out_dtype="int32",
-        channels=out_channels,
-        groups=groups,
-    )
-    if bias:
-        bias_data = tvm.nd.array(
-            np.random.randint(
-                np.iinfo(dtype).min,
-                high=np.iinfo(dtype).max + 1,
-                size=(out_channels,),
-                dtype="int32",
-            )
-        )
-        biasc = relay.const(bias_data, "int32")
-        op = relay.nn.bias_add(op, biasc, axis=3)
-
-    if isinstance(kernel_sc, tvm.runtime.ndarray.NDArray):
-        req_input_sc = [sc * input_sc for sc in kernel_sc.numpy()]
-    else:
-        req_input_sc = input_sc * kernel_sc
-
-    op = relay.qnn.op.requantize(
-        op,
-        input_zero_point=relay.const(input_zp, "int32"),
-        input_scale=relay.const(req_input_sc, "float32"),
-        output_zero_point=relay.const(output_zp, "int32"),
-        output_scale=relay.const(output_sc, "float32"),
-        axis=3,
-        rounding="UPWARD",
-        out_dtype=dtype,
-    )
-    params = {"w": weight_data}
-    if bias:
-        params["b"] = bias_data
-    return op, params
-
-
-@requires_ethosn
-@pytest.mark.parametrize("dtype", ["uint8", "int8"])
-@pytest.mark.parametrize(
-    "ifm_shape,strides,kernel_size,out_channels,bias",
-    [
-        ((1, 2, 2, 1), (2, 2), (1, 1), 1, False),
-        ((1, 2, 2, 5), (2, 2), (3, 5), 4, False),
-        ((1, 7, 7, 4), (2, 2), (7, 7), 8, True),
-    ],
-)
-def test_conv2d_transpose(ifm_shape, strides, kernel_size, out_channels, dtype, bias):
-    """Check transpose convolution output with TVM."""
-    np.random.seed(0)
-
-    kernel_layout = "IOHW"
-    dilation = (1, 1)
-    groups = 1
-
-    iinfo = np.iinfo(dtype)
-    data_min = iinfo.min
-    data_max = iinfo.max
-
-    input_zp = np.random.randint(data_min, data_max)
-    input_sc = np.random.random() * 2
-    kernel_zp = np.random.randint(data_min, data_max)
-    kernel_sc = np.random.random() * 4
-    output_zp, output_sc = tei.get_conv2d_qnn_params(
-        dtype, input_zp, input_sc, kernel_zp, kernel_sc, ifm_shape[1], ifm_shape[2], ifm_shape[3]
-    )
-
-    model, params = _get_model(
-        shape=ifm_shape,
-        kernel_h=kernel_size[0],
-        kernel_w=kernel_size[1],
-        input_zp=input_zp,
-        input_sc=input_sc,
-        kernel_zp=kernel_zp,
-        kernel_sc=kernel_sc,
-        output_zp=output_zp,
-        output_sc=output_sc,
-        stride=strides,
-        dilation=dilation,
-        groups=groups,
-        kernel_layout=kernel_layout,
-        dtype=dtype,
-        out_channels=out_channels,
-        bias=bias,
-    )
-
-    outputs = []
-    inputs = {
-        "a": tvm.nd.array(np.random.randint(data_min, data_max + 1, size=ifm_shape, dtype=dtype))
-    }
-
-    for npu in [False, True]:
-        mod = tei.make_module(model, params)
-        outputs.append(tei.build_and_run(mod, inputs, 1, params, npu=npu))
-
-    tei.verify(outputs, dtype, 1)
-
-
-@requires_ethosn
-@pytest.mark.parametrize("dtype", ["uint8", "int8"])
-@pytest.mark.parametrize(
-    "ifm_shape,strides,kernel_size,out_channels,bias",
-    [
-        ((1, 10, 20, 3), (1, 1), (8, 5), 4, False),
-        ((1, 10, 10, 2), (2, 2), (7, 9), 8, True),
-    ],
-)
-def test_conv2d_transpose_kernel_size_gt_8(
-    ifm_shape, strides, kernel_size, out_channels, dtype, bias
-):
-    """Check transpose convolution for big kernel sizes."""
-    if ethosn_api_version() in ["3.2.0", "3.1.0"]:
-        pytest.skip("Skipping because NPU driver 22.11 fails to interpret zp used in the test.")
-
-    np.random.seed(0)
-
-    kernel_layout = "IOHW"
-    dilation = (1, 1)
-    groups = 1
-
-    iinfo = np.iinfo(dtype)
-    data_min = iinfo.min
-    data_max = iinfo.max
-
-    input_zp = np.random.randint(data_min, data_max)
-    input_sc = np.random.random() * 2
-    kernel_zp = np.random.randint(data_min, data_max)
-    kernel_sc = np.random.random() * 4
-    output_zp, output_sc = tei.get_conv2d_qnn_params(
-        dtype, input_zp, input_sc, kernel_zp, kernel_sc, ifm_shape[1], ifm_shape[2], ifm_shape[3]
-    )
-
-    model, params = _get_model(
-        shape=ifm_shape,
-        kernel_h=kernel_size[0],
-        kernel_w=kernel_size[1],
-        input_zp=input_zp,
-        input_sc=input_sc,
-        kernel_zp=kernel_zp,
-        kernel_sc=kernel_sc,
-        output_zp=output_zp,
-        output_sc=output_sc,
-        stride=strides,
-        dilation=dilation,
-        groups=groups,
-        kernel_layout=kernel_layout,
-        dtype=dtype,
-        out_channels=out_channels,
-        bias=bias,
-    )
-
-    outputs = []
-    inputs = {
-        "a": tvm.nd.array(np.random.randint(data_min, data_max + 1, size=ifm_shape, dtype=dtype))
-    }
-
-    for npu in [False, True]:
-        mod = tei.make_module(model, params)
-        outputs.append(tei.build_and_run(mod, inputs, 1, params, npu=npu))
-
-    tei.verify(outputs, dtype, 1)
-
-
-@requires_ethosn
-@pytest.mark.parametrize("dtype", ["uint8", "int8"])
-@pytest.mark.parametrize(
-    "shape, stride, dilation, groups, err_msg",
-    [
-        (
-            (1, 4, 4, 4),
-            (1, 1, 1),
-            (1, 1),
-            1,
-            "stride size=3, stride size must = 2",
-        ),
-        (
-            (1, 4, 4, 4),
-            (2, 2),
-            (2, 2),
-            2,
-            "dilation=[2, 2], dilation must = [1, 1]",
-        ),
-        (
-            (2, 4, 4, 4),
-            (1, 1),
-            (1, 1),
-            1,
-            "batch size=2, batch size must = 1",
-        ),
-    ],
-)
-def test_conv2d_transpose_failure(
-    shape,
-    stride,
-    dilation,
-    groups,
-    err_msg,
-    dtype,
-):
-    """
-    Test transpose_conv2d error messages.
-    """
-    np.random.seed(0)
-    out_channels = 8
-
-    model, _ = _get_model(
-        shape=shape,
-        kernel_h=1,
-        kernel_w=1,
-        input_zp=0,
-        input_sc=1,
-        kernel_zp=0,
-        kernel_sc=1,
-        output_zp=0,
-        output_sc=1,
-        stride=stride,
-        dilation=dilation,
-        groups=groups,
-        kernel_layout="IOHW",
-        dtype=dtype,
-        out_channels=out_channels,
-        bias=False,
-    )
-    model = tei.make_ethosn_composite(model, "ethos-n.qnn_conv2d_transpose")
-    mod = tei.make_ethosn_partition(model)
-    tei.test_error(mod, {}, err_msg)
diff --git a/tests/python/contrib/test_ethosn/test_convert_equivalents.py b/tests/python/contrib/test_ethosn/test_convert_equivalents.py
deleted file mode 100644
index 5f05804517b2..000000000000
--- a/tests/python/contrib/test_ethosn/test_convert_equivalents.py
+++ /dev/null
@@ -1,482 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""Unit tests for the convert equivalents pass."""
-
-import pytest
-import numpy as np
-
-import tvm
-from tvm import relay
-from tvm.testing import requires_ethosn
-from tvm.relay.op.contrib.ethosn import ConvertEquivalents
-from tvm.relay import ExprVisitor
-
-from . import infrastructure as tei
-from .test_addition import _get_addition_qnn_params
-
-
-@requires_ethosn
-@pytest.mark.parametrize("dtype", ["uint8", "int8"])
-@pytest.mark.parametrize("shape,channels", [((1, 4, 4, 8), 8), ((1, 16, 12, 4), 4)])
-@pytest.mark.parametrize("reverse_inputs", [True, False])
-def test_multiply_to_depthwise(dtype, shape, channels, reverse_inputs):
-    """Check that multiply is correctly converted to a depthwise operation."""
-    np.random.seed(0)
-
-    iinfo = np.iinfo(dtype)
-    data_min = iinfo.min
-    data_max = iinfo.max
-    input_zp = np.random.randint(data_min, data_max)
-    input_sc = np.random.random() * 2
-    input2_zp = np.random.randint(data_min, data_max)
-    input2_sc = np.random.random() * 2
-    output_zp, output_sc = tei.get_conv2d_qnn_params(
-        dtype, input_zp, input_sc, input2_zp, input2_sc, 1, 1, shape[3]
-    )
-    x = relay.var("x", shape=shape, dtype=dtype)
-    constant_shape = (1, 1, 1, channels)
-    y_data = np.random.randint(data_min, data_max + 1, size=constant_shape, dtype=dtype)
-
-    def before():
-        y = relay.const(y_data, dtype=dtype)
-        expr = relay.qnn.op.mul(
-            y if reverse_inputs else x,
-            x if reverse_inputs else y,
-            relay.const(input_sc, "float32"),
-            relay.const(input_zp, "int32"),
-            relay.const(input2_sc, "float32"),
-            relay.const(input2_zp, "int32"),
-            relay.const(output_sc, "float32"),
-            relay.const(output_zp, "int32"),
-        )
-        composite = tei.make_ethosn_composite(expr, "ethos-n.qnn_mul_to_depthwise")
-        return tei.make_ethosn_partition(composite)
-
-    def expected():
-        constant_shape_hwoi = (1, 1, channels, 1)
-        y_data_hwoi = y_data.reshape(constant_shape_hwoi)
-        y_hwoi = relay.const(y_data_hwoi, dtype=dtype)
-        expr = relay.qnn.op.conv2d(
-            x,
-            y_hwoi,
-            relay.const(input2_zp if reverse_inputs else input_zp, "int32"),
-            relay.const(input_zp if reverse_inputs else input2_zp, "int32"),
-            relay.const(input2_sc if reverse_inputs else input_sc, "float32"),
-            relay.const(input_sc if reverse_inputs else input2_sc, "float32"),
-            (1, 1),
-            channels,
-            (1, 1),
-            (0, 0),
-            (1, 1),
-            channels,
-            "NHWC",
-            "HWOI",
-            "NHWC",
-            "int32",
-        )
-        expr = relay.nn.bias_add(expr, relay.const(np.zeros((channels,), dtype="int32")), axis=3)
-        expr = relay.qnn.op.requantize(
-            expr,
-            relay.const(input2_sc if reverse_inputs else input_sc, "float32"),
-            relay.const(input2_zp if reverse_inputs else input_zp, "int32"),
-            relay.const(output_sc, "float32"),
-            relay.const(output_zp, "int32"),
-            out_dtype=dtype,
-        )
-        composite = tei.make_ethosn_composite(expr, "ethos-n.qnn_conv2d")
-        return tei.make_ethosn_partition(composite)
-
-    mod = before()
-    mod = ConvertEquivalents()(mod)
-    expected_mod = expected()
-    tvm.ir.assert_structural_equal(mod["ethos-n_0"], expected_mod["ethos-n_0"])
-
-
-@requires_ethosn
-@pytest.mark.parametrize(
-    "dtype,shape,constant_shape",
-    [("int8", (1, 4, 4), (4,)), ("int32", (1, 16, 12, 4), (1, 1, 1, 4))],
-)
-def test_unsupported_multiply_to_depthwise(dtype, shape, constant_shape):
-    """Check that unsupported variants of multiply to depthwise are not converted."""
-    np.random.seed(0)
-
-    iinfo = np.iinfo(dtype)
-    data_min = iinfo.min
-    data_max = iinfo.max
-    input_zp = np.random.randint(data_min, data_max)
-    input_sc = np.random.random() * 2
-    input2_zp = np.random.randint(data_min, data_max)
-    input2_sc = np.random.random() * 2
-    output_zp, output_sc = tei.get_conv2d_qnn_params(
-        dtype, input_zp, input_sc, input2_zp, input2_sc, 1, 1, shape[-1]
-    )
-    x = relay.var("x", shape=shape, dtype=dtype)
-    y_data = np.random.randint(data_min, data_max + 1, size=constant_shape, dtype=dtype)
-
-    def before():
-        y = relay.const(y_data, dtype=dtype)
-        expr = relay.qnn.op.mul(
-            x,
-            y,
-            relay.const(input_sc, "float32"),
-            relay.const(input_zp, "int32"),
-            relay.const(input2_sc, "float32"),
-            relay.const(input2_zp, "int32"),
-            relay.const(output_sc, "float32"),
-            relay.const(output_zp, "int32"),
-        )
-        composite = tei.make_ethosn_composite(expr, "ethos-n.qnn_mul_to_depthwise")
-        return tei.make_ethosn_partition(composite)
-
-    mod = before()
-
-    error_regex = (
-        r'Operation "ethos-n.qnn_mul_to_depthwise" was marked '
-        r"as having a valid conversion, but it could not be converted."
-    )
-
-    with pytest.raises(tvm.TVMError, match=error_regex):
-        mod = ConvertEquivalents()(mod)
-
-
-@requires_ethosn
-@pytest.mark.parametrize(
-    "shape,constant_shape",
-    [((1, 4, 4, 8), (1, 1, 1, 1)), ((1, 16, 12, 4), None)],
-)
-@pytest.mark.parametrize("reverse_inputs", [True, False])
-def test_multiply_to_reinterpret_quantize(shape, constant_shape, reverse_inputs):
-    """Check that multiply is correctly converted to a reinterpret quantize operation."""
-    np.random.seed(0)
-
-    dtype = "uint8"
-
-    # Multiply can only be offloaded as a reinterpret quantize operation if
-    # it is an identity option. We must choose the quantization and constant
-    # data carefully to make sure that this is the case.
-    input_zp = 0
-    input_sc = 0.007814894430339336
-    input2_zp = 0
-    input2_sc = 0.5
-    output_zp = 0
-    output_sc = 0.9963990449905396
-    constant_data = 255
-
-    x = relay.var("x", shape=shape, dtype=dtype)
-    y_data = np.array(constant_data, dtype=dtype).reshape(constant_shape)
-
-    def before():
-        y = relay.const(y_data, dtype=dtype)
-        expr = relay.qnn.op.mul(
-            y if reverse_inputs else x,
-            x if reverse_inputs else y,
-            relay.const(input2_sc if reverse_inputs else input_sc, "float32"),
-            relay.const(input2_zp if reverse_inputs else input_zp, "int32"),
-            relay.const(input_sc if reverse_inputs else input2_sc, "float32"),
-            relay.const(input_zp if reverse_inputs else input2_zp, "int32"),
-            relay.const(output_sc, "float32"),
-            relay.const(output_zp, "int32"),
-        )
-        composite = tei.make_ethosn_composite(expr, "ethos-n.qnn_mul_to_reinterpret_quantize")
-        return tei.make_ethosn_partition(composite)
-
-    def expected():
-        expr = relay.qnn.op.requantize(
-            x,
-            relay.const(input_sc, "float32"),
-            relay.const(input_zp if reverse_inputs else input_zp, "int32"),
-            relay.const(output_sc, "float32"),
-            relay.const(output_zp, "int32"),
-            out_dtype=dtype,
-        )
-        composite = tei.make_ethosn_composite(expr, "ethos-n.qnn_reinterpret_quantize")
-        return tei.make_ethosn_partition(composite)
-
-    mod = before()
-    mod = ConvertEquivalents()(mod)
-    expected_mod = expected()
-    tvm.ir.assert_structural_equal(mod["ethos-n_0"], expected_mod["ethos-n_0"])
-
-
-@requires_ethosn
-@pytest.mark.parametrize(
-    "dtype,shape,constant_shape",
-    [("float32", (1, 16, 12, 4), None)],
-)
-def test_unsupported_multiply_to_reinterpret_quantize(dtype, shape, constant_shape):
-    """
-    Check that unsupported variants of multiply conversion to reinterpret
-    quantize are not converted.
-    """
-    np.random.seed(0)
-
-    # Multiply can only be offloaded as a reinterpret quantize operation if
-    # it is an identity option. We must choose the quantization and constant
-    # data carefully to make sure that this is the case.
-    input_zp = 0
-    input_sc = 0.007814894430339336
-    input2_zp = 0
-    input2_sc = 0.5
-    output_zp = 0
-    output_sc = 0.9963990449905396
-    constant_data = 255
-
-    x = relay.var("x", shape=shape, dtype=dtype)
-    y_data = np.array(constant_data, dtype=dtype).reshape(constant_shape)
-
-    def before():
-        y = relay.const(y_data, dtype=dtype)
-        expr = relay.qnn.op.mul(
-            x,
-            y,
-            relay.const(input_sc, "float32"),
-            relay.const(input_zp, "int32"),
-            relay.const(input2_sc, "float32"),
-            relay.const(input2_zp, "int32"),
-            relay.const(output_sc, "float32"),
-            relay.const(output_zp, "int32"),
-        )
-        composite = tei.make_ethosn_composite(expr, "ethos-n.qnn_mul_to_reinterpret_quantize")
-        return tei.make_ethosn_partition(composite)
-
-    mod = before()
-
-    error_regex = (
-        r'Operation "ethos-n.qnn_mul_to_reinterpret_quantize" was marked '
-        r"as having a valid conversion, but it could not be converted."
-    )
-
-    with pytest.raises(tvm.TVMError, match=error_regex):
-        mod = ConvertEquivalents()(mod)
-
-
-@requires_ethosn
-@pytest.mark.parametrize("reverse_inputs", [True, False])
-def test_add_to_depthwise(reverse_inputs):
-    """
-    Check that add is converted correctly.
-    """
-    dtype = "uint8"
-    lhs_shape = (1, 2, 4, 8)
-    rhs_shape = (1, 1, 1, 8)
-    np.random.seed(0)
-
-    iinfo = np.iinfo(dtype)
-    data_min = iinfo.min
-    data_max = iinfo.max
-    lhs_zp, lhs_sc, rhs_zp, rhs_sc, out_zp, out_sc = _get_addition_qnn_params(dtype)
-
-    x = relay.var("x", shape=lhs_shape, dtype=dtype)
-    y_data = np.random.randint(data_min, data_max + 1, size=rhs_shape, dtype=dtype)
-
-    def before():
-        y = relay.const(y_data)
-        expr = relay.qnn.op.add(
-            lhs=y if reverse_inputs else x,
-            rhs=x if reverse_inputs else y,
-            lhs_scale=relay.const(lhs_sc, "float32"),
-            lhs_zero_point=relay.const(lhs_zp, "int32"),
-            rhs_scale=relay.const(rhs_sc, "float32"),
-            rhs_zero_point=relay.const(rhs_zp, "int32"),
-            output_scale=relay.const(out_sc, "float32"),
-            output_zero_point=relay.const(out_zp, "int32"),
-        )
-        composite = tei.make_ethosn_composite(expr, "ethos-n.qnn_add_to_depthwise")
-        return tei.make_ethosn_partition(composite)
-
-    class ConversionChecker(ExprVisitor):
-        """
-        Pass to check the new composite function is in the expected format.
-        """
-
-        sequence = ["qnn.conv2d", "nn.bias_add", "qnn.requantize"]
-
-        # pylint: disable=invalid-name
-        def visit_function(self, fn):
-            composite_name = fn.attrs["Composite"]
-            expected = "ethos-n.qnn_conv2d"
-            assert (
-                composite_name == expected
-            ), f"Expected Composite attribute {expected} but got {composite_name}"
-            super().visit_function(fn)
-
-        def visit_call(self, call):
-            op_name = call.op.name
-            expected_name = self.sequence.pop()
-            assert op_name == expected_name, f"Got operator {op_name} but expected {expected_name}"
-            super().visit_call(call)
-
-    mod = before()
-    mod = ConvertEquivalents()(mod)
-    mod = ConversionChecker().visit(mod["ethos-n_0"].body.op)
-
-
-@requires_ethosn
-@pytest.mark.parametrize(
-    "dtype,lhs_shape,rhs_shape", [("uint8", (1, 4, 4), (1, 1, 4)), ("int32", (1, 4, 4, 4), (4,))]
-)
-def test_unsupported_add_to_depthwise(dtype, lhs_shape, rhs_shape):
-    """Check that unsupported variants of add are not converted."""
-    np.random.seed(0)
-
-    iinfo = np.iinfo(dtype)
-    data_min = iinfo.min
-    data_max = iinfo.max
-    lhs_zp, lhs_sc, rhs_zp, rhs_sc, out_zp, out_sc = _get_addition_qnn_params(dtype)
-
-    x = relay.var("x", shape=lhs_shape, dtype=dtype)
-    y_data = np.random.randint(data_min, data_max + 1, size=rhs_shape, dtype=dtype)
-
-    def before():
-        y = relay.const(y_data)
-        expr = relay.qnn.op.add(
-            lhs=x,
-            rhs=y,
-            lhs_scale=relay.const(lhs_sc, "float32"),
-            lhs_zero_point=relay.const(lhs_zp, "int32"),
-            rhs_scale=relay.const(rhs_sc, "float32"),
-            rhs_zero_point=relay.const(rhs_zp, "int32"),
-            output_scale=relay.const(out_sc, "float32"),
-            output_zero_point=relay.const(out_zp, "int32"),
-        )
-        composite = tei.make_ethosn_composite(expr, "ethos-n.qnn_add_to_depthwise")
-        return tei.make_ethosn_partition(composite)
-
-    mod = before()
-
-    error_regex = (
-        r'Operation "ethos-n.qnn_add_to_depthwise" was marked '
-        r"as having a valid conversion, but it could not be converted."
-    )
-
-    with pytest.raises(tvm.TVMError, match=error_regex):
-        mod = ConvertEquivalents()(mod)
-
-
-@requires_ethosn
-@pytest.mark.parametrize(
-    "shape,constant_shape",
-    [
-        ((1, 4, 4, 8), (1, 1, 1, 1)),
-        ((1, 16, 12, 4), None),
-    ],
-)
-@pytest.mark.parametrize("reverse_inputs", [True, False])
-def test_add_to_reinterpret_quantize(shape, constant_shape, reverse_inputs):
-    """Check that add is correctly converted to a reinterpret quantize operation."""
-    np.random.seed(0)
-
-    dtype = "uint8"
-
-    # Add can only be offloaded as a reinterpret quantize operation if
-    # it is an identity option. We must choose the quantization and constant
-    # data carefully to make sure that this is the case.
-    input_zp = 128
-    input_sc = 0.0078125
-    input2_zp = 0
-    input2_sc = 0.003921568859368563
-    output_zp = 0
-    output_sc = 0.007814894430339336
-    constant_data = 255
-
-    x = relay.var("x", shape=shape, dtype=dtype)
-    y_data = np.array(constant_data, dtype=dtype).reshape(constant_shape)
-
-    def before():
-        y = relay.const(y_data, dtype=dtype)
-        expr = relay.qnn.op.add(
-            y if reverse_inputs else x,
-            x if reverse_inputs else y,
-            relay.const(input2_sc if reverse_inputs else input_sc, "float32"),
-            relay.const(input2_zp if reverse_inputs else input_zp, "int32"),
-            relay.const(input_sc if reverse_inputs else input2_sc, "float32"),
-            relay.const(input_zp if reverse_inputs else input2_zp, "int32"),
-            relay.const(output_sc, "float32"),
-            relay.const(output_zp, "int32"),
-        )
-        composite = tei.make_ethosn_composite(expr, "ethos-n.qnn_add_to_reinterpret_quantize")
-        return tei.make_ethosn_partition(composite)
-
-    def expected():
-        expr = relay.qnn.op.requantize(
-            x,
-            relay.const(input_sc, "float32"),
-            relay.const(input_zp if reverse_inputs else input_zp, "int32"),
-            relay.const(output_sc, "float32"),
-            relay.const(output_zp, "int32"),
-            out_dtype=dtype,
-        )
-        composite = tei.make_ethosn_composite(expr, "ethos-n.qnn_reinterpret_quantize")
-        return tei.make_ethosn_partition(composite)
-
-    mod = before()
-    mod = ConvertEquivalents()(mod)
-    expected_mod = expected()
-    tvm.ir.assert_structural_equal(mod["ethos-n_0"], expected_mod["ethos-n_0"])
-
-
-@requires_ethosn
-@pytest.mark.parametrize(
-    "dtype,shape,constant_shape",
-    [
-        ("float32", (1, 16, 12, 4), None),
-    ],
-)
-def test_unsupported_add_to_reinterpret_quantize(dtype, shape, constant_shape):
-    """Check that unsupported variants of add to reinterpret quantize are not converted."""
-    np.random.seed(0)
-
-    # Add can only be offloaded as a reinterpret quantize operation if
-    # it is an identity option. We must choose the quantization and constant
-    # data carefully to make sure that this is the case.
-    input_zp = 128
-    input_sc = 0.0078125
-    input2_zp = 0
-    input2_sc = 0.003921568859368563
-    output_zp = 0
-    output_sc = 0.007814894430339336
-    constant_data = 255
-
-    x = relay.var("x", shape=shape, dtype=dtype)
-    y_data = np.array(constant_data, dtype=dtype).reshape(constant_shape)
-
-    def before():
-        y = relay.const(y_data, dtype=dtype)
-        expr = relay.qnn.op.add(
-            x,
-            y,
-            relay.const(input_sc, "float32"),
-            relay.const(input_zp, "int32"),
-            relay.const(input2_sc, "float32"),
-            relay.const(input2_zp, "int32"),
-            relay.const(output_sc, "float32"),
-            relay.const(output_zp, "int32"),
-        )
-        composite = tei.make_ethosn_composite(expr, "ethos-n.qnn_add_to_reinterpret_quantize")
-        return tei.make_ethosn_partition(composite)
-
-    mod = before()
-
-    error_regex = (
-        r'Operation "ethos-n.qnn_add_to_reinterpret_quantize" was marked '
-        r"as having a valid conversion, but it could not be converted."
-    )
-
-    with pytest.raises(tvm.TVMError, match=error_regex):
-        mod = ConvertEquivalents()(mod)
diff --git a/tests/python/contrib/test_ethosn/test_depth_to_space.py b/tests/python/contrib/test_ethosn/test_depth_to_space.py
deleted file mode 100644
index 7bbd532241fd..000000000000
--- a/tests/python/contrib/test_ethosn/test_depth_to_space.py
+++ /dev/null
@@ -1,90 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""Arm(R) Ethos(TM)-N integration depth-to-space tests"""
-
-import pytest
-import numpy as np
-import tvm
-from tvm import relay
-from tvm.testing import requires_ethosn
-from . import infrastructure as tei
-
-
-def _get_model(shape, block, dtype, layout):
-    a = relay.var("a", shape=shape, dtype=dtype)
-    depth = relay.nn.depth_to_space(a, layout=layout, block_size=block)
-    return depth
-
-
-@requires_ethosn
-@pytest.mark.parametrize("dtype", ["uint8", "int8"])
-@pytest.mark.parametrize(
-    "shape",
-    [
-        (1, 16, 16, 16),
-        (1, 64, 32, 16),
-    ],
-)
-def test_depth_to_space(dtype, shape):
-    """Compare Depth To Space output with TVM."""
-    np.random.seed(0)
-
-    inputs = {
-        "a": tvm.nd.array(
-            np.random.randint(np.iinfo(dtype).min, np.iinfo(dtype).max + 1, size=shape, dtype=dtype)
-        )
-    }
-    outputs = []
-    for npu in [False, True]:
-        model = _get_model(shape, 2, dtype, "NHWC")
-        mod = tei.make_module(model, {})
-        outputs.append(
-            tei.build_and_run(
-                mod,
-                inputs,
-                1,
-                {},
-                npu=npu,
-                additional_config_args={"inline_non_compute_intensive_partitions": False},
-            )
-        )
-
-    tei.verify(outputs, dtype, 1)
-
-
-@requires_ethosn
-@pytest.mark.parametrize(
-    "shape,block,dtype,layout,err_msg",
-    [
-        ((2, 16, 16, 16), 2, "uint8", "NHWC", "batch size=2, batch size must = 1"),
-        (
-            (1, 16, 16, 16),
-            2,
-            "int16",
-            "NHWC",
-            "dtype='int16', dtype must be either uint8, int8 or int32;",
-        ),
-        ((1, 16, 16, 16), 4, "uint8", "NHWC", "Only block size of 2 is supported"),
-        ((1, 16, 16, 16), 2, "uint8", "NCHW", "Input layer must be NHWC or NHWCB"),
-    ],
-)
-def test_depth_to_space_failure(shape, block, dtype, layout, err_msg):
-    """Check Depth To Space error messages."""
-    model = _get_model(shape, block, dtype, layout)
-    mod = tei.make_ethosn_partition(model)
-    tei.test_error(mod, {}, err_msg)
diff --git a/tests/python/contrib/test_ethosn/test_fullyconnected.py b/tests/python/contrib/test_ethosn/test_fullyconnected.py
deleted file mode 100644
index e84464f90217..000000000000
--- a/tests/python/contrib/test_ethosn/test_fullyconnected.py
+++ /dev/null
@@ -1,180 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""Arm(R) Ethos(TM)-N integration fully connected tests"""
-
-import numpy as np
-import pytest
-
-import tvm
-from tvm import relay
-from tvm.testing import requires_ethosn
-
-from . import infrastructure as tei
-
-
-def _get_model(
-    shape, weight_shape, input_zp, input_sc, kernel_zp, kernel_sc, output_zp, output_sc, dtype
-):
-    """Return a model an any parameters it may have"""
-    a = relay.var("a", shape=shape, dtype=dtype)
-    weights_array = tvm.nd.array(
-        np.random.randint(
-            np.iinfo(dtype).min, high=np.iinfo(dtype).max, size=weight_shape, dtype=dtype
-        )
-    )
-    weights = relay.const(weights_array, dtype)
-    dense = relay.qnn.op.dense(
-        a,
-        weights,
-        input_zero_point=relay.const(input_zp, "int32"),
-        kernel_zero_point=relay.const(kernel_zp, "int32"),
-        input_scale=relay.const(input_sc, "float32"),
-        kernel_scale=relay.const(kernel_sc, "float32"),
-        units=weight_shape[0],
-        out_dtype="int32",
-    )
-    b = tvm.nd.array(np.random.randint(0, high=255, size=(weight_shape[0],), dtype="int32"))
-    biasc = relay.const(b, "int32")
-    bias = relay.nn.bias_add(dense, biasc)
-    req = relay.qnn.op.requantize(
-        bias,
-        relay.const(input_sc * kernel_sc, "float32"),  # input zero scale
-        relay.const(input_zp * kernel_zp, "int32"),  # input zero point
-        relay.const(output_sc, "float32"),  # output zero scale
-        relay.const(output_zp, "int32"),  # output zero point
-        out_dtype=dtype,
-    )
-    params = {"w": weights_array, "b": b}
-    return req, params
-
-
-@requires_ethosn
-@pytest.mark.parametrize(
-    "shape,out_channels",
-    [
-        ((1, 1024), 64),
-        ((1, 16384), 1),
-        ((1, 1280), 1000),
-    ],
-)
-@pytest.mark.parametrize("dtype", ["uint8", "int8"])
-def test_fullyconnected(shape, out_channels, dtype):
-    """Compare Fully Connected output with TVM."""
-
-    np.random.seed(0)
-    iinfo = np.iinfo(dtype)
-    data_min = iinfo.min
-    data_max = iinfo.max
-
-    inputs = {
-        "a": tvm.nd.array(np.random.randint(data_min, data_max + 1, size=shape, dtype=dtype)),
-    }
-    outputs = []
-
-    input_zp = np.random.randint(data_min, data_max)
-    input_sc = np.random.random() * 2
-    kernel_zp = np.random.randint(data_min, data_max)
-    kernel_sc = np.random.random() * 2
-    output_zp, output_sc = tei.get_conv2d_qnn_params(
-        dtype,
-        input_zp,
-        input_sc,
-        kernel_zp,
-        kernel_sc,
-        shape[0],
-        shape[1],
-        1,
-    )
-    model, params = _get_model(
-        shape,
-        (out_channels, shape[1]),
-        input_zp,
-        input_sc,
-        kernel_zp,
-        kernel_sc,
-        output_zp,
-        output_sc,
-        dtype,
-    )
-    for npu in [False, True]:
-        mod = tei.make_module(model, params)
-        outputs.append(tei.build_and_run(mod, inputs, 1, params, npu=npu))
-    tei.verify(outputs, dtype, 1)
-
-
-@requires_ethosn
-@pytest.mark.parametrize(
-    "shape,weight_shape,err_msg",
-    [
-        (
-            (1, 1, 1, 64),
-            (1, 64),
-            "Weights tensor must have I dimension equal to the number"
-            " of channels of the input tensor.;",
-        ),
-        ((1024, 64), (1, 64), "batch size=1024, batch size must = 1;"),
-    ],
-)
-def test_fullyconnected_failure(shape, weight_shape, err_msg):
-    """Check Fully Connected error messages."""
-    np.random.seed(0)
-
-    dtype = "uint8"
-
-    model, _ = _get_model(
-        shape,
-        weight_shape,
-        0,
-        1,
-        0,
-        1,
-        0,
-        1,
-        dtype,
-    )
-    model = tei.make_ethosn_composite(model, "ethos-n.qnn_fc")
-    mod = tei.make_ethosn_partition(model)
-    tei.test_error(mod, {}, err_msg)
-
-
-@requires_ethosn
-def test_fullyconnected_scale_out_of_range():
-    """Check Fully Connected out of range scale error message."""
-    np.random.seed(0)
-
-    input_sc = 1024
-    kernel_sc = 1024
-    output_sc = 1
-
-    model, _ = _get_model(
-        (1, 64),
-        (1, 64),
-        0,
-        input_sc,
-        0,
-        kernel_sc,
-        0,
-        output_sc,
-        "uint8",
-    )
-    model = tei.make_ethosn_composite(model, "ethos-n.qnn_fc")
-    mod = tei.make_ethosn_partition(model)
-    expected_error_msg = (
-        "Overall scale (of the input * weights / output) should be in the range (2^-32, 65536)"
-    )
-    tei.test_error(mod, {}, expected_error_msg)
diff --git a/tests/python/contrib/test_ethosn/test_inline_partitions.py b/tests/python/contrib/test_ethosn/test_inline_partitions.py
deleted file mode 100644
index 735148bc660a..000000000000
--- a/tests/python/contrib/test_ethosn/test_inline_partitions.py
+++ /dev/null
@@ -1,161 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""
-Tests for the 'InlineNonComputeIntensivePartitions' pass.
-"""
-
-import tvm
-from tvm import relay
-from tvm.testing import requires_ethosn
-from tvm.relay.op.contrib.ethosn import InlineNonComputeIntensivePartitions
-
-from . import infrastructure as tei
-
-
-@requires_ethosn
-def test_single_reshape():
-    """Check that a single reshape is inlined correctly."""
-
-    def get_reshape():
-        x = relay.var("x", shape=(1, 2, 2, 4), dtype="int8")
-        return relay.reshape(x, newshape=(2, 2, 4))
-
-    def before():
-        reshape = get_reshape()
-        return tei.make_ethosn_partition(reshape)
-
-    def expected():
-        reshape = get_reshape()
-        mod = tvm.IRModule.from_expr(reshape)
-        return relay.transform.InferType()(mod)
-
-    mod = before()
-    mod = InlineNonComputeIntensivePartitions()(mod)
-    expected_mod = expected()
-    tvm.ir.assert_structural_equal(mod, expected_mod)
-
-
-@requires_ethosn
-def test_multiple_non_compute_intensive_ops():
-    """
-    Check that a partitioned function is correctly inlined
-    when it contains multiple non-compute intensive operations.
-    """
-
-    def get_graph():
-        x = relay.var("x", shape=(2, 2, 4), dtype="int8")
-        x = relay.reshape(x, newshape=(1, 2, 2, 4))
-        x = relay.clip(x, 0.0, 1.0)
-        x = relay.reshape(x, newshape=(2, 2, 4))
-        return relay.clip(x, 0.0, 1.0)
-
-    def before():
-        func = get_graph()
-        return tei.make_ethosn_partition(func)
-
-    def expected():
-        func = get_graph()
-        mod = tvm.IRModule.from_expr(func)
-        return relay.transform.InferType()(mod)
-
-    mod = before()
-    mod = InlineNonComputeIntensivePartitions()(mod)
-    expected_mod = expected()
-    tvm.ir.assert_structural_equal(mod, expected_mod)
-
-
-@requires_ethosn
-def test_compute_intensive_ops():
-    """
-    Check that a partitioned function that is considered
-    compute intensive is not inlined.
-    """
-
-    def before():
-        x = relay.var("x", shape=(1, 2, 2, 4), dtype="int8")
-        x = relay.nn.max_pool2d(x, layout="NHWC")
-        x = relay.reshape(x, newshape=(2, 2, 4))
-        return tei.make_ethosn_partition(x)
-
-    mod = before()
-    transformed_mod = InlineNonComputeIntensivePartitions()(mod)
-    for global_var in mod.get_global_vars():
-        tvm.ir.assert_structural_equal(mod[global_var], transformed_mod[global_var])
-
-
-@requires_ethosn
-def test_multiple_partitioned_functions():
-    """
-    Tests the pass on a number of partitioned functions.
-    """
-
-    def before():
-        composite_func_name = "ethos-n_0"
-        inp = relay.var("x", shape=(1, 2, 2, 4), dtype="int8")
-
-        # partitioned func 1 (non compute intensive)
-        x = relay.reshape(inp, newshape=(1, 2, 2, 4))
-        partitioned_func_1 = tei.make_ethosn_partition(x)[composite_func_name]
-        gv_1 = relay.GlobalVar("ethos-n_0")
-
-        # partitioned func 2 (compute intensive)
-        x = relay.nn.max_pool2d(inp, layout="NHWC")
-        partitioned_func_2 = tei.make_ethosn_partition(x)[composite_func_name]
-        gv_2 = relay.GlobalVar("ethos-n_1")
-
-        # partitioned func 3 (non compute intensive)
-        x = relay.clip(inp, 0.0, 1.0)
-        partitioned_func_3 = tei.make_ethosn_partition(x)[composite_func_name]
-        gv_3 = relay.GlobalVar("ethos-n_2")
-
-        mod = tvm.IRModule({})
-        mod[gv_1] = partitioned_func_1
-        mod[gv_2] = partitioned_func_2
-        mod[gv_3] = partitioned_func_3
-        main_expr = relay.Call(gv_1, [inp])
-        main_expr = relay.Call(gv_2, [main_expr])
-        main_expr = relay.Call(gv_3, [main_expr])
-        mod["main"] = relay.Function([inp], main_expr)
-        return relay.transform.InferType()(mod)
-
-    def expected():
-        composite_func_name = "ethos-n_0"
-        inp = relay.var("x", shape=(1, 2, 2, 4), dtype="int8")
-
-        # partitioned func 2 (compute intensive)
-        x = relay.nn.max_pool2d(inp, layout="NHWC")
-        partitioned_func_2 = tei.make_ethosn_partition(x)[composite_func_name]
-        gv_2 = relay.GlobalVar("ethos-n_1")
-
-        mod = tvm.IRModule({})
-        mod[gv_2] = partitioned_func_2
-        main_expr = relay.reshape(inp, newshape=(1, 2, 2, 4))
-        main_expr = relay.Call(gv_2, [main_expr])
-        main_expr = relay.clip(main_expr, 0.0, 1.0)
-        mod["main"] = relay.Function([inp], main_expr)
-        return relay.transform.InferType()(mod)
-
-    mod = before()
-    mod = InlineNonComputeIntensivePartitions()(mod)
-    expected_mod = expected()
-    for global_var in mod.get_global_vars():
-        tvm.ir.assert_structural_equal(
-            mod[global_var.name_hint],
-            expected_mod[global_var.name_hint],
-            map_free_vars=True,
-        )
diff --git a/tests/python/contrib/test_ethosn/test_leaky_relu.py b/tests/python/contrib/test_ethosn/test_leaky_relu.py
deleted file mode 100644
index baa1d34fbcaa..000000000000
--- a/tests/python/contrib/test_ethosn/test_leaky_relu.py
+++ /dev/null
@@ -1,97 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""Integration tests for Leaky ReLU"""
-
-import pytest
-import numpy as np
-
-import tvm
-from tvm import relay
-from tvm.testing import requires_ethosn
-
-from . import infrastructure as tei
-
-
-def _get_model(shape, input_zp, input_sc, output_zp, output_sc, dtype, alpha):
-    x = relay.var("x", shape=shape, dtype=dtype)
-    x = relay.qnn.op.dequantize(
-        x,
-        input_scale=relay.const(input_sc, "float32"),
-        input_zero_point=relay.const(input_zp, "int32"),
-    )
-    x = relay.nn.leaky_relu(x, alpha=alpha)
-    return relay.qnn.op.quantize(
-        x,
-        output_scale=relay.const(output_sc, "float32"),
-        output_zero_point=relay.const(output_zp, "int32"),
-        out_dtype=dtype,
-    )
-
-
-@requires_ethosn
-@pytest.mark.parametrize("dtype", ["uint8", "int8"])
-@pytest.mark.parametrize("shape", [(1, 52, 52, 3), (1, 3, 8, 2)])
-@pytest.mark.parametrize("alpha", [0.001, 0.5678])
-def test_leaky_relu(dtype, shape, alpha):
-    """Compare Leaky ReLU output with TVM."""
-
-    np.random.seed(0)
-
-    iinfo = np.iinfo(dtype)
-    zp_min = iinfo.min
-    zp_max = iinfo.max
-    input_zp = zp_min + 128
-    input_sc = 0.0068132
-    output_zp = zp_min + 126  # values offset more than 126 can cause saturation
-    output_sc = 0.0078125
-
-    inputs = {"x": tvm.nd.array(np.random.randint(zp_min, high=zp_max, size=shape, dtype=dtype))}
-    outputs = []
-    for npu in [False, True]:
-        model = _get_model(shape, input_zp, input_sc, output_zp, output_sc, dtype, alpha)
-        mod = tei.make_module(model, [])
-        outputs.append(
-            tei.build_and_run(
-                mod,
-                inputs,
-                1,
-                {},
-                npu=npu,
-                additional_config_args={"inline_non_compute_intensive_partitions": False},
-            )
-        )
-
-    tei.verify(outputs, dtype, 1)
-
-
-@requires_ethosn
-@pytest.mark.parametrize("dtype", ["int8"])
-@pytest.mark.parametrize("shape", [(1, 14, 14, 2)])
-@pytest.mark.parametrize("alpha", [-1.34, 2.32, 1, 0])
-def test_leaky_relu_unsupported_alpha(dtype, shape, alpha):
-    """Test unsupported values of alpha (<= 0, >= 1) in Leaky ReLU."""
-
-    iinfo = np.iinfo(dtype)
-    zp_min = iinfo.min
-
-    err_msg = f"leaky relu alpha must be less than 1 and greater than 0, but was {alpha}"
-
-    model = _get_model(shape, zp_min + 120, 0.0068132, zp_min + 128, 0.0078125, dtype, alpha)
-    model = tei.make_ethosn_composite(model, "ethos-n.qnn_leaky_relu")
-    mod = tei.make_ethosn_partition(model)
-    tei.test_error(mod, {}, err_msg)
diff --git a/tests/python/contrib/test_ethosn/test_mean.py b/tests/python/contrib/test_ethosn/test_mean.py
deleted file mode 100644
index 0ad7e17faed8..000000000000
--- a/tests/python/contrib/test_ethosn/test_mean.py
+++ /dev/null
@@ -1,80 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""Arm(R) Ethos(TM)-N integration mean tests"""
-
-import numpy as np
-import pytest
-import tvm
-from tvm import relay
-from tvm.testing import requires_ethosn
-from . import infrastructure as tei
-
-
-def _get_model(shape, axis, keepdims, input_zp, input_sc, output_zp, output_sc, dtype):
-    a = relay.var("a", shape=shape, dtype=dtype)
-    casted = relay.op.cast(a, "int32")
-    mean = relay.mean(casted, axis, keepdims)
-    model = relay.qnn.op.requantize(
-        mean,
-        input_scale=relay.const(input_sc, "float32"),
-        input_zero_point=relay.const(input_zp, "int32"),
-        output_scale=relay.const(output_sc, "float32"),
-        output_zero_point=relay.const(output_zp, "int32"),
-        out_dtype=dtype,
-    )
-    return model
-
-
-@requires_ethosn
-@pytest.mark.parametrize("dtype", ["uint8", "int8"])
-@pytest.mark.parametrize("shape", [(1, 7, 7, 2048), (1, 8, 8)])
-def test_mean(dtype, shape):
-    """Compare Mean output with TVM."""
-
-    np.random.seed(0)
-
-    zp_min = np.iinfo(dtype).min
-    zp_max = np.iinfo(dtype).max
-
-    inputs = {
-        "a": tvm.nd.array(np.random.randint(zp_min, high=zp_max + 1, size=shape, dtype=dtype)),
-    }
-    outputs = []
-    for npu in [False, True]:
-        model = _get_model(
-            shape, [1, 2], True, zp_min + 128, 0.0784314, zp_min + 128, 0.0784314, dtype=dtype
-        )
-        mod = tei.make_module(model, [])
-        outputs.append(tei.build_and_run(mod, inputs, 1, {}, npu=npu))
-
-    tei.verify(outputs, dtype, 1)
-
-
-@requires_ethosn
-@pytest.mark.parametrize("dtype", ["int8", "uint8"])
-def test_mean_non_equal_quantization(dtype):
-    """Test mean is not offloaded when quantization is not equal."""
-
-    np.random.seed(0)
-
-    shape = (1, 7, 7, 2048)
-    zp_min = np.iinfo(dtype).min
-
-    model = _get_model(shape, [1, 2], True, zp_min + 120, 0.0068132, zp_min + 128, 0.0078125, dtype)
-    mod = tei.make_module(model, [])
-    tei.build(mod, {}, npu=True, expected_host_ops=3, npu_partitions=0)
diff --git a/tests/python/contrib/test_ethosn/test_multiply.py b/tests/python/contrib/test_ethosn/test_multiply.py
deleted file mode 100644
index d7ebcfab40a8..000000000000
--- a/tests/python/contrib/test_ethosn/test_multiply.py
+++ /dev/null
@@ -1,263 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""Integration tests for Multiply."""
-
-import pytest
-import numpy as np
-
-import tvm
-from tvm import relay
-from tvm.testing import requires_ethosn
-
-from . import infrastructure as tei
-
-
-def _get_model(
-    shape,
-    constant_shape,
-    input_zp,
-    input_sc,
-    input2_zp,
-    input2_sc,
-    output_zp,
-    output_sc,
-    dtype,
-    reverse_inputs=False,
-    constant_data=None,
-):
-    iinfo = np.iinfo(dtype)
-    data_min = iinfo.min
-    data_max = iinfo.max
-
-    x = relay.var("x", shape=shape, dtype=dtype)
-    if constant_data:
-        y_data = np.array(constant_data, dtype=dtype).reshape(constant_shape)
-    else:
-        y_data = np.random.randint(data_min, data_max + 1, size=constant_shape, dtype=dtype)
-    y = relay.const(y_data, dtype=dtype)
-
-    out = relay.qnn.op.mul(
-        y if reverse_inputs else x,
-        x if reverse_inputs else y,
-        relay.const(input_sc, "float32"),
-        relay.const(input_zp, "int32"),
-        relay.const(input2_sc, "float32"),
-        relay.const(input2_zp, "int32"),
-        relay.const(output_sc, "float32"),
-        relay.const(output_zp, "int32"),
-    )
-    params = {"y": y_data}
-    return out, params
-
-
-@requires_ethosn
-@pytest.mark.parametrize("dtype", ["uint8", "int8"])
-@pytest.mark.parametrize(
-    "shape,constant_shape",
-    [((1, 4, 4, 8), (1, 1, 1, 8)), ((1, 16, 12, 4), (4,))],
-)
-@pytest.mark.parametrize("reverse_inputs", [False, True])
-def test_multiply_to_depthwise(dtype, shape, constant_shape, reverse_inputs):
-    """Compare Multiply -> Depthwise conversion output with TVM."""
-
-    np.random.seed(0)
-
-    iinfo = np.iinfo(dtype)
-    data_min = iinfo.min
-    data_max = iinfo.max
-    input_zp = np.random.randint(data_min, data_max)
-    input_sc = np.random.random() * 2
-    input2_zp = np.random.randint(data_min, data_max)
-    input2_sc = np.random.random() * 2
-    output_zp, output_sc = tei.get_conv2d_qnn_params(
-        dtype, input_zp, input_sc, input2_zp, input2_sc, 1, 1, shape[3]
-    )
-
-    model, params = _get_model(
-        shape,
-        constant_shape,
-        input_zp,
-        input_sc,
-        input2_zp,
-        input2_sc,
-        output_zp,
-        output_sc,
-        dtype,
-        reverse_inputs,
-    )
-    inputs = {"x": tvm.nd.array(np.random.randint(data_min, data_max + 1, size=shape, dtype=dtype))}
-    outputs = []
-    for npu in [False, True]:
-        mod = tei.make_module(model, params)
-        outputs.append(tei.build_and_run(mod, inputs, 1, params, npu=npu))
-
-    tei.verify(outputs, dtype, 1)
-
-
-@requires_ethosn
-@pytest.mark.parametrize(
-    "shape,constant_shape", [((1, 4, 5, 8), (1, 1, 1, 1)), ((1, 3, 7, 10), None)]
-)
-@pytest.mark.parametrize("reverse_inputs", [False, True])
-def test_multiply_to_reinterpret_quantize(shape, constant_shape, reverse_inputs):
-    """Compare Multiply -> Reinterpret Quantize conversion output with TVM."""
-    np.random.seed(0)
-
-    dtype = "uint8"
-    iinfo = np.iinfo(dtype)
-    data_min = iinfo.min
-    data_max = iinfo.max
-
-    # Multiply can only be offloaded as a reinterpret quantize operation if
-    # it is an identity option. We must choose the quantization and constant
-    # data carefully to make sure that this is the case.
-    input_zp = 0
-    input_sc = 0.007814894430339336
-    input2_zp = 0
-    input2_sc = 0.5
-    output_zp = 0
-    output_sc = 0.9963990449905396
-    constant_data = 255
-
-    model, params = _get_model(
-        shape,
-        constant_shape,
-        input_zp,
-        input_sc,
-        input2_zp,
-        input2_sc,
-        output_zp,
-        output_sc,
-        dtype,
-        reverse_inputs,
-        constant_data,
-    )
-    inputs = {"x": tvm.nd.array(np.random.randint(data_min, data_max + 1, size=shape, dtype=dtype))}
-    outputs = []
-    for npu in [False, True]:
-        mod = tei.make_module(model, params)
-        outputs.append(
-            tei.build_and_run(
-                mod,
-                inputs,
-                1,
-                params,
-                npu=npu,
-                additional_config_args={"inline_non_compute_intensive_partitions": False},
-            )
-        )
-
-    tei.verify(outputs, dtype, 1)
-
-
-@requires_ethosn
-def test_multiply_multiple_inputs_unsupported():
-    """Check multiply operator with two inputs is not offloaded."""
-
-    np.random.seed(0)
-
-    shape = (1, 4, 5, 6)
-    dtype = "int8"
-
-    iinfo = np.iinfo(dtype)
-    data_min = iinfo.min
-    data_max = iinfo.max
-    input_zp = np.random.randint(data_min, data_max)
-    input_sc = np.random.random() * 2
-    input2_zp = np.random.randint(data_min, data_max)
-    input2_sc = np.random.random() * 2
-    output_zp, output_sc = tei.get_conv2d_qnn_params(
-        dtype, input_zp, input_sc, input2_zp, input2_sc, 1, 1, shape[3]
-    )
-
-    x = relay.var("x", shape=shape, dtype=dtype)
-    y = relay.var("y", shape=shape, dtype=dtype)
-    model = relay.qnn.op.mul(
-        x,
-        y,
-        relay.const(input_sc, "float32"),
-        relay.const(input_zp, "int32"),
-        relay.const(input2_sc, "float32"),
-        relay.const(input2_zp, "int32"),
-        relay.const(output_sc, "float32"),
-        relay.const(output_zp, "int32"),
-    )
-
-    expected_host_ops = 1
-    npu_partitions = 0
-    for npu in [False, True]:
-        mod = tei.make_module(model, {})
-        tei.build(
-            mod,
-            {},
-            npu=npu,
-            expected_host_ops=expected_host_ops,
-            npu_partitions=npu_partitions,
-        )
-
-
-@requires_ethosn
-@pytest.mark.parametrize(
-    "dtype,shape,constant_shape",
-    [
-        ("int16", (1, 4, 5, 6), (1, 1, 1, 6)),
-        ("int8", (1, 1, 3), (1, 1, 1, 3)),
-        ("int8", (1, 2, 4, 8), (1, 2, 4, 8)),
-    ],
-)
-def test_multiply_unsupported(dtype, shape, constant_shape):
-    """Check multiply operator with unsupported attributes is not offloaded."""
-
-    np.random.seed(0)
-
-    iinfo = np.iinfo(dtype)
-    data_min = iinfo.min
-    data_max = iinfo.max
-    input_zp = np.random.randint(data_min, data_max)
-    input_sc = np.random.random() * 2
-    input2_zp = np.random.randint(data_min, data_max)
-    input2_sc = np.random.random() * 2
-    output_zp, output_sc = tei.get_conv2d_qnn_params(
-        dtype, input_zp, input_sc, input2_zp, input2_sc, 1, 1, shape[-1]
-    )
-
-    model, params = _get_model(
-        shape,
-        constant_shape,
-        input_zp,
-        input_sc,
-        input2_zp,
-        input2_sc,
-        output_zp,
-        output_sc,
-        dtype,
-        reverse_inputs=False,
-        constant_data=False,
-    )
-
-    expected_host_ops = 1
-    npu_partitions = 0
-    for npu in [False, True]:
-        mod = tei.make_module(model, {})
-        tei.build(
-            mod,
-            params,
-            npu=npu,
-            expected_host_ops=expected_host_ops,
-            npu_partitions=npu_partitions,
-        )
diff --git a/tests/python/contrib/test_ethosn/test_networks.py b/tests/python/contrib/test_ethosn/test_networks.py
deleted file mode 100644
index dfbd262abf96..000000000000
--- a/tests/python/contrib/test_ethosn/test_networks.py
+++ /dev/null
@@ -1,223 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=wrong-import-position, wrong-import-order
-
-"""Arm(R) Ethos(TM)-N integration end-to-end network tests"""
-
-import pytest
-
-pytest.importorskip("tflite")
-pytest.importorskip("tensorflow")
-
-import tflite.Model
-
-from tvm import relay
-from tvm.testing import requires_ethosn
-from tvm.contrib import download
-import tvm.relay.testing.tf as tf_testing
-
-from . import infrastructure as tei
-
-
-def _get_tflite_model(tflite_model_path, inputs_dict, dtype):
-    with open(tflite_model_path, "rb") as f:
-        tflite_model_buffer = f.read()
-
-    tflite_model = tflite.Model.Model.GetRootAsModel(tflite_model_buffer, 0)
-    shape_dict = {}
-    dtype_dict = {}
-    for value in inputs_dict:
-        input_shape = inputs_dict[value]
-        shape_dict[value] = input_shape
-        dtype_dict[value] = dtype
-
-    return relay.frontend.from_tflite(
-        tflite_model,
-        shape_dict=shape_dict,
-        dtype_dict=dtype_dict,
-    )
-
-
-def _test_image_network(
-    model_url,
-    model_sub_path,
-    input_dict,
-    compile_hash,
-    output_count,
-    host_ops=0,
-    npu_partitions=1,
-    run=False,
-):
-    """Test an image network.
-
-    Parameters
-    ----------
-    model_url : str
-        The URL to the model.
-    model_sub_path : str
-        The name of the model file.
-    input_dict : dict
-        The input dict.
-    compile_hash : str, set
-        The compile hash(es) to check the compilation output against.
-    output_count : int
-        The expected number of outputs.
-    host_ops : int
-        The expected number of host operators.
-    npu_partitions : int
-        The expected number of Ethos-N partitions.
-    run : bool
-        Whether or not to try running the network. If hardware isn't
-        available, the run will still take place but with a mocked
-        inference function, so the results will be incorrect. This is
-        therefore just to test the runtime flow is working rather than
-        to check the correctness/accuracy.
-
-    """
-
-    def get_model():
-        if model_url[-3:] in ("tgz", "zip"):
-            model_path = tf_testing.get_workload_official(
-                model_url,
-                model_sub_path,
-            )
-        else:
-            model_path = download.download_testdata(
-                model_url,
-                model_sub_path,
-            )
-        return _get_tflite_model(model_path, input_dict, "uint8")
-
-    inputs = {}
-    for input_name in input_dict:
-        input_shape = input_dict[input_name]
-        inputs[input_name] = tei.get_real_image(input_shape[1], input_shape[2])
-
-    mod, params = get_model()
-    m = tei.build(mod, params, npu=True, expected_host_ops=host_ops, npu_partitions=npu_partitions)
-    tei.assert_lib_hash(m.get_lib(), compile_hash)
-    if run:
-        tei.run(m, inputs, output_count, npu=True)
-
-
-@requires_ethosn
-def test_mobilenet_v1():
-    """Compare compile hashes for mobilenetv1 with an expected result."""
-    # If this test is failing due to a hash mismatch, please notify @lhutton1 and
-    # @Leo-arm. The hash is there to catch any changes in the behaviour of the
-    # codegen, which could come about from either a change in Support Library
-    # version or a change in the Ethos-N codegen. To update this requires running
-    # on hardware that isn't available in CI.
-    _compile_hash = {"c37fec1f214c7f93ce49ee4e3b587969"}
-    _test_image_network(
-        model_url="https://storage.googleapis.com/download.tensorflow.org/"
-        "models/mobilenet_v1_2018_08_02/mobilenet_v1_1.0_224_quant.tgz",
-        model_sub_path="mobilenet_v1_1.0_224_quant.tflite",
-        input_dict={"input": (1, 224, 224, 3)},
-        compile_hash=_compile_hash,
-        output_count=1,
-        host_ops=3,
-        npu_partitions=1,
-        run=True,
-    )
-
-
-@requires_ethosn
-def test_resnet_50_int8():
-    """Compare compile hashes for resnet50 with an expected result."""
-    # If this test is failing due to a hash mismatch, please notify @lhutton1 and
-    # @Leo-arm. The hash is there to catch any changes in the behaviour of the
-    # codegen, which could come about from either a change in Support Library
-    # version or a change in the Ethos-N codegen. To update this requires running
-    # on hardware that isn't available in CI.
-    _compile_hash = {
-        "f16dc9caa8e696bc5da8a5c6a644eb72",
-        "41acecca37b2735bd580f6ec38d8c2e0",
-    }
-    _test_image_network(
-        model_url="https://raw.githubusercontent.com/dmlc/web-data/main/tensorflow/"
-        "models/Quantized/resnet_50_quantized.tflite",
-        model_sub_path="resnet_50_quantized.tflite",
-        input_dict={"input": (1, 224, 224, 3)},
-        compile_hash=_compile_hash,
-        output_count=1,
-        host_ops=10,
-        npu_partitions=2,
-    )
-
-
-@requires_ethosn
-def test_inception_v3():
-    """Compare compile hashes for inceptionv3 with an expected result."""
-    # If this test is failing due to a hash mismatch, please notify @lhutton1 and
-    # @Leo-arm. The hash is there to catch any changes in the behaviour of the
-    # codegen, which could come about from either a change in Support Library
-    # version or a change in the Ethos-N codegen. To update this requires running
-    # on hardware that isn't available in CI.
-    _compile_hash = {"cff892eb15944756f22dad4b83c756d2"}
-    _test_image_network(
-        model_url="https://storage.googleapis.com/download.tensorflow.org/"
-        "models/tflite_11_05_08/inception_v3_quant.tgz",
-        model_sub_path="inception_v3_quant.tflite",
-        input_dict={"input": (1, 299, 299, 3)},
-        compile_hash=_compile_hash,
-        output_count=1,
-        host_ops=0,
-        npu_partitions=1,
-    )
-
-
-@requires_ethosn
-def test_inception_v4():
-    """Compare compile hashes for inceptionv4 with an expected result."""
-    # If this test is failing due to a hash mismatch, please notify @lhutton1 and
-    # @Leo-arm. The hash is there to catch any changes in the behaviour of the
-    # codegen, which could come about from either a change in Support Library
-    # version or a change in the Ethos-N codegen. To update this requires running
-    # on hardware that isn't available in CI.
-    _compile_hash = {"c00c119506b34c8e87f81aa009b42431"}
-    _test_image_network(
-        model_url="https://storage.googleapis.com/download.tensorflow.org/"
-        "models/inception_v4_299_quant_20181026.tgz",
-        model_sub_path="inception_v4_299_quant.tflite",
-        input_dict={"input": (1, 299, 299, 3)},
-        compile_hash=_compile_hash,
-        output_count=1,
-        host_ops=3,
-        npu_partitions=1,
-    )
-
-
-@requires_ethosn
-def test_ssd_mobilenet_v1():
-    """Compare compile hashes for ssdmobilenetv1 with an expected result."""
-    # If this test is failing due to a hash mismatch, please notify @lhutton1 and
-    # @Leo-arm. The hash is there to catch any changes in the behaviour of the
-    # codegen, which could come about from either a change in Support Library
-    # version or a change in the Ethos-N codegen. To update this requires running
-    # on hardware that isn't available in CI.
-    _compile_hash = {"04855b9b9e0ab3f3768495059e12c5cf"}
-    _test_image_network(
-        model_url="https://storage.googleapis.com/download.tensorflow.org/"
-        "models/tflite/coco_ssd_mobilenet_v1_1.0_quant_2018_06_29.zip",
-        model_sub_path="detect.tflite",
-        input_dict={"normalized_input_image_tensor": (1, 300, 300, 3)},
-        compile_hash=_compile_hash,
-        output_count=4,
-        host_ops=14,
-        npu_partitions=1,
-    )
diff --git a/tests/python/contrib/test_ethosn/test_pooling.py b/tests/python/contrib/test_ethosn/test_pooling.py
deleted file mode 100644
index 1e0487d76778..000000000000
--- a/tests/python/contrib/test_ethosn/test_pooling.py
+++ /dev/null
@@ -1,125 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""Arm(R) Ethos(TM)-N integration pooling tests"""
-
-import numpy as np
-import pytest
-import tvm
-from tvm import relay
-from tvm.testing import requires_ethosn
-from . import infrastructure as tei
-
-
-def _get_model(shape, typef, sizes, strides, pads, layout, dtype):
-    """Return a model and any parameters it may have"""
-    req = relay.var("a", shape=shape, dtype=dtype)
-    if typef is relay.nn.avg_pool2d:
-        req = relay.cast(req, "int32")
-    req = typef(req, pool_size=sizes, strides=strides, padding=pads, ceil_mode=True, layout=layout)
-    if typef is relay.nn.avg_pool2d:
-        req = relay.cast(req, dtype)
-    return req
-
-
-@requires_ethosn
-@pytest.mark.parametrize("dtype", ["uint8", "int8"])
-@pytest.mark.parametrize(
-    "shape,typef,size,stride,pad",
-    [
-        ((1, 8, 8, 8), relay.nn.max_pool2d, (2, 2), (2, 2), (0, 0, 0, 0)),
-        ((1, 9, 9, 9), relay.nn.max_pool2d, (3, 3), (2, 2), (0, 0, 0, 0)),
-        ((1, 8, 8, 8), relay.nn.avg_pool2d, (3, 3), (1, 1), (1, 1, 1, 1)),
-    ],
-)
-def test_pooling(dtype, shape, typef, size, stride, pad):
-    """Compare Pooling output with TVM."""
-    np.random.seed(0)
-
-    layout = "NHWC"
-
-    inputs = {
-        "a": tvm.nd.array(
-            np.random.randint(
-                low=np.iinfo(dtype).min, high=np.iinfo(dtype).max + 1, size=shape, dtype=dtype
-            )
-        ),
-    }
-    outputs = []
-    model = _get_model(shape, typef, size, stride, pad, layout, dtype)
-    for npu in [False, True]:
-        mod = tei.make_module(model, {})
-        outputs.append(tei.build_and_run(mod, inputs, 1, {}, npu=npu))
-
-    tei.verify(outputs, dtype, 1)
-
-
-@requires_ethosn
-@pytest.mark.parametrize(
-    "shape,size,stride,layout,dtype,err_msg",
-    [
-        (
-            (2, 8, 8, 8),
-            (2, 2),
-            (2, 2),
-            "NHWC",
-            "uint8",
-            "batch size=2, batch size must = 1",
-        ),
-        (
-            (1, 8, 8, 8),
-            (2, 2),
-            (2, 2),
-            "NHWC",
-            "int16",
-            "dtype='int16', dtype must be either uint8, int8 or int32",
-        ),
-        (
-            (1, 8, 8, 8),
-            (2, 2),
-            (2, 2),
-            "NCHW",
-            "uint8",
-            "data format=NCHW, data format must = NHWC",
-        ),
-        (
-            (1, 8, 8, 8),
-            (2, 2),
-            (2, 2, 2),
-            "NHWC",
-            "uint8",
-            "stride size=3, stride size must = 2",
-        ),
-        (
-            (1, 8, 8, 8),
-            (2, 2, 2),
-            (2, 2),
-            "NHWC",
-            "uint8",
-            "dimensions=3, dimensions must = 2",
-        ),
-    ],
-)
-def test_pooling_failure(shape, size, stride, layout, dtype, err_msg):
-    """Check Pooling error messages."""
-
-    typef = relay.nn.max_pool2d
-    pad = (0, 0, 0, 0)
-
-    model = _get_model(shape, typef, size, stride, pad, layout, dtype)
-    mod = tei.make_ethosn_partition(model)
-    tei.test_error(mod, {}, err_msg)
diff --git a/tests/python/contrib/test_ethosn/test_relu.py b/tests/python/contrib/test_ethosn/test_relu.py
deleted file mode 100644
index b1ab6ede2c42..000000000000
--- a/tests/python/contrib/test_ethosn/test_relu.py
+++ /dev/null
@@ -1,91 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""Arm(R) Ethos(TM)-N integration relu tests"""
-
-import numpy as np
-import pytest
-import tvm
-from tvm import relay
-from tvm.testing import requires_ethosn
-from . import infrastructure as tei
-
-
-def _get_model(shape, dtype, a_min, a_max):
-    assert a_min >= np.iinfo(dtype).min and a_max <= np.iinfo(dtype).max
-    a = relay.var("a", shape=shape, dtype=dtype)
-    relu = relay.clip(a, a_min=a_min, a_max=a_max)
-    return relu
-
-
-@requires_ethosn
-@pytest.mark.parametrize(
-    "shape,a_min,a_max,dtype",
-    [
-        ((1, 4, 4, 4), 65, 178, "uint8"),
-        ((1, 8, 4, 2), 1, 254, "uint8"),
-        ((1, 8, 4, 2), -100, 100, "int8"),
-        ((1, 16), -120, -20, "int8"),
-    ],
-)
-def test_relu(dtype, shape, a_min, a_max):
-    """Compare Relu output with TVM."""
-    np.random.seed(0)
-
-    inputs = {
-        "a": tvm.nd.array(
-            np.random.randint(
-                low=np.iinfo(dtype).min,
-                high=np.iinfo(dtype).max + 1,
-                size=shape,
-                dtype=dtype,
-            )
-        ),
-    }
-    outputs = []
-    for npu in [False, True]:
-        model = _get_model(inputs["a"].shape, dtype, a_min, a_max)
-        mod = tei.make_module(model, {})
-        outputs.append(
-            tei.build_and_run(
-                mod,
-                inputs,
-                1,
-                {},
-                npu=npu,
-                additional_config_args={"inline_non_compute_intensive_partitions": False},
-            )
-        )
-
-    tei.verify(outputs, dtype, 1)
-
-
-@requires_ethosn
-@pytest.mark.parametrize(
-    "shape,dtype,a_min,a_max,err_msg",
-    [
-        ((1, 4, 4, 4, 4), "uint8", 65, 78, "dimensions=5, dimensions must be <= 4"),
-        ((1, 8, 4, 2), "int16", 1, 254, "dtype='int16', dtype must be either uint8, int8 or int32"),
-        ((1, 8, 4, 2), "uint8", 254, 1, "Relu has lower bound > upper bound"),
-        ((2, 2, 2, 2), "uint8", 1, 63, "batch size=2, batch size must = 1; "),
-    ],
-)
-def test_relu_failure(shape, dtype, a_min, a_max, err_msg):
-    """Check Relu error messages."""
-    model = _get_model(shape, dtype, a_min, a_max)
-    mod = tei.make_ethosn_partition(model)
-    tei.test_error(mod, {}, err_msg)
diff --git a/tests/python/contrib/test_ethosn/test_requantize.py b/tests/python/contrib/test_ethosn/test_requantize.py
deleted file mode 100644
index 315beddbe45e..000000000000
--- a/tests/python/contrib/test_ethosn/test_requantize.py
+++ /dev/null
@@ -1,168 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""Arm(R) Ethos(TM)-N integration requantize tests"""
-
-import pytest
-import numpy as np
-import tvm
-from tvm import relay
-from tvm.testing import requires_ethosn
-from . import infrastructure as tei
-
-
-def _get_model(shape, input_zp, input_sc, output_zp, output_sc, in_dtype, out_dtype):
-    a = relay.var("a", shape=shape, dtype=in_dtype)
-    model = relay.qnn.op.requantize(
-        data=a,
-        input_scale=relay.const(input_sc, "float32"),
-        input_zero_point=relay.const(input_zp, "int32"),
-        output_scale=relay.const(output_sc, "float32"),
-        output_zero_point=relay.const(output_zp, "int32"),
-        out_dtype=out_dtype,
-    )
-    return model
-
-
-@requires_ethosn
-@pytest.mark.parametrize("in_dtype", ["int8", "uint8"])
-@pytest.mark.parametrize("out_dtype", ["int8", "uint8"])
-@pytest.mark.parametrize("shape", [(1, 52, 52, 3)])
-def test_requantize(in_dtype, out_dtype, shape):
-    """Compare Requantize output with TVM."""
-
-    np.random.seed(0)
-    low = 0 if in_dtype == "uint8" else -5
-    high = low + 10
-    input_zp = (high + low) / 2
-    inputs = {
-        "a": tvm.nd.array(np.random.randint(low=low, high=high, size=shape, dtype=in_dtype)),
-    }
-    outputs = []
-    for npu in [False, True]:
-        model = _get_model(
-            shape=shape,
-            input_zp=input_zp,
-            input_sc=0.002,
-            output_zp=10,
-            output_sc=0.008,
-            in_dtype=in_dtype,
-            out_dtype=out_dtype,
-        )
-        mod = tei.make_module(model, [])
-        x = tei.build_and_run(
-            mod,
-            inputs,
-            1,
-            {},
-            npu=npu,
-            additional_config_args={"inline_non_compute_intensive_partitions": False},
-        )
-        outputs.append(x)
-
-    tei.verify(outputs, out_dtype, 1)
-
-
-@requires_ethosn
-def test_requantize_mixed_precision_with_following_op():
-    """
-    Checks a requantize operation that changes precision from uint8 to int8 with a
-    following add op.
-    """
-
-    np.random.seed(0)
-    shape = (1, 4, 6, 8)
-    in_sc = 0.012566
-    in_zp = 131
-    out_sc = 0.012566
-    out_zp = 3
-    in_dtype = "uint8"
-    out_dtype = "int8"
-
-    def get_model():
-        a = relay.var("a", shape=shape, dtype=in_dtype)
-        b = relay.var("b", shape=shape, dtype=out_dtype)
-        req = relay.qnn.op.requantize(
-            data=a,
-            input_scale=relay.const(in_sc, "float32"),
-            input_zero_point=relay.const(in_zp, "int32"),
-            output_scale=relay.const(out_sc, "float32"),
-            output_zero_point=relay.const(out_zp, "int32"),
-            out_dtype=out_dtype,
-        )
-        req = relay.qnn.op.add(
-            req,
-            b,
-            lhs_scale=relay.const(out_sc, "float32"),
-            lhs_zero_point=relay.const(out_zp, "int32"),
-            rhs_scale=relay.const(out_sc, "float32"),
-            rhs_zero_point=relay.const(out_zp, "int32"),
-            output_scale=relay.const(out_sc, "float32"),
-            output_zero_point=relay.const(out_zp, "int32"),
-        )
-        return req
-
-    inputs = {
-        "a": tvm.nd.array(
-            np.random.randint(
-                low=np.iinfo(in_dtype).min, high=np.iinfo(in_dtype).max, size=shape, dtype=in_dtype
-            )
-        ),
-        "b": tvm.nd.array(
-            np.random.randint(
-                low=np.iinfo(out_dtype).min,
-                high=np.iinfo(out_dtype).max,
-                size=shape,
-                dtype=out_dtype,
-            )
-        ),
-    }
-    outputs = []
-    for npu in [False, True]:
-        model = get_model()
-        mod = tei.make_module(model, {})
-        x = tei.build_and_run(
-            mod,
-            inputs,
-            1,
-            {},
-            npu=npu,
-            additional_config_args={"inline_non_compute_intensive_partitions": False},
-        )
-        outputs.append(x)
-
-    tei.verify(outputs, out_dtype, 1)
-
-
-@requires_ethosn
-def test_requantize_failure():
-    """Check Requantize error messages."""
-
-    input_sc = 0.8
-    output_sc = (input_sc / 128) - 0.0001
-    model = _get_model(
-        shape=(1, 52, 52, 3),
-        input_zp=0,
-        input_sc=input_sc,
-        output_zp=0,
-        output_sc=output_sc,
-        in_dtype="int8",
-        out_dtype="int8",
-    )
-    model = tei.make_ethosn_composite(model, "ethos-n.qnn_requantize")
-    mod = tei.make_ethosn_partition(model)
-    tei.test_error(mod, {}, "Output scale must be bigger than input scale / 128")
diff --git a/tests/python/contrib/test_ethosn/test_reshape.py b/tests/python/contrib/test_ethosn/test_reshape.py
deleted file mode 100644
index 2c6b4fda5af5..000000000000
--- a/tests/python/contrib/test_ethosn/test_reshape.py
+++ /dev/null
@@ -1,109 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""Arm(R) Ethos(TM)-N integration reshape tests"""
-
-import numpy as np
-import pytest
-
-import tvm
-from tvm import relay
-from tvm.testing import requires_ethosn
-
-from . import infrastructure as tei
-
-
-def _get_model(input_shape, output_shape, dtype):
-    """Return a model and any parameters it may have"""
-    a = relay.var("a", shape=input_shape, dtype=dtype)
-    req = relay.reshape(a, output_shape)
-    return req, {}
-
-
-@requires_ethosn
-@pytest.mark.parametrize("dtype", ["uint8", "int8"])
-@pytest.mark.parametrize(
-    "input_shape, output_shape",
-    [
-        ((1, 15, 4, 1), (1, 60)),
-        ((1, 15, 4, 1), (1, 30, 2)),
-        ((1, 15, 4, 1), (1, 4, 15, 1)),
-        ((1, 15, 4, 1), (1, 12, 5, 1)),
-        ((1, 15, 4, 1), (1, 0, 2, 2)),
-        ((1, 15, 4, 1), (1, -1, 2, 1)),
-        ((1, 15, 4, 1), (1, -2)),
-        ((1, 15, 4, 1), (1, -3, 1, 1)),
-        ((1, 15, 4, 1), (1, -4, 3, 5, 4)),
-        ((1, 15, 4, 1), (0, -1, -2)),
-        ((1, 15, 4, 1), (0, -1, -3, 1)),
-        ((1, 15, 4, 1), (1, -4, -1, 5, 4)),
-    ],
-)
-def test_reshape(dtype, input_shape, output_shape):
-    """Compare Reshape output with TVM."""
-
-    np.random.seed(0)
-    inputs = {
-        "a": tvm.nd.array(
-            np.random.randint(
-                low=np.iinfo(dtype).min,
-                high=np.iinfo(dtype).max + 1,
-                size=input_shape,
-                dtype=dtype,
-            )
-        )
-    }
-    outputs = []
-    for npu in [False, True]:
-        model, params = _get_model(input_shape, output_shape, dtype)
-        mod = tei.make_module(model, params)
-        outputs.append(
-            tei.build_and_run(
-                mod,
-                inputs,
-                1,
-                params,
-                npu=npu,
-                additional_config_args={"inline_non_compute_intensive_partitions": False},
-            )
-        )
-
-    tei.verify(outputs, dtype, 1)
-
-
-@requires_ethosn
-@pytest.mark.parametrize(
-    "input_shape, output_shape",
-    [
-        (
-            (1, 13, 13, 255),
-            (1, 13, 13, 3, 85),
-        ),
-    ],
-)
-def test_reshape_failure(input_shape, output_shape):
-    """Check Resize is not offloaded."""
-
-    model, params = _get_model(input_shape, output_shape, "int8")
-    mod = tei.make_module(model, params)
-    tei.build(
-        mod,
-        params,
-        expected_host_ops=1,
-        npu_partitions=0,
-        additional_config_args={"inline_non_compute_intensive_partitions": False},
-    )
diff --git a/tests/python/contrib/test_ethosn/test_resize.py b/tests/python/contrib/test_ethosn/test_resize.py
deleted file mode 100644
index 88880d7d4a99..000000000000
--- a/tests/python/contrib/test_ethosn/test_resize.py
+++ /dev/null
@@ -1,109 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""Arm(R) Ethos(TM)-N integration resize tests"""
-
-import pytest
-import numpy as np
-import tvm
-from tvm import relay
-from tvm.testing import requires_ethosn
-from . import infrastructure as tei
-
-
-def _get_model(
-    shape,
-    dtype,
-    size,
-    coordinate_transformation_mode,
-    rounding_method,
-):
-    x = relay.var("x", shape=shape, dtype=dtype)
-    return relay.image.resize2d(
-        data=x,
-        size=size,
-        layout="NHWC",
-        method="nearest_neighbor",
-        coordinate_transformation_mode=coordinate_transformation_mode,
-        rounding_method=rounding_method,
-    )
-
-
-@requires_ethosn
-@pytest.mark.parametrize("dtype", ["uint8", "int8"])
-@pytest.mark.parametrize(
-    "shape, size, coordinate_transformation_mode, rounding_method",
-    [
-        ((1, 4, 4, 2), (8, 8), "half_pixel", "round_prefer_ceil"),
-        ((1, 4, 4, 2), (7, 7), "asymmetric", "floor"),
-        ((1, 4, 8, 3), (8, 16), "half_pixel", "round_prefer_ceil"),
-        ((1, 4, 8, 3), (7, 15), "asymmetric", "floor"),
-    ],
-)
-def test_resize(dtype, shape, size, coordinate_transformation_mode, rounding_method):
-    """Compare Resize output with TVM."""
-
-    np.random.seed(0)
-    zp_min = np.iinfo(dtype).min
-    zp_max = np.iinfo(dtype).max
-    inputs = {
-        "x": tvm.nd.array(np.random.randint(zp_min, high=zp_max + 1, size=shape, dtype=dtype)),
-    }
-    outputs = []
-    for npu in [False, True]:
-        model = _get_model(
-            shape=shape,
-            dtype=dtype,
-            size=size,
-            coordinate_transformation_mode=coordinate_transformation_mode,
-            rounding_method=rounding_method,
-        )
-        mod = tei.make_module(model, {})
-        x = tei.build_and_run(mod, inputs, 1, {}, npu=npu)
-        outputs.append(x)
-
-    tei.verify(outputs, dtype, 1)
-
-
-@requires_ethosn
-@pytest.mark.parametrize(
-    "size,err_msg",
-    [
-        (
-            (30, 20),
-            "Requested height isn't supported",
-        ),
-        (
-            (20, 30),
-            "Requested width isn't supported",
-        ),
-    ],
-)
-def test_resize_failure(size, err_msg):
-    """Check Resize error messages."""
-    dtype = "int8"
-
-    model = _get_model(
-        shape=(1, 10, 10, 1),
-        dtype=dtype,
-        size=size,
-        coordinate_transformation_mode="half_pixel",
-        rounding_method="round_prefer_ceil",
-    )
-    model = tei.make_ethosn_composite(model, "ethos-n.qnn_resize")
-    mod = tei.make_ethosn_partition(model)
-    tei.test_error(mod, {}, err_msg)
diff --git a/tests/python/contrib/test_ethosn/test_sigmoid.py b/tests/python/contrib/test_ethosn/test_sigmoid.py
deleted file mode 100644
index bddd16049144..000000000000
--- a/tests/python/contrib/test_ethosn/test_sigmoid.py
+++ /dev/null
@@ -1,102 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""Arm(R) Ethos(TM)-N integration sigmoid tests"""
-
-import pytest
-import numpy as np
-import tvm
-from tvm import relay
-from tvm.testing import requires_ethosn
-from . import infrastructure as tei
-
-
-def _get_model(shape, input_zp, input_sc, output_zp, output_sc, dtype):
-    a = relay.var("a", shape=shape, dtype=dtype)
-    dequantize = relay.qnn.op.dequantize(
-        a,
-        input_scale=relay.const(input_sc, "float32"),
-        input_zero_point=relay.const(input_zp, "int32"),
-    )
-    sigmoid = relay.sigmoid(dequantize)
-    model = relay.qnn.op.quantize(
-        sigmoid,
-        output_scale=relay.const(output_sc, "float32"),
-        output_zero_point=relay.const(output_zp, "int32"),
-        out_dtype=dtype,
-    )
-    return model
-
-
-@requires_ethosn
-@pytest.mark.parametrize("dtype", ["uint8", "int8"])
-@pytest.mark.parametrize(
-    "shape",
-    [
-        (1, 16, 16, 16),
-        (1, 8, 8),
-    ],
-)
-def test_sigmoid(dtype, shape):
-    """Compare Sigmoid output with TVM."""
-    np.random.seed(0)
-
-    inputs = {
-        "a": tvm.nd.array(
-            np.random.randint(np.iinfo(dtype).min, np.iinfo(dtype).max + 1, size=shape, dtype=dtype)
-        ),
-    }
-    outputs = []
-    for npu in [False, True]:
-        for _ in range(1, 2):
-            if dtype == "uint8":
-                input_zp = 0
-                output_zp = 0
-            else:
-                input_zp = 127
-                output_zp = -128
-            model = _get_model(shape, input_zp, 0.02, output_zp, 1.0 / 256.0, dtype)
-            mod = tei.make_module(model, [])
-            outputs.append(tei.build_and_run(mod, inputs, 1, {}, npu=npu))
-
-    tei.verify(outputs, dtype, 1)
-
-
-@requires_ethosn
-@pytest.mark.parametrize(
-    "shape,input_zp,input_sc,output_zp,output_sc,err_msg",
-    [
-        ((2, 4, 4, 4), 64, 0.2, 0, 1 / 256, "batch size=2, batch size must = 1"),
-        (
-            (1, 4, 4, 4),
-            64,
-            0.2,
-            3,
-            1,
-            "output quantization params=(3, 1), must = (0, 1/256)",
-        ),
-    ],
-)
-def test_sigmoid_failure(shape, input_zp, input_sc, output_zp, output_sc, err_msg):
-    """Check Sigmoid error messages."""
-
-    dtype = "uint8"
-
-    model = _get_model(shape, input_zp, input_sc, output_zp, output_sc, dtype)
-    model = tei.make_ethosn_composite(model, "ethos-n.qnn_sigmoid")
-    mod = tei.make_ethosn_partition(model)
-    tei.test_error(mod, {}, err_msg)
diff --git a/tests/python/contrib/test_ethosn/test_split.py b/tests/python/contrib/test_ethosn/test_split.py
deleted file mode 100644
index 0c13df97eef3..000000000000
--- a/tests/python/contrib/test_ethosn/test_split.py
+++ /dev/null
@@ -1,95 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""Split tests for Arm(R) Ethos(TM)-N"""
-
-import numpy as np
-import pytest
-
-import tvm
-from tvm import relay
-from tvm.testing import requires_ethosn
-
-from . import infrastructure as tei
-
-
-def _get_model(shape, dtype, splits, axis):
-    a = relay.var("a", shape=shape, dtype=dtype)
-    split = relay.op.split(a, indices_or_sections=splits, axis=axis)
-    return split.astuple()
-
-
-@requires_ethosn
-@pytest.mark.parametrize("dtype", ["uint8", "int8"])
-@pytest.mark.parametrize(
-    "shape,splits,axis",
-    [
-        ((1, 16, 16, 32), (2, 7, 10), 2),
-        ((1, 12, 8, 16), 3, 1),
-    ],
-)
-def test_split(dtype, shape, splits, axis):
-    """Compare Split output with TVM."""
-    np.random.seed(0)
-
-    outputs = []
-    inputs = {
-        "a": tvm.nd.array(
-            np.random.randint(np.iinfo(dtype).min, np.iinfo(dtype).max + 1, size=shape, dtype=dtype)
-        )
-    }
-    for npu in [False, True]:
-        model = _get_model(shape, dtype, splits, axis)
-        mod = tei.make_module(model, {})
-        output_count = splits if isinstance(splits, int) else len(splits) + 1
-        outputs.append(
-            tei.build_and_run(
-                mod,
-                inputs,
-                output_count,
-                {},
-                npu=npu,
-                additional_config_args={"inline_non_compute_intensive_partitions": False},
-            )
-        )
-
-        tei.verify(outputs, dtype, 0)
-
-
-@requires_ethosn
-@pytest.mark.parametrize(
-    "shape,dtype,splits,axis,err_msg",
-    [
-        ((1, 4, 4, 4, 4), "uint8", 4, 2, "dimensions=5, dimensions must be <= 4;"),
-        ((1, 4, 4, 4), "int16", 4, 2, "dtype='int16', dtype must be either uint8, int8 or int32;"),
-        ((2, 4, 4, 4), "uint8", 4, 2, "batch size=2, batch size must = 1;"),
-        ((1, 4, 4, 4), "uint8", 1, 0, "Split cannot be performed along batch axis (axis 0);"),
-        (
-            (1, 4, 4, 4),
-            "uint8",
-            4,
-            3,
-            "Split along the channels dimension (axis 3) requires all output sizes "
-            "(specified in splitInfo.m_Sizes) to be multiples of 16;",
-        ),
-    ],
-)
-def test_split_failure(shape, dtype, splits, axis, err_msg):
-    """Check Split error messages."""
-    model = _get_model(shape, dtype, splits, axis)
-    mod = tei.make_ethosn_partition(model)
-    tei.test_error(mod, {}, err_msg)
diff --git a/tests/python/contrib/test_ethosn/test_tanh.py b/tests/python/contrib/test_ethosn/test_tanh.py
deleted file mode 100644
index 77ed33980ea5..000000000000
--- a/tests/python/contrib/test_ethosn/test_tanh.py
+++ /dev/null
@@ -1,97 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""Arm(R) Ethos(TM)-N NPU integration tanh tests"""
-
-import pytest
-import numpy as np
-import tvm
-from tvm import relay
-from tvm.testing import requires_ethosn
-from . import infrastructure as tei
-
-
-def _get_model(shape, input_zp, input_sc, output_zp, output_sc, dtype):
-    a = relay.var("a", shape=shape, dtype=dtype)
-    dequantize = relay.qnn.op.dequantize(
-        a,
-        input_scale=relay.const(input_sc, "float32"),
-        input_zero_point=relay.const(input_zp, "int32"),
-    )
-    tanh = relay.tanh(dequantize)
-    model = relay.qnn.op.quantize(
-        tanh,
-        output_scale=relay.const(output_sc, "float32"),
-        output_zero_point=relay.const(output_zp, "int32"),
-        out_dtype=dtype,
-    )
-    return model
-
-
-@requires_ethosn
-@pytest.mark.parametrize("dtype", ["uint8", "int8"])
-@pytest.mark.parametrize("shape", [(1, 52, 52, 3)])
-def test_tanh(dtype, shape):
-    """Compare Tanh output with TVM."""
-    zp_min = np.iinfo(dtype).min
-    zp_max = np.iinfo(dtype).max
-
-    np.random.seed(0)
-    inputs = {
-        "a": tvm.nd.array(np.random.randint(zp_min, high=zp_max, size=shape, dtype=dtype)),
-    }
-    outputs = []
-    for npu in [False, True]:
-        model = _get_model(shape, zp_min + 128, 1 / 256, zp_min + 128, 1 / 128, dtype)
-        mod = tei.make_module(model, [])
-        outputs.append(
-            tei.build_and_run(
-                mod,
-                inputs,
-                1,
-                {},
-                npu=npu,
-                additional_config_args={"inline_non_compute_intensive_partitions": False},
-            )
-        )
-
-    tei.verify(outputs, dtype, 1)
-
-
-@requires_ethosn
-@pytest.mark.parametrize("dtype", ["uint8", "int8"])
-@pytest.mark.parametrize(
-    "shape, input_zp, input_sc, output_zp, output_sc, err_msg",
-    [
-        (
-            (1, 16, 16, 16),
-            120,
-            0.0250629,
-            64,
-            0.0078125,
-            "output quantization params=(64, 0.0078125), must = ({test_zp}, 1/256);",
-        )
-    ],
-)
-def test_tanh_failure(shape, input_zp, input_sc, output_zp, output_sc, err_msg, dtype):
-    """Check Tanh error messages."""
-
-    test_zp = 0 if dtype == "int8" else 128
-    model = _get_model(shape, input_zp, input_sc, output_zp, output_sc, dtype)
-    model = tei.make_ethosn_composite(model, "ethos-n.qnn_tanh")
-    mod = tei.make_ethosn_partition(model)
-    tei.test_error(mod, {}, err_msg.format(test_zp=test_zp))
diff --git a/tests/python/contrib/test_ethosn/test_topologies.py b/tests/python/contrib/test_ethosn/test_topologies.py
deleted file mode 100644
index 78aa19a846eb..000000000000
--- a/tests/python/contrib/test_ethosn/test_topologies.py
+++ /dev/null
@@ -1,433 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""Arm(R) Ethos(TM)-N tests for complex network topologies."""
-
-import numpy as np
-import pytest
-
-import tvm
-from tvm import relay
-from tvm.testing import requires_ethosn
-from tvm.relay.op.contrib.ethosn import Available, ethosn_available
-
-from . import infrastructure as tei
-
-
-@requires_ethosn
-@pytest.mark.parametrize("dtype", ["uint8", "int8"])
-def test_split_add_concat(dtype):
-    """Test a model with split, add and contatenate."""
-
-    def get_model(input_shape, dtype, var_names):
-        """Return a model"""
-
-        a = relay.var(next(var_names), shape=input_shape, dtype=dtype)
-        split_scale = relay.const(0.25, "float32")
-        split_zp = relay.const(100, "int32")
-        add_scale = relay.const(0.75, "float32")
-        add_zp = relay.const(120, "int32")
-        axis = 2
-
-        split = relay.split(a, indices_or_sections=4, axis=axis)
-        b = relay.qnn.op.add(
-            split[0],
-            split[1],
-            lhs_scale=split_scale,
-            lhs_zero_point=split_zp,
-            rhs_scale=split_scale,
-            rhs_zero_point=split_zp,
-            output_scale=add_scale,
-            output_zero_point=add_zp,
-        )
-        conc = relay.qnn.op.concatenate(
-            [b, split[2], split[3]],
-            input_scales=(add_scale, split_scale, split_scale),
-            input_zero_points=(add_zp, split_zp, split_zp),
-            output_scale=add_scale,
-            output_zero_point=add_zp,
-            axis=axis,
-        )
-        return conc
-
-    np.random.seed(0)
-    inputs = {
-        "a": tvm.nd.array(
-            np.random.randint(
-                np.iinfo(dtype).min, np.iinfo(dtype).max + 1, size=(1, 16, 16, 4), dtype=dtype
-            )
-        ),
-    }
-
-    outputs = []
-    for npu in [False, True]:
-        model = get_model(inputs["a"].shape, dtype, iter(inputs))
-        mod = tei.make_module(model, [])
-
-        expected_host_ops = 0
-        npu_partitions = 1
-
-        outputs.append(
-            tei.build_and_run(
-                mod,
-                inputs,
-                1,
-                {},
-                npu=npu,
-                expected_host_ops=expected_host_ops,
-                npu_partitions=npu_partitions,
-                additional_config_args={"inline_non_compute_intensive_partitions": False},
-            )
-        )
-
-    if outputs:
-        tei.verify(outputs, dtype, 2)
-
-
-@requires_ethosn
-@pytest.mark.parametrize("dtype", ["uint8", "int8"])
-def test_multiple_command_streams(dtype):
-    """Check that multiple Ethos-N partitions are correctly handled.
-
-    If there's more than one Ethos-N graph partition, more than one command
-    stream will be created. This should be handled correctly by both the
-    Ethos-N codegen and Ethos-N runtime module. This test checks against a
-    simple graph which creates two Ethos-N partitions and checks the result
-    against an 'all-CPU' run through TVM.
-    """
-
-    def get_model(dtype):
-        """
-        max_pool2d
-             |
-            abs
-             |
-        max_pool2d
-        """
-        x = relay.var("x", shape=(1, 4, 4, 4), dtype=dtype)
-        out = relay.nn.max_pool2d(x, (2, 2), (2, 2), layout="NHWC")  # supported
-        out = relay.op.abs(out)  # not supported
-        out = relay.nn.max_pool2d(out, (2, 2), (2, 2), layout="NHWC")  # supported
-        return out
-
-    np.random.seed(0)
-    inputs = {
-        "x": tvm.nd.array(
-            np.random.randint(
-                np.iinfo(dtype).min, np.iinfo(dtype).max + 1, size=(1, 4, 4, 4), dtype=dtype
-            )
-        )
-    }
-    model = get_model(dtype)
-    mod = tei.make_module(model, {})
-
-    # Mock inference is only supported when the whole graph is offloaded to the NPU
-    if ethosn_available() == Available.SW_ONLY:
-        tei.build(mod, {}, npu=True, expected_host_ops=1, npu_partitions=2)
-    else:
-        tei.build_and_run(mod, inputs, 1, {}, npu=True, expected_host_ops=1, npu_partitions=2)
-
-
-@requires_ethosn
-@pytest.mark.parametrize("dtype", ["uint8", "int8"])
-def test_output_order(dtype):
-    """Test the output order."""
-
-    def get_model(input_shape, dtype, var_names):
-        """Return a model"""
-
-        min_value = np.iinfo(dtype).min
-        max_value = np.iinfo(dtype).max
-        a = relay.var(next(var_names), shape=input_shape, dtype=dtype)
-
-        op_z = relay.op.clip(a, min_value, max_value)
-        op_b = relay.op.clip(op_z, min_value, min_value + 15)
-        op_c = relay.op.clip(op_z, min_value + 16, min_value + 31)
-        op_d = relay.op.clip(op_z, min_value + 32, min_value + 47)
-        op_e = relay.op.clip(op_z, min_value + 48, min_value + 63)
-        op_f = relay.op.clip(op_z, min_value + 64, min_value + 79)
-        op_g = relay.op.clip(op_z, min_value + 80, min_value + 95)
-        op_h = relay.op.clip(op_z, min_value + 96, min_value + 111)
-        op_i = relay.op.clip(op_z, min_value + 112, max_value)
-        return relay.Tuple((op_d, op_c, op_e, op_f, op_i, op_b, op_h, op_g))
-
-    np.random.seed(0)
-    inputs = {
-        "a": tvm.nd.array(
-            np.random.randint(
-                np.iinfo(dtype).min, np.iinfo(dtype).max + 1, size=(1, 16, 16, 4), dtype=dtype
-            )
-        ),
-    }
-
-    outputs = []
-    for npu in [False, True]:
-        model = get_model(inputs["a"].shape, dtype, iter(inputs))
-        mod = tei.make_module(model, [])
-        outputs.append(
-            tei.build_and_run(
-                mod,
-                inputs,
-                8,
-                {},
-                npu=npu,
-                additional_config_args={"inline_non_compute_intensive_partitions": False},
-            )
-        )
-
-    tei.verify(outputs, dtype, 1)
-
-
-@requires_ethosn
-@pytest.mark.parametrize("dtype", ["uint8", "int8"])
-def test_output_order_different_sizes(dtype):
-    """
-    Test the output order when there are multiple outputs of different sizes.
-    """
-
-    np.random.seed(0)
-    input_name = "a"
-    input_shape = (1, 8, 8, 4)
-    dtype_min = np.iinfo(dtype).min
-    dtype_max = np.iinfo(dtype).max
-
-    def get_model():
-        var = relay.var(input_name, shape=input_shape, dtype=dtype)
-        clip = relay.op.clip(var, dtype_min, dtype_max)
-        max_pool = relay.nn.max_pool2d(clip, (2, 2), (2, 2), ceil_mode=True, layout="NHWC")
-        mean = relay.op.cast(clip, "int32")
-        mean = relay.mean(mean, axis=[1, 2], keepdims=True)
-        mean = relay.qnn.op.requantize(
-            mean,
-            input_scale=relay.const(0.0784314, "float32"),
-            input_zero_point=relay.const(dtype_min + 128, "int32"),
-            output_scale=relay.const(0.0784314, "float32"),
-            output_zero_point=relay.const(dtype_min + 128, "int32"),
-            out_dtype=dtype,
-        )
-
-        return relay.Tuple((mean, max_pool, clip))
-
-    inputs = {
-        input_name: tvm.nd.array(
-            np.random.randint(dtype_min, dtype_max + 1, size=input_shape, dtype=dtype)
-        ),
-    }
-
-    outputs = []
-    for npu in [False, True]:
-        model = get_model()
-        mod = tei.make_module(model, [])
-        outputs.append(
-            tei.build_and_run(mod, inputs, 3, {}, npu=npu, expected_host_ops=0, npu_partitions=1)
-        )
-
-    tei.verify(outputs, dtype, 1)
-
-
-@requires_ethosn
-@pytest.mark.parametrize("dtype", ["uint8", "int8"])
-@pytest.mark.parametrize(
-    "shape,splits,axis",
-    [
-        ((1, 16, 16, 32), (2, 7, 10), 2),
-    ],
-)
-def test_split_with_asym_concats(dtype, shape, splits, axis):
-    """Test a model with split and contatenates."""
-    np.random.seed(0)
-
-    def get_model(shape, dtype, splits, axis):
-        a = relay.var("a", shape=shape, dtype=dtype)
-        split = relay.op.split(a, indices_or_sections=splits, axis=axis)
-        zeroi = relay.const(1, "int32")
-        zerof = relay.const(0.5, "float32")
-        con1 = relay.qnn.op.concatenate(
-            [split[0], split[1]],
-            input_scales=[zerof] * 2,
-            input_zero_points=[zeroi] * 2,
-            output_scale=zerof,
-            output_zero_point=zeroi,
-            axis=axis,
-        )
-        con2 = relay.qnn.op.concatenate(
-            [split[2], split[3]],
-            input_scales=[zerof] * 2,
-            input_zero_points=[zeroi] * 2,
-            output_scale=zerof,
-            output_zero_point=zeroi,
-            axis=axis,
-        )
-        return relay.Tuple((con2, con1))
-
-    outputs = []
-    inputs = {
-        "a": tvm.nd.array(
-            np.random.randint(np.iinfo(dtype).min, np.iinfo(dtype).max + 1, size=shape, dtype=dtype)
-        )
-    }
-    for npu in [False, True]:
-        model = get_model(shape, dtype, splits, axis)
-        mod = tei.make_module(model, {})
-
-        expected_host_ops = 0
-        npu_partitions = 1
-
-        # Mock inference is only supported when the whole graph is offloaded to the NPU
-        if ethosn_available() == Available.SW_ONLY:
-            tei.build(
-                mod,
-                {},
-                npu=npu,
-                expected_host_ops=expected_host_ops,
-                npu_partitions=npu_partitions,
-                additional_config_args={"inline_non_compute_intensive_partitions": False},
-            )
-        else:
-            outputs.append(
-                tei.build_and_run(
-                    mod,
-                    inputs,
-                    2,
-                    {},
-                    npu=npu,
-                    expected_host_ops=expected_host_ops,
-                    npu_partitions=npu_partitions,
-                    additional_config_args={"inline_non_compute_intensive_partitions": False},
-                )
-            )
-
-    if outputs:
-        tei.verify(outputs, dtype, 0)
-
-
-@requires_ethosn
-@pytest.mark.parametrize("dtype", ["uint8", "int8"])
-def test_output_tuple_propagation(dtype):
-    """This tests the case where the output tuple must be inferred
-    as having dummy tensor information."""
-
-    def get_model(dtype):
-        a = relay.var("a", shape=(1, 4, 4, 16), dtype=dtype)
-        split = relay.op.split(a, indices_or_sections=4, axis=2)
-        return relay.Tuple((split[0], split[1], split[2], split[3]))
-
-    np.random.seed(0)
-    outputs = []
-    inputs = {
-        "a": tvm.nd.array(
-            np.random.randint(
-                np.iinfo(dtype).min, np.iinfo(dtype).max + 1, size=(1, 4, 4, 16), dtype=dtype
-            )
-        )
-    }
-    for npu in [False, True]:
-        model = get_model(dtype)
-        mod = tei.make_module(model, {})
-        outputs.append(
-            tei.build_and_run(
-                mod,
-                inputs,
-                4,
-                {},
-                npu=npu,
-                additional_config_args={"inline_non_compute_intensive_partitions": False},
-            )
-        )
-
-    tei.verify(outputs, dtype, 0)
-
-
-@requires_ethosn
-@pytest.mark.parametrize("dtype", ["uint8", "int8"])
-def test_input_tuples(dtype):
-    """Test a model with a tuple as input."""
-
-    def get_model(shapes, dtype, axis):
-        tup = []
-        for i, shape in enumerate(shapes):
-            a = relay.var("in" + str(i), shape=shape, dtype=dtype)
-            tup.append(a)
-
-        zeroi = relay.const(1, "int32")
-        zerof = relay.const(0.5, "float32")
-        con = relay.qnn.op.concatenate(
-            tup,
-            input_scales=[zerof] * len(shapes),
-            input_zero_points=[zeroi] * len(shapes),
-            output_scale=zerof,
-            output_zero_point=zeroi,
-            axis=axis,
-        )
-
-        return con
-
-    np.random.seed(0)
-    inputs = {
-        "in0": tvm.nd.array(
-            np.random.randint(
-                np.iinfo(dtype).min, np.iinfo(dtype).max + 1, size=(1, 4), dtype=dtype
-            )
-        ),
-        "in1": tvm.nd.array(
-            np.random.randint(
-                np.iinfo(dtype).min, np.iinfo(dtype).max + 1, size=(1, 6), dtype=dtype
-            )
-        ),
-    }
-    outputs = []
-    for npu in [False, True]:
-        model = get_model([(1, 4), (1, 6)], dtype, 1)
-        if not npu:
-            mod = tei.make_module(model, {})
-        else:
-            mod = tei.make_ethosn_partition(model)
-        lib = tei.build(
-            mod,
-            {},
-            npu=False,
-            additional_config_args={"inline_non_compute_intensive_partitions": False},
-        )
-        outputs.append(tei.run(lib, inputs, 1, npu=npu))
-
-    tei.verify(outputs, dtype, 0)
-
-
-@requires_ethosn
-def test_inline_non_compute_intensive_operations():
-    """Tests the case when a subgraph is unpartitioned."""
-    np.random.seed(0)
-    dtype = "int8"
-    shape = (1, 2, 2, 4)
-
-    inp = relay.var("x", shape=shape, dtype=dtype)
-    reshape = relay.reshape(inp, newshape=(1, 1, 4, 4))
-
-    inputs = {
-        "x": tvm.nd.array(
-            np.random.randint(np.iinfo(dtype).min, np.iinfo(dtype).max + 1, size=shape, dtype=dtype)
-        ),
-    }
-    outputs = []
-
-    for npu in [False, True]:
-        mod = tei.make_module(reshape, {})
-        outputs.append(
-            tei.build_and_run(mod, inputs, 1, {}, npu=npu, expected_host_ops=1, npu_partitions=0)
-        )
-
-    tei.verify(outputs, dtype, 0)
diff --git a/tests/python/contrib/test_ethosu/__init__.py b/tests/python/contrib/test_ethosu/__init__.py
deleted file mode 100644
index e23e5fc926b2..000000000000
--- a/tests/python/contrib/test_ethosu/__init__.py
+++ /dev/null
@@ -1,17 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Test infrastructure for Arm(R) Ethos(TM)-U NPU related tests"""
diff --git a/tests/python/contrib/test_ethosu/cascader/__init__.py b/tests/python/contrib/test_ethosu/cascader/__init__.py
deleted file mode 100644
index 5d43783197e5..000000000000
--- a/tests/python/contrib/test_ethosu/cascader/__init__.py
+++ /dev/null
@@ -1,17 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Test infrastructure for the NPU cascader"""
diff --git a/tests/python/contrib/test_ethosu/cascader/conftest.py b/tests/python/contrib/test_ethosu/cascader/conftest.py
deleted file mode 100644
index 74063ba3433e..000000000000
--- a/tests/python/contrib/test_ethosu/cascader/conftest.py
+++ /dev/null
@@ -1,626 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import pytest
-
-ethosu_enabled = True
-try:
-    import ethosu.vela
-except ImportError:
-    ethosu_enabled = False
-
-import tvm.contrib.ethosu.cascader as cs
-
-
-@pytest.fixture
-def FLASH():
-    return cs.MemoryRegion(
-        name="FLASH",
-        size=10**7,
-        read_bandwidth=4,
-        write_bandwidth=4,
-        read_latency=0,
-        write_latency=0,
-        burst_length=1,
-    )
-
-
-@pytest.fixture
-def DRAM():
-    return cs.MemoryRegion(
-        name="DRAM",
-        size=10**9,
-        read_bandwidth=8,
-        write_bandwidth=8,
-        read_latency=0,
-        write_latency=0,
-        burst_length=1,
-    )
-
-
-@pytest.fixture
-def SRAM():
-    return cs.MemoryRegion(
-        name="SRAM",
-        size=10**6,
-        read_bandwidth=16,
-        write_bandwidth=16,
-        read_latency=0,
-        write_latency=0,
-        burst_length=1,
-    )
-
-
-if ethosu_enabled:
-    import tvm
-    from tvm import relay
-    from tvm.relay.testing import run_opt_pass
-
-    from .infra import create_te_graph
-    from ..infra import (
-        make_ethosu_conv2d,
-        make_ethosu_depthwise_conv2d,
-        make_ethosu_binary_elementwise,
-    )
-
-    def make_TwoConv2DTE():
-        def _get_func():
-            ifm = relay.var("ifm", shape=(1, 12, 12, 8), dtype="int8")
-            conv1 = make_ethosu_conv2d(
-                ifm=ifm,
-                ifm_channels=8,
-                ofm_channels=32,
-                kernel_shape=(1, 1),
-                padding=(0, 0),
-                strides=(1, 1),
-                dilation=(1, 1),
-                activation="NONE",
-                ifm_layout="NHWC",
-                ofm_layout="NHCWB16",
-            )
-            conv2 = make_ethosu_conv2d(
-                ifm=conv1,
-                ifm_channels=32,
-                ofm_channels=16,
-                kernel_shape=(3, 3),
-                padding=(1, 1),
-                strides=(1, 1),
-                dilation=(1, 1),
-                activation="NONE",
-                ifm_layout="NHCWB16",
-                ofm_layout="NHWC",
-            )
-            func = relay.Function(relay.analysis.free_vars(conv2), conv2)
-            func = run_opt_pass(func, relay.transform.InferType())
-            return func
-
-        func = _get_func()
-        te_graph, const_dict = create_te_graph(func)
-        sch = tvm.te.create_schedule([t.op for t in te_graph.outputs])
-        return sch, te_graph, const_dict
-
-    @pytest.fixture
-    def TwoConv2DTE():
-        return make_TwoConv2DTE()
-
-    @pytest.fixture
-    def TwoConv2DGraph():
-        _, te_graph, const_dict = make_TwoConv2DTE()
-        device_config = cs.EthosuDeviceConfig("ethos-u55-256")
-        return cs.create_cascader_graph(te_graph, const_dict, device_config)
-
-    def make_TwoConv2DWithSliceTE():
-        def _get_func():
-            ifm = relay.var("ifm", shape=(1, 12, 12, 8), dtype="int8")
-            conv1 = make_ethosu_conv2d(
-                ifm=ifm,
-                ifm_channels=8,
-                ofm_channels=64,
-                kernel_shape=(1, 1),
-                padding=(0, 0),
-                strides=(1, 1),
-                dilation=(1, 1),
-                activation="NONE",
-                ifm_layout="NHWC",
-                ofm_layout="NHWC",
-            )
-            strided_slice = relay.strided_slice(conv1, [0, 0, 0, 0], [1, 6, 6, 128])
-            conv2 = make_ethosu_conv2d(
-                ifm=strided_slice,
-                ifm_channels=64,
-                ofm_channels=16,
-                kernel_shape=(3, 3),
-                padding=(1, 1),
-                strides=(1, 1),
-                dilation=(1, 1),
-                activation="NONE",
-                ifm_layout="NHWC",
-                ofm_layout="NHCWB16",
-            )
-            func = relay.Function(relay.analysis.free_vars(conv2), conv2)
-            func = run_opt_pass(func, relay.transform.InferType())
-            return func
-
-        func = _get_func()
-        te_graph, const_dict = create_te_graph(func)
-        sch = tvm.te.create_schedule([t.op for t in te_graph.outputs])
-        return sch, te_graph, const_dict
-
-    @pytest.fixture
-    def TwoConv2DWithSliceTE():
-        return make_TwoConv2DWithSliceTE()
-
-    @pytest.fixture
-    def TwoConv2DWithSliceGraph():
-        _, te_graph, const_dict = make_TwoConv2DWithSliceTE()
-        device_config = cs.EthosuDeviceConfig("ethos-u55-256")
-        return cs.create_cascader_graph(te_graph, const_dict, device_config)
-
-    def make_MobileNetv2DiamondTE():
-        def _get_func():
-            ifm = relay.var("ifm", shape=(1, 56, 56, 96), dtype="int8")
-            conv1 = make_ethosu_conv2d(
-                ifm=ifm,
-                ifm_channels=96,
-                ofm_channels=24,
-                kernel_shape=(1, 1),
-                padding=(0, 0, 0, 0),
-                strides=(1, 1),
-                dilation=(1, 1),
-            )
-            conv2 = make_ethosu_conv2d(
-                ifm=conv1,
-                ifm_channels=24,
-                ofm_channels=144,
-                kernel_shape=(1, 1),
-                padding=(0, 0, 0, 0),
-                strides=(1, 1),
-                dilation=(1, 1),
-            )
-            depth1 = make_ethosu_depthwise_conv2d(
-                ifm=conv2,
-                channels=144,
-                kernel_shape=(3, 3),
-                padding=(1, 1, 1, 1),
-                strides=(1, 1),
-                dilation=(1, 1),
-            )
-            conv3 = make_ethosu_conv2d(
-                ifm=depth1,
-                ifm_channels=144,
-                ofm_channels=24,
-                kernel_shape=(1, 1),
-                padding=(0, 0, 0, 0),
-                strides=(1, 1),
-                dilation=(1, 1),
-            )
-            add1 = make_ethosu_binary_elementwise(
-                ifm=conv1,
-                ifm2=conv3,
-                ifm_channels=24,
-                ifm2_channels=24,
-                operator_type="ADD",
-                ofm_dtype="int8",
-            )
-            func = relay.Function(relay.analysis.free_vars(add1), add1)
-            func = run_opt_pass(func, relay.transform.InferType())
-            return func
-
-        func = _get_func()
-        te_graph, const_dict = create_te_graph(func)
-        sch = tvm.te.create_schedule([t.op for t in te_graph.outputs])
-        return sch, te_graph, const_dict
-
-    @pytest.fixture
-    def MobileNetv2DiamondTE():
-        return make_MobileNetv2DiamondTE()
-
-    @pytest.fixture
-    def MobileNetv2DiamondGraph():
-        _, te_graph, const_dict = make_MobileNetv2DiamondTE()
-        device_config = cs.EthosuDeviceConfig("ethos-u55-256")
-        return cs.create_cascader_graph(te_graph, const_dict, device_config)
-
-    def make_BinaryTE():
-        def _get_func():
-            ifm_a = relay.var("ifm_a", shape=(1, 8, 8, 8), dtype="int8")
-            ifm_b = relay.var("ifm_b", shape=(1, 8, 8, 8), dtype="int8")
-            conv1 = make_ethosu_conv2d(
-                ifm=ifm_a,
-                ifm_channels=8,
-                ofm_channels=8,
-                kernel_shape=(1, 1),
-                padding=(0, 0, 0, 0),
-                strides=(1, 1),
-                dilation=(1, 1),
-            )
-            conv2 = make_ethosu_conv2d(
-                ifm=ifm_b,
-                ifm_channels=8,
-                ofm_channels=8,
-                kernel_shape=(1, 1),
-                padding=(0, 0, 0, 0),
-                strides=(1, 1),
-                dilation=(1, 1),
-            )
-            add1 = make_ethosu_binary_elementwise(
-                ifm=conv1,
-                ifm2=conv2,
-                ifm_channels=8,
-                ifm2_channels=8,
-                operator_type="ADD",
-                ofm_dtype="int8",
-            )
-            func = relay.Function(relay.analysis.free_vars(add1), add1)
-            func = run_opt_pass(func, relay.transform.InferType())
-            return func
-
-        func = _get_func()
-        te_graph, const_dict = create_te_graph(func)
-        sch = tvm.te.create_schedule([t.op for t in te_graph.outputs])
-        return sch, te_graph, const_dict
-
-    @pytest.fixture
-    def BinaryTE():
-        return make_BinaryTE()
-
-    @pytest.fixture
-    def BinaryGraph():
-        _, te_graph, const_dict = make_BinaryTE()
-        device_config = cs.EthosuDeviceConfig("ethos-u55-256")
-        return cs.create_cascader_graph(te_graph, const_dict, device_config)
-
-    def make_MobileNetv1StartTE():
-        def _get_func():
-            ifm = relay.var("ifm", shape=(1, 224, 224, 3), dtype="int8")
-            conv1 = make_ethosu_conv2d(
-                ifm=ifm,
-                ifm_channels=3,
-                ofm_channels=32,
-                kernel_shape=(3, 3),
-                padding=(0, 0, 1, 1),
-                strides=(2, 2),
-                dilation=(1, 1),
-            )
-            depth1 = make_ethosu_depthwise_conv2d(
-                ifm=conv1,
-                channels=32,
-                kernel_shape=(3, 3),
-                padding=(1, 1, 1, 1),
-                strides=(1, 1),
-                dilation=(1, 1),
-            )
-            conv2 = make_ethosu_conv2d(
-                ifm=depth1,
-                ifm_channels=32,
-                ofm_channels=64,
-                kernel_shape=(1, 1),
-                padding=(0, 0, 0, 0),
-                strides=(1, 1),
-                dilation=(1, 1),
-            )
-            depth2 = make_ethosu_depthwise_conv2d(
-                ifm=conv2,
-                channels=64,
-                kernel_shape=(3, 3),
-                padding=(0, 0, 1, 1),
-                strides=(2, 2),
-                dilation=(1, 1),
-            )
-            conv3 = make_ethosu_conv2d(
-                ifm=depth2,
-                ifm_channels=64,
-                ofm_channels=128,
-                kernel_shape=(1, 1),
-                padding=(0, 0, 0, 0),
-                strides=(1, 1),
-                dilation=(1, 1),
-            )
-            depth3 = make_ethosu_depthwise_conv2d(
-                ifm=conv3,
-                channels=128,
-                kernel_shape=(3, 3),
-                padding=(1, 1, 1, 1),
-                strides=(1, 1),
-                dilation=(1, 1),
-            )
-            conv4 = make_ethosu_conv2d(
-                ifm=depth3,
-                ifm_channels=128,
-                ofm_channels=128,
-                kernel_shape=(1, 1),
-                padding=(0, 0, 0, 0),
-                strides=(1, 1),
-                dilation=(1, 1),
-            )
-            depth4 = make_ethosu_depthwise_conv2d(
-                ifm=conv4,
-                channels=128,
-                kernel_shape=(3, 3),
-                padding=(0, 0, 1, 1),
-                strides=(2, 2),
-                dilation=(1, 1),
-            )
-            func = relay.Function(relay.analysis.free_vars(depth4), depth4)
-            func = run_opt_pass(func, relay.transform.InferType())
-            return func
-
-        func = _get_func()
-        te_graph, const_dict = create_te_graph(func)
-        sch = tvm.te.create_schedule([t.op for t in te_graph.outputs])
-        return sch, te_graph, const_dict
-
-    @pytest.fixture
-    def MobileNetv1StartTE():
-        return make_MobileNetv1StartTE()
-
-    @pytest.fixture
-    def MobileNetv1StartGraph():
-        _, te_graph, const_dict = make_MobileNetv1StartTE()
-        device_config = cs.EthosuDeviceConfig("ethos-u55-256")
-        return cs.create_cascader_graph(te_graph, const_dict, device_config)
-
-    def make_MobileNetv1TE():
-        def _get_func():
-            ifm = relay.var("ifm", shape=(1, 224, 224, 3), dtype="int8")
-            conv1 = make_ethosu_conv2d(
-                ifm=ifm,
-                ifm_channels=3,
-                ofm_channels=32,
-                kernel_shape=(3, 3),
-                padding=(0, 0, 1, 1),
-                strides=(2, 2),
-                dilation=(1, 1),
-            )
-            depth1 = make_ethosu_depthwise_conv2d(
-                ifm=conv1,
-                channels=32,
-                kernel_shape=(3, 3),
-                padding=(1, 1, 1, 1),
-                strides=(1, 1),
-                dilation=(1, 1),
-            )
-            conv2 = make_ethosu_conv2d(
-                ifm=depth1,
-                ifm_channels=32,
-                ofm_channels=64,
-                kernel_shape=(1, 1),
-                padding=(0, 0, 0, 0),
-                strides=(1, 1),
-                dilation=(1, 1),
-            )
-            depth2 = make_ethosu_depthwise_conv2d(
-                ifm=conv2,
-                channels=64,
-                kernel_shape=(3, 3),
-                padding=(0, 0, 1, 1),
-                strides=(2, 2),
-                dilation=(1, 1),
-            )
-            conv3 = make_ethosu_conv2d(
-                ifm=depth2,
-                ifm_channels=64,
-                ofm_channels=128,
-                kernel_shape=(1, 1),
-                padding=(0, 0, 0, 0),
-                strides=(1, 1),
-                dilation=(1, 1),
-            )
-            depth3 = make_ethosu_depthwise_conv2d(
-                ifm=conv3,
-                channels=128,
-                kernel_shape=(3, 3),
-                padding=(1, 1, 1, 1),
-                strides=(1, 1),
-                dilation=(1, 1),
-            )
-            conv4 = make_ethosu_conv2d(
-                ifm=depth3,
-                ifm_channels=128,
-                ofm_channels=128,
-                kernel_shape=(1, 1),
-                padding=(0, 0, 0, 0),
-                strides=(1, 1),
-                dilation=(1, 1),
-            )
-            depth4 = make_ethosu_depthwise_conv2d(
-                ifm=conv4,
-                channels=128,
-                kernel_shape=(3, 3),
-                padding=(0, 0, 1, 1),
-                strides=(2, 2),
-                dilation=(1, 1),
-            )
-            conv5 = make_ethosu_conv2d(
-                ifm=depth4,
-                ifm_channels=128,
-                ofm_channels=256,
-                kernel_shape=(1, 1),
-                padding=(0, 0, 0, 0),
-                strides=(1, 1),
-                dilation=(1, 1),
-            )
-            depth5 = make_ethosu_depthwise_conv2d(
-                ifm=conv5,
-                channels=256,
-                kernel_shape=(3, 3),
-                padding=(1, 1, 1, 1),
-                strides=(1, 1),
-                dilation=(1, 1),
-            )
-            conv6 = make_ethosu_conv2d(
-                ifm=depth5,
-                ifm_channels=256,
-                ofm_channels=256,
-                kernel_shape=(1, 1),
-                padding=(0, 0, 0, 0),
-                strides=(1, 1),
-                dilation=(1, 1),
-            )
-            depth6 = make_ethosu_depthwise_conv2d(
-                ifm=conv6,
-                channels=256,
-                kernel_shape=(3, 3),
-                padding=(0, 0, 1, 1),
-                strides=(2, 2),
-                dilation=(1, 1),
-            )
-            conv7 = make_ethosu_conv2d(
-                ifm=depth6,
-                ifm_channels=256,
-                ofm_channels=512,
-                kernel_shape=(1, 1),
-                padding=(0, 0, 0, 0),
-                strides=(1, 1),
-                dilation=(1, 1),
-            )
-            depth7 = make_ethosu_depthwise_conv2d(
-                ifm=conv7,
-                channels=512,
-                kernel_shape=(3, 3),
-                padding=(1, 1, 1, 1),
-                strides=(1, 1),
-                dilation=(1, 1),
-            )
-            conv8 = make_ethosu_conv2d(
-                ifm=depth7,
-                ifm_channels=512,
-                ofm_channels=512,
-                kernel_shape=(1, 1),
-                padding=(0, 0, 0, 0),
-                strides=(1, 1),
-                dilation=(1, 1),
-            )
-            depth8 = make_ethosu_depthwise_conv2d(
-                ifm=conv8,
-                channels=512,
-                kernel_shape=(3, 3),
-                padding=(1, 1, 1, 1),
-                strides=(1, 1),
-                dilation=(1, 1),
-            )
-            conv9 = make_ethosu_conv2d(
-                ifm=depth8,
-                ifm_channels=512,
-                ofm_channels=512,
-                kernel_shape=(1, 1),
-                padding=(0, 0, 0, 0),
-                strides=(1, 1),
-                dilation=(1, 1),
-            )
-            depth9 = make_ethosu_depthwise_conv2d(
-                ifm=conv9,
-                channels=512,
-                kernel_shape=(3, 3),
-                padding=(1, 1, 1, 1),
-                strides=(1, 1),
-                dilation=(1, 1),
-            )
-            conv10 = make_ethosu_conv2d(
-                ifm=depth9,
-                ifm_channels=512,
-                ofm_channels=512,
-                kernel_shape=(1, 1),
-                padding=(0, 0, 0, 0),
-                strides=(1, 1),
-                dilation=(1, 1),
-            )
-            depth10 = make_ethosu_depthwise_conv2d(
-                ifm=conv10,
-                channels=512,
-                kernel_shape=(3, 3),
-                padding=(1, 1, 1, 1),
-                strides=(1, 1),
-                dilation=(1, 1),
-            )
-            conv11 = make_ethosu_conv2d(
-                ifm=depth10,
-                ifm_channels=512,
-                ofm_channels=512,
-                kernel_shape=(1, 1),
-                padding=(0, 0, 0, 0),
-                strides=(1, 1),
-                dilation=(1, 1),
-            )
-            depth11 = make_ethosu_depthwise_conv2d(
-                ifm=conv11,
-                channels=512,
-                kernel_shape=(3, 3),
-                padding=(1, 1, 1, 1),
-                strides=(1, 1),
-                dilation=(1, 1),
-            )
-            conv12 = make_ethosu_conv2d(
-                ifm=depth11,
-                ifm_channels=512,
-                ofm_channels=512,
-                kernel_shape=(1, 1),
-                padding=(0, 0, 0, 0),
-                strides=(1, 1),
-                dilation=(1, 1),
-            )
-            depth12 = make_ethosu_depthwise_conv2d(
-                ifm=conv12,
-                channels=512,
-                kernel_shape=(3, 3),
-                padding=(0, 0, 1, 1),
-                strides=(2, 2),
-                dilation=(1, 1),
-            )
-            conv13 = make_ethosu_conv2d(
-                ifm=depth12,
-                ifm_channels=512,
-                ofm_channels=1024,
-                kernel_shape=(1, 1),
-                padding=(0, 0, 0, 0),
-                strides=(1, 1),
-                dilation=(1, 1),
-            )
-            depth13 = make_ethosu_depthwise_conv2d(
-                ifm=conv13,
-                channels=1024,
-                kernel_shape=(3, 3),
-                padding=(1, 1, 1, 1),
-                strides=(1, 1),
-                dilation=(1, 1),
-            )
-            conv14 = make_ethosu_conv2d(
-                ifm=depth13,
-                ifm_channels=1024,
-                ofm_channels=1024,
-                kernel_shape=(1, 1),
-                padding=(0, 0, 0, 0),
-                strides=(1, 1),
-                dilation=(1, 1),
-            )
-            func = relay.Function(relay.analysis.free_vars(conv14), conv14)
-            func = run_opt_pass(func, relay.transform.InferType())
-            return func
-
-        func = _get_func()
-        te_graph, const_dict = create_te_graph(func)
-        sch = tvm.te.create_schedule([t.op for t in te_graph.outputs])
-        return sch, te_graph, const_dict
-
-    @pytest.fixture
-    def MobileNetv1TE():
-        return make_MobileNetv1TE()
-
-    @pytest.fixture
-    def MobileNetv1Graph():
-        _, te_graph, const_dict = make_MobileNetv1TE()
-        device_config = cs.EthosuDeviceConfig("ethos-u55-256")
-        return cs.create_cascader_graph(te_graph, const_dict, device_config)
diff --git a/tests/python/contrib/test_ethosu/cascader/infra.py b/tests/python/contrib/test_ethosu/cascader/infra.py
deleted file mode 100644
index cfda1df72161..000000000000
--- a/tests/python/contrib/test_ethosu/cascader/infra.py
+++ /dev/null
@@ -1,167 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-ethosu_enabled = True
-try:
-    import ethosu.vela
-except ImportError:
-    ethosu_enabled = False
-
-import tvm
-from tvm import relay
-import tvm.contrib.ethosu.cascader as cs
-import numpy as np
-
-
-def make_options(
-    cascade_region: cs.MemoryRegion,
-    max_proposals: int = 1,
-    stripe_factors: int = 1,
-    max_plan_size: int = 1,
-    max_open_plans: int = 8,
-    max_closed_plans: int = 32,
-    always_copy_size: int = 1024,
-    disable_pareto_plans: bool = False,
-    disable_pareto_proposals: bool = False,
-    enable_striping: bool = True,
-):
-    return cs.CascaderOptions(
-        cascade_region=cascade_region,
-        max_proposals=max_proposals,
-        stripe_factors=stripe_factors,
-        max_plan_size=max_plan_size,
-        max_open_plans=max_open_plans,
-        max_closed_plans=max_closed_plans,
-        always_copy_size=always_copy_size,
-        disable_pareto_plans=disable_pareto_plans,
-        disable_pareto_proposals=disable_pareto_proposals,
-        enable_striping=enable_striping,
-    )
-
-
-def make_simple_home_map(graph, var_region, const_region):
-    home_map = {}
-    for tensor in graph.tensor_order:
-        if tensor.is_constant:
-            home_map[tensor] = [const_region]
-        else:
-            home_map[tensor] = [var_region]
-
-    return home_map
-
-
-if ethosu_enabled:
-    from tvm.relay.backend.contrib.ethosu.tir.compiler import extract_constants, lower_to_te
-    from tvm.relay.backend.contrib.ethosu.te.common import get_layout_transform_matrices
-
-    def create_te_graph(func):
-        func, consts = extract_constants(func)
-        mod = tvm.IRModule.from_expr(func)
-        func = relay.transform.InferType()(mod)["main"]
-        te_graph = lower_to_te(func)
-        return te_graph, consts
-
-    def make_matrices(
-        op_type,
-        kernel,
-        stride,
-        padding,
-        ifm_layout,
-        ofm_layout,
-        dilation=(1, 1),
-        ifm_channels=1,
-        ofm_channels=1,
-    ):
-        kernel_h, kernel_w = kernel
-        stride_h, stride_w = stride
-        dilation_h, dilation_w = dilation
-        dilated_kernel_h = (kernel_h - 1) * dilation_h + 1
-        dilated_kernel_w = (kernel_w - 1) * dilation_w + 1
-
-        nhwc_to_nhcwb16, nhcwb16_to_nhwc = get_layout_transform_matrices(ofm_channels)
-
-        if op_type == "ethosu_conv2d":
-            ifm_matrix = [
-                [1, 0, 0, 0, 0],
-                [0, stride_h, 0, 0, (dilated_kernel_h - stride_h)],
-                [0, 0, stride_w, 0, (dilated_kernel_w - stride_w)],
-                [0, 0, 0, 0, ifm_channels],
-                [0, 0, 0, 0, 1],
-            ]
-            weight_matrix = [
-                [0, 0, 0, 1, 0],
-                [0, 0, 0, 0, kernel_h],
-                [0, 0, 0, 0, kernel_w],
-                [0, 0, 0, 0, ifm_channels],
-                [0, 0, 0, 0, 1],
-            ]
-        elif op_type == "ethosu_depthwise_conv2d":
-            ifm_matrix = [
-                [1, 0, 0, 0, 0],
-                [0, stride_h, 0, 0, (dilated_kernel_h - stride_h)],
-                [0, 0, stride_w, 0, (dilated_kernel_w - stride_w)],
-                [0, 0, 0, 1, 0],
-                [0, 0, 0, 0, 1],
-            ]
-            weight_matrix = [
-                [0, 0, 0, 1, 0],
-                [0, 0, 0, 0, kernel_h],
-                [0, 0, 0, 0, kernel_w],
-                [0, 0, 0, 0, 1],
-                [0, 0, 0, 0, 1],
-            ]
-        elif op_type == "ethosu_pooling":
-            ifm_matrix = [
-                [1, 0, 0, 0, 0],
-                [0, stride_h, 0, 0, (dilated_kernel_h - stride_h)],
-                [0, 0, stride_w, 0, (dilated_kernel_w - stride_w)],
-                [0, 0, 0, 1, 0],
-                [0, 0, 0, 0, 1],
-            ]
-            weight_matrix = [
-                [0, 0, 0, 0, 0],
-                [0, 0, 0, 0, 0],
-                [0, 0, 0, 0, 0],
-                [0, 0, 0, 0, 0],
-                [0, 0, 0, 0, 0],
-            ]
-        scale_bias_matrix = [
-            [0, 0, 0, 1, 0],
-            [0, 0, 0, 0, 10],
-            [0, 0, 0, 0, 1],
-        ]
-        if ofm_layout == "NHCWB16":
-            ifm_matrix = np.matmul(ifm_matrix, nhcwb16_to_nhwc).tolist()
-            weight_matrix = np.matmul(weight_matrix, nhcwb16_to_nhwc).tolist()
-            scale_bias_matrix = np.matmul(scale_bias_matrix, nhcwb16_to_nhwc).tolist()
-        if ifm_layout == "NHCWB16":
-            ifm_matrix = np.matmul(nhwc_to_nhcwb16, ifm_matrix).tolist()
-
-        ifm_offset = (
-            [0, -padding[0], -padding[1], 0]
-            if ifm_layout == "NHWC"
-            else [0, -padding[0], 0, -padding[1], 0]
-        )
-        weight_offset = [0, 0, 0, 0]
-        scale_bias_offset = [0, 0]
-        return (
-            ifm_matrix,
-            ifm_offset,
-            weight_matrix,
-            weight_offset,
-            scale_bias_matrix,
-            scale_bias_offset,
-        )
diff --git a/tests/python/contrib/test_ethosu/cascader/test_calculate_memory_pressure.py b/tests/python/contrib/test_ethosu/cascader/test_calculate_memory_pressure.py
deleted file mode 100644
index 255ec4bba892..000000000000
--- a/tests/python/contrib/test_ethosu/cascader/test_calculate_memory_pressure.py
+++ /dev/null
@@ -1,186 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=wrong-import-position
-
-"""
-Test memory pressure is calculated correctly from used memory annotations.
-"""
-
-import pytest
-
-pytest.importorskip("ethosu.vela")
-
-import tvm
-from tvm import relay
-from tvm.relay.backend.contrib.ethosu.codegen import _calculate_memory_pressure
-from tvm.contrib.ethosu.cascader.scheduler import extract_memory_info
-from tvm import WorkspacePoolInfo, PoolInfoProperties
-
-
-def _npu_and_non_npu_functions():
-    mod = tvm.IRModule({})
-
-    # NPU function 1
-    x = relay.var("x", shape=(1, 2, 2, 4), dtype="int8")
-    max_pool = relay.nn.max_pool2d(x)
-    composite_func = relay.Function([x], max_pool)
-    composite_func = composite_func.with_attr("Composite", "ethos-u.pooling")
-    inp = relay.var("input", shape=(1, 2, 2, 4), dtype="int8")
-    compiler_func = relay.Function([inp], composite_func)
-    compiler_func = compiler_func.with_attr("used_memory", [32])
-    npu_compiler_func1 = compiler_func.with_attr("Compiler", "ethos-u")
-    g1 = relay.GlobalVar("g1")
-    mod[g1] = npu_compiler_func1
-
-    # Non-NPU function
-    x = relay.var("x", shape=(1, 2, 2, 4), dtype="int8")
-    max_pool = relay.abs(x)
-    composite_func = relay.Function([x], max_pool)
-    composite_func = composite_func.with_attr("Composite", "foo.unary_elementwise")
-    inp = relay.var("input", shape=(1, 2, 2, 4), dtype="int8")
-    compiler_func = relay.Function([inp], composite_func)
-    compiler_func = compiler_func.with_attr("used_memory", [32])
-    non_npu_compiler_func = compiler_func.with_attr("Compiler", "foo")
-    g2 = relay.GlobalVar("g2")
-    mod[g2] = non_npu_compiler_func
-
-    # NPU function 2
-    x = relay.var("x", shape=(1, 2, 2, 4), dtype="int8")
-    max_pool = relay.abs(x)
-    composite_func = relay.Function([x], max_pool)
-    composite_func = composite_func.with_attr("Composite", "ethos-u.unary_elementwise")
-    inp = relay.var("input", shape=(1, 2, 2, 4), dtype="int8")
-    compiler_func = relay.Function([inp], composite_func)
-    compiler_func = compiler_func.with_attr("used_memory", [32])
-    npu_compiler_func2 = compiler_func.with_attr("Compiler", "ethos-u")
-    g3 = relay.GlobalVar("g3")
-    mod[g3] = npu_compiler_func2
-
-    # Main
-    inp = relay.var("main_input", shape=(1, 2, 2, 4), dtype="int8")
-    call1 = relay.Call(g1, [inp])
-    call2 = relay.Call(g2, [call1])
-    call3 = relay.Call(g3, [call2])
-    main_func = relay.Function([inp], call3)
-    main_func = main_func.with_attr("io_used_memory", 32)
-    mod["main"] = main_func
-    return mod
-
-
-def _parallel_npu_functions():
-    mod = tvm.IRModule({})
-
-    # NPU function 1
-    x = relay.var("x", shape=(1, 2, 2, 4), dtype="int8")
-    max_pool = relay.nn.max_pool2d(x)
-    composite_func = relay.Function([x], max_pool)
-    composite_func = composite_func.with_attr("Composite", "ethos-u.pooling")
-    inp = relay.var("input", shape=(1, 2, 2, 4), dtype="int8")
-    compiler_func = relay.Function([inp], composite_func)
-    compiler_func = compiler_func.with_attr("used_memory", [32])
-    npu_compiler_func1 = compiler_func.with_attr("Compiler", "ethos-u")
-    g1 = relay.GlobalVar("g1")
-    mod[g1] = npu_compiler_func1
-
-    # NPU function 2
-    x = relay.var("x", shape=(1, 2, 2, 4), dtype="int8")
-    abs_op = relay.abs(x)
-    composite_func = relay.Function([x], abs_op)
-    composite_func = composite_func.with_attr("Composite", "ethos-u.unary_elementwise")
-    inp = relay.var("input", shape=(1, 2, 2, 4), dtype="int8")
-    compiler_func = relay.Function([inp], composite_func)
-    compiler_func = compiler_func.with_attr("used_memory", [32 + 16])
-    npu_compiler_func2 = compiler_func.with_attr("Compiler", "ethos-u")
-    g2 = relay.GlobalVar("g2")
-    mod[g2] = npu_compiler_func2
-
-    # Main
-    inp = relay.var("main_input", shape=(1, 2, 2, 4), dtype="int8")
-    call1 = relay.Call(g1, [inp])
-    call2 = relay.Call(g2, [inp])
-    concat = relay.concatenate([call1, call2], axis=3)
-    main_func = relay.Function([inp], concat)
-    main_func = main_func.with_attr("io_used_memory", 32)
-    mod["main"] = main_func
-    return mod
-
-
-def _full_offload():
-    mod = tvm.IRModule({})
-
-    # NPU function
-    x = relay.var("x", shape=(1, 4, 4, 16), dtype="int8")
-    max_pool = relay.nn.max_pool2d(x)
-    composite_func = relay.Function([x], max_pool)
-    composite_func = composite_func.with_attr("Composite", "ethos-u.pooling")
-    inp = relay.var("input", shape=(1, 4, 4, 16), dtype="int8")
-    compiler_func = relay.Function([inp], composite_func)
-    compiler_func = compiler_func.with_attr("used_memory", [256 + 256])
-    npu_compiler_func = compiler_func.with_attr("Compiler", "ethos-u")
-    g1 = relay.GlobalVar("g1")
-    mod[g1] = npu_compiler_func
-
-    # Main
-    inp = relay.var("main_input", shape=(1, 4, 4, 16), dtype="int8")
-    call = relay.Call(g1, [inp])
-    main_func = relay.Function([inp], call)
-    main_func = main_func.with_attr("io_used_memory", 256 + 256)
-    mod["main"] = main_func
-    return mod
-
-
-@pytest.mark.parametrize(
-    "model_func,use_workspace_io,expected_memory_pressure",
-    [
-        (_npu_and_non_npu_functions, True, (16 + 16) + (16 + 16)),
-        (_npu_and_non_npu_functions, False, (16 + 16) + (16 + 16) - (16 + 16)),
-        (_parallel_npu_functions, True, (16 + 16) + (16 + 16 + 16)),
-        (_parallel_npu_functions, False, (16 + 16) + (16 + 16 + 16) - (16 + 16)),
-        (_full_offload, True, (256 + 256)),
-        (_full_offload, False, (256 + 256) - (256 + 256)),
-    ],
-)
-def test_calculate_memory_pressure_pass(model_func, use_workspace_io, expected_memory_pressure):
-    """
-    Test that memory pressure is correctly calculated for NPU external functions.
-    """
-
-    mod = model_func()
-    with tvm.transform.PassContext(config={"tir.usmp.use_workspace_io": use_workspace_io}):
-        memory_pressure = _calculate_memory_pressure(mod)
-    assert memory_pressure == expected_memory_pressure
-
-
-def test_extract_memory_info():
-    """
-    Test memory pressure value correctly reduces the workspace size.
-    """
-    initial_pool_size = 2000
-    memory_pressure = 500
-    memory_pool = WorkspacePoolInfo(
-        "SRAM",
-        [tvm.target.Target("c"), tvm.target.Target("ethos-u")],
-        PoolInfoProperties(
-            size_hint_bytes=initial_pool_size,
-            read_bandwidth_bytes_per_cycle=16,
-            write_bandwidth_bytes_per_cycle=16,
-            target_burst_bytes={tvm.target.Target("ethos-u"): 1},
-        ),
-    )
-
-    sram = extract_memory_info(memory_pool, memory_pressure)
-    assert sram.size == initial_pool_size - memory_pressure
diff --git a/tests/python/contrib/test_ethosu/cascader/test_ethosu_binary_elementwise_matcher.py b/tests/python/contrib/test_ethosu/cascader/test_ethosu_binary_elementwise_matcher.py
deleted file mode 100644
index 6a91e893820b..000000000000
--- a/tests/python/contrib/test_ethosu/cascader/test_ethosu_binary_elementwise_matcher.py
+++ /dev/null
@@ -1,181 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import pytest
-
-pytest.importorskip("ethosu.vela")
-
-import numpy as np
-import math
-
-from tvm import te
-import tvm.contrib.ethosu.cascader as cs
-from tvm.relay.backend.contrib.ethosu.te.binary_elementwise import (
-    match_ethosu_binary_elementwise,
-    binary_elementwise_compute,
-)
-from tvm.relay.backend.contrib.ethosu.te.common import get_layout_transform_matrices
-
-
-def _make_matrices(broadcast, ifm_layout, ifm2_layout, ofm_layout, ofm_channels):
-    broadcast_h, broadcast_w, broadcast_c = broadcast
-    nhwc_to_nhcwb16, nhcwb16_to_nhwc = get_layout_transform_matrices(ofm_channels)
-    ifm_matrix = [
-        [1, 0, 0, 0, 0],
-        [0, 1, 0, 0, 0],
-        [0, 0, 1, 0, 0],
-        [0, 0, 0, 1, 0],
-        [0, 0, 0, 0, 1],
-    ]
-    ifm2_matrix = [
-        [1, 0, 0, 0, 0],
-        [0, (1 - broadcast_h), 0, 0, broadcast_h],
-        [0, 0, (1 - broadcast_w), 0, broadcast_w],
-        [0, 0, 0, (1 - broadcast_c), broadcast_c],
-        [0, 0, 0, 0, 1],
-    ]
-    if ofm_layout == "NHCWB16":
-        ifm_matrix = np.matmul(ifm_matrix, nhcwb16_to_nhwc).tolist()
-        ifm2_matrix = np.matmul(ifm2_matrix, nhcwb16_to_nhwc).tolist()
-    if ifm_layout == "NHCWB16":
-        ifm_matrix = np.matmul(nhwc_to_nhcwb16, ifm_matrix).tolist()
-    if ifm2_layout == "NHCWB16":
-        ifm2_matrix = np.matmul(nhwc_to_nhcwb16, ifm2_matrix).tolist()
-
-    return (ifm_matrix, ifm2_matrix)
-
-
-@pytest.mark.parametrize(
-    "ofm_shape",
-    [
-        [1, 12, 15, 128],
-        [1, 16, 16, 16],
-        [1, 1, 1, 1024],
-        [1, 73, 51, 20],
-        [1, 124, 172, 5],
-    ],
-)
-@pytest.mark.parametrize("ifm2_broadcast", [[0, 0, 0], [1, 0, 0], [0, 1, 0], [1, 1, 0]])
-@pytest.mark.parametrize("ifm_layout", ["NHWC", "NHCWB16"])
-@pytest.mark.parametrize("ifm2_layout", ["NHWC", "NHCWB16"])
-@pytest.mark.parametrize("ofm_layout", ["NHWC", "NHCWB16"])
-@pytest.mark.parametrize("op_type", ["MUL", "ADD", "MIN"])
-def test_ethosu_binary_elementwise_matcher(
-    ofm_shape, ifm2_broadcast, ifm_layout, ifm2_layout, ofm_layout, op_type
-):
-    ifm_shape = ofm_shape.copy()
-    ifm2_shape = [1] + [1 if (b == 1) else a for a, b in zip(ofm_shape[1:], ifm2_broadcast)]
-    ifm_channels = ifm_shape[3]
-    ifm2_channels = ifm2_shape[3]
-    ofm_channels = ofm_shape[3]
-    nhwc_to_nhcwb16, _ = get_layout_transform_matrices(ofm_channels)
-    broadcast = [1 if a == 1 else 0 for a in ifm2_shape[1:]]
-    if ifm_layout == "NHCWB16":
-        ifm_shape = [
-            int(math.ceil(n))
-            for n in np.matmul(
-                nhwc_to_nhcwb16,
-                ifm_shape
-                + [
-                    1,
-                ],
-            ).tolist()[:-1]
-        ]
-    if ifm2_layout == "NHCWB16":
-        ifm2_shape = [
-            int(math.ceil(n))
-            for n in np.matmul(
-                nhwc_to_nhcwb16,
-                ifm2_shape
-                + [
-                    1,
-                ],
-            ).tolist()[:-1]
-        ]
-    if ofm_layout == "NHCWB16":
-        ofm_shape = [
-            int(math.ceil(n))
-            for n in np.matmul(
-                nhwc_to_nhcwb16,
-                ofm_shape
-                + [
-                    1,
-                ],
-            ).tolist()[:-1]
-        ]
-        order = [1, 2, 4, 3, 0]
-    else:
-        order = [1, 2, 3, 4]
-
-    ifm = te.placeholder(ifm_shape, dtype="int8")
-    ifm2 = te.placeholder(ifm2_shape, dtype="int8")
-    lut = te.placeholder((), dtype="uint8")
-    out = binary_elementwise_compute(
-        ifm=ifm,
-        ifm2=ifm2,
-        lut=lut,
-        operator_type=op_type,
-        ifm_scale=1,
-        ifm_zero_point=0,
-        ifm2_scale=1,
-        ifm2_zero_point=0,
-        ofm_scale=1,
-        ofm_zero_point=0,
-        ifm_channels=ifm_channels,
-        ifm2_channels=ifm2_channels,
-        reversed_operands=False,
-        activation="NONE",
-        clip_min=0,
-        clip_max=0,
-        rounding_mode="TFL",
-        ifm_layout=ifm_layout,
-        ifm2_layout=ifm2_layout,
-        ofm_layout=ofm_layout,
-        ofm_dtype="int8",
-        use_rescale=False,
-        rescale_scale=0,
-        rescale_shift=0,
-    )
-    ifm_propagator = out.op.attrs["ifm_propagator"]
-    ifm2_propagator = out.op.attrs["ifm2_propagator"]
-
-    offset = [0] * len(ofm_shape)
-    stripes = [0] * len(ofm_shape)
-    output_stripe_config = cs.StripeConfig(ofm_shape, ofm_shape, ofm_shape, order, stripes, offset)
-
-    (ifm_transform, ifm2_transform) = _make_matrices(
-        broadcast, ifm_layout, ifm2_layout, ofm_layout, ofm_channels
-    )
-
-    device_config = cs.EthosuDeviceConfig("ethos-u55-256")
-    part = match_ethosu_binary_elementwise(out, device_config)
-
-    assert isinstance(part, cs.EthosuPart)
-    assert len(part.propagators) == 2
-    assert part.propagators[0].transform == ifm_transform
-    assert part.propagators[1].transform == ifm2_transform
-
-    propagated_ifm = ifm_propagator.propagate(output_stripe_config).shape
-    propagated_ifm2 = ifm2_propagator.propagate(output_stripe_config).shape
-
-    # The layout transforms that have the exact number of output channels in them
-    # will lose no information about the number of channels
-    assert ifm_shape == propagated_ifm
-    assert ifm2_shape == propagated_ifm2
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/contrib/test_ethosu/cascader/test_ethosu_block_config.py b/tests/python/contrib/test_ethosu/cascader/test_ethosu_block_config.py
deleted file mode 100644
index 8b372e45c37f..000000000000
--- a/tests/python/contrib/test_ethosu/cascader/test_ethosu_block_config.py
+++ /dev/null
@@ -1,460 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import pytest
-
-pytest.importorskip("ethosu.vela")
-
-import numpy as np
-import math
-
-import tvm
-import tvm.contrib.ethosu.cascader as cs
-from tvm.relay.backend.contrib.ethosu.te.common import get_layout_transform_matrices
-
-from .infra import make_matrices
-
-
-@pytest.mark.parametrize(
-    "test_id, op_type, activation, kernel, stride, dilation, padding, in_shape, out_shape",
-    [
-        # Conv2D
-        (
-            0,
-            "ethosu_conv2d",
-            "NONE",
-            (34, 19),
-            (2, 2),
-            (1, 1),
-            (0, 0, 0, 0),
-            (1, 266, 111, 15),
-            (1, 117, 47, 15),
-        ),
-        (
-            1,
-            "ethosu_conv2d",
-            "NONE",
-            (14, 14),
-            (1, 1),
-            (1, 1),
-            (0, 0, 0, 0),
-            (1, 125, 63, 64),
-            (1, 112, 50, 128),
-        ),
-        (
-            2,
-            "ethosu_conv2d",
-            "NONE",
-            (7, 1),
-            (2, 1),
-            (1, 1),
-            (0, 0, 0, 0),
-            (1, 13, 4, 12),
-            (1, 4, 4, 511),
-        ),
-        (
-            3,
-            "ethosu_conv2d",
-            "NONE",
-            (5, 5),
-            (1, 1),
-            (1, 1),
-            (0, 0, 0, 0),
-            (1, 96, 16, 276),
-            (1, 92, 12, 16),
-        ),
-        (
-            4,
-            "ethosu_conv2d",
-            "NONE",
-            (5, 5),
-            (1, 1),
-            (1, 1),
-            (0, 0, 0, 0),
-            (1, 96, 16, 276),
-            (1, 92, 12, 1),
-        ),
-        (
-            5,
-            "ethosu_conv2d",
-            "NONE",
-            (3, 3),
-            (1, 1),
-            (2, 2),
-            (0, 0, 0, 0),
-            (1, 62, 94, 32),
-            (1, 58, 90, 16),
-        ),
-        # Depthwise Conv2D
-        (
-            6,
-            "ethosu_depthwise_conv2d",
-            "NONE",
-            (3, 5),
-            (1, 1),
-            (1, 1),
-            (0, 0, 0, 0),
-            (1, 77, 23, 18),
-            (1, 75, 19, 18),
-        ),
-        (
-            7,
-            "ethosu_depthwise_conv2d",
-            "NONE",
-            (3, 3),
-            (2, 2),
-            (1, 1),
-            (1, 1, 1, 1),
-            (1, 25, 10, 276),
-            (1, 13, 5, 276),
-        ),
-        # Pooling
-        (
-            8,
-            "ethosu_pooling",
-            "NONE",
-            (13, 5),
-            (1, 1),
-            (1, 1),
-            (0, 0, 0, 0),
-            (1, 13, 5, 276),
-            (1, 1, 1, 276),
-        ),
-        (
-            9,
-            "ethosu_pooling",
-            "NONE",
-            (7, 3),
-            (2, 1),
-            (1, 1),
-            (0, 0, 0, 0),
-            (1, 317, 14, 21),
-            (1, 156, 12, 21),
-        ),
-    ],
-)
-@pytest.mark.parametrize(
-    "layouts",
-    [
-        ("NHWC", "NHWC"),
-        ("NHCWB16", "NHCWB16"),
-        ("NHWC", "NHCWB16"),
-        ("NHCWB16", "NHWC"),
-    ],
-)
-@pytest.mark.parametrize(
-    "acc_config, expected_block_configs",
-    [
-        (
-            "ethos-u55-32",
-            [
-                # Conv2D
-                ((1, 8, 4, 16), (1, 8, 1, 4, 16)),
-                ((1, 6, 5, 16), (1, 6, 1, 5, 16)),
-                ((1, 4, 4, 96), (1, 4, 6, 4, 16)),
-                ((1, 8, 4, 16), (1, 8, 1, 4, 16)),
-                ((1, 10, 6, 4), (1, 5, 1, 12, 4), (1, 8, 1, 4, 16)),
-                ((1, 6, 5, 16), (1, 6, 1, 5, 16)),
-                # Depthwise Conv2D
-                ((1, 6, 10, 16), (1, 4, 1, 12, 16)),
-                ((1, 8, 5, 16), (1, 6, 1, 5, 16)),
-                # Pooling
-                ((1, 1, 1, 128), (1, 1, 4, 1, 16)),
-                ((1, 9, 6, 16), (1, 8, 1, 4, 16)),
-            ],
-        ),
-        (
-            "ethos-u55-64",
-            [
-                # Conv2D
-                ((1, 8, 4, 16), (1, 8, 1, 4, 16)),
-                ((1, 6, 5, 16), (1, 6, 1, 5, 16)),
-                ((1, 4, 4, 96), (1, 4, 6, 4, 16)),
-                ((1, 8, 4, 16), (1, 8, 1, 4, 16)),
-                ((1, 10, 6, 8), (1, 8, 1, 4, 16)),
-                ((1, 6, 5, 16), (1, 6, 1, 5, 16)),
-                # Depthwise Conv2D
-                ((1, 6, 10, 16), (1, 4, 1, 12, 16)),
-                ((1, 8, 5, 16), (1, 6, 1, 5, 16)),
-                # Pooling
-                ((1, 1, 1, 128), (1, 1, 4, 1, 16)),
-                ((1, 9, 6, 16), (1, 8, 1, 4, 16)),
-            ],
-        ),
-        (
-            "ethos-u55-128",
-            [
-                # Conv2D
-                ((1, 7, 6, 16), (1, 7, 1, 6, 16)),
-                ((1, 5, 8, 16), (1, 5, 1, 8, 16)),
-                ((1, 4, 4, 128), (1, 4, 8, 4, 16)),
-                ((1, 16, 4, 16), (1, 16, 1, 4, 16)),
-                ((1, 8, 12, 8), (1, 10, 1, 6, 16)),
-                ((1, 10, 6, 16), (1, 10, 1, 6, 16), (1, 6, 1, 6, 16)),
-                # Depthwise Conv2D
-                ((1, 7, 10, 16), (1, 7, 1, 10, 16), (1, 6, 1, 10, 16)),
-                ((1, 10, 6, 16), (1, 10, 1, 6, 16), (1, 6, 1, 6, 16)),
-                # Pooling
-                # ((1, 1, 2, 16), (1, 1, 1, 2, 16)),
-                ((1, 1, 2, 128), (1, 1, 4, 2, 16)),
-                ((1, 10, 6, 16), (1, 9, 1, 6, 16)),
-            ],
-        ),
-        (
-            "ethos-u55-256",
-            [
-                # Conv2D
-                ((1, 14, 8, 16), (1, 14, 1, 8, 16)),
-                ((1, 16, 8, 16), (1, 16, 1, 8, 16)),
-                ((1, 4, 4, 128), (1, 4, 8, 4, 16)),
-                ((1, 32, 4, 16), (1, 10, 12, 16), (1, 32, 1, 4, 16), (1, 10, 1, 12, 16)),
-                ((1, 20, 12, 8), (1, 10, 1, 12, 16)),
-                ((1, 12, 10, 16), (1, 12, 1, 10, 16)),
-                # Depthwise Conv2D
-                ((1, 8, 20, 16), (1, 6, 1, 20, 16), (1, 6, 2, 20, 16)),
-                ((1, 14, 6, 16), (1, 12, 1, 6, 16)),
-                # Pooling
-                # ((1, 2, 2, 16), (1, 2, 1, 2, 16)),
-                ((1, 2, 2, 128), (1, 2, 6, 2, 16)),
-                ((1, 10, 12, 16), (1, 10, 1, 12, 16)),
-            ],
-        ),
-    ],
-)
-def test_best_block_config(
-    test_id,
-    op_type,
-    activation,
-    kernel,
-    stride,
-    dilation,
-    padding,
-    in_shape,
-    out_shape,
-    layouts,
-    acc_config,
-    expected_block_configs,
-):
-    ofm_channels = out_shape[3]
-    ifm_channels = in_shape[3]
-
-    nhwc_to_nhcwb16, _ = get_layout_transform_matrices(ofm_channels)
-
-    ifm_matrix, ifm_offset, weight_matrix, weight_offset, _, _ = make_matrices(
-        op_type,
-        kernel,
-        stride,
-        padding,
-        layouts[0],
-        layouts[1],
-        dilation,
-        ifm_channels,
-        ofm_channels,
-    )
-
-    if layouts[0] == "NHCWB16":
-        in_shape = [
-            int(math.ceil(n)) for n in np.matmul(nhwc_to_nhcwb16, in_shape + (1,)).tolist()[:-1]
-        ]
-    if layouts[1] == "NHCWB16":
-        out_shape = [
-            int(math.ceil(n)) for n in np.matmul(nhwc_to_nhcwb16, out_shape + (1,)).tolist()[:-1]
-        ]
-
-    propagator = cs.Propagator(ifm_matrix, ifm_offset)
-    weight_propagator = cs.Propagator(weight_matrix, weight_offset)
-
-    subkernels = ((kernel[0] + 7) // 8) * ((kernel[1] + 7) // 8)
-
-    op_attrs = {
-        "op": op_type,
-        "activation": activation,
-        "stride_h": stride[0],
-        "stride_w": stride[1],
-        "dilation_h": dilation[0],
-        "dilation_w": dilation[1],
-    }
-
-    device_config = cs.EthosuDeviceConfig(acc_config)
-    block_configs = device_config.get_valid_block_configs(
-        propagator,
-        op_attrs,
-        out_shape,
-        ofm_channels,
-        ifm_channels,
-        layouts[1],
-        layouts[0],
-        "int8",
-        "int8",
-        kernel[0],
-        kernel[1],
-    )
-
-    output_quantum = [1, 1, 2, 8]
-    if layouts[1] == "NHCWB16":
-        output_quantum = [1, 1, 1, 2, 8]
-
-    # Create EthosUPart
-    te_subgraph = cs.TESubgraph([], None)
-    part = cs.EthosuPart(
-        te_subgraph,
-        [propagator, weight_propagator],
-        output_quantum,
-        subkernels,
-        block_configs,
-        1,
-    )
-    # Add tensors
-    input_tensor = cs.Tensor(in_shape, "int8")
-    part.set_input(0, input_tensor)
-    if op_type == "ethosu_conv2d":
-        weight_tensor = cs.Tensor([ofm_channels, kernel[0], kernel[1], ifm_channels], "int8")
-        part.set_input(1, weight_tensor)
-    elif op_type == "ethosu_depthwise_conv2d":
-        weight_tensor = cs.Tensor([ofm_channels, kernel[0], kernel[1], 1], "int8")
-        part.set_input(1, weight_tensor)
-
-    output_tensor = cs.Tensor(out_shape, "int8")
-    part.set_output(output_tensor)
-
-    order = [1, 2, 3, 4] if layouts[1] == "NHCWB16" else [1, 2, 4, 3, 0]
-    stripes = [1] * len(output_quantum)
-    offset = [0] * len(output_quantum)
-
-    stripe_config = cs.StripeConfig(out_shape, out_shape, out_shape, order, stripes, offset)
-
-    block = part.get_block_config(stripe_config)
-    block_shape = tuple(int(a) for a in block.output_shape)
-
-    assert block_shape in expected_block_configs[test_id]
-
-
-@pytest.mark.parametrize(
-    "ofm_layout, block_config_str, expected_block_shape",
-    [
-        ("NHWC", "4x4x8", [1, 4, 4, 8]),
-        ("NHCWB16", "4x4x8", [1, 4, 1, 4, 16]),
-        ("NHCWB16", "4x4x24", [1, 4, 2, 4, 16]),
-    ],
-)
-def test_force_block_config_kernelwise(ofm_layout, block_config_str, expected_block_shape):
-    op_type = "ethosu_pooling"
-    activation = "NONE"
-    kernel = (2, 2)
-    stride = (2, 2)
-    padding = (0, 0)
-    dilation = (1, 1)
-    ifm_channels = 32
-    out_shape = (1, 8, 10, 16)
-
-    ifm_matrix, ifm_offset, _, _, _, _ = make_matrices(
-        op_type, kernel, stride, padding, "NHWC", ofm_layout, dilation, ifm_channels
-    )
-
-    ofm_channels = out_shape[3]
-
-    propagator = cs.Propagator(ifm_matrix, ifm_offset)
-
-    op_attrs = {
-        "op": op_type,
-        "activation": activation,
-        "stride_h": stride[0],
-        "stride_w": stride[1],
-        "dilation_h": dilation[0],
-        "dilation_w": dilation[1],
-    }
-
-    config = {
-        "enable_cascader": True,
-        "dev_force_block_config": block_config_str,
-    }
-    with tvm.transform.PassContext(config={"relay.ext.ethos-u.options": config}):
-        device_config = cs.EthosuDeviceConfig("ethos-u55-128")
-        block_configs = device_config.get_valid_block_configs(
-            propagator,
-            op_attrs,
-            out_shape,
-            ofm_channels,
-            ifm_channels,
-            ofm_layout,
-            "NHWC",
-            "int8",
-            "int8",
-            kernel[0],
-            kernel[1],
-        )
-
-    assert len(block_configs) == 1
-    assert block_configs[0].output_shape == expected_block_shape
-
-
-@pytest.mark.parametrize(
-    "ofm_layout, block_config_str, expected_block_shape",
-    [
-        ("NHWC", "4x4x8", [1, 4, 4, 8]),
-        ("NHCWB16", "4x4x8", [1, 4, 1, 4, 16]),
-        ("NHCWB16", "4x4x24", [1, 4, 2, 4, 16]),
-    ],
-)
-def test_force_block_config_elementwise(ofm_layout, block_config_str, expected_block_shape):
-    op_type = "ethosu_elementwise_unary"
-    op_str = "ABS"
-    activation = "NONE"
-    ofm_shape = (1, 8, 10, 16)
-    ifm_matrix = [
-        [1, 0, 0, 0, 0],
-        [0, 1, 0, 0, 0],
-        [0, 0, 1, 0, 0],
-        [0, 0, 0, 1, 0],
-        [0, 0, 0, 0, 1],
-    ]
-    ifm_offset = [0, 0, 0, 0]
-
-    propagator = cs.Propagator(ifm_matrix, ifm_offset)
-
-    op_attrs = {
-        "op": op_type,
-        "operator_type": op_str,
-        "activation": activation,
-        "clip_min": 0,
-        "clip_max": 0,
-        "rounding_mode": "TFL",
-    }
-
-    config = {
-        "enable_cascader": True,
-        "dev_force_block_config": block_config_str,
-    }
-    with tvm.transform.PassContext(config={"relay.ext.ethos-u.options": config}):
-        device_config = cs.EthosuDeviceConfig("ethos-u55-128")
-        block_configs = device_config.get_elementwise_block_config(
-            propagator,
-            None,
-            op_attrs,
-            ofm_shape,
-            ofm_layout,
-            "NWHC",
-            None,
-            "int8",
-            "int8",
-        )
-
-    assert len(block_configs) == 1
-    assert block_configs[0].output_shape == expected_block_shape
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/contrib/test_ethosu/cascader/test_ethosu_conv2d_matcher.py b/tests/python/contrib/test_ethosu/cascader/test_ethosu_conv2d_matcher.py
deleted file mode 100644
index f9f2312ba7a9..000000000000
--- a/tests/python/contrib/test_ethosu/cascader/test_ethosu_conv2d_matcher.py
+++ /dev/null
@@ -1,182 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import pytest
-
-pytest.importorskip("ethosu.vela")
-
-from tvm import te
-import tvm.contrib.ethosu.cascader as cs
-from tvm.relay.backend.contrib.ethosu.te.convolution import match_ethosu_conv2d, conv2d_compute
-
-from .infra import make_matrices
-
-
-@pytest.mark.parametrize("kernel", [(3, 3), (2, 1), (3, 5)])
-@pytest.mark.parametrize("stride", [(1, 1), (2, 1), (3, 2)])
-@pytest.mark.parametrize("dilation", [(1, 1), (2, 1), (3, 2)])
-@pytest.mark.parametrize("padding", [(0, 0, 0, 0), (3, 2, 3, 2), (2, 1, 0, 1)])
-@pytest.mark.parametrize("ifm_channels", [8, 57])
-@pytest.mark.parametrize("ifm_layout", ["NHWC", "NHCWB16"])
-@pytest.mark.parametrize("ofm_layout", ["NHWC", "NHCWB16"])
-def test_ethosu_conv2d_matcher(
-    kernel, stride, dilation, padding, ifm_channels, ifm_layout, ofm_layout
-):
-    if ifm_layout == "NHWC":
-        ifm_shape = (1, 12, 15, ifm_channels)
-    else:
-        ifm_shape = (1, 12, 1 + ((ifm_channels - 1) // 16), 15, 16)
-    ofm_channels = 8
-    kernel_h, kernel_w = kernel
-    ifm = te.placeholder(ifm_shape, dtype="int8")
-    weight = te.placeholder((ofm_channels, kernel_h, kernel_w, ifm_channels), dtype="int8")
-    scale_bias = te.placeholder((ofm_channels, 10), dtype="uint8")
-    lut = te.placeholder((), dtype="uint8")
-    out = conv2d_compute(
-        ifm=ifm,
-        weight=weight,
-        scale_bias=scale_bias,
-        lut=lut,
-        ifm_scale=1,
-        ifm_zero_point=0,
-        ofm_scale=1,
-        ofm_zero_point=0,
-        weight_zero_point=0,
-        strides=stride,
-        padding=padding,
-        dilation=dilation,
-        activation="NONE",
-        clip_min=0,
-        clip_max=0,
-        upscale="NONE",
-        rounding_mode="TFL",
-        ifm_layout=ifm_layout,
-        ofm_layout=ofm_layout,
-    )
-    (
-        ifm_transform,
-        ifm_offset,
-        weight_transform,
-        weight_offset,
-        scale_bias_transform,
-        scale_bias_offset,
-    ) = make_matrices(
-        "ethosu_conv2d",
-        kernel,
-        stride,
-        padding,
-        ifm_layout,
-        ofm_layout,
-        dilation,
-        ifm_channels,
-        ofm_channels,
-    )
-
-    device_config = cs.EthosuDeviceConfig("ethos-u55-256")
-    part = match_ethosu_conv2d(out, device_config)
-
-    assert isinstance(part, cs.EthosuPart)
-    assert len(part.propagators) == 3
-    assert part.propagators[0].transform == ifm_transform
-    assert part.propagators[0].offset == ifm_offset
-    assert part.propagators[1].transform == weight_transform
-    assert part.propagators[1].offset == weight_offset
-    assert part.propagators[2].transform == scale_bias_transform
-    assert part.propagators[2].offset == scale_bias_offset
-
-
-@pytest.mark.parametrize(
-    "ifm_layout, ofm_layout, ifm_channels, expected_cycles",
-    [
-        ("NHWC", "NHWC", 24, 2304),
-        ("NHCWB16", "NHWC", 12, 2352),
-        ("NHWC", "NHCWB16", 38, 7056),
-        ("NHCWB16", "NHCWB16", 55, 4608),
-    ],
-)
-def test_ethosu_conv2d_block_config_from_matcher(
-    ifm_layout, ofm_layout, ifm_channels, expected_cycles
-):
-    ofm_channels = 10
-    ifm_height = 123
-    ifm_width = 155
-
-    ifm_shape = (
-        (1, ifm_height, ifm_width, ifm_channels)
-        if ifm_layout == "NHWC"
-        else (1, ifm_height, 1 + ((ifm_channels - 1) // 16), ifm_width, 16)
-    )
-    weight_shape = (ofm_channels, 3, 3, ifm_channels)
-    scale_bias_shape = (ofm_channels, 10)
-
-    ifm = te.placeholder(ifm_shape, dtype="int8")
-    weight = te.placeholder(weight_shape, dtype="int8")
-    scale_bias = te.placeholder(scale_bias_shape, dtype="uint8")
-    lut = te.placeholder((), dtype="uint8")
-    out = conv2d_compute(
-        ifm=ifm,
-        weight=weight,
-        scale_bias=scale_bias,
-        lut=lut,
-        ifm_scale=1,
-        ifm_zero_point=0,
-        ofm_scale=1,
-        ofm_zero_point=0,
-        weight_zero_point=0,
-        strides=(1, 1),
-        padding=(0, 0, 0, 0),
-        dilation=(1, 1),
-        activation="NONE",
-        clip_min=0,
-        clip_max=0,
-        upscale="NONE",
-        rounding_mode="TFL",
-        ifm_layout=ifm_layout,
-        ofm_layout=ofm_layout,
-    )
-
-    device_config = cs.EthosuDeviceConfig("ethos-u55-256")
-    part = match_ethosu_conv2d(out, device_config)
-
-    ofm_shape = [int(i) for i in part.subgraph.output_tensor.shape]
-
-    # Add inputs and outputs to the part
-    input_tensor = cs.Tensor(ifm_shape, "int8")
-    part.set_input(0, input_tensor)
-    weight_tensor = cs.Tensor(weight_shape, "int8")
-    part.set_input(1, weight_tensor)
-    scale_bias_tensor = cs.Tensor(scale_bias_shape, "int8")
-    part.set_input(2, scale_bias_tensor)
-    output_tensor = cs.Tensor(ofm_shape, "int8")
-    part.set_output(output_tensor)
-
-    # Create a stripe of a size of the output tensor
-    order = [1, 2, 3, 4] if ofm_layout == "NHWC" else [1, 2, 4, 3, 0]
-    stripes = [1] * len(order)
-    offset = [0] * len(order)
-
-    stripe_config = cs.StripeConfig(ofm_shape, ofm_shape, ofm_shape, order, stripes, offset)
-
-    block = part.get_block_config(stripe_config)
-
-    # Since we dont know the values of the variables we passed to the get_valid_block_configs in
-    # the matcher, best we can do is to verify the compute cycle count since the channels have a
-    # significant effect on it
-    assert block.compute_cycles == expected_cycles
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/contrib/test_ethosu/cascader/test_ethosu_depthwise2d_matcher.py b/tests/python/contrib/test_ethosu/cascader/test_ethosu_depthwise2d_matcher.py
deleted file mode 100644
index 8625a4844405..000000000000
--- a/tests/python/contrib/test_ethosu/cascader/test_ethosu_depthwise2d_matcher.py
+++ /dev/null
@@ -1,103 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import pytest
-
-pytest.importorskip("ethosu.vela")
-
-import numpy as np
-
-from tvm import te
-import tvm.contrib.ethosu.cascader as cs
-from tvm.relay.backend.contrib.ethosu.te.depthwise import (
-    match_ethosu_depthwise_conv2d,
-    depthwise_conv2d_compute,
-)
-from .infra import make_matrices
-
-
-@pytest.mark.parametrize("kernel", [(3, 3), (2, 1), (3, 5)])
-@pytest.mark.parametrize("stride", [(1, 1), (2, 1), (3, 2)])
-@pytest.mark.parametrize("dilation", [(1, 1), (2, 1), (3, 2)])
-@pytest.mark.parametrize("padding", [(0, 0, 0, 0), (3, 2, 3, 2), (2, 1, 0, 1)])
-@pytest.mark.parametrize("ifm_layout", ["NHWC", "NHCWB16"])
-@pytest.mark.parametrize("ofm_layout", ["NHWC", "NHCWB16"])
-def test_ethosu_depthwise2d_matcher(kernel, stride, dilation, padding, ifm_layout, ofm_layout):
-    ofm_channels = 57
-    if ifm_layout == "NHWC":
-        ifm_shape = (1, 12, 15, ofm_channels)
-    else:
-        ifm_shape = (1, 12, 1 + ((ofm_channels - 1) // 16), 15, 16)
-    kernel_h, kernel_w = kernel
-    ifm = te.placeholder(ifm_shape, dtype="int8")
-    weight = te.placeholder((ofm_channels, kernel_h, kernel_w, 1), dtype="int8")
-    scale_bias = te.placeholder((ofm_channels, 10), dtype="uint8")
-    lut = te.placeholder((), dtype="uint8")
-    out = depthwise_conv2d_compute(
-        ifm=ifm,
-        weight=weight,
-        scale_bias=scale_bias,
-        lut=lut,
-        ifm_scale=1,
-        ifm_zero_point=0,
-        ofm_scale=1,
-        ofm_zero_point=0,
-        weight_zero_point=0,
-        strides=stride,
-        padding=padding,
-        dilation=dilation,
-        activation="NONE",
-        clip_min=0,
-        clip_max=0,
-        rounding_mode="TFL",
-        upscale="NONE",
-        ifm_layout=ifm_layout,
-        ofm_layout=ofm_layout,
-        ofm_dtype=ifm.dtype,
-    )
-    (
-        ifm_transform,
-        ifm_offset,
-        weight_transform,
-        weight_offset,
-        scale_bias_transform,
-        scale_bias_offset,
-    ) = make_matrices(
-        "ethosu_depthwise_conv2d",
-        kernel,
-        stride,
-        padding,
-        ifm_layout,
-        ofm_layout,
-        dilation,
-        ofm_channels=ofm_channels,
-    )
-
-    device_config = cs.EthosuDeviceConfig("ethos-u55-256")
-    part = match_ethosu_depthwise_conv2d(out, device_config)
-
-    assert isinstance(part, cs.EthosuPart)
-    assert len(part.propagators) == 3
-    assert part.propagators[0].transform == ifm_transform
-    assert part.propagators[0].offset == ifm_offset
-    assert part.propagators[1].transform == weight_transform
-    assert part.propagators[1].offset == weight_offset
-    assert part.propagators[2].transform == scale_bias_transform
-    assert part.propagators[2].offset == scale_bias_offset
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/contrib/test_ethosu/cascader/test_ethosu_identity_matcher.py b/tests/python/contrib/test_ethosu/cascader/test_ethosu_identity_matcher.py
deleted file mode 100644
index 11d76ab2b8dd..000000000000
--- a/tests/python/contrib/test_ethosu/cascader/test_ethosu_identity_matcher.py
+++ /dev/null
@@ -1,59 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import pytest
-
-pytest.importorskip("ethosu.vela")
-
-import numpy as np
-
-from tvm import te
-import tvm.contrib.ethosu.cascader as cs
-from tvm.relay.backend.contrib.ethosu.te.identity import match_ethosu_identity, identity_compute
-from .infra import make_matrices
-
-
-def test_ethosu_identity_matcher():
-    ofm_channels = 21
-    ifm_shape = (1, 12, 15, ofm_channels)
-    ifm = te.placeholder(ifm_shape, dtype="int8")
-    lut = te.placeholder((), dtype="uint8")
-    out = identity_compute(
-        ifm=ifm,
-        lut=lut,
-        ifm_scale=1,
-        ifm_zero_point=0,
-        ofm_scale=1,
-        ofm_zero_point=0,
-        activation="NONE",
-        rounding_mode="TFL",
-    )
-
-    length = len(ifm.shape)
-    ifm_transform = np.identity(length + 1).tolist()
-    ifm_offset = np.zeros(length, dtype="int64").tolist()
-
-    device_config = cs.EthosuDeviceConfig("ethos-u55-256")
-    part = match_ethosu_identity(out, device_config)
-
-    assert isinstance(part, cs.EthosuPart)
-    assert len(part.propagators) == 1
-    assert part.propagators[0].transform == ifm_transform
-    assert part.propagators[0].offset == ifm_offset
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/contrib/test_ethosu/cascader/test_ethosu_inline_matcher.py b/tests/python/contrib/test_ethosu/cascader/test_ethosu_inline_matcher.py
deleted file mode 100644
index ff5530d433f6..000000000000
--- a/tests/python/contrib/test_ethosu/cascader/test_ethosu_inline_matcher.py
+++ /dev/null
@@ -1,50 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import pytest
-
-pytest.importorskip("ethosu.vela")
-
-from tvm import te
-from tvm.topi.transform import reshape
-import tvm.contrib.ethosu.cascader as cs
-from tvm.relay.backend.contrib.ethosu.te.inline import match_ethosu_inline
-
-
-def test_ethosu_inline_matcher():
-    ifm_shape = (2, 5, 6)
-    new_shape = (2, 30)
-    ifm = te.placeholder(ifm_shape, dtype="int8")
-    out = reshape(ifm, new_shape)
-    ifm_transform = [
-        [0, 0, ifm_shape[0]],
-        [0, 0, ifm_shape[1]],
-        [0, 0, ifm_shape[2]],
-        [0, 0, 1],
-    ]
-    ifm_offset = [0, 0, 0]
-
-    device_config = cs.EthosuDeviceConfig("ethos-u55-256")
-    part = match_ethosu_inline(out, device_config)
-
-    assert isinstance(part, cs.InlinePart)
-    assert len(part.propagators) == 1
-    assert part.propagators[0].transform == ifm_transform
-    assert part.propagators[0].offset == ifm_offset
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/contrib/test_ethosu/cascader/test_ethosu_part.py b/tests/python/contrib/test_ethosu/cascader/test_ethosu_part.py
deleted file mode 100644
index 22196e237e3d..000000000000
--- a/tests/python/contrib/test_ethosu/cascader/test_ethosu_part.py
+++ /dev/null
@@ -1,60 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import pytest
-
-pytest.importorskip("ethosu.vela")
-
-import tvm.contrib.ethosu.cascader as cs
-from tvm.contrib.ethosu.cascader.graph import BufferMode
-from tvm.contrib.ethosu.cascader.parts import EthosuPart
-
-
-def test_ethosu_part():
-    te_subgraph = cs.TESubgraph([], None)
-    output_quantum = [1, 2, 2, 8]
-    propagator = cs.Propagator(
-        [[1, 0, 0, 0, 2], [0, 1, 0, 0, 2], [0, 0, 1, 0, 0], [0, 0, 0, 1, 0], [0, 0, 0, 0, 1]],
-        [0, 0, 0, 0],
-    )
-    stripe_config = cs.StripeConfig(
-        [1, 4, 4, 16], [1, 64, 72, 96], [1, 4, 4, 16], [1, 2, 3, 4], [1, 16, 13, 6], [0, 0, 0, 0]
-    )
-    subkernels = 3
-
-    valid_block_configs = [cs.BlockConfig([1, 2, 4, 16], [1, 2, 4, 16], 15000, 7500)]
-
-    part = EthosuPart(
-        te_subgraph,
-        [propagator],
-        output_quantum,
-        subkernels,
-        valid_block_configs,
-        1,
-    )
-    input_tensor = cs.Tensor(shape=[1, 66, 74, 16], dtype="int8")
-    part.set_input(0, input_tensor)
-    output_tensor = cs.Tensor(shape=[1, 66, 74, 16], dtype="int8")
-    part.set_output(output_tensor)
-
-    assert part.get_stripe_align_hint() == output_quantum
-    # Check that the performance model runs, don't verify output
-    part.get_performance_info(stripe_config, BufferMode.ROLLING)
-    part.get_performance_info(stripe_config, BufferMode.RECOMPUTE)
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/contrib/test_ethosu/cascader/test_ethosu_part_performance.py b/tests/python/contrib/test_ethosu/cascader/test_ethosu_part_performance.py
deleted file mode 100644
index f68e29559743..000000000000
--- a/tests/python/contrib/test_ethosu/cascader/test_ethosu_part_performance.py
+++ /dev/null
@@ -1,234 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import pytest
-
-pytest.importorskip("ethosu.vela")
-
-from functools import reduce
-import numpy as np
-import math
-
-import tvm.contrib.ethosu.cascader as cs
-from tvm.contrib.ethosu.cascader.device_config import _Shape
-
-from .infra import make_matrices
-
-
-@pytest.mark.parametrize(
-    "acc_config, expected",
-    [
-        ("ethos-u55-256", (1, 0.125, 0.75, 0.375, 0.75)),
-        ("ethos-u55-128", (1, 0.25, 1.5, 0.75, 0.75)),
-        ("ethos-u55-64", (1, 0.5, 3, 1.5, 1.5)),
-        ("ethos-u55-32", (2, 1, 6, 3, 3)),
-    ],
-)
-def test_device_config_cycles(acc_config, expected):
-    device_config = cs.EthosuDeviceConfig(acc_config)
-
-    conv_type = "ethosu_conv2d"
-    conv_str = None
-    conv_ifm_dtype = "int8"
-    conv_ofm_dtype = "int8"
-    conv_activation = "LUT"
-    conv_cycles = device_config._get_output_cycles(
-        conv_type, conv_str, conv_ifm_dtype, conv_ofm_dtype, conv_activation
-    )
-    assert conv_cycles == expected[0]
-
-    pool_type = "ethosu_pooling"
-    pool_str = "MAX"
-    pool_ifm_dtype = "int8"
-    pool_ofm_dtype = "int8"
-    pool_activation = "NONE"
-    pool_cycles = device_config._get_output_cycles(
-        pool_type, pool_str, pool_ifm_dtype, pool_ofm_dtype, pool_activation
-    )
-    assert pool_cycles == expected[1]
-
-    add_type = "ethosu_binary_elementwise"
-    add_str = "ADD"
-    add_ifm_dtype = "int8"
-    add_ofm_dtype = "int8"
-    add_activation = "NONE"
-    add_cycles = device_config._get_output_cycles(
-        add_type, add_str, add_ifm_dtype, add_ofm_dtype, add_activation
-    )
-    assert add_cycles == expected[2]
-
-    mul_type = "ethosu_binary_elementwise"
-    mul_str = "MUL"
-    mul_ifm_dtype = "int8"
-    mul_ofm_dtype = "int8"
-    mul_activation = "NONE"
-    mul_cycles = device_config._get_output_cycles(
-        mul_type, mul_str, mul_ifm_dtype, mul_ofm_dtype, mul_activation
-    )
-    assert mul_cycles == expected[3]
-
-    mul_32_type = "ethosu_binary_elementwise"
-    mul_32_str = "MUL"
-    mul_32_ifm_dtype = "int8"
-    mul_32_ofm_dtype = "int32"
-    mul_32_activation = "NONE"
-    mul_32_cycles = device_config._get_output_cycles(
-        mul_32_type, mul_32_str, mul_32_ifm_dtype, mul_32_ofm_dtype, mul_32_activation
-    )
-    assert mul_32_cycles == expected[4]
-
-
-@pytest.mark.parametrize(
-    "accelerator, op_type, activation, kernel, stride, dilation, padding, in_shape, out_shape, block_shape, input_block_shape, expected",
-    [
-        (
-            "ethos-u55-128",
-            "ethosu_conv2d",
-            "NONE",
-            (3, 3),
-            (1, 1),
-            (1, 1),
-            (0, 0, 0, 0),
-            (1, 16, 16, 96),
-            (1, 16, 16, 96),
-            (1, 8, 8, 16),
-            (1, 10, 10, 32),
-            167733,
-        ),
-        (
-            "ethos-u55-128",
-            "ethosu_conv2d",
-            "NONE",
-            (10, 4),
-            (2, 1),
-            (1, 1),
-            (0, 0, 0, 0),
-            (1, 58, 13, 1),
-            (1, 25, 10, 276),
-            (1, 6, 10, 32),
-            (1, 18, 14, 8),
-            174105,
-        ),
-        (
-            "ethos-u55-128",
-            "ethosu_depthwise_conv2d",
-            "NONE",
-            (3, 3),
-            (2, 2),
-            (1, 1),
-            (1, 1, 1, 1),
-            (1, 25, 10, 276),
-            (1, 13, 5, 276),
-            (1, 7, 6, 16),
-            (1, 15, 14, 16),
-            17590,
-        ),
-        (
-            "ethos-u55-128",
-            "ethosu_depthwise_conv2d",
-            "NONE",
-            (4, 9),
-            (1, 1),
-            (1, 1),
-            (0, 0, 0, 0),
-            (1, 28, 81, 42),
-            (1, 25, 73, 41),
-            (1, 4, 16, 16),
-            (1, 7, 24, 16),
-            173414,
-        ),
-    ],
-)
-def test_conv_performance(
-    accelerator,
-    op_type,
-    activation,
-    kernel,
-    stride,
-    dilation,
-    padding,
-    in_shape,
-    out_shape,
-    block_shape,
-    input_block_shape,
-    expected,
-):
-    ifm_channels = in_shape[3]
-    ifm_matrix, ifm_offset, weight_matrix, weight_offset, _, _ = make_matrices(
-        op_type,
-        kernel,
-        stride,
-        padding,
-        "NHWC",
-        "NHWC",
-        dilation,
-        ifm_channels,
-    )
-
-    propagator = cs.Propagator(ifm_matrix, ifm_offset)
-    weight_propagator = cs.Propagator(weight_matrix, weight_offset)
-
-    subkernels = ((kernel[0] + 7) // 8) * ((kernel[1] + 7) // 8)
-
-    device_config = cs.EthosuDeviceConfig(accelerator)
-
-    output_cycles = device_config._get_output_cycles(op_type, "", "int8", "int8", activation)
-    output_cycles *= reduce(lambda a, b: a * b, block_shape, 1)
-    is_partkernel = device_config.is_partkernel(
-        op_type, ifm_channels, "int8", kernel[0] * kernel[1]
-    )
-    compute_cycles = device_config._estimate_compute_cycles_per_block(
-        op_type,
-        _Shape(block_shape),
-        _Shape(input_block_shape),
-        kernel[0],
-        kernel[1],
-        ifm_channels,
-        "int8",
-        is_partkernel,
-    )
-    block_configs = [
-        cs.BlockConfig(input_block_shape, block_shape, compute_cycles, int(output_cycles))
-    ]
-
-    output_quantum = [1, 1, 2, 8]
-    te_subgraph = cs.TESubgraph([], None)
-    part = cs.EthosuPart(
-        te_subgraph,
-        [propagator, weight_propagator],
-        output_quantum,
-        subkernels,
-        block_configs,
-        1,
-    )
-    part.set_input(0, cs.Tensor(in_shape, "int8"))
-    part.set_input(1, cs.Tensor([ifm_channels, kernel[0], kernel[1], out_shape[-1]], "int8"))
-    part.set_output(cs.Tensor(out_shape, "int8"))
-
-    stripes = [1] * len(output_quantum)
-    offset = [0] * len(output_quantum)
-    order = [1, 2, 3, 4]
-
-    stripe_config = cs.StripeConfig(out_shape, out_shape, out_shape, order, stripes, offset)
-
-    compute_cycles = part.get_performance_info(stripe_config, cs.BufferMode.ROLLING).compute_cycles
-    tolerance = expected * 0.1
-
-    assert expected - tolerance <= compute_cycles <= expected + tolerance
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/contrib/test_ethosu/cascader/test_ethosu_pooling_matcher.py b/tests/python/contrib/test_ethosu/cascader/test_ethosu_pooling_matcher.py
deleted file mode 100644
index 1faec87ba2aa..000000000000
--- a/tests/python/contrib/test_ethosu/cascader/test_ethosu_pooling_matcher.py
+++ /dev/null
@@ -1,83 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import pytest
-
-pytest.importorskip("ethosu.vela")
-
-import numpy as np
-
-from tvm import te
-import tvm.contrib.ethosu.cascader as cs
-from tvm.relay.backend.contrib.ethosu.te.pooling import match_ethosu_pooling, pooling_compute
-from .infra import make_matrices
-
-
-@pytest.mark.parametrize("pool_shape", [(3, 3), (2, 1), (3, 5)])
-@pytest.mark.parametrize("stride", [(1, 1), (2, 1), (3, 2)])
-@pytest.mark.parametrize("padding", [(0, 0, 0, 0), (3, 2, 3, 2), (2, 1, 0, 1)])
-@pytest.mark.parametrize("ifm_layout", ["NHWC", "NHCWB16"])
-@pytest.mark.parametrize("ofm_layout", ["NHWC", "NHCWB16"])
-def test_ethosu_pooling_matcher(pool_shape, stride, padding, ifm_layout, ofm_layout):
-    ofm_channels = 21
-    if ifm_layout == "NHWC":
-        ifm_shape = (1, 12, 15, ofm_channels)
-    else:
-        ifm_shape = (1, 12, 1 + ((ofm_channels - 1) // 16), 15, 16)
-    ifm = te.placeholder(ifm_shape, dtype="int8")
-    lut = te.placeholder((), dtype="uint8")
-    out = pooling_compute(
-        ifm=ifm,
-        lut=lut,
-        pooling_type="MAX",
-        ifm_scale=1,
-        ifm_zero_point=0,
-        ofm_scale=1,
-        ofm_zero_point=0,
-        pool_shape=pool_shape,
-        ofm_channels=ofm_channels,
-        ofm_dtype="int8",
-        strides=stride,
-        padding=padding,
-        activation="NONE",
-        clip_min=0,
-        clip_max=0,
-        rounding_mode="TFL",
-        upscale="NONE",
-        ifm_layout=ifm_layout,
-        ofm_layout=ofm_layout,
-    )
-    (ifm_transform, ifm_offset, _, _, _, _) = make_matrices(
-        "ethosu_pooling",
-        pool_shape,
-        stride,
-        padding,
-        ifm_layout,
-        ofm_layout,
-        ofm_channels=ofm_channels,
-    )
-
-    device_config = cs.EthosuDeviceConfig("ethos-u55-256")
-    part = match_ethosu_pooling(out, device_config)
-
-    assert isinstance(part, cs.EthosuPart)
-    assert len(part.propagators) == 1
-    assert part.propagators[0].transform == ifm_transform
-    assert part.propagators[0].offset == ifm_offset
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/contrib/test_ethosu/cascader/test_ethosu_unary_elementwise_matcher.py b/tests/python/contrib/test_ethosu/cascader/test_ethosu_unary_elementwise_matcher.py
deleted file mode 100644
index e79c75c00cb0..000000000000
--- a/tests/python/contrib/test_ethosu/cascader/test_ethosu_unary_elementwise_matcher.py
+++ /dev/null
@@ -1,134 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import pytest
-
-pytest.importorskip("ethosu.vela")
-
-import numpy as np
-import math
-
-from tvm import te
-import tvm.contrib.ethosu.cascader as cs
-from tvm.relay.backend.contrib.ethosu.te.unary_elementwise import (
-    match_ethosu_unary_elementwise,
-    unary_elementwise_compute,
-)
-from tvm.relay.backend.contrib.ethosu.te.common import get_layout_transform_matrices
-
-
-def _make_matrices(ifm_layout, ofm_layout, ofm_channels):
-    nhwc_to_nhcwb16, nhcwb16_to_nhwc = get_layout_transform_matrices(ofm_channels)
-    ifm_matrix = [
-        [1, 0, 0, 0, 0],
-        [0, 1, 0, 0, 0],
-        [0, 0, 1, 0, 0],
-        [0, 0, 0, 1, 0],
-        [0, 0, 0, 0, 1],
-    ]
-    if ofm_layout == "NHCWB16":
-        ifm_matrix = np.matmul(ifm_matrix, nhcwb16_to_nhwc).tolist()
-    if ifm_layout == "NHCWB16":
-        ifm_matrix = np.matmul(nhwc_to_nhcwb16, ifm_matrix).tolist()
-
-    return ifm_matrix
-
-
-@pytest.mark.parametrize(
-    "ofm_shape",
-    [
-        [1, 12, 15, 128],
-        [1, 16, 16, 16],
-        [1, 1, 1, 1024],
-        [1, 53, 91, 7],
-        [1, 182, 12, 72],
-    ],
-)
-@pytest.mark.parametrize("ifm_layout", ["NHWC", "NHCWB16"])
-@pytest.mark.parametrize("ofm_layout", ["NHWC", "NHCWB16"])
-@pytest.mark.parametrize("op_type", ["ABS", "CLZ"])
-def test_ethosu_unary_elementwise_matcher(ofm_shape, ifm_layout, ofm_layout, op_type):
-    ifm_shape = ofm_shape.copy()
-    ofm_channels = ofm_shape[3]
-    nhwc_to_nhcwb16, _ = get_layout_transform_matrices(ofm_channels)
-    if ifm_layout == "NHCWB16":
-        ifm_shape = [
-            int(math.ceil(n))
-            for n in np.matmul(
-                nhwc_to_nhcwb16,
-                ifm_shape
-                + [
-                    1,
-                ],
-            ).tolist()[:-1]
-        ]
-    if ofm_layout == "NHCWB16":
-        ofm_shape = [
-            int(math.ceil(n))
-            for n in np.matmul(
-                nhwc_to_nhcwb16,
-                ofm_shape
-                + [
-                    1,
-                ],
-            ).tolist()[:-1]
-        ]
-        order = [1, 2, 4, 3, 0]
-    else:
-        order = [1, 2, 3, 4]
-
-    ifm = te.placeholder(ifm_shape, dtype="int8")
-    lut = te.placeholder((), dtype="uint8")
-    out = unary_elementwise_compute(
-        ifm=ifm,
-        lut=lut,
-        operator_type=op_type,
-        ifm_scale=1,
-        ifm_zero_point=0,
-        ofm_scale=1,
-        ofm_zero_point=0,
-        ofm_channels=ofm_channels,
-        activation="NONE",
-        clip_min=0,
-        clip_max=0,
-        rounding_mode="TFL",
-        ifm_layout=ifm_layout,
-        ofm_layout=ofm_layout,
-    )
-    ifm_propagator = out.op.attrs["ifm_propagator"]
-
-    offset = [0] * len(ofm_shape)
-    stripes = [0] * len(ofm_shape)
-    output_stripe_config = cs.StripeConfig(ofm_shape, ofm_shape, ofm_shape, order, stripes, offset)
-
-    ifm_transform = _make_matrices(ifm_layout, ofm_layout, ofm_channels)
-
-    device_config = cs.EthosuDeviceConfig("ethos-u55-256")
-    part = match_ethosu_unary_elementwise(out, device_config)
-
-    assert isinstance(part, cs.EthosuPart)
-    assert len(part.propagators) == 1
-    assert part.propagators[0].transform == ifm_transform
-
-    propagated_ifm = ifm_propagator.propagate(output_stripe_config).shape
-
-    # The layout transforms that have the exact number of output channels in them
-    # will lose no information about the number of channels
-    assert ifm_shape == propagated_ifm
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/contrib/test_ethosu/cascader/test_graph.py b/tests/python/contrib/test_ethosu/cascader/test_graph.py
deleted file mode 100644
index c3d5c0fd0061..000000000000
--- a/tests/python/contrib/test_ethosu/cascader/test_graph.py
+++ /dev/null
@@ -1,204 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import pytest
-
-pytest.importorskip("ethosu.vela")
-
-import tvm.contrib.ethosu.cascader as cs
-
-
-def test_tensor():
-    shape = [1, 2, 3]
-    dtype = "uint8"
-    is_constant = True
-    compression_ratio = 0.5
-    size = 6
-    tensor = cs.Tensor(shape, dtype, is_constant, compression_ratio)
-    assert tensor.shape == shape
-    assert tensor.dtype == dtype
-    assert tensor.is_constant == is_constant
-    assert tensor.compression_ratio == compression_ratio
-    assert tensor.size == size
-
-
-def test_inline_part():
-    subgraph = cs.TESubgraph([], None)
-    part = cs.InlinePart(
-        subgraph,
-        [
-            cs.Propagator(
-                [[0, 1, 0], [1, 0, 0], [0, 0, 1]],
-                [0, 0],
-            ),
-        ],
-    )
-    output_stripe_config = cs.StripeConfig([2, 4], [8, 8], [2, 4], [1, 2], [4, 2], [0, 0])
-    input_stripe_config = cs.StripeConfig([4, 2], [8, 8], [4, 2], [2, 1], [2, 4], [0, 0])
-
-    assert part.input_tensors == [None]
-    assert part.output_tensor == None
-    assert len(part.propagators) == 1
-    assert part.in_line == True
-    assert part.get_stripe_align_hint() == [1, 1]
-    performance_info = part.get_performance_info(output_stripe_config, cs.BufferMode.RECOMPUTE)
-    assert performance_info.compute_cycles == 0
-    assert performance_info.read_bytes == [0]
-    assert performance_info.write_bytes == 0
-    input_stripe_configs = part.calculate_input_stripe_configs(output_stripe_config)
-    assert len(input_stripe_configs) == 1
-    assert input_stripe_configs[0] == input_stripe_config
-
-
-def test_small_graph():
-    subgraph = cs.TESubgraph([], None)
-    part_a = cs.InlinePart(
-        subgraph,
-        [
-            cs.Propagator(
-                [[1, 0, 0], [0, 1, 0], [0, 0, 1]],
-                [0, 0],
-            ),
-            cs.Propagator(
-                [[0, 1, 0], [1, 0, 0], [0, 0, 1]],
-                [-1, -1],
-            ),
-        ],
-    )
-    part_b = cs.InlinePart(
-        subgraph,
-        [
-            cs.Propagator(
-                [[1, 0, 0], [0, 1, 0], [0, 0, 1]],
-                [0, 0],
-            ),
-        ],
-    )
-    tensor_1 = cs.Tensor([10, 10], "uint8")
-    tensor_2 = cs.Tensor([9, 9], "uint8")
-    tensor_3 = cs.Tensor([10, 10], "uint8")
-    tensor_4 = cs.Tensor([10, 10], "uint8")
-
-    part_a.set_input(0, tensor_1)
-    part_a.set_input(1, tensor_2)
-    part_a.set_output(tensor_3)
-    tensor_1.add_consumer(part_a)
-    tensor_2.add_consumer(part_a)
-    tensor_3.add_producer(part_a)
-    part_b.set_input(0, tensor_3)
-    part_b.set_output(tensor_4)
-    tensor_3.add_consumer(part_b)
-    tensor_4.add_producer(part_b)
-
-    assert part_a.input_tensors == [tensor_1, tensor_2]
-    assert part_a.output_tensor == tensor_3
-    assert part_b.input_tensors == [tensor_3]
-    assert part_b.output_tensor == tensor_4
-
-    assert tensor_1.producers == []
-    assert tensor_1.consumers == [part_a]
-    assert tensor_2.producers == []
-    assert tensor_2.consumers == [part_a]
-    assert tensor_3.producers == [part_a]
-    assert tensor_3.consumers == [part_b]
-    assert tensor_4.producers == [part_b]
-    assert tensor_4.consumers == []
-
-    graph = cs.CascaderGraph([tensor_1, tensor_2], [tensor_4])
-    assert graph.input_tensors == [tensor_1, tensor_2]
-    assert graph.output_tensors == [tensor_4]
-    assert graph.part_order == [part_b, part_a]
-    for i, part in enumerate(graph.part_order):
-        assert graph.get_part_id(part) == i
-
-
-def test_create_cascader_graph(TwoConv2DWithSliceTE):
-    _, te_graph, const_dict = TwoConv2DWithSliceTE
-    device_config = cs.EthosuDeviceConfig("ethos-u55-256")
-    graph = cs.create_cascader_graph(te_graph, const_dict, device_config)
-
-    output_tensor = graph.output_tensors[0]
-    assert output_tensor.shape == [1, 6, 1, 6, 16]
-    assert len(output_tensor.producers) == 1
-    assert not output_tensor.is_constant
-
-    conv2_part = output_tensor.producers[0]
-    assert isinstance(conv2_part, cs.EthosuPart)
-    assert len(conv2_part.input_tensors) == 3
-
-    assert conv2_part.input_tensors[0].shape == [1, 6, 6, 64]
-    assert len(conv2_part.input_tensors[0].producers) == 1
-    assert not conv2_part.input_tensors[0].is_constant
-
-    assert conv2_part.input_tensors[1].shape == [16, 3, 3, 64]
-    assert len(conv2_part.input_tensors[1].producers) == 0
-    assert conv2_part.input_tensors[1].is_constant
-
-    assert conv2_part.input_tensors[2].shape == [16, 10]
-    assert len(conv2_part.input_tensors[2].producers) == 0
-    assert conv2_part.input_tensors[2].is_constant
-
-    slice_part = conv2_part.input_tensors[0].producers[0]
-    assert isinstance(slice_part, cs.InlinePart)
-    assert len(slice_part.input_tensors) == 1
-
-    assert slice_part.input_tensors[0].shape == [1, 12, 12, 64]
-    assert len(slice_part.input_tensors[0].producers) == 1
-    assert not slice_part.input_tensors[0].is_constant
-
-    conv1_part = slice_part.input_tensors[0].producers[0]
-    assert isinstance(conv1_part, cs.EthosuPart)
-    assert len(conv1_part.input_tensors) == 3
-
-    assert conv1_part.input_tensors[0].shape == [1, 12, 12, 8]
-    assert len(conv1_part.input_tensors[0].producers) == 0
-    assert not conv1_part.input_tensors[0].is_constant
-
-    assert conv1_part.input_tensors[1].shape == [64, 1, 1, 8]
-    assert len(conv1_part.input_tensors[1].producers) == 0
-    assert conv1_part.input_tensors[1].is_constant
-
-    assert conv1_part.input_tensors[2].shape == [64, 10]
-    assert len(conv1_part.input_tensors[2].producers) == 0
-    assert conv1_part.input_tensors[2].is_constant
-
-
-def test_create_diamond_graph(MobileNetv2DiamondTE):
-    _, te_graph, const_dict = MobileNetv2DiamondTE
-    device_config = cs.EthosuDeviceConfig("ethos-u55-256")
-    graph = cs.create_cascader_graph(te_graph, const_dict, device_config)
-
-    output_tensor = graph.output_tensors[0]
-    assert output_tensor.shape == [1, 56, 56, 24]
-    assert len(output_tensor.producers) == 1
-    assert not output_tensor.is_constant
-
-    add1_part = output_tensor.producers[0]
-    assert isinstance(add1_part, cs.EthosuPart)
-    assert len(add1_part.input_tensors) == 2
-    assert graph.get_part_id(add1_part) == 0
-
-    assert add1_part.input_tensors[0].shape == [1, 56, 56, 24]
-    assert len(add1_part.input_tensors[0].producers) == 1
-    assert not add1_part.input_tensors[0].is_constant
-
-    assert add1_part.input_tensors[1].shape == [1, 56, 56, 24]
-    assert len(add1_part.input_tensors[0].producers) == 1
-    assert not add1_part.input_tensors[0].is_constant
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/contrib/test_ethosu/cascader/test_integration.py b/tests/python/contrib/test_ethosu/cascader/test_integration.py
deleted file mode 100644
index 14cc8fbc61cf..000000000000
--- a/tests/python/contrib/test_ethosu/cascader/test_integration.py
+++ /dev/null
@@ -1,143 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=wrong-import-position,invalid-name
-
-"""
-Test the cascader in the compilation flow.
-"""
-
-import pytest
-
-pytest.importorskip("ethosu.vela")
-
-import numpy as np
-
-import tvm
-from tvm import relay
-from tvm.relay.backend.contrib.ethosu.codegen import _create_cascader
-from tvm.relay.backend.contrib.ethosu.tir.compiler import _lower_to_tir
-from tvm.contrib.ethosu.cascader import MemoryRegion, EthosuDeviceConfig
-
-from .. import infra as test_infra
-from . import infra as cascader_test_infra
-
-
-def _ethos_u55_cascader():
-    sram = MemoryRegion(
-        name="SRAM",
-        size=10**6,
-        read_bandwidth=16,
-        write_bandwidth=16,
-        read_latency=0,
-        write_latency=0,
-        burst_length=1,
-    )
-    flash = MemoryRegion(name="FLASH", size=10**7, read_bandwidth=4, write_bandwidth=4)
-
-    device_config = EthosuDeviceConfig("ethos-u55-256")
-    cascader_options = cascader_test_infra.make_options(
-        cascade_region=sram,
-        max_proposals=64,
-        stripe_factors=4,
-        max_plan_size=10,
-        max_open_plans=8,
-        max_closed_plans=32,
-        always_copy_size=1024,
-        disable_pareto_plans=False,
-        disable_pareto_proposals=False,
-        enable_striping=False,
-    )
-    return _create_cascader(
-        options=cascader_options,
-        io_region=sram,
-        constant_region=flash,
-        working_regions=[sram],
-        device_config=device_config,
-    )
-
-
-def _compile_model(relay_function):
-    mod = tvm.IRModule()
-    mod["main"] = relay_function
-    mod = relay.transform.InferType()(mod)
-    tir_mod = _lower_to_tir(mod["main"], _ethos_u55_cascader())[0]
-    return tir_mod["main"]
-
-
-def _create_single_conv2d():
-    ifm = relay.var("x", shape=(1, 8, 8, 4), dtype="int8")
-    conv1 = test_infra.make_ethosu_conv2d(ifm, 4, 4, (3, 3), (1, 1), (1, 1), (1, 1))
-    func = relay.Function(relay.analysis.free_vars(conv1), conv1)
-    return func
-
-
-def _create_double_conv2d():
-    ifm = relay.var("x", shape=(1, 8, 8, 4), dtype="int8")
-    conv1 = test_infra.make_ethosu_conv2d(ifm, 4, 4, (3, 3), (1, 1), (1, 1), (1, 1))
-    conv2 = test_infra.make_ethosu_conv2d(conv1, 4, 4, (1, 3), (1, 1), (1, 1), (1, 1))
-    func = relay.Function(relay.analysis.free_vars(conv2), conv2)
-    return func
-
-
-def _create_scalar_add():
-    ifm = relay.var("x", shape=(1, 5, 4, 3), dtype="int8")
-    ifm2 = relay.const(np.ones((1, 1, 1, 1)), dtype="int8")
-    add = test_infra.make_ethosu_binary_elementwise(
-        ifm, ifm2, ifm_channels=3, ifm2_channels=1, operator_type="ADD", ofm_dtype="int8"
-    )
-    func = relay.Function(relay.analysis.free_vars(add), add)
-    return func
-
-
-def test_single_conv_compute_cycles_hint():
-    """
-    Check the "compute_cycles_hint" annotation remains in the lowering flow
-    for single convolution.
-    """
-    primfunc = _compile_model(_create_single_conv2d())
-    ops = primfunc.body.body.seq
-    compute_cycles_hints = [2944, 320]
-    for op, compute_cycle_hint in zip(ops, compute_cycles_hints):
-        assert op.attr_key == "pragma_compute_cycles_hint"
-        assert op.value == compute_cycle_hint
-
-
-def test_double_conv_compute_cycles_hint():
-    """
-    Check the "compute_cycles_hint" annotation remains in the lowering flow
-    for double convolution.
-    """
-    primfunc = _compile_model(_create_double_conv2d())
-    ops = primfunc.body.body.body.body.seq
-    compute_cycles_hints = [2944, 1408, 320, 240]
-    for op, compute_cycle_hint in zip(ops, compute_cycles_hints):
-        assert op.attr_key == "pragma_compute_cycles_hint"
-        assert op.value == compute_cycle_hint
-
-
-def test_scalar_add_compute_cycles_hint():
-    """
-    Check the "compute_cycles_hint" annotation remains in the lowering flow
-    for add with scalar values.
-    """
-    primfunc = _compile_model(_create_scalar_add())
-    ops = primfunc.body.body.seq
-
-    compute_cycles_hints = [16, 24]
-    for op, compute_cycle_hint in zip(ops, compute_cycles_hints):
-        assert op.attr_key == "pragma_compute_cycles_hint"
-        assert op.value == compute_cycle_hint
diff --git a/tests/python/contrib/test_ethosu/cascader/test_memory_reduction.py b/tests/python/contrib/test_ethosu/cascader/test_memory_reduction.py
deleted file mode 100644
index 99238fa59337..000000000000
--- a/tests/python/contrib/test_ethosu/cascader/test_memory_reduction.py
+++ /dev/null
@@ -1,393 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name, unused-argument
-import pytest
-
-pytest.importorskip("ethosu.vela")
-
-import numpy as np
-import tensorflow as tf
-import tflite.Model
-from tvm import relay
-from tvm.relay.backend import Executor, Runtime
-from tvm.micro import model_library_format as mlf
-from tvm.relay.op.contrib.ethosu import partition_for_ethosu
-import tvm
-from tvm import WorkspaceMemoryPools, WorkspacePoolInfo, PoolInfoProperties
-from tvm.relay.backend.contrib.ethosu.codegen import extract_memory_info
-
-from .. import infra
-
-
-def _get_compilation_config(accel_type, enable_cascader, enable_striping):
-    enable_usmp = True
-
-    target = tvm.target.Target("c")
-    ethosu_target = tvm.target.Target("ethos-u")
-    runtime = Runtime("crt")
-
-    executor = Executor(
-        "aot",
-        {
-            "workspace-byte-alignment": 16,
-            "interface-api": "c",
-            "unpacked-api": True,
-        },
-    )
-    pass_config = {
-        "tir.disable_vectorize": True,
-        "relay.ext.ethos-u.options": {
-            "accelerator_config": accel_type,
-            "enable_cascader": enable_cascader,
-            "enable_striping": enable_striping,
-        },
-        "tir.usmp.enable": enable_usmp,
-        "tir.usmp.algorithm": "hill_climb",
-        "tir.disable_storage_rewrite": enable_usmp,
-    }
-
-    return target, ethosu_target, runtime, executor, pass_config
-
-
-def _get_ethosu_workspace_size(
-    mod, params, accel_type, pool_size, enable_cascader, enable_striping
-):
-
-    target, ethosu_target, runtime, executor, pass_config = _get_compilation_config(
-        accel_type, enable_cascader, enable_striping
-    )
-
-    workspace_memory_pools = WorkspaceMemoryPools(
-        [
-            WorkspacePoolInfo(
-                "SRAM",
-                [target, ethosu_target],
-                PoolInfoProperties(
-                    size_hint_bytes=pool_size,
-                    read_bandwidth_bytes_per_cycle=16,
-                    write_bandwidth_bytes_per_cycle=16,
-                    target_burst_bytes={ethosu_target: 1},
-                ),
-            ),
-        ]
-    )
-
-    with tvm.transform.PassContext(opt_level=3, config=pass_config):
-        lib = tvm.relay.build(
-            mod,
-            target,
-            executor=executor,
-            runtime=runtime,
-            workspace_memory_pools=workspace_memory_pools,
-            params=params,
-        )
-
-    mlf_memory_map = mlf._build_function_memory_map(lib.function_metadata)
-    return mlf_memory_map["main"][0]["workspace_size_bytes"]
-
-
-@pytest.mark.parametrize(
-    "accel_type, expected_ws_size_without_striping, expected_ws_size_with_striping",
-    [
-        ("ethos-u55-256", 1067520, 14208),
-        ("ethos-u55-128", 1067520, 4080),
-        ("ethos-u55-64", 1067520, 4080),
-        ("ethos-u55-32", 1067504, 4064),
-    ],
-)
-def test_double_conv2d(
-    accel_type, expected_ws_size_without_striping, expected_ws_size_with_striping
-):
-    np.random.seed(1)
-    ifm_shape = (1, 321, 212, 6)
-
-    @tf.function
-    def tf_graph(x):
-        ofm_channels = 10
-        conv2d = tf.nn.conv2d(
-            x,
-            filters=tf.constant(
-                np.random.uniform(size=[3, 2, ifm_shape[3], ofm_channels]),  # HWIO
-                dtype=tf.float32,
-            ),
-            strides=(1, 1),
-            padding="VALID",
-            dilations=(2, 1),
-        )
-        conv2d = tf.nn.conv2d(
-            conv2d,
-            filters=tf.constant(
-                np.random.uniform(size=(1, 1, ofm_channels, 3)),  # HWIO
-                dtype=tf.float32,
-            ),
-            strides=(3, 2),
-            padding="SAME",
-            dilations=(1, 1),
-        )
-
-        return conv2d
-
-    _, tflite_graph = infra.get_tflite_graph(tf_graph, [ifm_shape])
-    tflite_model = tflite.Model.Model.GetRootAsModel(tflite_graph, 0)
-
-    relay_module, params = relay.frontend.from_tflite(tflite_model)
-    mod = partition_for_ethosu(relay_module, params)
-
-    # Run the graph without the cascader, with lots of memory
-    pool_size = 2000000
-    workspace_size_cascader_disabled = _get_ethosu_workspace_size(
-        mod, params, accel_type, pool_size, enable_cascader=False, enable_striping=False
-    )
-    workspace_size_cascader_enabled_striping_disabled = _get_ethosu_workspace_size(
-        mod, params, accel_type, pool_size, enable_cascader=True, enable_striping=False
-    )
-    # if striping is not done, it should be same as cacader disabled
-    assert workspace_size_cascader_disabled == workspace_size_cascader_enabled_striping_disabled
-
-    # Run the same graph with the cascader, giving it less memory to persuade cascder to cascade
-    pool_size = 600000
-    workspace_size_cascader_enabled_striping_enabled = _get_ethosu_workspace_size(
-        mod, params, accel_type, pool_size, enable_cascader=True, enable_striping=True
-    )
-
-    assert workspace_size_cascader_disabled == expected_ws_size_without_striping
-    assert workspace_size_cascader_enabled_striping_enabled == expected_ws_size_with_striping
-
-
-@pytest.mark.parametrize(
-    "accel_type, expected_ws_size_without_striping, expected_ws_size_with_striping",
-    [
-        ("ethos-u55-256", 180288, 15200),
-        ("ethos-u55-128", 180288, 15200),
-        ("ethos-u55-64", 180288, 14432),
-        ("ethos-u55-32", 180272, 14416),
-    ],
-)
-def test_depthwise2d_conv2d_pooling(
-    accel_type, expected_ws_size_without_striping, expected_ws_size_with_striping
-):
-    np.random.seed(2)
-    ifm_shape = (1, 80, 75, 3)
-
-    @tf.function
-    def tf_graph(x):
-        # This graph will execute as one cascade
-        ofm_channels = 7
-        conv2d = tf.nn.conv2d(
-            x,
-            filters=tf.constant(
-                np.random.uniform(size=[3, 2, ifm_shape[3], ofm_channels]),  # HWIO
-                dtype=tf.float32,
-            ),
-            strides=(1, 1),
-            padding="VALID",
-            dilations=(1, 1),
-        )
-        depthwise2d = tf.nn.depthwise_conv2d(
-            conv2d,
-            tf.constant(np.random.uniform(size=(3, 3, ofm_channels, 1)), dtype=tf.float32),  # HWC1
-            strides=(1, 1, 1, 1),
-            padding="VALID",
-            dilations=(1, 1),
-        )
-        relu = tf.nn.relu(depthwise2d)
-        conv2d = tf.nn.conv2d(
-            relu,
-            filters=tf.constant(
-                np.random.uniform(size=[3, 2, ofm_channels, 2]),  # HWIO
-                dtype=tf.float32,
-            ),
-            strides=(1, 1),
-            padding="SAME",
-            dilations=(1, 1),
-        )
-        max_pool = tf.nn.max_pool(conv2d, (3, 3), (1, 1), "SAME")
-
-        return max_pool
-
-    _, tflite_graph = infra.get_tflite_graph(tf_graph, [ifm_shape])
-    tflite_model = tflite.Model.Model.GetRootAsModel(tflite_graph, 0)
-
-    relay_module, params = relay.frontend.from_tflite(tflite_model)
-    mod = partition_for_ethosu(relay_module, params)
-
-    # Run the graph without the cascader, with lots of memory
-    pool_size = 10**6
-    workspace_size_cascader_disabled = _get_ethosu_workspace_size(
-        mod, params, accel_type, pool_size, enable_cascader=False, enable_striping=False
-    )
-    workspace_size_cascader_enabled_striping_disabled = _get_ethosu_workspace_size(
-        mod, params, accel_type, pool_size, enable_cascader=True, enable_striping=False
-    )
-    # if striping is not done, it should be same as cacader disabled
-    assert workspace_size_cascader_disabled == workspace_size_cascader_enabled_striping_disabled
-
-    # Run the same graph with the cascader, giving it less memory to persuade cascder to cascade
-    pool_size = 50000
-    workspace_size_cascader_enabled_striping_enabled = _get_ethosu_workspace_size(
-        mod, params, accel_type, pool_size, enable_cascader=True, enable_striping=True
-    )
-
-    assert workspace_size_cascader_disabled == expected_ws_size_without_striping
-    assert workspace_size_cascader_enabled_striping_enabled == expected_ws_size_with_striping
-
-
-def test_multiple_memory_pools():
-    """
-    The cascader does not support multiple workspace memory
-    pools. Check the correct error is thrown.
-    """
-    np.random.seed(2)
-    ifm_shape = (1, 80, 75, 3)
-
-    target, ethosu_target, runtime, executor, pass_config = _get_compilation_config(
-        "ethos-u55-256", True, True
-    )
-    workspace_memory_pools = WorkspaceMemoryPools(
-        [
-            WorkspacePoolInfo(
-                "SRAM",
-                [target, ethosu_target],
-                PoolInfoProperties(
-                    size_hint_bytes=1,
-                    read_bandwidth_bytes_per_cycle=16,
-                    write_bandwidth_bytes_per_cycle=16,
-                    target_burst_bytes={ethosu_target: 1},
-                ),
-            ),
-            WorkspacePoolInfo(
-                "SRAM",
-                [target, ethosu_target],
-                PoolInfoProperties(
-                    size_hint_bytes=1,
-                    read_bandwidth_bytes_per_cycle=16,
-                    write_bandwidth_bytes_per_cycle=16,
-                    target_burst_bytes={ethosu_target: 1},
-                ),
-            ),
-        ]
-    )
-
-    @tf.function
-    def tf_graph(x):
-        return tf.nn.max_pool(x, (3, 3), (1, 1), "SAME")
-
-    _, tflite_graph = infra.get_tflite_graph(tf_graph, [ifm_shape])
-    tflite_model = tflite.Model.Model.GetRootAsModel(tflite_graph, 0)
-    relay_module, params = relay.frontend.from_tflite(tflite_model)
-    mod = partition_for_ethosu(relay_module, params)
-
-    with pytest.raises(ValueError) as e:
-        with tvm.transform.PassContext(opt_level=3, config=pass_config):
-            tvm.relay.build(
-                mod,
-                target,
-                executor=executor,
-                runtime=runtime,
-                workspace_memory_pools=workspace_memory_pools,
-                params=params,
-            )
-
-    expected_reason = "Exactly one workspace pool needs to be provided for the U55 cascader"
-    on_error = "A ValueError was caught but its reason is not the expected one."
-    assert expected_reason in str(e.value), on_error
-
-
-def test_missing_memory_pools():
-    """
-    The cascader requires memory pools to be present, check the correct error
-    is thrown when there aren't any.
-    """
-    np.random.seed(2)
-    ifm_shape = (1, 80, 75, 3)
-
-    target, _, runtime, executor, pass_config = _get_compilation_config("ethos-u55-256", True, True)
-
-    @tf.function
-    def tf_graph(x):
-        return tf.nn.max_pool(x, (3, 3), (1, 1), "SAME")
-
-    _, tflite_graph = infra.get_tflite_graph(tf_graph, [ifm_shape])
-    tflite_model = tflite.Model.Model.GetRootAsModel(tflite_graph, 0)
-    relay_module, params = relay.frontend.from_tflite(tflite_model)
-    mod = partition_for_ethosu(relay_module, params)
-
-    with pytest.raises(ValueError) as e:
-        with tvm.transform.PassContext(opt_level=3, config=pass_config):
-            tvm.relay.build(
-                mod,
-                target,
-                executor=executor,
-                runtime=runtime,
-                workspace_memory_pools=None,
-                params=params,
-            )
-
-    expected_reason = "Workspace memory pool needs to be provided for the U55 cascader"
-    on_error = "A ValueError was caught but its reason is not the expected one."
-    assert expected_reason in str(e.value), on_error
-
-
-def test_invalid_accelerator():
-    """
-    Check an error is thrown when an unsupported accelerator configuration
-    is used.
-    """
-    np.random.seed(2)
-    ifm_shape = (1, 80, 75, 3)
-
-    target, ethosu_target, runtime, executor, pass_config = _get_compilation_config(
-        "ethos-u65-256", True, True
-    )
-    workspace_memory_pools = WorkspaceMemoryPools(
-        [
-            WorkspacePoolInfo(
-                "SRAM",
-                [target, ethosu_target],
-                PoolInfoProperties(
-                    size_hint_bytes=1,
-                    read_bandwidth_bytes_per_cycle=16,
-                    write_bandwidth_bytes_per_cycle=16,
-                    target_burst_bytes={ethosu_target: 1},
-                ),
-            ),
-        ]
-    )
-
-    @tf.function
-    def tf_graph(x):
-        return tf.nn.max_pool(x, (3, 3), (1, 1), "SAME")
-
-    _, tflite_graph = infra.get_tflite_graph(tf_graph, [ifm_shape])
-    tflite_model = tflite.Model.Model.GetRootAsModel(tflite_graph, 0)
-    relay_module, params = relay.frontend.from_tflite(tflite_model)
-    mod = partition_for_ethosu(relay_module, params)
-
-    with pytest.raises(ValueError) as e:
-        with tvm.transform.PassContext(opt_level=3, config=pass_config):
-            tvm.relay.build(
-                mod,
-                target,
-                executor=executor,
-                runtime=runtime,
-                workspace_memory_pools=workspace_memory_pools,
-                params=params,
-            )
-
-    expected_reason = "Cascading is not supported for the U65 accelerator"
-    on_error = "A ValueError was caught but its reason is not the expected one."
-    assert expected_reason in str(e.value), on_error
diff --git a/tests/python/contrib/test_ethosu/cascader/test_pareto.py b/tests/python/contrib/test_ethosu/cascader/test_pareto.py
deleted file mode 100644
index 65d3619c64bb..000000000000
--- a/tests/python/contrib/test_ethosu/cascader/test_pareto.py
+++ /dev/null
@@ -1,149 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-from tvm.tir import IntImm
-from tvm.contrib.ethosu.cascader.pareto import (
-    _get_pareto_frontier,
-    _thin_vector,
-    _pareto_cull_plans,
-)
-from tvm.contrib.ethosu.cascader import (
-    Plan,
-    StripeConfig,
-    TensorConfig,
-    TensorConfigState,
-    BufferMode,
-    Tensor,
-)
-
-import pytest
-import numpy as np
-
-
-def _ref_get_pareto_frontier(costs):
-    is_efficient = np.ones(costs.shape[0], dtype=bool)
-    for i, c in enumerate(costs):
-        if is_efficient[i]:
-            is_efficient[is_efficient] = np.any(
-                costs[is_efficient] < c, axis=1
-            )  # Keep any point with a lower cost
-            is_efficient[i] = True  # And keep self
-    return is_efficient
-
-
-def _ref_thin_vector(vec, max_size):
-    if max_size < 1:
-        return []
-    if len(vec) <= max_size or len(vec) == 0:
-        return vec
-    if max_size == 1:
-        return [vec[0]]
-    samples = np.linspace(0, len(vec), max_size - 1, endpoint=False).astype(int)
-    samples = np.append(samples, len(vec) - 1)
-    return vec[samples]
-
-
-def _ref_pareto_cull_plans(plans, points):
-    if len(plans) <= points:
-        return plans
-    plans = np.array(sorted(plans, key=lambda x: x.memory_usage))
-    costs = []
-    for plan in plans:
-        costs.append(np.array([plan.memory_usage, plan.cycles]))
-    is_efficient = _ref_get_pareto_frontier(np.array(costs))
-    culled_plans = plans[is_efficient]
-    thinned_plans = (
-        culled_plans
-        if len(culled_plans) <= points
-        else _ref_thin_vector(np.array(culled_plans), points)
-    )
-    return thinned_plans
-
-
-@pytest.mark.parametrize("num_costs", [1, 10, 30, 100, 300, 1000])
-def test_get_pareto_frontier(num_costs):
-    cost_low = 1
-    cost_high = 100
-    dims = 2
-    costs = []
-    for i in range(num_costs):
-        costs.append(list(np.random.randint(cost_low, cost_high, size=(dims,))))
-    reference = list(_ref_get_pareto_frontier(np.array(costs)))
-    result = _get_pareto_frontier(costs)
-    assert result == reference
-
-
-@pytest.mark.parametrize("vec_length", [0, 1, 10, 25, 100])
-@pytest.mark.parametrize("max_size", [0, 1, 2, 5, 11, 51])
-def test_thin_vector(vec_length, max_size):
-    def _make_vector(length):
-        vector = []
-        for i in range(length):
-            obj = IntImm("int32", i)
-            vector.append(obj)
-
-        return vector
-
-    vector = _make_vector(vec_length)
-    reference = list(_ref_thin_vector(np.array(vector), max_size))
-    result = _thin_vector(vector, max_size)
-    assert result == reference
-
-
-@pytest.mark.parametrize("num_plans", [0, 1, 10, 25, 100])
-@pytest.mark.parametrize("max_plans", [0, 1, 2, 5, 11, 51])
-def test_pareto_cull_plans(num_plans, max_plans, SRAM):
-    memory_usage_low = 1
-    memory_usage_high = 1000
-    cycles_low = 100
-    cycles_high = 10000
-
-    def _make_plan(memory_usage, cycles):
-        output_config = TensorConfig(
-            tensor=Tensor([1], "int8"),
-            home_region=SRAM,
-            state=TensorConfigState.BOUNDARY,
-            buffer_mode=BufferMode.RECOMPUTE,
-            stripe_configs=[StripeConfig([1], [1], [1], [1], [1], [0])],
-        )
-        return Plan(
-            tensor_configs={},
-            open_configs=[],
-            output_config=output_config,
-            part_group=[],
-            interior_region=SRAM,
-            memory_usage=memory_usage,
-            cycles=cycles,
-        )
-
-    def _make_plans(num):
-        plans = []
-        for _ in range(num):
-            memory_usage = np.random.randint(memory_usage_low, memory_usage_high)
-            cycles = np.random.randint(cycles_low, cycles_high)
-            plan = _make_plan(memory_usage, cycles)
-            plans.append(plan)
-
-        return plans
-
-    plans = _make_plans(num_plans)
-    reference = list(_ref_pareto_cull_plans(plans, max_plans))
-    result = _pareto_cull_plans(plans, max_plans, False)
-    assert result == reference
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/contrib/test_ethosu/cascader/test_plan.py b/tests/python/contrib/test_ethosu/cascader/test_plan.py
deleted file mode 100644
index 0d33743cd945..000000000000
--- a/tests/python/contrib/test_ethosu/cascader/test_plan.py
+++ /dev/null
@@ -1,244 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import tvm.contrib.ethosu.cascader as cs
-
-import pytest
-
-
-def test_plan(DRAM, SRAM):
-    subgraph = cs.TESubgraph([], None)
-    part = cs.InlinePart(
-        subgraph,
-        [
-            cs.Propagator(
-                [[1, 0, 0], [0, 1, 0], [0, 0, 1]],
-                [0, 0],
-            ),
-        ],
-    )
-    tensor_1 = cs.Tensor([10, 10], "uint8")
-    tensor_2 = cs.Tensor([10, 10], "uint8")
-
-    part.set_input(0, tensor_1)
-    part.set_output(tensor_2)
-    tensor_1.add_consumer(part)
-    tensor_2.add_producer(part)
-
-    output_stripe_config = cs.StripeConfig(
-        shape=[5, 5],
-        extent=[10, 10],
-        strides=[5, 5],
-        order=[1, 2],
-        stripes=[2, 2],
-        offset=[0, 0],
-    )
-    tensor_config_out = cs.TensorConfig(
-        tensor=tensor_2,
-        home_region=DRAM,
-        state=cs.TensorConfigState.BOUNDARY,
-        buffer_mode=cs.BufferMode.RECOMPUTE,
-        stripe_configs=[output_stripe_config],
-        copy_tensor=False,
-    )
-    input_stripe_config = part.calculate_input_stripe_configs(output_stripe_config)[0]
-    tensor_config_in = cs.TensorConfig(
-        tensor=tensor_1,
-        home_region=DRAM,
-        state=cs.TensorConfigState.INTERIOR,
-        buffer_mode=cs.BufferMode.ROLLING,
-        stripe_configs=[input_stripe_config],
-        copy_tensor=False,
-    )
-    tensor_configs = {tensor_1: tensor_config_in, tensor_2: tensor_config_out}
-    open_configs = frozenset([tensor_config_in])
-    part_group = frozenset([part])
-    interior_region = SRAM
-    memory_usage = 100
-    cycles = 20
-    plan = cs.Plan(
-        tensor_configs=tensor_configs,
-        open_configs=open_configs,
-        output_config=tensor_config_out,
-        part_group=part_group,
-        interior_region=interior_region,
-        memory_usage=memory_usage,
-        cycles=cycles,
-    )
-
-    assert plan.tensor_configs == tensor_configs
-    assert plan.open_configs == open_configs
-    assert plan.output_config == tensor_config_out
-    assert plan.part_group == part_group
-    assert plan.interior_region == interior_region
-    assert plan.memory_usage == memory_usage
-    assert plan.cycles == cycles
-
-
-def test_plan_merge(DRAM, SRAM):
-    subgraph = cs.TESubgraph([], None)
-    part_1 = cs.InlinePart(
-        subgraph,
-        [
-            cs.Propagator(
-                [[2, 0, 0], [0, 2, 0], [0, 0, 1]],
-                [0, 0],
-            ),
-        ],
-    )
-    part_2 = cs.InlinePart(
-        subgraph,
-        [
-            cs.Propagator(
-                [[1, 0, 0], [0, 1, 0], [0, 0, 1]],
-                [0, 0],
-            ),
-            cs.Propagator(
-                [[0, 0, 6], [0, 0, 6], [0, 0, 1]],
-                [0, 0],
-            ),
-            cs.Propagator(
-                [[1, 0], [0, 1]],
-                [0],
-            ),
-        ],
-    )
-    tensor_1 = cs.Tensor([20, 20], "uint8")
-    tensor_2 = cs.Tensor([10, 10], "uint8")
-    tensor_3 = cs.Tensor([6, 6], "uint8")
-    tensor_4 = cs.Tensor([10], "uint8")
-    tensor_5 = cs.Tensor([10, 10], "uint8")
-
-    part_1.set_input(0, tensor_1)
-    part_1.set_output(tensor_2)
-    tensor_1.add_consumer(part_1)
-    tensor_2.add_producer(part_1)
-
-    part_2.set_input(0, tensor_2)
-    part_2.set_input(1, tensor_3)
-    part_2.set_input(2, tensor_4)
-    part_2.set_output(tensor_5)
-    tensor_2.add_consumer(part_2)
-    tensor_3.add_consumer(part_2)
-    tensor_4.add_consumer(part_2)
-    tensor_5.add_producer(part_2)
-
-    output_stripe_config = cs.StripeConfig(
-        shape=[5, 5],
-        extent=[10, 10],
-        strides=[5, 5],
-        order=[1, 2],
-        stripes=[2, 2],
-        offset=[0, 0],
-    )
-    tensor_config_5 = cs.TensorConfig(
-        tensor=tensor_5,
-        home_region=DRAM,
-        state=cs.TensorConfigState.BOUNDARY,
-        buffer_mode=cs.BufferMode.RECOMPUTE,
-        stripe_configs=[output_stripe_config],
-        copy_tensor=False,
-    )
-    input_stripe_configs = part_2.calculate_input_stripe_configs(output_stripe_config)
-    tensor_config_4 = cs.TensorConfig(
-        tensor=tensor_4,
-        home_region=DRAM,
-        state=cs.TensorConfigState.BOUNDARY,
-        buffer_mode=cs.BufferMode.RECOMPUTE,
-        stripe_configs=[input_stripe_configs[2]],
-        copy_tensor=False,
-    )
-    tensor_config_3 = cs.TensorConfig(
-        tensor=tensor_3,
-        home_region=SRAM,
-        state=cs.TensorConfigState.INTERIOR,
-        buffer_mode=cs.BufferMode.RECOMPUTE,
-        stripe_configs=[input_stripe_configs[1]],
-        copy_tensor=False,
-    )
-    tensor_config_2 = cs.TensorConfig(
-        tensor=tensor_2,
-        home_region=SRAM,
-        state=cs.TensorConfigState.INTERIOR,
-        buffer_mode=cs.BufferMode.ROLLING,
-        stripe_configs=[input_stripe_configs[0]],
-        copy_tensor=False,
-    )
-    input_stripe_config = part_1.calculate_input_stripe_configs(input_stripe_configs[0])[0]
-    tensor_config_1 = cs.TensorConfig(
-        tensor=tensor_1,
-        home_region=DRAM,
-        state=cs.TensorConfigState.BOUNDARY,
-        buffer_mode=cs.BufferMode.ROLLING,
-        stripe_configs=[input_stripe_config],
-        copy_tensor=False,
-    )
-    tensor_configs = {tensor_1: tensor_config_1, tensor_2: tensor_config_2}
-    open_configs = frozenset([tensor_config_2])
-    part_group = frozenset([part_1])
-    interior_region = SRAM
-    memory_usage = 100
-    cycles = 20
-    plan_1 = cs.Plan(
-        tensor_configs=tensor_configs,
-        open_configs=open_configs,
-        output_config=tensor_config_2,
-        part_group=part_group,
-        interior_region=interior_region,
-        memory_usage=memory_usage,
-        cycles=cycles,
-    )
-
-    tensor_configs = {
-        tensor_2: tensor_config_2,
-        tensor_3: tensor_config_3,
-        tensor_4: tensor_config_4,
-        tensor_5: tensor_config_5,
-    }
-    open_configs = frozenset([tensor_config_2, tensor_config_3])
-    part_group = frozenset([part_2])
-    interior_region = SRAM
-    memory_usage = 200
-    cycles = 30
-    plan_2 = cs.Plan(
-        tensor_configs=tensor_configs,
-        open_configs=open_configs,
-        output_config=tensor_config_5,
-        part_group=part_group,
-        interior_region=interior_region,
-        memory_usage=memory_usage,
-        cycles=cycles,
-    )
-
-    merged_plan = plan_1.merge(plan_2)
-
-    assert merged_plan.tensor_configs == {
-        tensor_1: tensor_config_1,
-        tensor_2: tensor_config_2,
-        tensor_3: tensor_config_3,
-        tensor_4: tensor_config_4,
-        tensor_5: tensor_config_5,
-    }
-    assert merged_plan.open_configs == frozenset([tensor_config_3])
-    assert merged_plan.output_config == tensor_config_5
-    assert merged_plan.part_group == frozenset([part_1, part_2])
-    assert merged_plan.interior_region == interior_region
-    assert merged_plan.memory_usage == plan_1.memory_usage + plan_2.memory_usage
-    assert merged_plan.cycles == plan_1.cycles + plan_2.cycles
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/contrib/test_ethosu/cascader/test_plan_generator.py b/tests/python/contrib/test_ethosu/cascader/test_plan_generator.py
deleted file mode 100644
index c0d2a2f0c944..000000000000
--- a/tests/python/contrib/test_ethosu/cascader/test_plan_generator.py
+++ /dev/null
@@ -1,305 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import pytest
-
-import tvm.contrib.ethosu.cascader as cs
-from .infra import make_simple_home_map, make_options, ethosu_enabled
-
-from tvm.contrib.ethosu.cascader.plan_generator import (
-    _generate_output_stripe_configs,
-    _generate_single_plans,
-    _generate_graph_plans,
-)
-
-
-@pytest.mark.parametrize("stripe_factors", [3, 4, 8, 16, 10])
-def test_generate_output_stripe_configs_disable_striping(stripe_factors):
-    subgraph = cs.TESubgraph([], None)
-    part_1 = cs.InlinePart(
-        subgraph,
-        [
-            cs.Propagator(
-                [[2, 0, 0], [0, 2, 0], [0, 0, 1]],
-                [0, 0],
-            ),
-        ],
-    )
-    tensor_1 = cs.Tensor([800, 800], "uint8")
-    tensor_2 = cs.Tensor([400, 400], "uint8")
-
-    part_1.set_input(0, tensor_1)
-    part_1.set_output(tensor_2)
-    tensor_1.add_consumer(part_1)
-    tensor_2.add_producer(part_1)
-
-    assert (
-        len(
-            _generate_output_stripe_configs(
-                part_1, stripe_factors, enable_striping=False, multi_dimensional=False
-            )
-        )
-        == 1
-    )
-
-
-def test_generate_output_stripe_configs_multi_dimensional():
-    stripe_factors = 3
-    subgraph = cs.TESubgraph([], None)
-    part_1 = cs.InlinePart(
-        subgraph,
-        [
-            cs.Propagator(
-                [[2, 0, 0], [0, 2, 0], [0, 0, 1]],
-                [0, 0],
-            ),
-        ],
-    )
-    tensor_1 = cs.Tensor([800, 800], "uint8")
-    tensor_2 = cs.Tensor([400, 400], "uint8")
-
-    part_1.set_input(0, tensor_1)
-    part_1.set_output(tensor_2)
-    tensor_1.add_consumer(part_1)
-    tensor_2.add_producer(part_1)
-
-    expected_stripe_configs = {
-        cs.StripeConfig([1, 1], [400, 400], [1, 1], [1, 2], [400, 400], [0, 0]),
-        cs.StripeConfig([1, 1], [400, 400], [1, 1], [2, 1], [400, 400], [0, 0]),
-        cs.StripeConfig([200, 1], [400, 400], [200, 1], [1, 2], [2, 400], [0, 0]),
-        cs.StripeConfig([200, 1], [400, 400], [200, 1], [2, 1], [2, 400], [0, 0]),
-        cs.StripeConfig([400, 1], [400, 400], [400, 1], [2, 1], [1, 400], [0, 0]),
-        cs.StripeConfig([1, 200], [400, 400], [1, 200], [1, 2], [400, 2], [0, 0]),
-        cs.StripeConfig([1, 200], [400, 400], [1, 200], [2, 1], [400, 2], [0, 0]),
-        cs.StripeConfig([200, 200], [400, 400], [200, 200], [2, 1], [2, 2], [0, 0]),
-        cs.StripeConfig([200, 200], [400, 400], [200, 200], [1, 2], [2, 2], [0, 0]),
-        cs.StripeConfig([400, 200], [400, 400], [400, 200], [2, 1], [1, 2], [0, 0]),
-        cs.StripeConfig([1, 400], [400, 400], [1, 400], [1, 2], [400, 1], [0, 0]),
-        cs.StripeConfig([200, 400], [400, 400], [200, 400], [1, 2], [2, 1], [0, 0]),
-        cs.StripeConfig([400, 400], [400, 400], [400, 400], [1, 2], [1, 1], [0, 0]),
-    }
-
-    output_stripe_configs = _generate_output_stripe_configs(
-        part=part_1, stripe_factors=stripe_factors, enable_striping=True, multi_dimensional=True
-    )
-
-    assert len(output_stripe_configs) == len(expected_stripe_configs)
-    assert set(output_stripe_configs) == expected_stripe_configs
-
-
-def test_generate_output_stripe_configs_uncascadable_axis():
-    stripe_factors = 3
-    subgraph = cs.TESubgraph([], None)
-    part_1 = cs.InlinePart(
-        subgraph,
-        [
-            cs.Propagator(
-                [[2, 0, 0], [0, 0, 200], [0, 0, 1]],
-                [0, 0],
-            ),
-        ],
-    )
-    tensor_1 = cs.Tensor([800, 200], "uint8")
-    tensor_2 = cs.Tensor([400, 400], "uint8")
-
-    part_1.set_input(0, tensor_1)
-    part_1.set_output(tensor_2)
-    tensor_1.add_consumer(part_1)
-    tensor_2.add_producer(part_1)
-
-    expected_stripe_configs = {
-        cs.StripeConfig([1, 400], [400, 400], [1, 400], [1, 2], [400, 1], [0, 0]),
-        cs.StripeConfig([200, 400], [400, 400], [200, 400], [1, 2], [2, 1], [0, 0]),
-        cs.StripeConfig([400, 400], [400, 400], [400, 400], [1, 2], [1, 1], [0, 0]),
-    }
-
-    output_stripe_configs = _generate_output_stripe_configs(
-        part=part_1, stripe_factors=stripe_factors, enable_striping=True, multi_dimensional=True
-    )
-
-    assert len(output_stripe_configs) == len(expected_stripe_configs)
-    assert set(output_stripe_configs) == expected_stripe_configs
-
-
-def test_generate_output_stripe_configs_single_dimension():
-    stripe_factors = 3
-    subgraph = cs.TESubgraph([], None)
-    part_1 = cs.InlinePart(
-        subgraph,
-        [
-            cs.Propagator(
-                [[2, 0, 0], [0, 2, 0], [0, 0, 1]],
-                [0, 0],
-            ),
-        ],
-    )
-    tensor_1 = cs.Tensor([800, 800], "uint8")
-    tensor_2 = cs.Tensor([400, 400], "uint8")
-
-    part_1.set_input(0, tensor_1)
-    part_1.set_output(tensor_2)
-    tensor_1.add_consumer(part_1)
-    tensor_2.add_producer(part_1)
-
-    expected_stripe_configs = {
-        cs.StripeConfig([400, 1], [400, 400], [400, 1], [2, 1], [1, 400], [0, 0]),
-        cs.StripeConfig([400, 200], [400, 400], [400, 200], [2, 1], [1, 2], [0, 0]),
-        cs.StripeConfig([1, 400], [400, 400], [1, 400], [1, 2], [400, 1], [0, 0]),
-        cs.StripeConfig([200, 400], [400, 400], [200, 400], [1, 2], [2, 1], [0, 0]),
-        cs.StripeConfig([400, 400], [400, 400], [400, 400], [1, 2], [1, 1], [0, 0]),
-    }
-
-    output_stripe_configs = _generate_output_stripe_configs(
-        part=part_1, stripe_factors=stripe_factors, enable_striping=True, multi_dimensional=False
-    )
-
-    assert len(output_stripe_configs) == len(expected_stripe_configs)
-    assert set(output_stripe_configs) == expected_stripe_configs
-
-
-def test_generate_single_plans(SRAM, DRAM):
-    subgraph = cs.TESubgraph([], None)
-    part_1 = cs.InlinePart(
-        subgraph,
-        [
-            cs.Propagator(
-                [[2, 0, 0], [0, 2, 0], [0, 0, 1]],
-                [0, 0],
-            ),
-        ],
-    )
-    tensor_1 = cs.Tensor([800, 800], "int8")
-    tensor_2 = cs.Tensor([400, 400], "int8")
-
-    part_1.set_input(0, tensor_1)
-    part_1.set_output(tensor_2)
-    tensor_1.add_consumer(part_1)
-    tensor_2.add_producer(part_1)
-
-    home_map = {
-        tensor_1: [SRAM, DRAM],
-        tensor_2: [SRAM],
-    }
-    options = make_options(cascade_region=SRAM, stripe_factors=1)
-    output_stripe_configs = _generate_output_stripe_configs(
-        part_1,
-        options.stripe_factors,
-        enable_striping=True,
-        multi_dimensional=True,
-    )
-    plans = _generate_single_plans(part_1, output_stripe_configs, home_map, options)
-    for plan in plans:
-        assert plan.interior_region == SRAM
-        assert plan.part_group == frozenset([part_1])
-        assert set(plan.tensor_configs.keys()) == set([tensor_1, tensor_2])
-        for open_config in plan.open_configs:
-            assert open_config.state == cs.TensorConfigState.INTERIOR
-
-
-def test_generate_graph_plans(SRAM, DRAM):
-    num_part_groups = 3
-    stripe_factors = 4
-    max_plan_size = 10
-    subgraph = cs.TESubgraph([], None)
-    part_a = cs.InlinePart(
-        subgraph,
-        [
-            cs.Propagator(
-                [[1, 0, 0], [0, 1, 0], [0, 0, 1]],
-                [0, 0],
-            ),
-            cs.Propagator(
-                [[0, 1, 0], [1, 0, 0], [0, 0, 1]],
-                [-1, -1],
-            ),
-        ],
-    )
-    part_b = cs.InlinePart(
-        subgraph,
-        [
-            cs.Propagator(
-                [[1, 0, 0], [0, 1, 0], [0, 0, 1]],
-                [0, 0],
-            ),
-        ],
-    )
-    tensor_1 = cs.Tensor([10, 10], "int8")
-    tensor_2 = cs.Tensor([9, 9], "int8")
-    tensor_3 = cs.Tensor([10, 10], "int8")
-    tensor_4 = cs.Tensor([10, 10], "int8")
-
-    part_a.set_input(0, tensor_1)
-    part_a.set_input(1, tensor_2)
-    part_a.set_output(tensor_3)
-    tensor_1.add_consumer(part_a)
-    tensor_2.add_consumer(part_a)
-    tensor_3.add_producer(part_a)
-    part_b.set_input(0, tensor_3)
-    part_b.set_output(tensor_4)
-    tensor_3.add_consumer(part_b)
-    tensor_4.add_producer(part_b)
-
-    graph = cs.CascaderGraph([tensor_1, tensor_2], [tensor_4])
-    home_map = {
-        tensor_1: [SRAM, DRAM],
-        tensor_2: [SRAM],
-        tensor_3: [SRAM],
-        tensor_4: [SRAM, DRAM],
-    }
-
-    options = make_options(
-        cascade_region=SRAM,
-        stripe_factors=stripe_factors,
-        max_plan_size=max_plan_size,
-    )
-    closed_plans = _generate_graph_plans(graph, home_map, options)
-
-    assert len(closed_plans) == num_part_groups
-
-
-if ethosu_enabled:
-
-    def test_plan_generator_two_conv2d(FLASH, SRAM, TwoConv2DGraph):
-        num_part_groups = 3
-        graph = TwoConv2DGraph
-        home_map = make_simple_home_map(graph, SRAM, FLASH)
-        options = make_options(
-            cascade_region=SRAM,
-            stripe_factors=4,
-            max_plan_size=10,
-        )
-
-        closed_plans = _generate_graph_plans(graph, home_map, options)
-
-        assert len(closed_plans) == num_part_groups
-
-    def test_plan_generator_two_conv2d_with_slice(FLASH, SRAM, TwoConv2DWithSliceGraph):
-        num_part_groups = 4  # Note this is not 6 because 'slice' has an opaque Propagator
-        graph = TwoConv2DWithSliceGraph
-        home_map = make_simple_home_map(graph, SRAM, FLASH)
-        options = make_options(
-            cascade_region=SRAM,
-            stripe_factors=4,
-            max_plan_size=10,
-        )
-
-        closed_plans = _generate_graph_plans(graph, home_map, options)
-
-        assert len(closed_plans) == num_part_groups
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/contrib/test_ethosu/cascader/test_propagator.py b/tests/python/contrib/test_ethosu/cascader/test_propagator.py
deleted file mode 100644
index 9712d00e52a9..000000000000
--- a/tests/python/contrib/test_ethosu/cascader/test_propagator.py
+++ /dev/null
@@ -1,136 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import pytest
-
-pytest.importorskip("ethosu.vela")
-
-from math import isclose
-from tvm.contrib.ethosu.cascader import StripeConfig, Propagator
-
-
-def test_propagator():
-    transform = [
-        [1, 0, 0, 0],
-        [0, 1 / 2, 0, 0],
-        [0, 0, -1, 0],
-        [0, 0, 0, 1],
-    ]
-    offset = [-1, 1, 2]
-    propagator = Propagator(
-        transform=transform,
-        offset=offset,
-    )
-    assert list(propagator.offset) == offset
-    for i, row in enumerate(transform):
-        for j, value in enumerate(row):
-            assert isclose(propagator.transform[i][j], value)
-
-
-@pytest.mark.parametrize(
-    ["propagator", "input_stripe_config", "output_stripe_config"],
-    [
-        (
-            Propagator(
-                transform=[
-                    [1, 0, 0, 0, 0],
-                    [0, 1, 0, 0, 0],
-                    [0, 0, 0, 1 / 16, 0],
-                    [0, 0, 1, 0, 0],
-                    [0, 0, 0, 0, 16],
-                    [0, 0, 0, 0, 1],
-                ],
-                offset=[0, 0, 0, 0, 0],
-            ),
-            StripeConfig(
-                shape=[1, 12, 14, 36],
-                extent=[1, 24, 18, 72],
-                strides=[1, 12, 14, 36],
-                order=[1, 2, 3, 4],
-                stripes=[1, 2, 2, 2],
-                offset=[0, 0, 0, 0],
-            ),
-            StripeConfig(
-                shape=[1, 12, 3, 14, 16],
-                extent=[1, 24, 5, 18, 16],
-                strides=[1, 12, 2.25, 14, 0],
-                order=[1, 2, 4, 3, 0],
-                stripes=[1, 2, 2, 2, 1],
-                offset=[0, 0, 0, 0, 0],
-            ),
-        ),
-        (
-            Propagator(
-                transform=[
-                    [0.5, 0, 0],
-                    [0, 0.5, 0],
-                    [0, 0, 1],
-                ],
-                offset=[0, 0],
-            ),
-            StripeConfig(
-                shape=[3, 5],
-                extent=[27, 50],
-                strides=[3, 5],
-                order=[1, 2],
-                stripes=[9, 10],
-                offset=[0, 0],
-            ),
-            StripeConfig(
-                shape=[2, 3],
-                extent=[14, 25],
-                strides=[1.5, 2.5],
-                order=[1, 2],
-                stripes=[9, 10],
-                offset=[0, 0],
-            ),
-        ),
-        (
-            Propagator(
-                transform=[
-                    [2, 0, 0, 4],
-                    [0, 1, 0, 2],
-                    [0, 0, 0, 8],
-                    [0, 0, 0, 1],
-                ],
-                offset=[-2, -1, 0],
-            ),
-            StripeConfig(
-                shape=[4, 6, 32],
-                extent=[48, 60, 64],
-                strides=[4, 6, 32],
-                order=[1, 2, 3],
-                stripes=[12, 10, 2],
-                offset=[0, 0, 0],
-            ),
-            StripeConfig(
-                shape=[12, 8, 8],
-                extent=[100, 62, 8],
-                strides=[8, 6, 0],
-                order=[1, 2, 0],
-                stripes=[12, 10, 1],
-                offset=[-2, -1, 0],
-            ),
-        ),
-    ],
-)
-def test_propagate(propagator, input_stripe_config, output_stripe_config):
-    result_stripe_config = propagator.propagate(input_stripe_config)
-    assert result_stripe_config == output_stripe_config
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/contrib/test_ethosu/cascader/test_proposal_generator.py b/tests/python/contrib/test_ethosu/cascader/test_proposal_generator.py
deleted file mode 100644
index 8a573c05fa2a..000000000000
--- a/tests/python/contrib/test_ethosu/cascader/test_proposal_generator.py
+++ /dev/null
@@ -1,160 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import pytest
-from tvm.contrib.ethosu.cascader.proposal_generator import generate_proposals
-
-from .infra import make_simple_home_map, make_options, ethosu_enabled
-
-
-if ethosu_enabled:
-
-    def test_generate_proposals(FLASH, SRAM, TwoConv2DGraph):
-        graph = TwoConv2DGraph
-        min_sram = 3700
-        max_sram = 11700
-        input_configs = 1
-        parts = 2
-        home_map = make_simple_home_map(graph, SRAM, FLASH)
-        options = make_options(
-            cascade_region=SRAM,
-            max_proposals=32,
-            stripe_factors=4,
-            max_plan_size=10,
-        )
-
-        proposals = generate_proposals(graph, home_map, options)
-
-        for proposal in proposals:
-            assert 0 < len(proposal.plans) <= parts
-            assert len(proposal.input_tensor_configs) == input_configs
-            assert len(proposal.part_group) == parts
-            assert min_sram < proposal.memory_usage < max_sram
-            assert proposal.cycles > 0
-
-    def test_generate_proposals_binary(FLASH, SRAM, BinaryGraph):
-        graph = BinaryGraph
-        input_configs = 2
-        parts = 3
-        home_map = make_simple_home_map(graph, SRAM, FLASH)
-        options = make_options(
-            cascade_region=SRAM,
-            max_proposals=32,
-            stripe_factors=4,
-            max_plan_size=10,
-        )
-
-        proposals = generate_proposals(graph, home_map, options)
-
-        for proposal in proposals:
-            assert 0 < len(proposal.plans) <= parts
-            assert len(proposal.input_tensor_configs) == input_configs
-            assert len(proposal.part_group) == parts
-            assert proposal.cycles > 0
-
-    def test_generate_proposals_mobilenetv1_start(FLASH, SRAM, MobileNetv1StartGraph):
-        graph = MobileNetv1StartGraph
-        min_sram = 200000
-        max_sram = 1300000
-        input_configs = 1
-        parts = 8
-        home_map = make_simple_home_map(graph, SRAM, FLASH)
-        options = make_options(
-            cascade_region=SRAM,
-            max_proposals=32,
-            stripe_factors=5,
-            max_plan_size=10,
-        )
-
-        proposals = generate_proposals(graph, home_map, options)
-
-        for proposal in proposals:
-            assert 0 < len(proposal.plans) <= parts
-            assert len(proposal.input_tensor_configs) == input_configs
-            assert len(proposal.part_group) == parts
-            assert min_sram < proposal.memory_usage < max_sram
-            assert proposal.cycles > 0
-
-    def test_generate_proposals_mobilenetv1(FLASH, SRAM, MobileNetv1Graph):
-        graph = MobileNetv1Graph
-        min_sram = 200000
-        max_sram = 1300000
-        input_configs = 1
-        parts = 27
-        home_map = make_simple_home_map(graph, SRAM, FLASH)
-        options = make_options(
-            cascade_region=SRAM,
-            max_proposals=32,
-            stripe_factors=5,
-            max_plan_size=10,
-        )
-
-        proposals = generate_proposals(graph, home_map, options)
-
-        for proposal in proposals:
-            assert 0 < len(proposal.plans) <= parts
-            assert len(proposal.input_tensor_configs) == input_configs
-            assert len(proposal.part_group) == parts
-            assert min_sram < proposal.memory_usage < max_sram
-            assert proposal.cycles > 0
-
-    def test_generate_proposals_mobilenetv2diamond(FLASH, SRAM, MobileNetv2DiamondGraph):
-        graph = MobileNetv2DiamondGraph
-        min_sram = 370000
-        max_sram = 990000
-        input_configs = 1
-        parts = 5
-        home_map = make_simple_home_map(graph, SRAM, FLASH)
-        options = make_options(
-            cascade_region=SRAM,
-            max_proposals=64,
-            stripe_factors=5,
-            max_plan_size=10,
-        )
-
-        proposals = generate_proposals(graph, home_map, options)
-
-        for proposal in proposals:
-            assert 0 < len(proposal.plans) <= parts
-            assert len(proposal.input_tensor_configs) == input_configs
-            assert len(proposal.part_group) == parts
-            assert min_sram < proposal.memory_usage < max_sram
-            assert proposal.cycles > 0
-
-    def test_generate_proposals_mobilenetv1_disable_striping(FLASH, SRAM, MobileNetv1Graph):
-        graph = MobileNetv1Graph
-        home_map = make_simple_home_map(graph, SRAM, FLASH)
-        options = make_options(
-            cascade_region=SRAM,
-            max_proposals=32,
-            stripe_factors=5,
-            max_plan_size=10,
-            enable_striping=False,
-        )
-
-        proposals = generate_proposals(graph, home_map, options)
-        assert len(proposals) == 1
-        proposal = proposals[0]
-        for plan in proposal.plans:
-            for stripe_config in plan.output_config.stripe_configs:
-                for shape_dim, stride_dim in list(zip(stripe_config.shape, stripe_config.strides)):
-                    # The striding and shape sizes in each dimension should be the same
-                    # if striping is disabled
-                    assert int(shape_dim) == int(stride_dim)
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/contrib/test_ethosu/cascader/test_scheduler.py b/tests/python/contrib/test_ethosu/cascader/test_scheduler.py
deleted file mode 100644
index 417aeb9ed67f..000000000000
--- a/tests/python/contrib/test_ethosu/cascader/test_scheduler.py
+++ /dev/null
@@ -1,83 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=wrong-import-position, invalid-name
-
-import pytest
-
-pytest.importorskip("ethosu.vela")
-
-import tvm.contrib.ethosu.cascader as cs
-
-from . import infra
-
-
-def test_cascade(SRAM, FLASH, TwoConv2DWithSliceTE, TwoConv2DTE, MobileNetv1StartTE, MobileNetv1TE):
-    fixtures = [
-        TwoConv2DTE,
-        TwoConv2DWithSliceTE,
-        MobileNetv1StartTE,
-        MobileNetv1TE,
-    ]
-    device_config = cs.EthosuDeviceConfig("ethos-u55-256")
-    for sch, te_graph, const_dict in fixtures:
-        options = infra.make_options(
-            cascade_region=SRAM,
-            max_proposals=64,
-            stripe_factors=4,
-            max_plan_size=10,
-            max_open_plans=8,
-            max_closed_plans=32,
-            always_copy_size=1024,
-            disable_pareto_plans=False,
-            disable_pareto_proposals=False,
-        )
-        cs.cascade(sch, te_graph, const_dict, options, SRAM, FLASH, [SRAM], device_config)
-
-
-def test_compute_cycles_annotation(SRAM, FLASH, TwoConv2DTE):
-    device_config = cs.EthosuDeviceConfig("ethos-u55-256")
-    options = infra.make_options(
-        cascade_region=SRAM,
-        max_proposals=64,
-        stripe_factors=4,
-        max_plan_size=10,
-        max_open_plans=8,
-        max_closed_plans=32,
-        always_copy_size=1024,
-        disable_pareto_plans=False,
-        disable_pareto_proposals=False,
-        enable_striping=False,
-    )
-    sch, te_graph, const_dict = TwoConv2DTE
-    cs.cascade(sch, te_graph, const_dict, options, SRAM, FLASH, [SRAM], device_config)
-
-    # Stages that should have compute cycle annotations
-    # [copy, copy, conv2d, copy, conv2d]
-    stages = [6, 8, 9, 18, 19]
-    # Expected hints for each operation
-    compute_cycles_hints = [4096, 5120, 1440, 2560, 3072]
-
-    for stage, compute_cycles_hint in zip(stages, compute_cycles_hints):
-        op = sch.stages[stage]
-        op_iter_vars = op.leaf_iter_vars[0]
-        op_attrs = op.iter_var_attrs[op_iter_vars]
-        assert op_attrs.pragma_keys[0] == "compute_cycles_hint"
-        assert op_attrs.pragma_values[0] == compute_cycles_hint
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/contrib/test_ethosu/cascader/test_stripe_config.py b/tests/python/contrib/test_ethosu/cascader/test_stripe_config.py
deleted file mode 100644
index f0142167e76a..000000000000
--- a/tests/python/contrib/test_ethosu/cascader/test_stripe_config.py
+++ /dev/null
@@ -1,215 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import pytest
-
-pytest.importorskip("ethosu.vela")
-
-from tvm.contrib.ethosu.cascader.stripe_config import StripeConfig, count_stripes
-
-
-def test_stripe_config():
-    shape = [1, 2, 3]
-    extent = [2, 3, 4]
-    strides = [3, 4, 5]
-    order = [4, 5, 6]
-    stripes = [5, 6, 7]
-    offset = [6, 7, 8]
-    hash_value = 3107995860559090954
-    stripe_config = StripeConfig(
-        shape=shape,
-        extent=extent,
-        strides=strides,
-        order=order,
-        stripes=stripes,
-        offset=offset,
-    )
-    assert stripe_config.shape == shape
-    assert stripe_config.extent == extent
-    assert stripe_config.strides == strides
-    assert stripe_config.order == order
-    assert stripe_config.stripes == stripes
-    assert stripe_config.offset == offset
-    assert hash(stripe_config) == hash_value
-
-
-@pytest.mark.parametrize(
-    "mismatch", [None, "shape", "extent", "strides", "order", "stripes", "offset"]
-)
-def test_stripe_config_equal(mismatch):
-    init_dict = {
-        "shape": [1, 2, 3],
-        "extent": [2, 3, 4],
-        "strides": [3, 4, 5],
-        "order": [4, 5, 6],
-        "stripes": [5, 6, 7],
-        "offset": [6, 7, 8],
-    }
-    stripe_config_a = StripeConfig(**init_dict)
-    if mismatch:
-        init_dict[mismatch] = [1, 1, 1]
-    stripe_config_b = StripeConfig(**init_dict)
-    if not mismatch:
-        assert stripe_config_a == stripe_config_b
-    else:
-        assert stripe_config_a != stripe_config_b
-
-
-@pytest.mark.parametrize(
-    ["stripe_config", "expected_stripe_counts"],
-    [
-        (
-            StripeConfig(
-                shape=[3, 3, 3],
-                extent=[9, 9, 9],
-                strides=[3, 3, 3],
-                order=[1, 2, 3],
-                stripes=[3, 3, 3],
-                offset=[0, 0, 0],
-            ),
-            {
-                (3, 3, 3): 27,
-            },
-        ),
-        (
-            StripeConfig(
-                shape=[3, 3],
-                extent=[10, 10],
-                strides=[2, 2],
-                order=[1, 2],
-                stripes=[5, 5],
-                offset=[0, 0],
-            ),
-            {
-                (3, 3): 16,
-                (2, 3): 4,
-                (3, 2): 4,
-                (2, 2): 1,
-            },
-        ),
-        (
-            StripeConfig(
-                shape=[3, 3, 9],
-                extent=[9, 9, 9],
-                strides=[3, 3, 0],
-                order=[1, 2, 3],
-                stripes=[3, 3, 1],
-                offset=[0, 0, 0],
-            ),
-            {
-                (3, 3, 9): 9,
-            },
-        ),
-        (
-            StripeConfig(
-                shape=[5, 5],
-                extent=[8, 8],
-                strides=[5, 5],
-                order=[1, 2],
-                stripes=[2, 2],
-                offset=[0, 0],
-            ),
-            {
-                (5, 5): 1,
-                (3, 5): 1,
-                (5, 3): 1,
-                (3, 3): 1,
-            },
-        ),
-        (
-            StripeConfig(
-                shape=[5, 5],
-                extent=[8, 8],
-                strides=[5, 5],
-                order=[1, 2],
-                stripes=[2, 2],
-                offset=[-1, -2],
-            ),
-            {
-                (4, 3): 2,
-                (4, 5): 2,
-            },
-        ),
-        (
-            StripeConfig(
-                shape=[13, 7],
-                extent=[128, 73],
-                strides=[13, 7],
-                order=[1, 2],
-                stripes=[11, 12],
-                offset=[-10, -5],
-            ),
-            {
-                (3, 1): 1,
-                (3, 2): 1,
-                (8, 7): 10,
-                (8, 2): 1,
-                (13, 7): 90,
-                (13, 1): 9,
-                (8, 1): 1,
-                (3, 7): 10,
-                (13, 2): 9,
-            },
-        ),
-    ],
-)
-def test_count_stripes(stripe_config, expected_stripe_counts):
-    assert count_stripes(stripe_config) == expected_stripe_counts
-
-
-@pytest.mark.parametrize(
-    ["stripe_config", "expected_stripe_counts"],
-    [
-        (
-            StripeConfig(
-                shape=[4, 4],
-                extent=[16, 16],
-                strides=[2, 2],
-                order=[1, 2],
-                stripes=[7, 7],
-                offset=[0, 0],
-            ),
-            {
-                (4, 4): 1,
-                (2, 4): 6,
-                (4, 2): 6,
-                (2, 2): 36,
-            },
-        ),
-        (
-            StripeConfig(
-                shape=[4, 4],
-                extent=[8, 8],
-                strides=[2, 2],
-                order=[1, 2],
-                stripes=[6, 3],
-                offset=[-5, 0],
-            ),
-            {
-                (1, 4): 2,
-                (2, 4): 3,
-                (2, 2): 6,
-                (1, 2): 4,
-            },
-        ),
-    ],
-)
-def test_count_stripes_sliding_window(stripe_config, expected_stripe_counts):
-    assert count_stripes(stripe_config, enable_sliding_window=True) == expected_stripe_counts
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/contrib/test_ethosu/cascader/test_tensor_config.py b/tests/python/contrib/test_ethosu/cascader/test_tensor_config.py
deleted file mode 100644
index eaab420fbfba..000000000000
--- a/tests/python/contrib/test_ethosu/cascader/test_tensor_config.py
+++ /dev/null
@@ -1,110 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-from tvm.contrib.ethosu.cascader import (
-    StripeConfig,
-    Tensor,
-    MemoryRegion,
-    TensorConfig,
-    TensorConfigState,
-    BufferMode,
-)
-
-import pytest
-
-
-def test_tensor_config(DRAM, SRAM):
-    stripe_config = StripeConfig(
-        shape=[1, 2, 3],
-        extent=[2, 3, 4],
-        strides=[3, 4, 5],
-        order=[4, 5, 6],
-        stripes=[5, 6, 7],
-        offset=[6, 7, 8],
-    )
-    tensor = Tensor(
-        shape=[10, 10, 10],
-        dtype="int8",
-    )
-    home_region = DRAM
-    state = TensorConfigState.BOUNDARY
-    buffer_mode = BufferMode.ROLLING
-    copy_tensor = True
-    copy_region = SRAM
-    tensor_config = TensorConfig(
-        tensor=tensor,
-        home_region=home_region,
-        state=state,
-        buffer_mode=buffer_mode,
-        stripe_configs=[stripe_config],
-        copy_tensor=copy_tensor,
-        copy_region=copy_region,
-    )
-
-    assert tensor_config.tensor == tensor
-    assert tensor_config.home_region == home_region
-    assert tensor_config.state == state
-    assert tensor_config.buffer_mode == buffer_mode
-    assert tensor_config.stripe_configs == [stripe_config]
-    assert tensor_config.copy_tensor == copy_tensor
-    assert tensor_config.copy_region == copy_region
-    assert hash(tensor_config) != 0
-
-
-def test_get_rolling_buffer(DRAM):
-    stripe_config = StripeConfig(
-        shape=[9, 4, 7],
-        extent=[9, 16, 21],
-        strides=[3, 5, 7],
-        order=[1, 3, 2],
-        stripes=[1, 3, 3],
-        offset=[0, 0, 0],
-    )
-    tensor = Tensor(shape=[9, 16, 21], dtype="int32", compression_ratio=0.5)
-    tensor_config = TensorConfig(
-        tensor=tensor,
-        home_region=DRAM,
-        state=TensorConfigState.BOUNDARY,
-        buffer_mode=BufferMode.ROLLING,
-        stripe_configs=[stripe_config],
-    )
-
-    assert tensor_config.get_buffer_size() == 2016
-
-
-def test_get_recompute_buffer(DRAM):
-    stripe_config = StripeConfig(
-        shape=[4, 5, 7],
-        extent=[6, 7, 14],
-        strides=[2, 3, 7],
-        order=[1, 3, 2],
-        stripes=[2, 2, 2],
-        offset=[0, 0, 0],
-    )
-    tensor = Tensor(shape=[6, 7, 14], dtype="int32", compression_ratio=0.5)
-    tensor_config = TensorConfig(
-        tensor=tensor,
-        home_region=DRAM,
-        state=TensorConfigState.BOUNDARY,
-        buffer_mode=BufferMode.RECOMPUTE,
-        stripe_configs=[stripe_config],
-    )
-
-    assert tensor_config.get_buffer_size() == 280
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/contrib/test_ethosu/infra.py b/tests/python/contrib/test_ethosu/infra.py
deleted file mode 100644
index 71e7e029c148..000000000000
--- a/tests/python/contrib/test_ethosu/infra.py
+++ /dev/null
@@ -1,791 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-This module provides infrastructure to verify the correctness of
-the command stream produced.
-Currently it will invoke vela to generate a vela-optimized tflite
-in which the command stream is contained as a custom operator.
-This class include methods to parse the custom operator to extract
-the command stream and perform an equivalency check for single operator
-test cases.
-"""
-from typing import List
-
-import os
-import struct
-import numpy as np
-import math
-from enum import IntEnum
-import tensorflow as tf
-
-from ethosu.vela.register_command_stream_generator import CmdMode
-from ethosu.vela.register_command_stream_generator import cmd0
-from ethosu.vela.register_command_stream_generator import cmd1
-
-import tvm
-from tvm import relay
-import tvm.relay.backend.contrib.ethosu.op as ethosu_ops
-from tvm.topi.nn.utils import get_pad_tuple
-from tvm.relay.expr_functor import ExprMutator
-from tvm.relay.op.annotation import compiler_begin, compiler_end
-from tvm.relay.backend.contrib.ethosu import preprocess
-import tvm.relay.testing.tf as tf_testing
-from tvm import WorkspaceMemoryPools, WorkspacePoolInfo, PoolInfoProperties
-
-from tvm.relay.op.contrib.ethosu import partition_for_ethosu
-from tvm.testing.aot import (
-    AOTCompiledTestModel,
-    AOTDataLinkage,
-    AOTTestModel,
-    AOTTestRunner,
-    compile_models,
-    run_and_check,
-)
-
-
-class AttachType(IntEnum):
-    kGroupRoot = 1
-    kInline = 2
-    kInlinedAlready = 3
-    kScope = 4
-    kScanUpdate = 5
-
-
-def print_payload(payload):
-    cmds = deserialize_command_stream(payload)
-    for cmd_val in cmds:
-        cmd, val = parse_cmd(cmd_val)
-        s = str(cmd)
-        s = s.ljust(40)
-        s += str(val)
-        print(s)
-
-
-def parse_cmd(binary_cmd):
-    code = binary_cmd[0] & 0x0000FFFF  # lower 16 bits
-    param = binary_cmd[0] >> 16  # higher 16 bits
-    payload_mode = CmdMode(code & CmdMode.Mask)
-    if payload_mode == CmdMode.Payload32:
-        command = cmd1(code & CmdMode.CmdOpMask)
-        value = binary_cmd[1]
-    else:
-        command = cmd0(code & CmdMode.CmdOpMask)
-        value = param
-    return command, value
-
-
-def deserialize_command_stream(blob):
-    assert isinstance(blob, bytes)
-    payload_bytes = struct.unpack("<{0}I".format(len(blob) // 4), blob)
-    cmms = []
-    # remove_header
-    payload_bytes = payload_bytes[8:]
-    idx = 0
-    while idx < len(payload_bytes):
-        cmd = []
-        code = payload_bytes[idx]
-        idx += 1
-        cmd.append(code)
-        payload_mode = CmdMode(code & CmdMode.Mask)
-        if payload_mode == CmdMode.Payload32:
-            value = payload_bytes[idx]
-            idx += 1
-            cmd.append(value)
-        cmms.append(cmd)
-    return cmms
-
-
-def _get_workspace_size_define_macro(pool_name: str, model_name="default") -> str:
-    """This function converts pool names to compiler generated
-    workspace pool size macros"""
-
-    prefix = "TVMGEN_" + model_name.upper() + "_"
-    postfix = "_WORKSPACE_POOL_SIZE"
-    return prefix + pool_name.upper() + postfix
-
-
-def create_test_runner(
-    accel="ethos-u55-256",
-    enable_usmp=True,
-    enable_cascader=False,
-    enable_striping=False,
-    workspace_pools=None,
-):
-
-    file_dir = os.path.dirname(os.path.abspath(__file__))
-    test_root = os.path.join(file_dir, "reference_system")
-    _, ethosu_variant, ethosu_macs = accel.split("-")
-    ethosu_variant = ethosu_variant.upper()
-
-    prologue = """
-    UartStdOutInit();
-    EthosuInit();
-
-    struct ethosu_driver* ethos_u = ethosu_reserve_driver();
-    """
-
-    if workspace_pools:
-        for pool in workspace_pools.pools:
-            prologue = (
-                prologue
-                + f"""
-    #ifdef {_get_workspace_size_define_macro(pool.pool_name)}
-    __attribute__((section(".bss.noinit.tvm"), aligned(16)))
-    static uint8_t {pool.pool_name}[{_get_workspace_size_define_macro(pool.pool_name)}];
-    #endif
-
-            """
-            )
-
-    return AOTTestRunner(
-        makefile="corstone300",
-        prologue=prologue,
-        epilogue="""
-        ethosu_release_driver(ethos_u);
-        """,
-        includes=["uart_stdout.h", "ethosu_55.h", "ethosu_mod.h", "hard_fault.h"],
-        parameters={
-            "ETHOSU_TEST_ROOT": test_root,
-            "NPU_MACS": ethosu_macs,
-            "NPU_VARIANT": ethosu_variant,
-        },
-        pass_config={
-            "relay.ext.ethos-u.options": {
-                "accelerator_config": accel,
-                "enable_cascader": enable_cascader,
-                "enable_striping": enable_striping,
-            },
-            "tir.usmp.enable": enable_usmp,
-            "tir.usmp.algorithm": "hill_climb",
-            "tir.disable_storage_rewrite": enable_usmp,
-        },
-    )
-
-
-def build_source(
-    module,
-    inputs,
-    outputs,
-    test_runner,
-    output_tolerance=0,
-    workspace_pools=None,
-):
-    return compile_models(
-        models=AOTTestModel(
-            module=module,
-            inputs=inputs,
-            outputs=outputs,
-            output_tolerance=output_tolerance,
-            extra_memory_in_bytes=0,
-        ),
-        interface_api="c",
-        use_unpacked_api=True,
-        workspace_memory_pools=workspace_pools,
-        workspace_byte_alignment=16,
-        pass_config=test_runner.pass_config,
-    )
-
-
-def verify_source(models: List[AOTCompiledTestModel], test_runner):
-    """
-    This method verifies the generated source from an NPU module by building it and running on an FVP.
-    """
-    interface_api = "c"
-    run_and_check(
-        models,
-        test_runner,
-        interface_api,
-        workspace_byte_alignment=16,
-        data_linkage=AOTDataLinkage(section="ethosu_scratch", alignment=16),
-    )
-
-
-class InputGenerator:
-    def __init__(self, random_state):
-        self._random_state = random_state
-
-    def generate(self, size, dtype):
-        if dtype == np.float32:
-            print("random float32")
-            return self._random_state.uniform(-1, 1, size).astype(dtype)
-        else:
-            print("random (u)int min=%d max=%d", np.iinfo(dtype).min, np.iinfo(dtype).max)
-            low = np.iinfo(dtype).min
-            high = np.iinfo(dtype).max + 1
-            return self._random_state.randint(low, high, size, dtype)
-
-
-def generate_ref_data_tflite(model):
-    """
-    This method generates reference data by running the specified model on tflite with random input data.
-    The random input data and generated output data are returned.
-    """
-    expected_output_data = {}
-
-    interpreter = tf.lite.Interpreter(
-        model_content=model,
-        experimental_op_resolver_type=tf.lite.experimental.OpResolverType.BUILTIN_REF,
-    )
-
-    interpreter.allocate_tensors()
-
-    input_details = interpreter.get_input_details()
-    output_details = interpreter.get_output_details()
-
-    # Initialize random generators with a fixed seed to get deterministic results
-    seed = 0
-    random_state = np.random.RandomState(seed)
-
-    inputgen = InputGenerator(random_state)
-
-    # Generate input data
-    input_data = {
-        input_detail["name"]: inputgen.generate(
-            input_detail["shape"],
-            input_detail["dtype"],
-        )
-        for input_detail in input_details
-    }
-    input_index = {input_detail["name"]: input_detail["index"] for input_detail in input_details}
-
-    for input_name in input_data.keys():
-        data = input_data[input_name]
-        index = input_index[input_name]
-        interpreter.set_tensor(index, data)
-    interpreter.invoke()
-
-    expected_output_data = {
-        output_detail["name"]: interpreter.get_tensor(output_detail["index"])
-        for output_detail in output_details
-    }
-
-    return input_data, expected_output_data
-
-
-def get_tflite_model(model_url):
-    """Get a TFLite model from URL."""
-    tflite_model_file = tf_testing.get_workload_official(model_url[0], model_url[1])
-    with open(tflite_model_file, "rb") as f:
-        tflite_model_buf = f.read()
-    return tflite_model_buf
-
-
-def get_tflite_graph(tf_func, shapes, ranges=None):
-    tensor_specs = [tf.TensorSpec(shape, dtype=tf.float32) for shape in shapes]
-    if not ranges:
-        ranges = [(0, 1) for _ in shapes]
-    concrete_func = tf_func.get_concrete_function(*tensor_specs)
-
-    # Convert the model
-    def representative_dataset():
-        for _ in range(100):
-            inputs = []
-            for i, shape in enumerate(shapes):
-                data = np.random.uniform(
-                    low=ranges[i][0], high=ranges[i][1], size=tuple(shape)
-                ).astype("float32")
-                inputs.append(data)
-
-            yield inputs
-
-    converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func])
-    converter.optimizations = [tf.lite.Optimize.DEFAULT]
-    converter.representative_dataset = representative_dataset
-    converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
-    converter.inference_input_type = tf.int8
-    converter.inference_output_type = tf.int8
-    tflite_graph = converter.convert()
-
-    # Get TFLite model from buffer
-    try:
-        import tflite
-
-        tflite_model = tflite.Model.GetRootAsModel(tflite_graph, 0)
-    except AttributeError:
-        import tflite.Model
-
-        tflite_model = tflite.Model.Model.GetRootAsModel(tflite_graph, 0)
-
-    relay_module, params = relay.frontend.from_tflite(tflite_model)
-    mod = partition_for_ethosu(relay_module, params)
-    return mod, tflite_graph
-
-
-def compare_ethosu_with_reference(
-    mod,
-    input_data,
-    output_data,
-    accel_type: str,
-    output_tolerance=0,
-    print_cmm=False,
-    enable_cascader=None,
-):
-    if enable_cascader is None:
-        enable_cascader = "u65" not in accel_type
-    pool_name = "my_memory_pool"
-    host_target = tvm.target.Target("c")
-    ethosu_target = tvm.target.Target("ethos-u")
-    workspace_pools = WorkspaceMemoryPools(
-        [
-            WorkspacePoolInfo(
-                pool_name,
-                [host_target, ethosu_target],
-                PoolInfoProperties(
-                    size_hint_bytes=2400000,
-                    read_bandwidth_bytes_per_cycle=16,
-                    write_bandwidth_bytes_per_cycle=16,
-                    target_burst_bytes={ethosu_target: 1},
-                ),
-            )
-        ]
-    )
-    test_runner = create_test_runner(
-        accel_type,
-        enable_usmp=True,
-        enable_cascader=enable_cascader,
-        enable_striping=False,
-        workspace_pools=workspace_pools,
-    )
-    compiled_models = build_source(
-        mod,
-        input_data,
-        output_data,
-        test_runner,
-        workspace_pools=workspace_pools,
-        output_tolerance=output_tolerance,
-    )
-
-    # Assumes only two runtime.Modules are created -- i.e. single offload module
-    ethosu_module = compiled_models[0].executor_factory.lib.imported_modules[0].imported_modules[0]
-
-    # Verify generated C source
-    if print_cmm:
-        get_artifacts = tvm._ffi.get_global_func("runtime.module.ethos-u.get_artifacts")
-        compilation_artifacts = get_artifacts(ethosu_module)
-        cmms = bytes.fromhex(compilation_artifacts[0].command_stream)
-        print_payload(cmms)
-
-    verify_source(compiled_models, test_runner)
-
-
-def compare_tvm_with_tflite(
-    tf_func,
-    shapes,
-    accel_type,
-    ranges=None,
-    output_tolerance=0,
-    print_cmm=False,
-    enable_cascader=None,
-):
-    mod, tflite_graph = get_tflite_graph(tf_func, shapes, ranges)
-
-    # Generate reference data
-    input_data, output_data = generate_ref_data_tflite(tflite_graph)
-
-    compare_ethosu_with_reference(
-        mod,
-        input_data,
-        output_data,
-        accel_type,
-        output_tolerance=output_tolerance,
-        print_cmm=print_cmm,
-        enable_cascader=enable_cascader,
-    )
-
-
-class EthosUAnnotator(ExprMutator):
-    """Annotate entire graph for Ethos-U offload"""
-
-    def __init__(self):
-        super(EthosUAnnotator, self).__init__()
-        self.compiler = "ethos-u"
-        self.last_call = True
-
-    def visit_call(self, call):
-        curr_last = self.last_call
-        self.last_call = False
-
-        params = []
-        for arg in call.args:
-            param = super().visit(arg)
-            if isinstance(param, relay.expr.Var):
-                param = compiler_begin(param, self.compiler)
-            params.append(param)
-
-        new_call = relay.Call(call.op, params, call.attrs)
-        if curr_last:
-            new_call = compiler_end(new_call, self.compiler)
-        return new_call
-
-    def visit_constant(self, constant):
-        new_constant = compiler_begin(constant, self.compiler)
-        return new_constant
-
-
-def create_ethosu_partition(mod):
-    mod["main"] = EthosUAnnotator().visit(mod["main"])
-    mod = relay.transform.MergeCompilerRegions()(mod)
-    mod = relay.transform.InferType()(mod)
-    mod = relay.transform.PartitionGraph()(mod)
-    mod = relay.transform.InferType()(mod)
-    mod = preprocess.preprocess_ext_io()(mod)
-    return mod
-
-
-def generate_weights_data(shape, dtype):
-    size = 1
-    for dim in shape:
-        size *= dim
-    return (np.arange(size) % 255).reshape(shape).astype(dtype)
-
-
-def get_convolutional_args(call, include_buffers=False, remove_constants=False):
-    """A method to extract the arguments from conv2d or depthwise_conv2d extern call."""
-    args = call.args
-    conv_args = []
-    remove_indices = [0]
-
-    if remove_constants:
-        remove_indices += [41, 42, 44, 45]
-
-    for i, arg in enumerate(args):
-        if i in remove_indices:
-            continue
-        elif isinstance(arg, tvm.tir.expr.IntImm) or isinstance(arg, tvm.tir.expr.FloatImm):
-            conv_args.append(arg.value)
-        elif isinstance(arg, tvm.tir.expr.BufferLoad) and not include_buffers:
-            conv_args.append(arg.indices[0])
-        else:
-            conv_args.append(arg)
-
-    return conv_args
-
-
-def compute_ofm_shape(
-    ifm_shape, padding, kernel_shape, strides, dilation=[1, 1], channel_padding=[0, 0]
-):
-    assert len(strides) == 2
-    assert len(dilation) == 2
-    assert len(kernel_shape) == 2
-    if isinstance(padding, tuple):
-        h = (
-            ifm_shape[1] - (kernel_shape[0] - 1) * dilation[0] + padding[0] + padding[2]
-        ) // strides[0]
-        w = (
-            ifm_shape[2] - (kernel_shape[1] - 1) * dilation[1] + padding[1] + padding[3]
-        ) // strides[1]
-    elif padding.lower() == "valid":
-        h = math.ceil((ifm_shape[1] - (kernel_shape[0] - 1) * dilation[0]) / strides[0])
-        w = math.ceil((ifm_shape[2] - (kernel_shape[1] - 1) * dilation[1]) / strides[1])
-    elif padding.lower() == "same":
-        h = math.ceil(ifm_shape[1] / strides[0])
-        w = math.ceil(ifm_shape[2] / strides[1])
-    ofm_shape = [ifm_shape[0], h, w, ifm_shape[3] + channel_padding[0] + channel_padding[1]]
-    return ofm_shape
-
-
-def compute_padding_shape(ifm_shape, ofm_shape, padding, kernel_shape, strides, dilation=[1, 1]):
-    assert len(strides) == 2
-    assert len(dilation) == 2
-    assert len(kernel_shape) == 2
-    if padding.lower() == "valid":
-        return [0, 0, 0, 0]
-    if padding.lower() == "same":
-        effective_kernel_shape = [
-            dilation[0] * (kernel_shape[0] - 1) + 1,
-            dilation[1] * (kernel_shape[1] - 1) + 1,
-        ]
-        pad_along_height = max(
-            (ofm_shape[1] - 1) * strides[0] + effective_kernel_shape[0] - ifm_shape[1], 0
-        )
-        pad_along_width = max(
-            (ofm_shape[2] - 1) * strides[1] + effective_kernel_shape[1] - ifm_shape[2], 0
-        )
-        pad_top = pad_along_height // 2
-        pad_bottom = pad_along_height - pad_top
-        pad_left = pad_along_width // 2
-        pad_right = pad_along_width - pad_left
-        return [pad_top, pad_left, pad_bottom, pad_right]
-
-
-def make_ethosu_conv2d(
-    ifm,
-    ifm_channels,
-    ofm_channels,
-    kernel_shape,
-    padding,
-    strides,
-    dilation,
-    lut=relay.const([], dtype="int8"),
-    activation="NONE",
-    ifm_layout="NHWC",
-    ofm_layout="NHWC",
-    weight_dtype="int8",
-    scale_bias_dtype="uint8",
-    rounding_mode="TFL",
-    upscale="NONE",
-):
-    # conv params
-    weight_shape = (ofm_channels, kernel_shape[0], kernel_shape[1], ifm_channels)
-    padding = get_pad_tuple(padding, kernel_shape)
-
-    scale_bias_data = generate_weights_data((weight_shape[0], 10), scale_bias_dtype)
-    scale_bias = relay.const(scale_bias_data, dtype=scale_bias_dtype)
-    weight_data = generate_weights_data(weight_shape, weight_dtype)
-    weight = relay.const(weight_data, dtype=weight_dtype)
-    conv = ethosu_ops.ethosu_conv2d(
-        ifm,
-        weight,
-        scale_bias,
-        lut=lut,
-        ifm_scale=0.5,
-        ifm_zero_point=10,
-        weight_zero_point=12,
-        ofm_scale=0.25,
-        ofm_zero_point=14,
-        kernel_shape=kernel_shape,
-        ofm_channels=ofm_channels,
-        strides=strides,
-        padding=padding,
-        dilation=dilation,
-        activation=activation,
-        clip_min=10 if activation == "CLIP" else 0,
-        clip_max=100 if activation == "CLIP" else 0,
-        rounding_mode=rounding_mode,
-        upscale=upscale,
-        ifm_layout=ifm_layout,
-        ofm_layout=ofm_layout,
-    )
-    return conv
-
-
-def make_ethosu_depthwise_conv2d(
-    ifm,
-    channels,
-    kernel_shape,
-    padding,
-    strides,
-    dilation,
-    activation="NONE",
-    ifm_layout="NHWC",
-    ofm_layout="NHWC",
-    weight_dtype="int8",
-    scale_bias_dtype="uint8",
-    rounding_mode="TFL",
-):
-    # params
-    weight_shape = (channels, kernel_shape[0], kernel_shape[1], 1)
-    padding = get_pad_tuple(padding, kernel_shape)
-
-    scale_bias_data = generate_weights_data((weight_shape[0], 10), scale_bias_dtype)
-    scale_bias = relay.const(scale_bias_data, dtype=scale_bias_dtype)
-    weight_data = generate_weights_data(weight_shape, weight_dtype)
-    weight = relay.const(weight_data, dtype=weight_dtype)
-    depthwise = ethosu_ops.ethosu_depthwise_conv2d(
-        ifm,
-        weight,
-        scale_bias,
-        lut=relay.const([], dtype="int8"),
-        ifm_scale=0.6,
-        ifm_zero_point=11,
-        weight_zero_point=13,
-        ofm_scale=0.26,
-        ofm_zero_point=15,
-        kernel_shape=kernel_shape,
-        ofm_channels=channels,
-        strides=strides,
-        padding=padding,
-        dilation=dilation,
-        activation=activation,
-        clip_min=15 if activation == "CLIP" else 0,
-        clip_max=105 if activation == "CLIP" else 0,
-        rounding_mode=rounding_mode,
-        upscale="NONE",
-        ifm_layout=ifm_layout,
-        ofm_layout=ofm_layout,
-    )
-    return depthwise
-
-
-def get_pooling_args(call, include_buffers=False):
-    args = call.args
-    pooling_args = []
-
-    for i, arg in enumerate(args):
-        if isinstance(arg, tvm.tir.expr.IntImm) or isinstance(arg, tvm.tir.expr.FloatImm):
-            pooling_args.append(arg.value)
-        elif isinstance(arg, tvm.tir.expr.BufferLoad) and not include_buffers:
-            pooling_args.append(arg.indices[0])
-        else:
-            pooling_args.append(arg)
-
-    return pooling_args
-
-
-def make_ethosu_pooling(
-    ifm,
-    pooling_type,
-    pool_shape,
-    ofm_channels,
-    ofm_dtype,
-    strides,
-    padding,
-    activation="NONE",
-    ifm_layout="NHWC",
-    ofm_layout="NHWC",
-    rounding_mode="TFL",
-    upscale="NONE",
-):
-    pooling = ethosu_ops.ethosu_pooling(
-        ifm,
-        lut=relay.const([], dtype="int8"),
-        pooling_type=pooling_type,
-        ifm_scale=1,
-        ifm_zero_point=0,
-        ofm_scale=1,
-        ofm_zero_point=0,
-        pool_shape=pool_shape,
-        ofm_channels=ofm_channels,
-        ofm_dtype=ofm_dtype,
-        strides=strides,
-        padding=padding,
-        activation=activation,
-        clip_min=10 if activation == "CLIP" else 0,
-        clip_max=100 if activation == "CLIP" else 0,
-        rounding_mode=rounding_mode,
-        upscale=upscale,
-        ifm_layout=ifm_layout,
-        ofm_layout=ofm_layout,
-    )
-    return pooling
-
-
-def get_binary_elementwise_args(call, include_buffers=False):
-    args = call.args
-    binary_elementwise_args = []
-
-    for i, arg in enumerate(args):
-        if isinstance(arg, tvm.tir.expr.IntImm) or isinstance(arg, tvm.tir.expr.FloatImm):
-            binary_elementwise_args.append(arg.value)
-        elif isinstance(arg, tvm.tir.expr.BufferLoad) and not include_buffers:
-            binary_elementwise_args.append(arg.indices[0])
-        else:
-            binary_elementwise_args.append(arg)
-
-    return binary_elementwise_args
-
-
-def make_ethosu_binary_elementwise(
-    ifm,
-    ifm2,
-    ifm_channels,
-    ifm2_channels,
-    operator_type,
-    ofm_dtype,
-    reversed_operands=False,
-    activation="NONE",
-    ifm_layout="NHWC",
-    ifm2_layout="NHWC",
-    ofm_layout="NHWC",
-    rounding_mode="TFL",
-    use_rescale: bool = False,
-    rescale_scale: int = 0,
-    rescale_shift: int = 0,
-    lut=relay.const([], dtype="int8"),
-    ifm_scale: float = 1.0,
-    ifm_zero_point: int = 0,
-    ifm2_scale: float = 1.0,
-    ifm2_zero_point: int = 0,
-    ofm_scale: float = 1.0,
-    ofm_zero_point: int = 0,
-):
-    ethosu_binary_elementwise = ethosu_ops.ethosu_binary_elementwise(
-        ifm=ifm,
-        ifm2=ifm2,
-        lut=lut,
-        operator_type=operator_type,
-        ifm_scale=ifm_scale,
-        ifm_zero_point=ifm_zero_point,
-        ifm2_scale=ifm2_scale,
-        ifm2_zero_point=ifm2_zero_point,
-        ofm_scale=ofm_scale,
-        ofm_zero_point=ofm_zero_point,
-        ifm_channels=ifm_channels,
-        ifm2_channels=ifm2_channels,
-        reversed_operands=reversed_operands,
-        activation=activation,
-        ofm_dtype=ofm_dtype,
-        clip_min=10 if activation == "CLIP" else 0,
-        clip_max=100 if activation == "CLIP" else 0,
-        rounding_mode=rounding_mode,
-        ifm_layout=ifm_layout,
-        ifm2_layout=ifm2_layout,
-        ofm_layout=ofm_layout,
-        use_rescale=use_rescale,
-        rescale_scale=rescale_scale,
-        rescale_shift=rescale_shift,
-    )
-    return ethosu_binary_elementwise
-
-
-def make_ethosu_identity(
-    ifm,
-    lut=relay.const([], dtype="int8"),
-    ifm_scale=1,
-    ifm_zero_point=0,
-    ofm_scale=1,
-    ofm_zero_point=0,
-    activation="NONE",
-):
-    identity = ethosu_ops.ethosu_identity(
-        ifm,
-        lut=lut,
-        ifm_scale=ifm_scale,
-        ifm_zero_point=ifm_zero_point,
-        ofm_scale=ofm_scale,
-        ofm_zero_point=ofm_zero_point,
-        activation=activation,
-    )
-    return identity
-
-
-def make_ethosu_unary_elementwise(
-    ifm,
-    ofm_channels,
-    operator_type,
-    activation="NONE",
-    ifm_layout="NHWC",
-    ofm_layout="NHWC",
-    rounding_mode="TFL",
-):
-    ethosu_unary_elementwise = ethosu_ops.ethosu_unary_elementwise(
-        ifm=ifm,
-        lut=relay.const([], dtype="int8"),
-        operator_type=operator_type,
-        ifm_scale=1,
-        ifm_zero_point=0,
-        ofm_scale=1,
-        ofm_zero_point=0,
-        ofm_channels=ofm_channels,
-        activation=activation,
-        clip_min=10 if activation == "CLIP" else 0,
-        clip_max=100 if activation == "CLIP" else 0,
-        rounding_mode=rounding_mode,
-        ifm_layout=ifm_layout,
-        ofm_layout=ofm_layout,
-    )
-    return ethosu_unary_elementwise
diff --git a/tests/python/contrib/test_ethosu/reference_system/arm-none-eabi-gcc.cmake b/tests/python/contrib/test_ethosu/reference_system/arm-none-eabi-gcc.cmake
deleted file mode 100644
index 6aeb0b7cc7c1..000000000000
--- a/tests/python/contrib/test_ethosu/reference_system/arm-none-eabi-gcc.cmake
+++ /dev/null
@@ -1,79 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-if (__TOOLCHAIN_LOADED)
-    return()
-endif()
-set(__TOOLCHAIN_LOADED TRUE)
-
-set(CMAKE_SYSTEM_NAME Generic)
-set(CMAKE_C_COMPILER "arm-none-eabi-gcc")
-set(CMAKE_CXX_COMPILER "arm-none-eabi-g++")
-set(CMAKE_SYSTEM_PROCESSOR "cortex-m55" CACHE STRING "Select Cortex-M architecture. (cortex-m0, cortex-m3, cortex-m33, cortex-m4, cortex-m55, cortex-m7, etc)")
-
-set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
-
-SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
-SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
-SET(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
-
-set(CMAKE_C_STANDARD 99)
-set(CMAKE_CXX_STANDARD 14)
-
-# The system processor could for example be set to cortex-m33+nodsp+nofp.
-set(__CPU_COMPILE_TARGET ${CMAKE_SYSTEM_PROCESSOR})
-string(REPLACE "+" ";" __CPU_FEATURES ${__CPU_COMPILE_TARGET})
-list(POP_FRONT __CPU_FEATURES CMAKE_SYSTEM_PROCESSOR)
-
-string(FIND ${__CPU_COMPILE_TARGET} "+" __OFFSET)
-if(__OFFSET GREATER_EQUAL 0)
-    string(SUBSTRING ${__CPU_COMPILE_TARGET} ${__OFFSET} -1 CPU_FEATURES)
-endif()
-
-# Add -mcpu to the compile options to override the -mcpu the CMake toolchain adds
-add_compile_options(-mcpu=${__CPU_COMPILE_TARGET})
-
-# Set floating point unit
-if("${__CPU_COMPILE_TARGET}" MATCHES "\\+fp")
-    set(FLOAT hard)
-elseif("${__CPU_COMPILE_TARGET}" MATCHES "\\+nofp")
-    set(FLOAT soft)
-elseif("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "cortex-m33" OR
-        "${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "cortex-m55")
-    set(FLOAT hard)
-else()
-    set(FLOAT soft)
-endif()
-
-add_compile_options(-mfloat-abi=${FLOAT})
-add_link_options(-mfloat-abi=${FLOAT})
-
-# Link target
-add_link_options(-mcpu=${__CPU_COMPILE_TARGET})
-add_link_options(-Xlinker -Map=output.map)
-
-#
-# Compile options
-#
-set(cxx_flags "-fno-unwind-tables;-fno-rtti;-fno-exceptions")
-
-add_compile_options("-Wall;-Wextra;-Wsign-compare;-Wunused;-Wswitch-default;\
--Wdouble-promotion;-Wredundant-decls;-Wshadow;-Wnull-dereference;\
--Wno-format-extra-args;-Wno-unused-function;-Wno-unused-label;\
--Wno-missing-field-initializers;-Wno-return-type;-Wno-format;-Wno-int-conversion"
-        "$<$<COMPILE_LANGUAGE:CXX>:${cxx_flags}>"
-        )
diff --git a/tests/python/contrib/test_ethosu/reference_system/ethosu_55.h b/tests/python/contrib/test_ethosu/reference_system/ethosu_55.h
deleted file mode 100644
index 41ce284956e2..000000000000
--- a/tests/python/contrib/test_ethosu/reference_system/ethosu_55.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-#ifndef TVM_RUNTIME_CONTRIB_ETHOS_U_ETHOSU_55_H_
-#define TVM_RUNTIME_CONTRIB_ETHOS_U_ETHOSU_55_H_
-
-/* Define Arm(R) Ethos(TM)-U55 specific IRQs & base address */
-#define ETHOSU_NPU_FAIL (1 << 4)
-#define ETHOSU_IRQ ((IRQn_Type)56)
-#define ETHOSU_BASE_ADDRESS ((void*)0x48102000)
-
-#endif  // TVM_RUNTIME_CONTRIB_ETHOS_U_ETHOSU_55_H_
diff --git a/tests/python/contrib/test_ethosu/reference_system/ethosu_mod.h b/tests/python/contrib/test_ethosu/reference_system/ethosu_mod.h
deleted file mode 100644
index 9fdb7d91028c..000000000000
--- a/tests/python/contrib/test_ethosu/reference_system/ethosu_mod.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-#ifndef TVM_RUNTIME_CONTRIB_ETHOS_U_ETHOSU_MOD_H_
-#define TVM_RUNTIME_CONTRIB_ETHOS_U_ETHOSU_MOD_H_
-
-#include <ARMCM55.h>
-// TODO(@grant-arm): Remove device specific information once RTOS support is available
-#include <ethosu_driver.h>
-#include <stdio.h>
-
-#include "ethosu_55.h"
-
-struct ethosu_driver ethosu0_driver;
-
-void ethosuIrqHandler0() { ethosu_irq_handler(&ethosu0_driver); }
-
-// Initialize Arm(R) Ethos(TM)-U NPU driver
-int EthosuInit() {
-  if (ethosu_init(&ethosu0_driver, (void*)ETHOSU_BASE_ADDRESS, NULL, 0, 1, 1)) {
-    printf("Failed to initialize NPU.\n");
-    return -1;
-  }
-
-  // Assumes SCB->VTOR points to RW memory
-  NVIC_SetVector(ETHOSU_IRQ, (uint32_t)&ethosuIrqHandler0);
-  NVIC_EnableIRQ(ETHOSU_IRQ);
-
-  return 0;
-}
-
-#endif  // TVM_RUNTIME_CONTRIB_ETHOS_U_ETHOSU_MOD_H_
diff --git a/tests/python/contrib/test_ethosu/reference_system/hard_fault.h b/tests/python/contrib/test_ethosu/reference_system/hard_fault.h
deleted file mode 100644
index 9d349004848b..000000000000
--- a/tests/python/contrib/test_ethosu/reference_system/hard_fault.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-#ifndef TVM_RUNTIME_CONTRIB_ETHOS_U_HARD_FAULT_H_
-#define TVM_RUNTIME_CONTRIB_ETHOS_U_HARD_FAULT_H_
-
-struct ExcContext {
-  uint32_t r0;
-  uint32_t r1;
-  uint32_t r2;
-  uint32_t r3;
-  uint32_t r12;
-  uint32_t lr;
-  uint32_t pc;
-  uint32_t xPsr;
-};
-void HardFault_Handler() {
-  int irq;
-  struct ExcContext* e;
-  uint32_t sp;
-  asm volatile(
-      "mrs %0, ipsr            \n"  // Read IPSR (Exception number)
-      "sub %0, #16             \n"  // Get it into IRQn_Type range
-      "tst lr, #4              \n"  // Select the stack which was in use
-      "ite eq                  \n"
-      "mrseq %1, msp           \n"
-      "mrsne %1, psp           \n"
-      "mov %2, sp              \n"
-      : "=r"(irq), "=r"(e), "=r"(sp));
-  printf("Hard fault. irq=%d, pc=0x%08lu, lr=0x%08lu, xpsr=0x%08lu, sp=0x%08lu\n", irq, e->pc,
-         e->lr, e->xPsr, sp);
-  printf("%11s cfsr=0x%08lu bfar=0x%08lu\n", "", SCB->CFSR, SCB->BFAR);
-  printf("EXITTHESIM\n");
-  while (1 == 1)
-    ;
-}
-
-#endif  // TVM_RUNTIME_CONTRIB_ETHOS_U_HARD_FAULT_H_
diff --git a/tests/python/contrib/test_ethosu/test_attr_passing.py b/tests/python/contrib/test_ethosu/test_attr_passing.py
deleted file mode 100644
index 744ffce8c352..000000000000
--- a/tests/python/contrib/test_ethosu/test_attr_passing.py
+++ /dev/null
@@ -1,107 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import pytest
-
-pytest.importorskip("ethosu.vela")
-import tvm
-from tvm import relay
-from tvm.relay.backend.contrib.ethosu import util
-
-
-def test_compiler_attr():
-    config = {
-        "accelerator_config": "ethos-u55-32",
-        "enable_cascader": True,
-        "enable_striping": True,
-        "disable_copying_constants": True,
-        "dev_force_block_config": "2x4x16",
-        "dev_max_open_plans": "256",
-        "dev_max_closed_plans": "128",
-        "dev_select_proposal_idx": "1",
-        "dev_disable_pareto_plans": True,
-        "dev_disable_pareto_proposals": True,
-        "dev_disable_block_culling": True,
-        "dev_cascader_logging": True,
-    }
-    with tvm.transform.PassContext(opt_level=3, config={"relay.ext.ethos-u.options": config}):
-        with tvm.target.Target("c"):
-            compiler_attrs = tvm.get_global_func("relay.ext.ethos-u.get_compiler_attrs")()
-            assert compiler_attrs.accelerator_config == config["accelerator_config"]
-            assert compiler_attrs.enable_cascader == config["enable_cascader"]
-            assert compiler_attrs.enable_striping == config["enable_striping"]
-            assert compiler_attrs.disable_copying_constants == config["disable_copying_constants"]
-            assert compiler_attrs.dev_force_block_config == config["dev_force_block_config"]
-            assert compiler_attrs.dev_max_open_plans == config["dev_max_open_plans"]
-            assert compiler_attrs.dev_max_closed_plans == config["dev_max_closed_plans"]
-            assert compiler_attrs.dev_select_proposal_idx == config["dev_select_proposal_idx"]
-            assert compiler_attrs.dev_disable_pareto_plans == config["dev_disable_pareto_plans"]
-            assert (
-                compiler_attrs.dev_disable_pareto_proposals
-                == config["dev_disable_pareto_proposals"]
-            )
-            assert compiler_attrs.dev_disable_block_culling == config["dev_disable_block_culling"]
-            assert compiler_attrs.dev_cascader_logging == config["dev_cascader_logging"]
-
-
-def test_compiler_attr_default():
-    default_config = {
-        "accelerator_config": "ethos-u55-256",
-        "enable_cascader": False,
-        "enable_striping": False,
-        "disable_copying_constants": False,
-        "dev_force_block_config": "",
-        "dev_max_open_plans": "8",
-        "dev_max_closed_plans": "32",
-        "dev_select_proposal_idx": "-1",
-        "dev_disable_pareto_plans": False,
-        "dev_disable_pareto_proposals": False,
-        "dev_disable_block_culling": False,
-        "dev_cascader_logging": False,
-    }
-    with tvm.transform.PassContext(opt_level=3):
-        with tvm.target.Target("c"):
-            compiler_attrs = tvm.get_global_func("relay.ext.ethos-u.get_compiler_attrs")()
-            assert compiler_attrs.accelerator_config == default_config["accelerator_config"]
-            assert compiler_attrs.enable_cascader == default_config["enable_cascader"]
-            assert compiler_attrs.enable_striping == default_config["enable_striping"]
-            assert (
-                compiler_attrs.disable_copying_constants
-                == default_config["disable_copying_constants"]
-            )
-            assert compiler_attrs.dev_force_block_config == default_config["dev_force_block_config"]
-            assert compiler_attrs.dev_max_open_plans == default_config["dev_max_open_plans"]
-            assert compiler_attrs.dev_max_closed_plans == default_config["dev_max_closed_plans"]
-            assert (
-                compiler_attrs.dev_select_proposal_idx == default_config["dev_select_proposal_idx"]
-            )
-            assert (
-                compiler_attrs.dev_disable_pareto_plans
-                == default_config["dev_disable_pareto_plans"]
-            )
-            assert (
-                compiler_attrs.dev_disable_pareto_proposals
-                == default_config["dev_disable_pareto_proposals"]
-            )
-            assert (
-                compiler_attrs.dev_disable_block_culling
-                == default_config["dev_disable_block_culling"]
-            )
-            assert compiler_attrs.dev_cascader_logging == default_config["dev_cascader_logging"]
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/contrib/test_ethosu/test_codegen.py b/tests/python/contrib/test_ethosu/test_codegen.py
deleted file mode 100644
index 451f47f87aa7..000000000000
--- a/tests/python/contrib/test_ethosu/test_codegen.py
+++ /dev/null
@@ -1,1724 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name, unused-argument
-import pytest
-
-pytest.importorskip("ethosu.vela")
-
-import numpy as np
-import tflite.Model
-
-import tvm
-import tensorflow as tf
-from tvm import relay
-
-from tvm.relay.backend.contrib.ethosu import util
-
-from tvm.relay.op.contrib.ethosu import partition_for_ethosu
-from tvm.testing.aot import generate_ref_data
-
-from . import infra
-
-
-ACCEL_TYPES = ["ethos-u55-256", "ethos-u55-128", "ethos-u55-64", "ethos-u55-32", "ethos-u65-256"]
-
-
-def is_u55_accel_type(accel_type):
-    return "u55" in accel_type
-
-
-@pytest.mark.parametrize("accel_type", ACCEL_TYPES + ["ethos-u65-512"])
-@pytest.mark.parametrize("ifm_shape", [(1, 299, 299, 2), (1, 55, 55, 3)])
-@pytest.mark.parametrize("kernel_shape", [(3, 2), (1, 3)])
-@pytest.mark.parametrize("strides, dilation", [((1, 1), (2, 1)), ((3, 2), (1, 1))])
-@pytest.mark.parametrize("padding", ["SAME", "VALID"])
-@pytest.mark.parametrize("activation", ["NONE", "RELU"])
-def test_ethosu_conv2d_single(
-    ifm_shape,
-    kernel_shape,
-    strides,
-    dilation,
-    padding,
-    accel_type,
-    activation,
-):
-    np.random.seed(0)
-
-    @tf.function
-    def conv2d(x):
-        # Use tf.nn API to create the model
-        tf_strides = [1, strides[0], strides[1], 1]
-        op = tf.nn.conv2d(
-            x,
-            filters=tf.constant(
-                np.random.uniform(size=[kernel_shape[0], kernel_shape[1], ifm_shape[3], 3]),
-                dtype=tf.float32,
-            ),
-            strides=tf_strides,
-            padding=padding,
-            dilations=dilation,
-        )
-        if activation == "RELU":
-            op = tf.nn.relu(op)
-        return op
-
-    infra.compare_tvm_with_tflite(conv2d, [ifm_shape], accel_type)
-
-
-def test_tflite_conv2d_with_separate_pad():
-    np.random.seed(0)
-
-    ifm_shape = (1, 55, 34, 3)
-    kernel_shape = (3, 2)
-    strides = (1, 1)
-    dilation = (2, 1)
-    padding = (0, 0, 1, 1)
-
-    @tf.function
-    def conv2d(x):
-        tf_strides = [1, strides[0], strides[1], 1]
-        op = tf.pad(
-            x,
-            [[0, 0], [padding[0], padding[2]], [padding[1], padding[3]], [0, 0]],
-            "CONSTANT",
-        )
-        weight_shape = [kernel_shape[0], kernel_shape[1], ifm_shape[3], 3]
-        weight = tf.constant(np.random.uniform(size=weight_shape), dtype=tf.float32)
-        return tf.nn.conv2d(
-            op,
-            weight,
-            strides=tf_strides,
-            padding="VALID",
-            dilations=dilation,
-        )
-
-    infra.compare_tvm_with_tflite(conv2d, [ifm_shape], "ethos-u55-256")
-
-
-@pytest.mark.parametrize("ifm_shape", [(1, 214, 227, 2), (1, 27, 42, 3)])
-@pytest.mark.parametrize("kernel_shape", [(3, 2), (1, 3)])
-@pytest.mark.parametrize("strides, dilation", [((1, 1), (2, 1)), ((3, 2), (1, 1))])
-@pytest.mark.parametrize("padding", ["SAME", "VALID"])
-@pytest.mark.parametrize("accel_type", ACCEL_TYPES + ["ethos-u65-512"])
-@pytest.mark.parametrize("activation", ["NONE", "RELU"])
-def test_ethosu_conv2d_double(
-    ifm_shape,
-    kernel_shape,
-    strides,
-    dilation,
-    padding,
-    accel_type,
-    activation,
-):
-    np.random.seed(0)
-
-    @tf.function
-    def conv2d_double(x):
-        # Use tf.nn API to create the model with two convolutions
-        op = tf.nn.conv2d(
-            x,
-            filters=tf.constant(
-                np.random.uniform(size=[kernel_shape[0], kernel_shape[1], ifm_shape[3], 5]),
-                dtype=tf.float32,
-            ),
-            strides=strides,
-            padding=padding,
-            dilations=dilation,
-        )
-        # Second convolution
-        op2 = tf.nn.conv2d(
-            op,
-            filters=tf.constant(
-                np.random.uniform(size=(kernel_shape[0], kernel_shape[1], 5, 3)),
-                dtype=tf.float32,
-            ),
-            strides=strides,
-            padding=padding,
-            dilations=dilation,
-        )
-        if activation == "RELU":
-            op2 = tf.nn.relu(op2)
-        return op2
-
-    infra.compare_tvm_with_tflite(conv2d_double, [ifm_shape], accel_type)
-
-
-@pytest.mark.parametrize("accel_type", ACCEL_TYPES)
-@pytest.mark.parametrize(
-    "op_pairs", [("conv2d", "conv2d"), ("depthwise", "depthwise"), ("conv2d", "depthwise")]
-)
-def test_tflite_shared_pad(
-    accel_type,
-    op_pairs,
-):
-    np.random.seed(0)
-
-    ifm_shape = (1, 55, 32, 3)
-    kernel_shape = (3, 3)
-    strides = (3, 2)
-    dilation = (1, 1)
-    activation_function = "RELU"
-    op_padding = "SAME"
-    sep_padding = (0, 0, 1, 1)
-
-    @tf.function
-    def tf_function(x):
-        def make_depthwise_or_conv2d(pair_idx, x):
-            # The input strides to the TensorFlow API needs to be of shape 1x4
-            tf_strides = [1, strides[0], strides[1], 1]
-            if op_pairs[pair_idx] == "depthwise":
-                weight_shape = [kernel_shape[0], kernel_shape[1], ifm_shape[3], 1]
-                weight = tf.constant(np.random.uniform(size=weight_shape), dtype=tf.float32)
-                op = tf.nn.depthwise_conv2d(
-                    x, weight, strides=tf_strides, padding=op_padding, dilations=dilation
-                )
-            else:
-                weight_shape = [kernel_shape[0], kernel_shape[1], ifm_shape[3], 3]
-                weight = tf.constant(np.random.uniform(size=weight_shape), dtype=tf.float32)
-                op = tf.nn.conv2d(
-                    x,
-                    weight,
-                    strides=tf_strides,
-                    padding=op_padding,
-                    dilations=dilation,
-                )
-            if activation_function == "RELU":
-                op = tf.nn.relu(op)
-            return op
-
-        x = tf.pad(
-            x,
-            [
-                [0, 0],
-                [sep_padding[0], sep_padding[2]],
-                [sep_padding[1], sep_padding[3]],
-                [0, 0],
-            ],
-            "CONSTANT",
-        )
-
-        x1 = make_depthwise_or_conv2d(0, x)
-        x2 = make_depthwise_or_conv2d(1, x)
-
-        x3 = tf.math.add(x1, x2)
-        return x3
-
-    infra.compare_tvm_with_tflite(tf_function, [ifm_shape], accel_type)
-
-
-@pytest.mark.parametrize("weight_min, weight_max", [(0.0, 1e-11), (-1e10, 1e10)])
-def test_out_of_range_scaling(weight_min, weight_max):
-    np.random.seed(0)
-    ifm_shape = (1, 6, 6, 2)
-    strides = (1, 1)
-    kernel_shape = (1, 1)
-    dilation = (1, 1)
-    padding = "SAME"
-    activation = "RELU"
-    accel_type = "ethos-u55-128"
-
-    @tf.function
-    def conv_invalid_scale(x):
-        # Use tf.nn API to create the model
-        tf_strides = [1, strides[0], strides[1], 1]
-        weights = np.random.uniform(size=[kernel_shape[0], kernel_shape[1], 2, 2])
-        # Overwrite to force quantization that produces out of range shift values
-        weights[0][0][0][0] = weight_min
-        weights[0][0][1][0] = weight_max
-        op = tf.nn.conv2d(
-            x,
-            filters=tf.constant(
-                weights,
-                dtype=tf.float32,
-            ),
-            strides=tf_strides,
-            padding=padding,
-            dilations=dilation,
-        )
-        if activation == "RELU":
-            op = tf.nn.relu(op)
-        return op
-
-    infra.compare_tvm_with_tflite(conv_invalid_scale, [ifm_shape], accel_type)
-
-
-@pytest.mark.parametrize("accel_type", ACCEL_TYPES)
-@pytest.mark.parametrize("ifm_shape", [(1, 55, 55, 3), (1, 23, 32, 7)])
-@pytest.mark.parametrize(
-    "kernel_shape, activation_function",
-    [((3, 3), "RELU"), ((1, 2), "NONE")],
-)
-@pytest.mark.parametrize("padding", ["SAME", "VALID"])
-@pytest.mark.parametrize("strides, dilation", [((1, 1), (2, 2)), ((3, 2), (1, 1))])
-def test_tflite_depthwise_conv2d(
-    accel_type,
-    ifm_shape,
-    kernel_shape,
-    padding,
-    strides,
-    dilation,
-    activation_function,
-):
-    np.random.seed(0)
-
-    @tf.function
-    def depthwise_conv2d(x):
-        weight_shape = [kernel_shape[0], kernel_shape[1], ifm_shape[3], 1]
-        weight = tf.constant(np.random.uniform(size=weight_shape), dtype=tf.float32)
-        # The input strides to the TensorFlow API needs to be of shape 1x4
-        tf_strides = [1, strides[0], strides[1], 1]
-        op = tf.nn.depthwise_conv2d(
-            x, weight, strides=tf_strides, padding=padding, dilations=dilation
-        )
-        if activation_function == "RELU":
-            op = tf.nn.relu(op)
-        return op
-
-    infra.compare_tvm_with_tflite(depthwise_conv2d, [ifm_shape], accel_type)
-
-
-def test_tflite_depthwise_conv2d_with_separate_pad():
-    np.random.seed(0)
-
-    ifm_shape = (1, 23, 32, 7)
-    kernel_shape = (1, 2)
-    strides = (3, 2)
-    dilation = (1, 1)
-    padding = (0, 0, 1, 1)
-
-    @tf.function
-    def depthwise_conv2d(x):
-        tf_strides = [1, strides[0], strides[1], 1]
-        op = tf.pad(
-            x,
-            [[0, 0], [padding[0], padding[2]], [padding[1], padding[3]], [0, 0]],
-            "CONSTANT",
-        )
-        weight_shape = [kernel_shape[0], kernel_shape[1], ifm_shape[3], 1]
-        weight = tf.constant(np.random.uniform(size=weight_shape), dtype=tf.float32)
-        return tf.nn.depthwise_conv2d(
-            op,
-            weight,
-            strides=tf_strides,
-            padding="VALID",
-            dilations=dilation,
-        )
-
-    infra.compare_tvm_with_tflite(depthwise_conv2d, [ifm_shape], "ethos-u55-256")
-
-
-@pytest.mark.parametrize("ifm_shape", [(1, 55, 55, 3), (1, 23, 32, 7)])
-@pytest.mark.parametrize("padding", [(0, 1, 0, 0), (1, 1, 1, 1), (1, 1, 5, 5)])
-@pytest.mark.parametrize("const_value", [0, 5, 125, -5])
-def test_tflite_separate_pad(
-    ifm_shape,
-    padding,
-    const_value,
-):
-
-    np.random.seed(0)
-
-    @tf.function
-    def pad2d(x):
-        return tf.pad(
-            x,
-            [[0, 0], [padding[0], padding[2]], [padding[1], padding[3]], [0, 0]],
-            "CONSTANT",
-            const_value,
-        )
-
-    infra.compare_tvm_with_tflite(pad2d, [ifm_shape], "ethos-u55-256")
-
-
-@pytest.mark.parametrize("ifm_shape", [(1, 55, 55, 3), (1, 23, 32, 7)])
-@pytest.mark.parametrize("channel_padding", [(0, 1), (1, 1), (5, 2)])
-@pytest.mark.parametrize("const_value", [0, 5, 125, -5])
-def test_tflite_separate_channel_pad(
-    ifm_shape,
-    channel_padding,
-    const_value,
-):
-    np.random.seed(0)
-
-    @tf.function
-    def concat_func(x):
-        x = tf.pad(
-            x,
-            [[0, 0], [0, 0], [0, 0], [channel_padding[0], channel_padding[1]]],
-            "CONSTANT",
-            const_value,
-        )
-        return x
-
-    infra.compare_tvm_with_tflite(concat_func, [ifm_shape], "ethos-u55-256", enable_cascader=False)
-
-
-@pytest.mark.parametrize(
-    "accel_type",
-    ACCEL_TYPES,
-)
-@pytest.mark.parametrize("pooling_type", ["MAX", "AVG"])
-@pytest.mark.parametrize("ifm_shape", [[1, 3, 4, 3], [1, 4, 5, 2]])
-@pytest.mark.parametrize(
-    "pool_shape, strides, activation_function, padding",
-    [([1, 2], [1, 2], "NONE", "SAME"), ([2, 3], [2, 3], "RELU", "VALID")],
-)
-def test_ethosu_pooling(
-    accel_type,
-    ifm_shape,
-    pooling_type,
-    strides,
-    pool_shape,
-    activation_function,
-    padding,
-):
-    np.random.seed(0)
-
-    @tf.function
-    def pooling(x):
-        if pooling_type == "MAX":
-            op = tf.nn.max_pool(x, pool_shape, strides, padding)
-        elif pooling_type == "AVG":
-            op = tf.nn.avg_pool(x, pool_shape, strides, padding)
-        if activation_function == "RELU":
-            op = tf.nn.relu(op)
-        return op
-
-    infra.compare_tvm_with_tflite(pooling, [ifm_shape], accel_type)
-
-
-@pytest.mark.parametrize(
-    "accel_type",
-    ACCEL_TYPES,
-)
-@pytest.mark.parametrize("pooling_type", ["MAX", "AVG"])
-@pytest.mark.parametrize(
-    "ifm_shape, pool_shape, strides, activation_function, padding",
-    [
-        ([1, 4, 4, 3], [4, 4], [4, 4], "NONE", "SAME"),
-        ([1, 4, 4, 3], [4, 4], [4, 4], "RELU", "VALID"),
-        ([1, 25, 5, 64], [25, 5], [25, 5], "NONE", "VALID"),
-        ([1, 25, 5, 64], [25, 5], [25, 5], "RELU", "SAME"),
-    ],
-)
-def test_ethosu_pooling_same_ifm_and_kernel_shape(
-    accel_type, pooling_type, ifm_shape, pool_shape, strides, activation_function, padding
-):
-    np.random.seed(0)
-
-    @tf.function
-    def pooling(x):
-        if pooling_type == "MAX":
-            op = tf.nn.max_pool(x, pool_shape, strides, padding)
-        elif pooling_type == "AVG":
-            op = tf.nn.avg_pool(x, pool_shape, strides, padding)
-        if activation_function == "RELU":
-            op = tf.nn.relu(op)
-        return op
-
-    infra.compare_tvm_with_tflite(pooling, [ifm_shape], accel_type)
-
-
-@pytest.mark.parametrize(
-    "accel_type",
-    ["ethos-u55-256", "ethos-u65-256"],
-)
-@pytest.mark.parametrize("ifm_shape", [[1, 148, 29], [4, 148, 29], [1, 12], [8, 12]])
-def test_ethosu_softmax(
-    accel_type,
-    ifm_shape,
-):
-    np.random.seed(0)
-
-    @tf.function
-    def softmax(x):
-        return tf.nn.softmax(x)
-
-    infra.compare_tvm_with_tflite(softmax, [ifm_shape], accel_type, ranges=[(-1, 1)])
-
-
-@pytest.mark.parametrize("accel_type", ACCEL_TYPES)
-@pytest.mark.parametrize("operator_type", ["ADD", "SUB", "MUL", "MIN", "MAX"])
-@pytest.mark.parametrize(
-    "ifm_shape, ifm2_shape",
-    [
-        ([1, 2, 3, 4], [1, 2, 3, 4]),
-        ([1, 2, 3, 4], [1, 1, 1, 1]),
-        ([1, 1, 1, 1], [1, 2, 3, 4]),
-        ([1, 4, 4], [4, 1]),
-    ],
-)
-@pytest.mark.parametrize("activation_function", ["NONE", "RELU"])
-def test_ethosu_binary_elementwise(
-    accel_type,
-    operator_type,
-    ifm_shape,
-    ifm2_shape,
-    activation_function,
-):
-    np.random.seed(0)
-
-    @tf.function
-    def binary_elementwise(lhs, rhs):
-        if operator_type == "ADD":
-            op = tf.math.add(lhs, rhs)
-        elif operator_type == "SUB":
-            op = tf.math.subtract(lhs, rhs)
-        elif operator_type == "MUL":
-            op = tf.math.multiply(lhs, rhs)
-        elif operator_type == "MIN":
-            op = tf.math.minimum(lhs, rhs)
-        elif operator_type == "MAX":
-            op = tf.math.maximum(lhs, rhs)
-        if activation_function == "RELU":
-            op = tf.nn.relu(op)
-        return op
-
-    infra.compare_tvm_with_tflite(
-        binary_elementwise,
-        shapes=[ifm_shape, ifm2_shape],
-        ranges=[(0, 1), (0, 2)],
-        accel_type=accel_type,
-        enable_cascader=is_u55_accel_type(accel_type),
-    )
-
-
-@pytest.mark.parametrize("accel_type", ACCEL_TYPES)
-@pytest.mark.parametrize(
-    "ifm_shape, ifm2_shape",
-    [
-        ([4], [4]),
-        ([4], [1, 2, 3, 4]),
-        ([1, 4, 4], [4, 1]),
-    ],
-)
-def test_binary_add_with_non_4d_shapes(
-    request,
-    accel_type,
-    ifm_shape,
-    ifm2_shape,
-):
-    np.random.seed(0)
-
-    @tf.function
-    def binary_elementwise(lhs, rhs):
-        return tf.math.add(lhs, rhs)
-
-    infra.compare_tvm_with_tflite(
-        binary_elementwise,
-        shapes=[ifm_shape, ifm2_shape],
-        ranges=[(0, 1), (0, 2)],
-        accel_type=accel_type,
-        enable_cascader=is_u55_accel_type(accel_type),
-    )
-
-
-@pytest.mark.parametrize(
-    "accel_type",
-    ACCEL_TYPES,
-)
-@pytest.mark.parametrize(
-    "ifm_shape, axis, keep_dims, use_same_quantization, dtype",
-    [
-        # mean to average pool
-        [(1, 8, 16, 16), (2,), False, True, "int8"],
-        [(1, 8, 16, 16), (2,), False, True, "uint8"],
-        [(3, 3, 4), (0,), True, True, "int8"],
-        [(8, 5), (0,), False, True, "int8"],
-        # mean to depthwise
-        [(1, 8, 16, 16), (2,), True, False, "int8"],
-        [(1, 8, 16, 16), (2,), True, False, "uint8"],
-        [(1, 8, 16, 16), (2, 1), False, False, "int8"],
-        [(8, 4), (0,), False, False, "int8"],
-        [(1, 65, 2, 1), (1, 2), True, False, "int8"],  # special case when h > 64
-        [(1, 65, 2, 1), (1, 2), True, False, "uint8"],  # special case when h > 64
-    ],
-)
-def test_mean(accel_type, ifm_shape, axis, keep_dims, use_same_quantization, dtype):
-    np.random.seed(0)
-
-    def create_mod_from_tflite():
-        class Model(tf.Module):
-            @tf.function
-            def tf_function(self, x):
-                op = tf.math.reduce_mean(x, axis=axis, keepdims=keep_dims)
-                return op
-
-        model = Model()
-        concrete_func = model.tf_function.get_concrete_function(
-            tf.TensorSpec(ifm_shape, dtype=tf.float32)
-        )
-
-        # Convert the model
-        def representative_dataset():
-            for _ in range(100):
-                data = np.random.rand(*tuple(ifm_shape))
-                yield [data.astype(np.float32)]
-
-        converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func])
-        converter.optimizations = [tf.lite.Optimize.DEFAULT]
-        converter.representative_dataset = representative_dataset
-        converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
-        converter.inference_input_type = tf.int8
-        converter.inference_output_type = tf.int8
-        tflite_graph = converter.convert()
-        tflite_model = tflite.Model.Model.GetRootAsModel(tflite_graph, 0)
-
-        mod, _ = relay.frontend.from_tflite(
-            tflite_model,
-            shape_dict={"ifm": ifm_shape},
-            dtype_dict={"ifm": dtype},
-        )
-        input_data, output_data = infra.generate_ref_data_tflite(tflite_graph)
-        return mod, input_data, output_data
-
-    def create_mod_from_relay():
-        ifm = relay.var("input", shape=ifm_shape, dtype=dtype)
-        cast = relay.cast(ifm, dtype="int32")
-        mean = relay.mean(cast, axis=axis, keepdims=keep_dims)
-        requantize = relay.qnn.op.requantize(
-            mean,
-            input_scale=relay.const(1.0, dtype="float32"),
-            input_zero_point=relay.const(0, dtype="int32"),
-            output_scale=relay.const(1.0, dtype="float32"),
-            output_zero_point=relay.const(0, dtype="int32"),
-            out_dtype=dtype,
-        )
-
-        func = relay.Function(relay.analysis.free_vars(requantize), requantize)
-        mod = tvm.IRModule.from_expr(func)
-
-        low, high = (0, 256) if dtype == "uint8" else (-127, 128)
-        input_data = {"input": np.random.randint(low=low, high=high, size=ifm_shape, dtype=dtype)}
-        output_data = generate_ref_data(mod, input_data)
-        return mod, input_data, output_data
-
-    mod, input_data, output_data = (
-        create_mod_from_relay() if use_same_quantization else create_mod_from_tflite()
-    )
-    mod = partition_for_ethosu(mod)
-
-    test_runner = infra.create_test_runner(accel_type)
-    compiled_models = infra.build_source(mod, input_data, output_data, test_runner)
-
-    # Assumes only two runtime.Modules are created -- i.e. single offload module
-    ethosu_module = compiled_models[0].executor_factory.lib.imported_modules[0].imported_modules[0]
-
-    # Verify generated C source
-    get_artifacts = tvm._ffi.get_global_func("runtime.module.ethos-u.get_artifacts")
-    compilation_artifacts = get_artifacts(ethosu_module)
-    cmms = bytes.fromhex(compilation_artifacts[0].command_stream)
-    infra.print_payload(cmms)
-    infra.verify_source(compiled_models, test_runner)
-
-
-@pytest.mark.parametrize(
-    "accel_type",
-    ACCEL_TYPES,
-)
-@pytest.mark.parametrize(
-    "ifm_shape, axis, keepdims, relu",
-    [
-        [(1, 4, 2, 8), 3, False, False],
-        [(1, 4, 4, 1), 3, False, True],
-        [(3, 5, 7), 2, False, True],
-        [(1, 4, 2, 8), 3, True, False],
-        [(3, 5, 7), 2, True, False],
-    ],
-)
-def test_ethosu_sum(accel_type, ifm_shape, axis, keepdims, relu):
-    np.random.seed(0)
-
-    @tf.function
-    def sum_func(x):
-        op = tf.math.reduce_sum(x, axis=axis, keepdims=keepdims)
-        return tf.nn.relu(op) if relu else op
-
-    infra.compare_tvm_with_tflite(
-        sum_func,
-        [ifm_shape],
-        accel_type,
-        enable_cascader=is_u55_accel_type(accel_type),
-    )
-
-
-# Case to check reduce_sum operation with different input types.
-@pytest.mark.parametrize("dtype", ["int8", "int32"])
-def test_add_reduce_sum(dtype):
-    ifm_shape = (1, 2, 2, 4)
-    accel_type = "ethos-u55-256"
-    np.random.seed(0)
-
-    def create_model():
-        ifm = relay.var("ifm", shape=ifm_shape, dtype=dtype)
-        ifm2 = relay.var("ifm2", shape=ifm_shape, dtype=dtype)
-        ifm_scale = 0.0 if dtype == "int32" else 1.0
-        op = infra.make_ethosu_binary_elementwise(
-            ifm,
-            ifm2,
-            ifm_shape[3],
-            ifm_shape[3],
-            "ADD",
-            dtype,
-            ifm_scale=ifm_scale,
-            ifm2_scale=ifm_scale,
-        )
-        op = infra.make_ethosu_pooling(
-            ifm=op,
-            pooling_type="SUM",
-            pool_shape=(1, 1),
-            ofm_channels=1,
-            ofm_dtype="int32",
-            strides=(1, 1),
-            padding=(0, 0, 0, 0),
-            rounding_mode="NATURAL",
-        )
-        return tvm.IRModule.from_expr(relay.Function([ifm, ifm2], op))
-
-    def generate_output_data(input_data):
-        lhs = input_data["ifm"]
-        rhs = input_data["ifm2"]
-        # reduce_sum output type is int32.
-        output_dtype = "int32"
-        add = lhs + rhs
-        return [np.sum(add, axis=3).astype(output_dtype)]
-
-    cpu_mod = create_model()
-
-    # Generate reference data
-    in_min, in_max = -10, 19
-    lhs = np.random.randint(in_min, in_max, size=ifm_shape, dtype=dtype)
-    rhs = np.random.randint(in_min, in_max, size=ifm_shape, dtype=dtype)
-    input_data = {
-        "ifm": lhs,
-        "ifm2": rhs,
-    }
-    output_data = {"output": generate_output_data(input_data)[0]}
-    ethosu_mod = infra.create_ethosu_partition(cpu_mod)
-
-    infra.compare_ethosu_with_reference(ethosu_mod, input_data, output_data, accel_type)
-
-
-@pytest.mark.parametrize("accel_type", ACCEL_TYPES)
-@pytest.mark.parametrize("dtype", ["int8", "uint8"])
-@pytest.mark.parametrize("constant", [np.ones((1, 1, 1, 1)), np.array(1)])
-def test_elementwise_add_from_constant_scalar(accel_type, dtype, constant):
-    np.random.seed(0)
-    ifm_shape = (1, 4, 4, 8)
-
-    def create_relay_graph():
-        inp = relay.var("input", shape=ifm_shape, dtype=dtype)
-        scalar = relay.const(constant, dtype=dtype)
-        add = relay.qnn.op.add(
-            inp,
-            scalar,
-            relay.const(1.0, dtype="float32"),
-            relay.const(0, dtype="int32"),
-            relay.const(1.0, dtype="float32"),
-            relay.const(0, dtype="int32"),
-            relay.const(1.0, dtype="float32"),
-            relay.const(0, dtype="int32"),
-        )
-        return tvm.IRModule.from_expr(relay.Function(relay.analysis.free_vars(add), add))
-
-    cpu_mod = create_relay_graph()
-    ethosu_mod = partition_for_ethosu(cpu_mod)
-
-    # Generate reference data
-    input_data = {
-        "input": np.random.randint(
-            low=np.iinfo(dtype).min, high=np.iinfo(dtype).max, size=ifm_shape, dtype=dtype
-        ),
-    }
-    output_data = generate_ref_data(cpu_mod, input_data)
-
-    # Scalar constants are not supported by the cascader
-    infra.compare_ethosu_with_reference(
-        ethosu_mod, input_data, output_data, accel_type, enable_cascader=False
-    )
-
-
-@pytest.mark.parametrize("accel_type", ACCEL_TYPES)
-@pytest.mark.parametrize(
-    "ifm_shape, ifm2_shape",
-    [
-        ([1, 2, 3, 4], [1, 2, 3, 4]),
-        ([1, 2, 3, 4], [1, 1, 3, 1]),
-        ([1, 1, 3, 1], [1, 2, 3, 4]),
-    ],
-)
-def test_ethosu_left_shift_binary_elemwise(
-    accel_type,
-    ifm_shape,
-    ifm2_shape,
-):
-    np.random.seed(0)
-    dtype = "int32"
-
-    def create_model():
-        ifm = relay.var("ifm", shape=ifm_shape, dtype=dtype)
-        ifm2 = relay.var("ifm2", shape=ifm2_shape, dtype=dtype)
-        c1 = relay.left_shift(ifm, ifm2)
-        return tvm.IRModule.from_expr(relay.Function([ifm, ifm2], c1))
-
-    cpu_mod = create_model()
-
-    # Generate reference data
-    in_min, in_max = util.get_range_for_dtype_str(dtype)
-    input_data = {
-        "ifm": np.random.randint(in_min, high=in_max, size=ifm_shape, dtype=dtype),
-        "ifm2": np.random.randint(0, high=32, size=ifm2_shape, dtype=dtype),
-    }
-    output_data = generate_ref_data(cpu_mod, input_data)
-    ethosu_mod = partition_for_ethosu(cpu_mod)
-
-    infra.compare_ethosu_with_reference(ethosu_mod, input_data, output_data, accel_type)
-
-
-@pytest.mark.parametrize("accel_type", ACCEL_TYPES)
-@pytest.mark.parametrize(
-    "ifm_shape, ifm2_shape, reversed_operands, ofm_dtype",
-    [
-        ([1, 2, 3, 4], [1, 2, 3, 4], False, "int8"),
-        ([1, 2, 3, 1], [1, 1, 3, 1], False, "int32"),
-        ([1, 1, 3, 1], [1, 2, 3, 1], True, "int32"),
-    ],
-)
-def test_ethosu_right_shift_binary_elemwise(
-    ifm_shape, ifm2_shape, reversed_operands, accel_type, ofm_dtype
-):
-    np.random.seed(0)
-    dtype = "int32"
-
-    def create_model():
-        ifm = relay.var("ifm", shape=ifm_shape, dtype=dtype)
-        ifm2 = relay.var("ifm2", shape=ifm2_shape, dtype=dtype)
-        shr_op = infra.make_ethosu_binary_elementwise(
-            ifm, ifm2, ifm_shape[3], ifm2_shape[3], "SHR", ofm_dtype, reversed_operands
-        )
-        return tvm.IRModule.from_expr(relay.Function([ifm, ifm2], shr_op))
-
-    def generate_output_data(input_data):
-        lhs = input_data["ifm"]
-        rhs = input_data["ifm2"]
-        if reversed_operands:
-            lhs = np.broadcast_to(lhs, ifm2_shape)
-            lhs, rhs = rhs, lhs
-        else:
-            rhs = np.broadcast_to(rhs, ifm_shape)
-
-        def rounding_right_shift(lhs, rhs):
-            r = 1 << (rhs - 1)
-            return (lhs + r) >> rhs
-
-        return [
-            np.array([rounding_right_shift(x[0], x[1]) for x in zip(lhs.flat, rhs.flat)]).astype(
-                ofm_dtype
-            )
-        ]
-
-    cpu_mod = create_model()
-
-    # Generate reference data
-    in_min, in_max = util.get_range_for_dtype_str(dtype)
-    in_min, in_max = 18, 19
-    lhs = np.random.randint(in_min, high=in_max, size=ifm_shape, dtype=dtype)
-    rhs = np.random.randint(1, high=2, size=ifm2_shape, dtype=dtype)
-    input_data = {
-        "ifm": lhs,
-        "ifm2": rhs,
-    }
-    output_data = {"output": generate_output_data(input_data)[0]}
-    ethosu_mod = infra.create_ethosu_partition(cpu_mod)
-
-    infra.compare_ethosu_with_reference(ethosu_mod, input_data, output_data, accel_type)
-
-
-@pytest.mark.parametrize("accel_type", ["ethos-u55-256", "ethos-u65-256"])
-@pytest.mark.parametrize(
-    "ifm_shape, ifm2_shape, scale, shift, dtype",
-    [
-        ([1, 1, 1, 16], [1, 1, 1, 16], 5, 2, "int8"),
-        ([1, 2, 3, 1], [1, 1, 3, 1], 2, 1, "int8"),
-        ([1, 5, 1, 8], [1, 1, 1, 8], 1, 2, "int32"),
-    ],
-)
-def test_ethosu_rescale_mul_binary_elemwise(ifm_shape, ifm2_shape, scale, shift, accel_type, dtype):
-    np.random.seed(0)
-
-    def create_model():
-        ifm = relay.var("ifm", shape=ifm_shape, dtype=dtype)
-        ifm2 = relay.var("ifm2", shape=ifm2_shape, dtype=dtype)
-        rescale_mul_op = infra.make_ethosu_binary_elementwise(
-            ifm,
-            ifm2,
-            ifm_shape[3],
-            ifm2_shape[3],
-            "MUL",
-            dtype,
-            use_rescale=True,
-            rescale_scale=scale,
-            rescale_shift=shift,
-        )
-        return tvm.IRModule.from_expr(relay.Function([ifm, ifm2], rescale_mul_op))
-
-    def generate_output_data(input_data):
-        lhs = input_data["ifm"]
-        rhs = input_data["ifm2"]
-        rhs = np.broadcast_to(rhs, ifm_shape)
-
-        def rounding_right_shift(lhs, shift):
-            r = 1 << (shift - 1)
-            return (lhs + r) >> shift
-
-        def apply_scale(lhs, scale):
-            if dtype == "int32":
-                # For 32-bit operations scale is not applied but shift is
-                return lhs
-            else:
-                return lhs * scale
-
-        return [
-            rounding_right_shift(
-                apply_scale(np.multiply(lhs.astype("int32"), rhs.astype("int32")), scale), shift
-            ).astype(dtype)
-        ]
-
-    cpu_mod = create_model()
-
-    # Generate reference data
-    lhs = np.random.randint(low=-10, high=15, size=ifm_shape, dtype=dtype)
-    rhs = np.random.randint(low=1, high=5, size=ifm2_shape, dtype=dtype)
-    input_data = {
-        "ifm": lhs,
-        "ifm2": rhs,
-    }
-    output_data = {"output": generate_output_data(input_data)[0]}
-    ethosu_mod = infra.create_ethosu_partition(cpu_mod)
-
-    infra.compare_ethosu_with_reference(ethosu_mod, input_data, output_data, accel_type)
-
-
-@pytest.mark.parametrize("accel_type", ACCEL_TYPES)
-@pytest.mark.parametrize("ifm_shape", [(3, 2), (1, 15, 11, 7), (3, 1, 12), (400,)])
-@pytest.mark.parametrize("ifm_scale, ifm_zp, ofm_scale, ofm_zp", [(1, 0, 1, 0), (0.015, 3, 0.2, 5)])
-def test_ethosu_identity_codegen(
-    request, ifm_shape, ifm_scale, ifm_zp, ofm_scale, ofm_zp, accel_type
-):
-    np.random.seed(0)
-
-    def create_model():
-        ifm = relay.var("ifm", shape=ifm_shape, dtype="int8")
-        identity = infra.make_ethosu_identity(
-            ifm,
-            ifm_scale=ifm_scale,
-            ifm_zero_point=ifm_zp,
-            ofm_scale=ofm_scale,
-            ofm_zero_point=ofm_zp,
-        )
-        return tvm.IRModule.from_expr(relay.Function([ifm], identity))
-
-    def generate_output_data(input_data):
-        requant_data = (ifm_scale * (input_data["ifm"] - ifm_zp)) / ofm_scale + ofm_zp
-        return [np.round(np.clip(requant_data, -128, 127)).astype("int8")]
-
-    cpu_mod = create_model()
-    input_data = {"ifm": np.random.randint(-120, high=120, size=ifm_shape, dtype="int8")}
-    output_data = {"output": generate_output_data(input_data)[0]}
-    ethosu_mod = infra.create_ethosu_partition(cpu_mod)
-
-    infra.compare_ethosu_with_reference(
-        ethosu_mod,
-        input_data,
-        output_data,
-        accel_type,
-        output_tolerance=1,
-        enable_cascader=is_u55_accel_type(accel_type),
-    )
-
-
-@pytest.mark.parametrize("accel_type", ACCEL_TYPES)
-@pytest.mark.parametrize(
-    "ifm_shape, new_shape",
-    [
-        ((1, 4, 1, 2), (1, 1, 1, 8)),
-        ((12, 20), (1, 6, 4, 10)),
-        ((12, 20), (6, 4, 10)),
-        ((20,), (4, 5)),
-        ((12, 2, 10), (0, -3)),
-        ((11, 3, 25), (-1,)),
-        ((8, 7, 3), (-4, 1, 8, -2)),
-    ],
-)
-def test_relay_reshape_codegen(ifm_shape, new_shape, accel_type):
-    np.random.seed(0)
-
-    def create_model():
-        ifm = relay.var("ifm", shape=ifm_shape, dtype="int8")
-        reshape = relay.op.reshape(ifm, newshape=new_shape)
-        return tvm.IRModule.from_expr(relay.Function([ifm], reshape))
-
-    cpu_mod = create_model()
-    input_data = {"ifm": np.random.randint(-128, high=127, size=ifm_shape, dtype="int8")}
-    output_data = generate_ref_data(cpu_mod, input_data)
-    ethosu_mod = infra.create_ethosu_partition(cpu_mod)
-
-    infra.compare_ethosu_with_reference(
-        ethosu_mod,
-        input_data,
-        output_data,
-        accel_type,
-        enable_cascader=is_u55_accel_type(accel_type),
-    )
-
-
-@pytest.mark.parametrize("accel_type", ACCEL_TYPES)
-@pytest.mark.parametrize(
-    "ifm_shape, begin, size",
-    [
-        ([1, 10, 50, 4], [0, 5, 11, 2], [1, 5, 11, 1]),
-        ([15, 17, 3], [3, 0, 1], [8, 17, 2]),
-        ([7, 6043], [0, 704], [1, 2860]),
-        ([5000], [123], [2151]),
-    ],
-)
-def test_tflite_slice(request, accel_type, ifm_shape, begin, size):
-    np.random.seed(0)
-
-    @tf.function
-    def slice_func(x):
-        return tf.slice(x, begin, size)
-
-    infra.compare_tvm_with_tflite(
-        slice_func, [ifm_shape], accel_type, enable_cascader=is_u55_accel_type(accel_type)
-    )
-
-
-@pytest.mark.parametrize("accel_type", ACCEL_TYPES)
-@pytest.mark.parametrize(
-    "ifm_shape, begin, end",
-    [([1, 1, 5, 8], [0, 0, 0, 0], [1, 1, 2, 3]), ([1, 3, 3], [0, 1, 2], [1, 2, 3])],
-)
-def test_tflite_strided_slice(accel_type, ifm_shape, begin, end):
-    np.random.seed(0)
-
-    @tf.function
-    def strided_slice_func(x):
-        return tf.strided_slice(x, begin, end)
-
-    infra.compare_tvm_with_tflite(
-        strided_slice_func, [ifm_shape], accel_type, enable_cascader=is_u55_accel_type(accel_type)
-    )
-
-
-@pytest.mark.parametrize("accel_type", ACCEL_TYPES)
-@pytest.mark.parametrize("operator_type", ["ABS"])
-@pytest.mark.parametrize(
-    "ifm_shape",
-    [[1, 5, 12, 4], [1, 1, 2], [4, 3, 2], [10, 20], [345]],
-)
-def test_ethosu_unary_elementwise(
-    request,
-    accel_type,
-    operator_type,
-    ifm_shape,
-):
-    np.random.seed(0)
-
-    @tf.function
-    def abs_func(x):
-        if operator_type == "ABS":
-            op = tf.math.abs(x)
-        return op
-
-    infra.compare_tvm_with_tflite(
-        abs_func,
-        [ifm_shape],
-        accel_type,
-        enable_cascader=is_u55_accel_type(accel_type),
-    )
-
-
-def test_ethosu_section_name():
-    np.random.seed(0)
-
-    @tf.function
-    def depthwise_conv2d(x):
-        weight_shape = [3, 3, 3, 1]
-        weight = tf.constant(np.random.uniform(size=weight_shape), dtype=tf.float32)
-        tf_strides = [1, 1, 1, 1]
-        op = tf.nn.depthwise_conv2d(x, weight, strides=tf_strides, padding="SAME", dilations=(2, 2))
-        return op
-
-    mod, tflite_graph = infra.get_tflite_graph(depthwise_conv2d, [(1, 55, 55, 3)])
-
-    # Generate reference data
-    input_data, output_data = infra.generate_ref_data_tflite(tflite_graph)
-
-    test_runner = infra.create_test_runner()
-    compiled_models = infra.build_source(mod, input_data, output_data, test_runner)
-
-    # Assumes only two runtime.Modules are created -- i.e. single offload module
-    ethosu_module = compiled_models[0].executor_factory.lib.imported_modules[0].imported_modules[0]
-
-    # Verify generated C source
-    source = ethosu_module.get_source()
-    assert (
-        '__attribute__((section(".rodata.tvm"), aligned(16))) static int8_t tvmgen_default_ethos_u_main_0_cms_data_data'
-        in source
-    )
-    assert (
-        '__attribute__((section(".rodata.tvm"), aligned(16))) static int8_t tvmgen_default_ethos_u_main_0_weights'
-        in source
-    )
-
-
-@pytest.mark.parametrize("accel_type", ACCEL_TYPES)
-def test_ethosu_clz(accel_type):
-    np.random.seed(0)
-    ifm_shape = (1, 42, 5, 4)
-
-    def create_model():
-        ifm = relay.var("ifm", shape=ifm_shape, dtype="int32")
-        clz = infra.make_ethosu_unary_elementwise(ifm, 4, "CLZ")
-        return tvm.IRModule.from_expr(relay.Function([ifm], clz))
-
-    def generate_output_data(input_data):
-        def clz_comp(n):
-            n_bin = np.binary_repr(n)
-            if n_bin[0] == "-":
-                return 0
-            else:
-                return 32 - len(n_bin)
-
-        return [
-            np.array([clz_comp(i) for i in input_data["ifm"].ravel()])
-            .reshape(ifm_shape)
-            .astype("int32")
-        ]
-
-    cpu_mod = create_model()
-    input_data = {"ifm": np.random.randint(-500000, high=500000, size=ifm_shape, dtype="int32")}
-    output_data = {"output": generate_output_data(input_data)[0]}
-    ethosu_mod = infra.create_ethosu_partition(cpu_mod)
-
-    infra.compare_ethosu_with_reference(ethosu_mod, input_data, output_data, accel_type)
-
-
-@pytest.mark.parametrize("accel_type", ACCEL_TYPES)
-def test_tflite_tanh(accel_type):
-    np.random.seed(0)
-    ifm_shape = [1, 115, 32, 7]
-
-    @tf.function
-    def tanh_func(x):
-        op = tf.nn.tanh(x)
-        return op
-
-    infra.compare_tvm_with_tflite(
-        tanh_func, [ifm_shape], accel_type, enable_cascader=is_u55_accel_type(accel_type)
-    )
-
-
-@pytest.mark.parametrize("accel_type", ACCEL_TYPES)
-@pytest.mark.parametrize("ifm_shape", [(1, 5, 5, 3), (1, 12, 9, 1)])
-def test_tflite_hard_swish(accel_type, ifm_shape):
-    np.random.seed(0)
-
-    @tf.function
-    def hard_swish_func(x):
-        op = tf.keras.layers.Lambda(
-            lambda x: x * tf.keras.activations.relu(x + 3.0, max_value=6.0) / 6.0
-        )(x)
-        return op
-
-    infra.compare_tvm_with_tflite(hard_swish_func, [ifm_shape], accel_type, ranges=[(-1, 1)])
-
-
-@pytest.mark.parametrize("accel_type", ACCEL_TYPES)
-@pytest.mark.parametrize(
-    "shapes, axis",
-    [
-        ([(2, 3), (4, 3)], 0),
-        ([(3, 2, 1), (3, 1, 1)], 1),
-        ([(10,), (13,), (14,)], 0),
-        ([(1, 5, 2, 1), (1, 5, 7, 1), (1, 5, 3, 1)], 2),
-    ],
-)
-def test_tflite_concat(shapes, axis, accel_type):
-    np.random.seed(0)
-
-    @tf.function
-    def concat_func(*inputs):
-        op = tf.concat(list(inputs), axis)
-        return op
-
-    infra.compare_tvm_with_tflite(concat_func, shapes, accel_type, enable_cascader=False)
-
-
-def test_tflite_unstack_concat():
-    np.random.seed(0)
-    shapes = [(2, 4, 16)]
-    axis = 1
-    accel_type = "ethos-u55-256"
-
-    @tf.function
-    def concat_func(input):
-        inputs = tf.unstack(input)
-        inputs.reverse()
-        op = tf.concat(inputs, axis)
-        return op
-
-    infra.compare_tvm_with_tflite(concat_func, shapes, accel_type, enable_cascader=False)
-
-
-def test_tflite_concat_with_reused_args():
-    np.random.seed(0)
-    shapes = [(1, 1, 24, 1), (1, 1, 24, 1), (1, 1, 10, 1), (1, 1, 68, 1)]
-    axis = 2
-    accel_type = "ethos-u55-256"
-
-    @tf.function
-    def concat_func(*inputs):
-        op = tf.add(inputs[0], inputs[1])
-        op2 = tf.concat((inputs[0], inputs[2], op), axis)
-        op = tf.concat((inputs[0], inputs[3], op), axis)
-        op = tf.nn.max_pool2d(op, (1, 1), (1, 2), "SAME")
-        op = tf.add(op, op2)
-        return op
-
-    infra.compare_tvm_with_tflite(concat_func, shapes, accel_type, enable_cascader=False)
-
-
-@pytest.mark.parametrize("accel_type", ACCEL_TYPES)
-def test_tflite_sigmoid(accel_type):
-    np.random.seed(0)
-    ifm_shape = [1, 135, 41, 6]
-
-    @tf.function
-    def sigmoid_function(x):
-        op = tf.nn.sigmoid(x)
-        return op
-
-    infra.compare_tvm_with_tflite(
-        sigmoid_function, [ifm_shape], accel_type, enable_cascader=is_u55_accel_type(accel_type)
-    )
-
-
-# This codegen test checks both, split and split_v
-@pytest.mark.parametrize("accel_type", ACCEL_TYPES)
-@pytest.mark.parametrize(
-    "ifm_shape, num_or_size_splits, axis",
-    [
-        ((1, 4, 6, 8), (1, 3, 4), 3),
-        ((4, 6, 8), 2, 0),
-        ((50,), 25, 0),
-        ((5, 11), 1, 1),
-        ((13,), (13,), 0),
-        ((22, 7), (4, -1), 1),
-    ],
-)
-def test_tflite_split(accel_type, ifm_shape, num_or_size_splits, axis):
-    np.random.seed(0)
-
-    @tf.function
-    def split_func(x):
-        op = tf.split(x, num_or_size_splits, axis=axis)
-        return op
-
-    infra.compare_tvm_with_tflite(split_func, [ifm_shape], accel_type, enable_cascader=False)
-
-
-@pytest.mark.parametrize("accel_type", ACCEL_TYPES)
-@pytest.mark.parametrize(
-    "ifm_shape,ifm_scale,ifm_zp,ofm_scale,ofm_zp",
-    [
-        [(1, 8, 8, 3), 1.0, 0, 1.0, 0],
-        [(1, 20, 30, 3), 1.345, 34, 0.32, -23],
-        [(1, 1, 4, 8), 0.0078125, 0, 0.00997, -30],
-    ],
-)
-def test_ethosu_requantize(accel_type, ifm_shape, ifm_scale, ifm_zp, ofm_scale, ofm_zp):
-    np.random.seed(0)
-    dtype = "int8"
-
-    def create_model():
-        ifm = relay.var("ifm", shape=ifm_shape, dtype="int8")
-        requantize = relay.qnn.op.requantize(
-            ifm,
-            relay.const(ifm_scale, dtype="float32"),
-            relay.const(ifm_zp, dtype="int32"),
-            relay.const(ofm_scale, dtype="float32"),
-            relay.const(ofm_zp, dtype="int32"),
-        )
-        return tvm.IRModule.from_expr(relay.Function([ifm], requantize))
-
-    cpu_mod = create_model()
-    input_data = {"ifm": np.random.randint(-128, high=127, size=ifm_shape, dtype=dtype)}
-    output_data = generate_ref_data(cpu_mod, input_data)
-    ethosu_mod = partition_for_ethosu(cpu_mod)
-
-    infra.compare_ethosu_with_reference(
-        ethosu_mod,
-        input_data,
-        output_data,
-        accel_type,
-        enable_cascader=is_u55_accel_type(accel_type),
-    )
-
-
-@pytest.mark.parametrize("accel_type", ACCEL_TYPES)
-@pytest.mark.parametrize("ifm_shape,axis", [((2,), 0), ((1, 3, 3), 2)])
-def test_tflite_expand_dims(accel_type, ifm_shape, axis):
-    np.random.seed(0)
-
-    @tf.function
-    def expand_dims_func(x):
-        return tf.expand_dims(x, axis=axis)
-
-    infra.compare_tvm_with_tflite(
-        expand_dims_func, [ifm_shape], accel_type, enable_cascader=is_u55_accel_type(accel_type)
-    )
-
-
-@pytest.mark.parametrize("accel_type", ACCEL_TYPES)
-@pytest.mark.parametrize(
-    "ifm_shape,axis", [((1, 1, 2, 1), 0), ((1, 3, 3, 1), 3), ((1, 1, 2, 1), None)]
-)
-def test_tflite_squeeze(accel_type, ifm_shape, axis):
-    np.random.seed(0)
-
-    @tf.function
-    def squeeze_func(x):
-        return tf.squeeze(x, axis=axis)
-
-    infra.compare_tvm_with_tflite(
-        squeeze_func, [ifm_shape], accel_type, enable_cascader=is_u55_accel_type(accel_type)
-    )
-
-
-@pytest.mark.parametrize("accel_type", ACCEL_TYPES)
-@pytest.mark.parametrize(
-    "ifm_shape,size,half_pixel",
-    [
-        [(1, 2, 2, 1), (4, 4), False],
-        [(1, 2, 2, 1), (4, 4), True],
-        [(1, 4, 7, 3), (8, 14), False],
-        [(1, 3, 5, 3), (3, 5), False],
-        [(1, 6, 6, 96), (12, 12), False],
-        [(1, 6, 6, 96), (12, 12), True],
-    ],
-)
-def test_tflite_resize2d_nearest_neighbor(accel_type, ifm_shape, size, half_pixel):
-    np.random.seed(0)
-    align_corners = False
-
-    @tf.function
-    def resize_model(x):
-        return tf.compat.v1.image.resize_nearest_neighbor(
-            x,
-            size,
-            align_corners=align_corners,
-            half_pixel_centers=half_pixel,
-        )
-
-    infra.compare_tvm_with_tflite(
-        resize_model, [ifm_shape], accel_type, enable_cascader=is_u55_accel_type(accel_type)
-    )
-
-
-@pytest.mark.parametrize("accel_type", ACCEL_TYPES)
-@pytest.mark.parametrize(
-    "ifm_shape,size,align_corners",
-    [
-        [(1, 2, 2, 1), (4, 4), False],
-        [(1, 4, 7, 3), (8, 14), False],
-        [(1, 2, 2, 1), (3, 3), True],
-        [(1, 4, 7, 3), (7, 13), True],
-        [(1, 3, 5, 3), (3, 5), False],
-    ],
-)
-def test_tflite_resize2d_bilinear(accel_type, ifm_shape, size, align_corners):
-    np.random.seed(0)
-
-    @tf.function
-    def resize_model(x):
-        return tf.compat.v1.image.resize_bilinear(
-            x, size, align_corners=align_corners, half_pixel_centers=False
-        )
-
-    infra.compare_tvm_with_tflite(
-        resize_model, [ifm_shape], accel_type, enable_cascader=is_u55_accel_type(accel_type)
-    )
-
-
-@pytest.mark.parametrize("accel_type", ACCEL_TYPES)
-@pytest.mark.parametrize(
-    "ifm_shape,ofm_shape,kernel_shape,padding",
-    [
-        [(1, 2, 2, 1), (1, 4, 4, 1), (3, 3), "SAME"],
-        [(1, 2, 2, 1), (1, 9, 9, 1), (7, 7), "VALID"],
-        [(1, 2, 4, 3), (1, 4, 8, 3), (5, 3), "SAME"],
-        [(1, 10, 5, 3), (1, 21, 13, 3), (3, 5), "VALID"],
-    ],
-)
-@pytest.mark.parametrize("has_bias", [False, True])
-def test_tflite_transpose_convolution(
-    accel_type, ifm_shape, ofm_shape, kernel_shape, padding, has_bias
-):
-    np.random.seed(0)
-    dilations = (1, 1)
-    strides = (2, 2)
-
-    @tf.function
-    def conv2d_transpose(x):
-        weight_shape = [kernel_shape[0], kernel_shape[1], ifm_shape[3], ofm_shape[3]]
-        weight = tf.constant(np.random.uniform(size=weight_shape), dtype=tf.float32)
-        bias_shape = ofm_shape[3]
-        bias = tf.constant(np.random.uniform(size=bias_shape), dtype=tf.float32)
-        tf_strides = [1, strides[0], strides[1], 1]
-        op = tf.nn.conv2d_transpose(
-            x,
-            weight,
-            output_shape=ofm_shape,
-            strides=tf_strides,
-            padding=padding,
-            dilations=dilations,
-        )
-        if has_bias:
-            op = tf.nn.bias_add(op, bias)
-        return op
-
-    infra.compare_tvm_with_tflite(
-        conv2d_transpose,
-        [ifm_shape],
-        accel_type=accel_type,
-        enable_cascader=is_u55_accel_type(accel_type),
-    )
-
-
-@pytest.mark.parametrize("accel_type", ACCEL_TYPES)
-@pytest.mark.parametrize(
-    "ifm_shapes,axis",
-    [
-        ([(1, 2, 2), (1, 2, 2), (1, 2, 2)], 2),
-        ([(5, 4), (5, 4)], 1),
-        ([(1,), (1,)], 0),
-        ([(3, 1), (3, 1), (3, 1), (3, 1)], 0),
-    ],
-)
-def test_tflite_pack(accel_type, ifm_shapes, axis):
-    np.random.seed(0)
-
-    @tf.function
-    def pack_func(*inputs):
-        return tf.stack(inputs, axis=axis)
-
-    infra.compare_tvm_with_tflite(pack_func, ifm_shapes, accel_type, enable_cascader=False)
-
-
-@pytest.mark.parametrize("accel_type", ACCEL_TYPES)
-@pytest.mark.parametrize(
-    "ifm_shape,axis",
-    [[(1, 2, 3, 4), 1], [(2, 3), 1], [(5, 6, 7), 2]],
-)
-def test_tflite_unpack(accel_type, ifm_shape, axis):
-    np.random.seed(0)
-
-    @tf.function
-    def unpack_func(x):
-        return tf.unstack(x, axis=axis)
-
-    infra.compare_tvm_with_tflite(unpack_func, [ifm_shape], accel_type, enable_cascader=False)
-
-
-@pytest.mark.parametrize("accel_type", ACCEL_TYPES)
-@pytest.mark.parametrize("ifm_shape", [(1, 15, 15, 3), (1, 8, 9, 1)])
-@pytest.mark.parametrize("alpha", [0.2, 0.634])
-def test_tflite_leaky_relu(accel_type, ifm_shape, alpha):
-    np.random.seed(0)
-
-    @tf.function
-    def leaky_relu_func(x):
-        return tf.nn.leaky_relu(x, alpha=alpha)
-
-    infra.compare_tvm_with_tflite(
-        leaky_relu_func,
-        [ifm_shape],
-        accel_type,
-        enable_cascader=is_u55_accel_type(accel_type),
-        ranges=[(-1, 1)],
-    )
-
-
-# conv2d + relu_n1_to_1 is used because separate activation is not offloaded to NPU.
-def test_tflite_relu_n1_to_1():
-    np.random.seed(0)
-    accel_type = "ethos-u55-256"
-    ifm_shape = (1, 55, 34, 3)
-    kernel_shape = (3, 2)
-    strides = (1, 1)
-
-    @tf.function
-    def conv2d_relu_n1_to_1(x):
-        tf_strides = [1, strides[0], strides[1], 1]
-        weight_shape = [kernel_shape[0], kernel_shape[1], ifm_shape[3], 3]
-        weight = tf.constant(np.random.uniform(size=weight_shape), dtype=tf.float32)
-        op = tf.nn.conv2d(
-            x,
-            weight,
-            strides=tf_strides,
-            padding="VALID",
-        )
-        # The specific pattern will be replaced into RELU_N1_TO_1 by tflite.
-        return tf.math.maximum(-1.0, tf.math.minimum(op, 1.0))
-
-    infra.compare_tvm_with_tflite(
-        conv2d_relu_n1_to_1,
-        [ifm_shape],
-        accel_type,
-        enable_cascader=True,
-    )
-
-
-# conv2d + relu6 is used because separate activation is not offloaded to NPU.
-def test_tflite_relu6():
-    np.random.seed(0)
-    accel_type = "ethos-u55-256"
-    ifm_shape = (1, 55, 34, 3)
-    kernel_shape = (3, 2)
-    strides = (1, 1)
-
-    @tf.function
-    def conv2d_relu6(x):
-        tf_strides = [1, strides[0], strides[1], 1]
-        weight_shape = [kernel_shape[0], kernel_shape[1], ifm_shape[3], 3]
-        weight = tf.constant(np.random.uniform(size=weight_shape), dtype=tf.float32)
-        op = tf.nn.conv2d(
-            x,
-            weight,
-            strides=tf_strides,
-            padding="VALID",
-        )
-        return tf.nn.relu6(op)
-
-    infra.compare_tvm_with_tflite(
-        conv2d_relu6,
-        [ifm_shape],
-        accel_type,
-        enable_cascader=True,
-    )
-
-
-# Specific case when operation cannot be offloaded to NPU by single binary elementwise operation because
-# min and max operations cannot be fused with requantize if there are different scales as it's not supported on NPU.
-@pytest.mark.parametrize("operation", [tf.math.minimum, tf.math.maximum])
-def test_tflite_min_max_relu_n1_to_1(operation):
-    np.random.seed(0)
-    accel_type = "ethos-u55-128"
-    ifm_shape = (1, 12, 16, 8)
-
-    @tf.function
-    def min_max_relu_n1_to_1(lhs, rhs):
-        op = operation(lhs, rhs)
-        # The specific pattern will be replaced into RELU_N1_TO_1 by tflite.
-        return tf.math.maximum(-1.0, tf.math.minimum(op, 1.0))
-
-    infra.compare_tvm_with_tflite(
-        min_max_relu_n1_to_1,
-        [ifm_shape, ifm_shape],
-        accel_type,
-        enable_cascader=True,
-        ranges=[(-1, 1), (0, 2)],
-    )
-
-
-@pytest.mark.parametrize("accel_type", ACCEL_TYPES)
-@pytest.mark.parametrize("ifm_shape", [(1, 14), (1, 151)])
-@pytest.mark.parametrize("ofm_channels", [32, 64])
-@pytest.mark.parametrize("use_bias", [True, False])
-@pytest.mark.parametrize("activation_function", ["RELU", "NONE"])
-def test_tflite_fully_connected(
-    accel_type,
-    ifm_shape,
-    ofm_channels,
-    use_bias,
-    activation_function,
-):
-    np.random.seed(0)
-
-    @tf.function
-    def fully_connected(x):
-        bias_shape = ofm_channels
-        bias = tf.constant(np.random.uniform(size=bias_shape), dtype=tf.float32)
-        w = tf.constant(
-            np.random.uniform(size=[ifm_shape[1], ofm_channels]),
-            dtype=tf.float32,
-        )
-        x = tf.matmul(x, w)
-        if use_bias:
-            x = tf.nn.bias_add(x, bias)
-        if activation_function:
-            x = tf.nn.relu(x)
-        return x
-
-    infra.compare_tvm_with_tflite(
-        fully_connected, [ifm_shape], accel_type, enable_cascader=is_u55_accel_type(accel_type)
-    )
-
-
-@pytest.mark.parametrize("accel_type", ["ethos-u55-256", "ethos-u65-256"])
-@pytest.mark.parametrize("ifm_shape", [(1, 16), (4, 8)])
-@pytest.mark.parametrize("ofm_channels", [8, 32])
-@pytest.mark.parametrize("activation_function", ["NONE", "RELU"])
-def test_tflite_matmul(
-    accel_type,
-    ifm_shape,
-    ofm_channels,
-    activation_function,
-):
-    np.random.seed(0)
-
-    @tf.function
-    def matmul(x, y):
-        x = tf.matmul(x, y, transpose_b=True)
-        if activation_function == "RELU":
-            x = tf.nn.relu(x)
-        return x
-
-    infra.compare_tvm_with_tflite(
-        matmul, [ifm_shape, [ofm_channels, ifm_shape[-1]]], accel_type, enable_cascader=False
-    )
-
-
-@pytest.mark.parametrize("accel_type", ["ethos-u55-256", "ethos-u65-256"])
-def test_tflite_subtract_sigmoid(accel_type):
-    np.random.seed(0)
-    ifm_shape = [1, 6, 8, 4]
-
-    @tf.function
-    def subtract_sigmoid_function(lhs, rhs):
-        op = tf.math.subtract(lhs, rhs)
-        op = tf.nn.sigmoid(op)
-        return op
-
-    infra.compare_tvm_with_tflite(
-        subtract_sigmoid_function,
-        [ifm_shape, ifm_shape],
-        accel_type,
-        enable_cascader=is_u55_accel_type(accel_type),
-    )
-
-
-@pytest.mark.parametrize("accel_type", ["ethos-u55-256", "ethos-u65-256"])
-@pytest.mark.parametrize(
-    "ifm_shape,ofm_channels,fract_size,tolerance",
-    [[(1, 16), 8, 15, 0.001], [(2, 8), 16, 14, 0.001], [(4, 8), 16, 12, 0.001]],
-)
-def test_ethosu_matmul_fixed_point(accel_type, ifm_shape, ofm_channels, fract_size, tolerance):
-    np.random.seed(0)
-    dtype = "int16"
-    weights_shape = (ofm_channels, ifm_shape[1])
-
-    def create_model():
-        ifm = relay.var("ifm", shape=ifm_shape, dtype=dtype)
-        ifm2 = relay.var("ifm2", shape=weights_shape, dtype=dtype)
-        ifm_fixed_point = relay.cast(ifm, "int32")
-        ifm2_fixed_point = relay.cast(ifm2, "int32")
-        ifm_fixed_point = relay.fixed_point_multiply(ifm_fixed_point, 2**31 - 1, 0)
-        ifm2_fixed_point = relay.fixed_point_multiply(ifm2_fixed_point, 2**31 - 1, 0)
-        dense = relay.nn.dense(ifm_fixed_point, ifm2_fixed_point)
-        dense = relay.fixed_point_multiply(dense, 1, 16)
-        dense = relay.cast(dense, dtype)
-        return tvm.IRModule.from_expr(relay.Function([ifm, ifm2], dense))
-
-    def convert_to_fixed_point(arr, fract_size):
-        fract_fact = 0b1 << fract_size
-        return np.array(arr * fract_fact, dtype=np.int16)
-
-    cpu_mod = create_model()
-    ethosu_mod = partition_for_ethosu(cpu_mod)
-
-    input_data = {
-        "ifm": np.random.uniform(-0.5, 0.5, size=ifm_shape),
-        "ifm2": np.random.uniform(-0.5, 0.5, size=weights_shape),
-    }
-    input_data = {
-        "ifm": convert_to_fixed_point(input_data["ifm"], fract_size),
-        "ifm2": convert_to_fixed_point(input_data["ifm2"], fract_size),
-    }
-    output_data = generate_ref_data(cpu_mod, input_data)
-    output_data = {"output": output_data["output"].astype("int16")}
-    tolerance = convert_to_fixed_point(tolerance, fract_size)
-
-    infra.compare_ethosu_with_reference(
-        ethosu_mod,
-        input_data,
-        output_data,
-        accel_type,
-        enable_cascader=False,
-        output_tolerance=tolerance,
-    )
-
-
-@pytest.mark.parametrize("accel_type", ["ethos-u55-256", "ethos-u65-256"])
-@pytest.mark.parametrize(
-    "ifm_shape,fract_size,tolerance",
-    [[(1, 2, 8, 4), 15, 0.001], [(1, 8), 12, 0.001], [(1, 1, 4, 8), 10, 0.002]],
-)
-def test_ethosu_tanh_fixed_point(accel_type, ifm_shape, fract_size, tolerance):
-    np.random.seed(0)
-    dtype = "int16"
-
-    def create_model():
-        ifm = relay.var("ifm", shape=ifm_shape, dtype=dtype)
-        ifm_fixed_point = relay.cast(ifm, "int32")
-        ifm_fixed_point = relay.fixed_point_multiply(ifm_fixed_point, 2**31 - 1, 0)
-        tanh = relay.tanh(ifm_fixed_point)
-        tanh = relay.fixed_point_multiply(tanh, 1, 31 - fract_size)
-        tanh = relay.cast(tanh, dtype)
-        return tvm.IRModule.from_expr(relay.Function([ifm], tanh))
-
-    def generate_ref(input_data):
-        return np.tanh(input_data)
-
-    def convert_to_fixed_point(arr, fract_size):
-        fract_fact = 0b1 << fract_size
-        return np.array(arr * fract_fact, dtype=np.int16)
-
-    cpu_mod = create_model()
-    ethosu_mod = partition_for_ethosu(cpu_mod)
-
-    input_data = {"ifm": np.random.uniform(-1, 1, size=ifm_shape)}
-    output_data = generate_ref(input_data["ifm"])
-
-    input_data = {"ifm": convert_to_fixed_point(input_data["ifm"], fract_size)}
-    output_data = {"output": convert_to_fixed_point(output_data, fract_size)}
-    tolerance = convert_to_fixed_point(tolerance, fract_size)
-
-    infra.compare_ethosu_with_reference(
-        ethosu_mod,
-        input_data,
-        output_data,
-        accel_type,
-        enable_cascader=is_u55_accel_type(accel_type),
-        output_tolerance=tolerance,
-    )
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/contrib/test_ethosu/test_compiler.py b/tests/python/contrib/test_ethosu/test_compiler.py
deleted file mode 100644
index 3bf7abb8f113..000000000000
--- a/tests/python/contrib/test_ethosu/test_compiler.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import pytest
-
-pytest.importorskip("ethosu.vela")
-import tvm
-from tvm import relay
-from tvm.relay.backend.contrib.ethosu.tir.compiler import _lower_to_tir
-from . import infra
-
-
-def _create_single_conv2d():
-    ifm = relay.var("x", shape=(1, 8, 8, 4), dtype="int8")
-    conv1 = infra.make_ethosu_conv2d(ifm, 4, 4, (3, 3), (1, 1), (1, 1), (1, 1))
-    func = relay.Function(relay.analysis.free_vars(conv1), conv1)
-    return func
-
-
-def _create_double_conv2d():
-    ifm = relay.var("x", shape=(1, 8, 8, 4), dtype="int8")
-    conv1 = infra.make_ethosu_conv2d(ifm, 4, 4, (3, 3), (1, 1), (1, 1), (1, 1))
-    conv2 = infra.make_ethosu_conv2d(conv1, 4, 7, (2, 2), (1, 1), (1, 1), (1, 1))
-    func = relay.Function(relay.analysis.free_vars(conv2), conv2)
-    return func
-
-
-def _create_non_linear_conv2d():
-    shape = (1, 8, 8, 4)
-    ifm1 = relay.var("x", shape=shape, dtype="int8")
-    ifm2 = relay.var("y", shape=shape, dtype="int8")
-    conv1 = infra.make_ethosu_conv2d(ifm1, 4, 4, (3, 3), (1, 1), (1, 1), (1, 1))
-    conv2 = infra.make_ethosu_conv2d(ifm2, 4, 4, (3, 3), (1, 1), (1, 1), (1, 1))
-    add = infra.make_ethosu_binary_elementwise(conv1, conv2, shape[3], shape[3], "ADD", "int8")
-    func = relay.Function(relay.analysis.free_vars(add), add)
-    return func
-
-
-@pytest.mark.parametrize(
-    "relay_function, arg_count",
-    [(_create_single_conv2d, 2), (_create_double_conv2d, 2), (_create_non_linear_conv2d, 3)],
-)
-def test_lower_to_tir_arg_count(relay_function, arg_count):
-    mod = tvm.IRModule()
-    mod["main"] = relay_function()
-    mod = relay.transform.InferType()(mod)
-    tir_mod = _lower_to_tir(mod["main"])[0]
-    primfunc = tir_mod["main"]
-    assert len(primfunc.params) == arg_count
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/contrib/test_ethosu/test_copy_compute_reordering.py b/tests/python/contrib/test_ethosu/test_copy_compute_reordering.py
deleted file mode 100644
index 6b9702f012ca..000000000000
--- a/tests/python/contrib/test_ethosu/test_copy_compute_reordering.py
+++ /dev/null
@@ -1,697 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import pytest
-
-pytest.importorskip("ethosu.vela")
-
-import tvm
-from tvm.script import tir as T
-from tvm.relay.backend.contrib.ethosu.tir.passes import CopyComputeReordering
-
-# Uninitialized vars used
-# fmt: off
-@tvm.script.ir_module(check_well_formed=False)
-class AllOperatorsWithWeights:
-    @T.prim_func
-    def main() -> None:
-        # function attr dict
-        T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
-        buffer1 = T.Buffer([8192], "int8")
-        buffer2 = T.Buffer([128], "uint8")
-        buffer3 = T.Buffer([32], "uint8")
-        buffer4 = T.Buffer([112], "uint8")
-        buffer5 = T.Buffer([32], "uint8")
-        buffer6 = T.Buffer([112], "uint8")
-        buffer7 = T.Buffer([32], "uint8")
-        buffer8 = T.Buffer([112], "uint8")
-        buffer9 = T.Buffer([32], "uint8")
-        buffer10 = T.Buffer([2048], "int8")
-        # body
-        p1 = T.decl_buffer([128], "uint8")
-        p2 = T.decl_buffer([112], "uint8")
-        p3 = T.decl_buffer([112], "uint8")
-        p4 = T.decl_buffer([32], "uint8")
-        p5 = T.decl_buffer([32], "uint8")
-        p6 = T.decl_buffer([32], "uint8")
-        p7 = T.decl_buffer([112], "uint8")
-        p8 = T.decl_buffer([32], "uint8")
-        T.evaluate(T.call_extern("ethosu_copy", buffer2[0], 128, p1[0], dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_copy", buffer3[0], 32, p4[0], dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, buffer1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, buffer10[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p1[0], 128, 12, p4[0], 32, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_copy", buffer4[0], 112, p2[0], dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_copy", buffer5[0], 32, p5[0], dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, buffer1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, buffer10[2], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p2[0], 112, 12, p5[0], 32, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_copy", buffer6[0], 112, p3[0], dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_copy", buffer7[0], 32, p6[0], dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, buffer1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, buffer10[4], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p3[0], 112, 12, p6[0], 32, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_copy", buffer8[0], 112, p7[0], dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_copy", buffer9[0], 32, p8[0], dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, buffer1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, buffer10[6], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p7[0], 112, 12, p8[0], 32, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-# fmt: on
-
-
-def test_all_operators_with_weights_max_copy_movements_0():
-    test_mod = CopyComputeReordering(0)(AllOperatorsWithWeights)
-    reference_mod = AllOperatorsWithWeights
-    tvm.ir.assert_structural_equal(test_mod, reference_mod, True)
-
-
-def test_all_operators_with_weights_max_copy_movements_1():
-    # Uninitialized vars used
-    # fmt: off
-    @tvm.script.ir_module(check_well_formed=False)
-    class ReferenceModule:
-        @T.prim_func
-        def main() -> None:
-            # function attr dict
-            T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
-            buffer1 = T.Buffer([8192], "int8")
-            buffer2 = T.Buffer([128], "uint8")
-            buffer3 = T.Buffer([32], "uint8")
-            buffer4 = T.Buffer([112], "uint8")
-            buffer5 = T.Buffer([32], "uint8")
-            buffer6 = T.Buffer([112], "uint8")
-            buffer7 = T.Buffer([32], "uint8")
-            buffer8 = T.Buffer([112], "uint8")
-            buffer9 = T.Buffer([32], "uint8")
-            buffer10 = T.Buffer([2048], "int8")
-            # body
-            p1 = T.decl_buffer([128], "uint8")
-            p2 = T.decl_buffer([112], "uint8")
-            p3 = T.decl_buffer([112], "uint8")
-            p4 = T.decl_buffer([32], "uint8")
-            p5 = T.decl_buffer([32], "uint8")
-            p6 = T.decl_buffer([32], "uint8")
-            p7 = T.decl_buffer([112], "uint8")
-            p8 = T.decl_buffer([32], "uint8")
-            T.evaluate(T.call_extern("ethosu_copy", buffer2[0], 128, p1[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer3[0], 32, p4[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer4[0], 112, p2[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer5[0], 32, p5[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, buffer1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, buffer10[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p1[0], 128, 12, p4[0], 32, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer6[0], 112, p3[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer7[0], 32, p6[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, buffer1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, buffer10[2], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p2[0], 112, 12, p5[0], 32, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer8[0], 112, p7[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer9[0], 32, p8[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, buffer1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, buffer10[4], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p3[0], 112, 12, p6[0], 32, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, buffer1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, buffer10[6], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p7[0], 112, 12, p8[0], 32, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-    # fmt: on
-
-    test_mod = CopyComputeReordering(1)(AllOperatorsWithWeights)
-    reference_mod = ReferenceModule
-    tvm.ir.assert_structural_equal(test_mod, reference_mod, True)
-
-
-def test_all_operators_with_weights_max_copy_movements_2():
-    # Uninitialized vars used
-    # fmt: off
-    @tvm.script.ir_module(check_well_formed=False)
-    class ReferenceModule:
-        @T.prim_func
-        def main() -> None:
-            # function attr dict
-            T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
-            buffer1 = T.Buffer([8192], "int8")
-            buffer2 = T.Buffer([128], "uint8")
-            buffer3 = T.Buffer([32], "uint8")
-            buffer4 = T.Buffer([112], "uint8")
-            buffer5 = T.Buffer([32], "uint8")
-            buffer6 = T.Buffer([112], "uint8")
-            buffer7 = T.Buffer([32], "uint8")
-            buffer8 = T.Buffer([112], "uint8")
-            buffer9 = T.Buffer([32], "uint8")
-            buffer10 = T.Buffer([2048], "int8")
-            # body
-            p1 = T.decl_buffer([128], "uint8")
-            p2 = T.decl_buffer([112], "uint8")
-            p3 = T.decl_buffer([112], "uint8")
-            p4 = T.decl_buffer([32], "uint8")
-            p5 = T.decl_buffer([32], "uint8")
-            p6 = T.decl_buffer([32], "uint8")
-            p7 = T.decl_buffer([112], "uint8")
-            p8 = T.decl_buffer([32], "uint8")
-            T.evaluate(T.call_extern("ethosu_copy", buffer2[0], 128, p1[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer3[0], 32, p4[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer4[0], 112, p2[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer5[0], 32, p5[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer6[0], 112, p3[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer7[0], 32, p6[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, buffer1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, buffer10[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p1[0], 128, 12, p4[0], 32, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer8[0], 112, p7[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer9[0], 32, p8[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, buffer1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, buffer10[2], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p2[0], 112, 12, p5[0], 32, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, buffer1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, buffer10[4], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p3[0], 112, 12, p6[0], 32, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, buffer1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, buffer10[6], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p7[0], 112, 12, p8[0], 32, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-    # fmt: on
-
-    test_mod = CopyComputeReordering(2)(AllOperatorsWithWeights)
-    reference_mod = ReferenceModule
-    tvm.ir.assert_structural_equal(test_mod, reference_mod, True)
-
-
-# Uninitialized vars used
-# fmt: off
-@tvm.script.ir_module(check_well_formed=False)
-class AllOperatorsWithoutWeights:
-    @T.prim_func
-    def main() -> None:
-        T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
-        buffer1 = T.Buffer([36], "int8")
-        buffer2 = T.Buffer([9], "int8")
-        # body
-        p1 = T.decl_buffer([96], "int8")
-        T.evaluate(T.call_extern("ethosu_pooling", "int8", 3, 4, 3, 3, 0, 4, buffer1[0], 0, 0, 0, T.float32(1), 0, "NHWC", 12, 3, 1, "int8", 3, 2, 3, 3, 0, 2, p1[0], 0, 0, 0, T.float32(1), 0, "NHCWB16", 32, 16, 1, "MAX", 2, 1, 2, 1, 1, 1, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_pooling", "int8", 3, 2, 3, 3, 0, 2, p1[0], 0, 0, 0, T.float32(1), 0, "NHCWB16", 32, 16, 1, "int8", 3, 1, 3, 3, 0, 1, buffer2[0], 0, 0, 0, T.float32(1), 0, "NHWC", 3, 1, 1, "MAX", 2, 1, 2, 1, 1, 1, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-# fmt: on
-
-
-@pytest.mark.parametrize("max_copy_movements", [0, 1, 2])
-def test_all_operators_without_weights(max_copy_movements):
-    test_mod = CopyComputeReordering(max_copy_movements)(AllOperatorsWithoutWeights)
-    reference_mod = AllOperatorsWithoutWeights
-    tvm.ir.assert_structural_equal(test_mod, reference_mod, True)
-
-
-# Uninitialized vars used
-# fmt: off
-@tvm.script.ir_module(check_well_formed=False)
-class OperatorsWithAndWithoutWeights:
-    @T.prim_func
-    def main() -> None:
-        T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
-        buffer1 = T.Buffer([97156], "int8")
-        buffer2 = T.Buffer([80], "uint8")
-        buffer3 = T.Buffer([64], "uint8")
-        buffer4 = T.Buffer([96], "uint8")
-        buffer5 = T.Buffer([32], "uint8")
-        # body
-        p1 = T.decl_buffer([390336], "int8")
-        p2 = T.decl_buffer([80], "uint8")
-        p3 = T.decl_buffer([64], "uint8")
-        p4 = T.decl_buffer([390336], "int8")
-        p5 = T.decl_buffer([96], "uint8")
-        p6 = T.decl_buffer([32], "uint8")
-        T.evaluate(T.call_extern("ethosu_pooling", "int8", 214, 227, 2, 214, 0, 227, buffer1[0], 0, 0, 0, T.float32(1), 0, "NHWC", 454, 2, 1, "int8", 214, 114, 2, 214, 0, 114, p1[0], 0, 0, 0, T.float32(1), 0, "NHCWB16", 1824, 16, 1, "MAX", 2, 1, 2, 1, 1, 1, 0, 0, 0, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_copy", buffer2[0], 80, p2[0], dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_copy", buffer3[0], 64, p3[0], dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 214, 114, 2, 214, 0, 114, p1[0], 0, 0, 0, T.float32(0.00392157), -128, "NHCWB16", 1824, 16, 1, "int8", 214, 114, 5, 214, 0, 114, p4[0], 0, 0, 0, T.float32(0.0174839), -128, "NHCWB16", 1824, 16, 1, 3, 1, 1, 1, 1, 2, p2[0], 80, 0, p3[0], 64, 0, 1, 0, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_copy", buffer4[0], 96, p5[0], dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_copy", buffer5[0], 32, p6[0], dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 214, 114, 5, 214, 0, 114, p4[0], 0, 0, 0, T.float32(0.0174839), -128, "NHCWB16", 1824, 16, 1, "int8", 214, 114, 3, 214, 0, 114, buffer3[0], 0, 0, 0, T.float32(0.104816), -128, "NHWC", 342, 3, 1, 3, 1, 1, 1, 1, 2, p5[0], 96, 0, p6[0], 32, 0, 1, 0, 1, "CLIP", -128, 127, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-# fmt: on
-
-
-def test_operators_with_and_without_weights_max_copy_movements_0():
-    test_mod = CopyComputeReordering(0)(OperatorsWithAndWithoutWeights)
-    reference_mod = OperatorsWithAndWithoutWeights
-    tvm.ir.assert_structural_equal(test_mod, reference_mod, True)
-
-
-def test_operators_with_and_without_weights_max_copy_movements_1():
-    # Uninitialized vars used
-    # fmt: off
-    @tvm.script.ir_module(check_well_formed=False)
-    class ReferenceModule:
-        @T.prim_func
-        def main() -> None:
-            T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
-            buffer1 = T.Buffer([97156], "int8")
-            buffer2 = T.Buffer([80], "uint8")
-            buffer3 = T.Buffer([64], "uint8")
-            buffer4 = T.Buffer([96], "uint8")
-            buffer5 = T.Buffer([32], "uint8")
-            # body
-            p1 = T.decl_buffer([390336], "int8")
-            p2 = T.decl_buffer([80], "uint8")
-            p3 = T.decl_buffer([64], "uint8")
-            p4 = T.decl_buffer([390336], "int8")
-            p5 = T.decl_buffer([96], "uint8")
-            p6 = T.decl_buffer([32], "uint8")
-            T.evaluate(T.call_extern("ethosu_copy", buffer2[0], 80, p2[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer3[0], 64, p3[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_pooling", "int8", 214, 227, 2, 214, 0, 227, buffer1[0], 0, 0, 0, T.float32(1), 0, "NHWC", 454, 2, 1, "int8", 214, 114, 2, 214, 0, 114, p1[0], 0, 0, 0, T.float32(1), 0, "NHCWB16", 1824, 16, 1, "MAX", 2, 1, 2, 1, 1, 1, 0, 0, 0, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer4[0], 96, p5[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer5[0], 32, p6[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_conv2d", "int8", 214, 114, 2, 214, 0, 114, p1[0], 0, 0, 0, T.float32(0.00392157), -128, "NHCWB16", 1824, 16, 1, "int8", 214, 114, 5, 214, 0, 114, p4[0], 0, 0, 0, T.float32(0.0174839), -128, "NHCWB16", 1824, 16, 1, 3, 1, 1, 1, 1, 2, p2[0], 80, 0, p3[0], 64, 0, 1, 0, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_conv2d", "int8", 214, 114, 5, 214, 0, 114, p4[0], 0, 0, 0, T.float32(0.0174839), -128, "NHCWB16", 1824, 16, 1, "int8", 214, 114, 3, 214, 0, 114, buffer3[0], 0, 0, 0, T.float32(0.104816), -128, "NHWC", 342, 3, 1, 3, 1, 1, 1, 1, 2, p5[0], 96, 0, p6[0], 32, 0, 1, 0, 1, "CLIP", -128, 127, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-    # fmt: on
-
-    test_mod = CopyComputeReordering(1)(OperatorsWithAndWithoutWeights)
-    reference_mod = ReferenceModule
-    tvm.ir.assert_structural_equal(test_mod, reference_mod, True)
-
-
-def test_operators_with_and_without_weights_max_copy_movements_2():
-    # Uninitialized vars used
-    # fmt: off
-    @tvm.script.ir_module(check_well_formed=False)
-    class ReferenceModule:
-        @T.prim_func
-        def main() -> None:
-            T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
-            buffer1 = T.Buffer([97156], "int8")
-            buffer2 = T.Buffer([80], "uint8")
-            buffer3 = T.Buffer([64], "uint8")
-            buffer4 = T.Buffer([96], "uint8")
-            buffer5 = T.Buffer([32], "uint8")
-            # body
-            p1 = T.decl_buffer([390336], "int8")
-            p2 = T.decl_buffer([80], "uint8")
-            p3 = T.decl_buffer([64], "uint8")
-            p4 = T.decl_buffer([390336], "int8")
-            p5 = T.decl_buffer([96], "uint8")
-            p6 = T.decl_buffer([32], "uint8")
-            T.evaluate(T.call_extern("ethosu_copy", buffer2[0], 80, p2[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer3[0], 64, p3[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer4[0], 96, p5[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer5[0], 32, p6[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_pooling", "int8", 214, 227, 2, 214, 0, 227, buffer1[0], 0, 0, 0, T.float32(1), 0, "NHWC", 454, 2, 1, "int8", 214, 114, 2, 214, 0, 114, p1[0], 0, 0, 0, T.float32(1), 0, "NHCWB16", 1824, 16, 1, "MAX", 2, 1, 2, 1, 1, 1, 0, 0, 0, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_conv2d", "int8", 214, 114, 2, 214, 0, 114, p1[0], 0, 0, 0, T.float32(0.00392157), -128, "NHCWB16", 1824, 16, 1, "int8", 214, 114, 5, 214, 0, 114, p4[0], 0, 0, 0, T.float32(0.0174839), -128, "NHCWB16", 1824, 16, 1, 3, 1, 1, 1, 1, 2, p2[0], 80, 0, p3[0], 64, 0, 1, 0, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_conv2d", "int8", 214, 114, 5, 214, 0, 114, p4[0], 0, 0, 0, T.float32(0.0174839), -128, "NHCWB16", 1824, 16, 1, "int8", 214, 114, 3, 214, 0, 114, buffer3[0], 0, 0, 0, T.float32(0.104816), -128, "NHWC", 342, 3, 1, 3, 1, 1, 1, 1, 2, p5[0], 96, 0, p6[0], 32, 0, 1, 0, 1, "CLIP", -128, 127, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-    # fmt: on
-
-    test_mod = CopyComputeReordering(2)(OperatorsWithAndWithoutWeights)
-    reference_mod = ReferenceModule
-    tvm.ir.assert_structural_equal(test_mod, reference_mod, True)
-
-
-# Uninitialized vars used
-# fmt: off
-@tvm.script.ir_module(check_well_formed=False)
-class CopyToBufferWithLocalScope:
-    @T.prim_func
-    def main() -> None:
-        T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
-        buffer1 = T.Buffer([64], "uint8")
-        buffer2 = T.Buffer([48], "uint8")
-        buffer3 = T.Buffer([48], "uint8")
-        buffer4 = T.Buffer([256], "uint8")
-        buffer5 = T.Buffer([16], "uint8")
-        buffer6 = T.Buffer([48], "uint8")
-        buffer7 = T.Buffer([256], "uint8")
-        buffer8 = T.Buffer([64], "uint8")
-        # body
-        p1 = T.decl_buffer([48], "uint8")
-        p2 = T.decl_buffer([48], "uint8")
-        p3 = T.decl_buffer([256], "int8", scope="local")
-        p4 = T.decl_buffer([256], "int8")
-        p5 = T.decl_buffer([16], "uint8")
-        p6 = T.decl_buffer([48], "uint8")
-        p7 = T.decl_buffer([256], "int8", scope="local")
-        T.evaluate(T.call_extern("ethosu_copy", buffer2[0], 48, p1[0], dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_copy", buffer3[0], 48, p2[0], dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_copy", buffer4[0], 256, p3[0], dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 4, 4, 4, 4, 0, 4, buffer1[0], 0, 0, 0, T.float32(0.00392081), -128, "NHWC", 16, 4, 1, "int8", 4, 4, 4, 4, 0, 4, p4[0], 0, 0, 0, T.float32(0.00839574), -128, "NHCWB16", 64, 16, 1, 1, 1, 1, 1, 1, 1, p1[0], 48, 0, p2[0], 48, 0, 0, 0, 0, "TANH", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_copy", buffer5[0], 16, p5[0], dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_copy", buffer6[0], 48, p6[0], dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_copy", buffer7[0], 256, p7[0], dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_depthwise_conv2d", "int8", 4, 4, 4, 4, 0, 4, p4[0], 0, 0, 0, T.float32(0.0078125), 0, "NHCWB16", 64, 16, 1, "int8", 4, 4, 4, 4, 0, 4, buffer8[0], 0, 0, 0, T.float32(0.00372155), -128, "NHWC", 16, 4, 1, 1, 1, 1, 1, 1, 1, p5[0], 16, 0, p6[0], 48, 0, 0, 0, 0, "TANH", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-# fmt: on
-
-
-def test_copy_to_buffer_with_local_scope_max_copy_movements_0():
-    test_mod = CopyComputeReordering(0)(CopyToBufferWithLocalScope)
-    reference_mod = CopyToBufferWithLocalScope
-    tvm.ir.assert_structural_equal(test_mod, reference_mod, True)
-
-
-@pytest.mark.parametrize("max_copy_movements", [1, 2])
-def test_copy_to_buffer_with_local_scope_max_copy_movements_n(max_copy_movements):
-    # Uninitialized vars used
-    # fmt: off
-    @tvm.script.ir_module(check_well_formed=False)
-    class ReferenceModule:
-        @T.prim_func
-        def main() -> None:
-            T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
-            buffer1 = T.Buffer([64], "uint8")
-            buffer2 = T.Buffer([48], "uint8")
-            buffer3 = T.Buffer([48], "uint8")
-            buffer4 = T.Buffer([256], "uint8")
-            buffer5 = T.Buffer([16], "uint8")
-            buffer6 = T.Buffer([48], "uint8")
-            buffer7 = T.Buffer([256], "uint8")
-            buffer8 = T.Buffer([64], "uint8")
-            # body
-            p1 = T.decl_buffer([48], "uint8")
-            p2 = T.decl_buffer([48], "uint8")
-            p3 = T.decl_buffer([256], "int8", scope="local")
-            p4 = T.decl_buffer([256], "int8")
-            p5 = T.decl_buffer([16], "uint8")
-            p6 = T.decl_buffer([48], "uint8")
-            p7 = T.decl_buffer([256], "int8", scope="local")
-            T.evaluate(T.call_extern("ethosu_copy", buffer2[0], 48, p1[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer3[0], 48, p2[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer4[0], 256, p3[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer5[0], 16, p5[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer6[0], 48, p6[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_conv2d", "int8", 4, 4, 4, 4, 0, 4, buffer1[0], 0, 0, 0, T.float32(0.00392081), -128, "NHWC", 16, 4, 1, "int8", 4, 4, 4, 4, 0, 4, p4[0], 0, 0, 0, T.float32(0.00839574), -128, "NHCWB16", 64, 16, 1, 1, 1, 1, 1, 1, 1, p1[0], 48, 0, p2[0], 48, 0, 0, 0, 0, "TANH", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer7[0], 256, p7[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_depthwise_conv2d", "int8", 4, 4, 4, 4, 0, 4, p4[0], 0, 0, 0, T.float32(0.0078125), 0, "NHCWB16", 64, 16, 1, "int8", 4, 4, 4, 4, 0, 4, buffer8[0], 0, 0, 0, T.float32(0.00372155), -128, "NHWC", 16, 4, 1, 1, 1, 1, 1, 1, 1, p5[0], 16, 0, p6[0], 48, 0, 0, 0, 0, "TANH", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-    # fmt: on
-
-    test_mod = CopyComputeReordering(max_copy_movements)(CopyToBufferWithLocalScope)
-    reference_mod = ReferenceModule
-    tvm.ir.assert_structural_equal(test_mod, reference_mod, True)
-
-
-def test_multiple_prim_funcs():
-    # fmt: off
-    @tvm.script.ir_module
-    class InputModule:
-        @T.prim_func
-        def main():
-            T.evaluate(0)
-
-        @T.prim_func
-        def abc():
-            T.evaluate(0)
-    # fmt: on
-
-    err_rgx = (
-        r"Expected a single primitive function called 'main'. "
-        r"Please run the CopyComputeReordering pass in conjunction with the LowerToTIR\(\) pass."
-    )
-    with pytest.raises(tvm.TVMError, match=err_rgx):
-        CopyComputeReordering(1)(InputModule)
-
-
-def test_no_main_prim_func():
-    # fmt: off
-    @tvm.script.ir_module
-    class InputModule:
-        @T.prim_func
-        def abs():
-            T.evaluate(0)
-    # fmt: on
-
-    err_rgx = (
-        r"Expected a single primitive function called 'main'. "
-        r"Please run the CopyComputeReordering pass in conjunction with the LowerToTIR\(\) pass."
-    )
-    with pytest.raises(tvm.TVMError, match=err_rgx):
-        CopyComputeReordering(1)(InputModule)
-
-
-def test_default_max_copy_movements():
-    # Uninitialized vars used
-    # fmt: off
-    @tvm.script.ir_module(check_well_formed=False)
-    class ReferenceModule:
-        @T.prim_func
-        def main() -> None:
-            T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
-            buffer1 = T.Buffer([97156], "int8")
-            buffer2 = T.Buffer([80], "uint8")
-            buffer3 = T.Buffer([64], "uint8")
-            buffer4 = T.Buffer([96], "uint8")
-            buffer5 = T.Buffer([32], "uint8")
-            # body
-            p1 = T.decl_buffer([390336], "int8")
-            p2 = T.decl_buffer([80], "uint8")
-            p3 = T.decl_buffer([64], "uint8")
-            p4 = T.decl_buffer([390336], "int8")
-            p5 = T.decl_buffer([96], "uint8")
-            p6 = T.decl_buffer([32], "uint8")
-            T.evaluate(T.call_extern("ethosu_copy", buffer2[0], 80, p2[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer3[0], 64, p3[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_pooling", "int8", 214, 227, 2, 214, 0, 227, buffer1[0], 0, 0, 0, T.float32(1), 0, "NHWC", 454, 2, 1, "int8", 214, 114, 2, 214, 0, 114, p1[0], 0, 0, 0, T.float32(1), 0, "NHCWB16", 1824, 16, 1, "MAX", 2, 1, 2, 1, 1, 1, 0, 0, 0, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer4[0], 96, p5[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer5[0], 32, p6[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_conv2d", "int8", 214, 114, 2, 214, 0, 114, p1[0], 0, 0, 0, T.float32(0.00392157), -128, "NHCWB16", 1824, 16, 1, "int8", 214, 114, 5, 214, 0, 114, p4[0], 0, 0, 0, T.float32(0.0174839), -128, "NHCWB16", 1824, 16, 1, 3, 1, 1, 1, 1, 2, p2[0], 80, 0, p3[0], 64, 0, 1, 0, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_conv2d", "int8", 214, 114, 5, 214, 0, 114, p4[0], 0, 0, 0, T.float32(0.0174839), -128, "NHCWB16", 1824, 16, 1, "int8", 214, 114, 3, 214, 0, 114, buffer3[0], 0, 0, 0, T.float32(0.104816), -128, "NHWC", 342, 3, 1, 3, 1, 1, 1, 1, 2, p5[0], 96, 0, p6[0], 32, 0, 1, 0, 1, "CLIP", -128, 127, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-    # fmt: on
-
-    test_mod = CopyComputeReordering()(OperatorsWithAndWithoutWeights)
-    reference_mod = ReferenceModule
-    tvm.ir.assert_structural_equal(test_mod, reference_mod, True)
-
-
-def test_pass_context_option_max_copy_movements():
-    # Uninitialized vars used
-    # fmt: off
-    @tvm.script.ir_module(check_well_formed=False)
-    class ReferenceModule:
-        @T.prim_func
-        def main() -> None:
-            T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
-            buffer1 = T.Buffer([97156], "int8")
-            buffer2 = T.Buffer([80], "uint8")
-            buffer3 = T.Buffer([64], "uint8")
-            buffer4 = T.Buffer([96], "uint8")
-            buffer5 = T.Buffer([32], "uint8")
-            # body
-            p1 = T.decl_buffer([390336], "int8")
-            p2 = T.decl_buffer([80], "uint8")
-            p3 = T.decl_buffer([64], "uint8")
-            p4 = T.decl_buffer([390336], "int8")
-            p5 = T.decl_buffer([96], "uint8")
-            p6 = T.decl_buffer([32], "uint8")
-            T.evaluate(T.call_extern("ethosu_copy", buffer2[0], 80, p2[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer3[0], 64, p3[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer4[0], 96, p5[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer5[0], 32, p6[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_pooling", "int8", 214, 227, 2, 214, 0, 227, buffer1[0], 0, 0, 0, T.float32(1), 0, "NHWC", 454, 2, 1, "int8", 214, 114, 2, 214, 0, 114, p1[0], 0, 0, 0, T.float32(1), 0, "NHCWB16", 1824, 16, 1, "MAX", 2, 1, 2, 1, 1, 1, 0, 0, 0, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_conv2d", "int8", 214, 114, 2, 214, 0, 114, p1[0], 0, 0, 0, T.float32(0.00392157), -128, "NHCWB16", 1824, 16, 1, "int8", 214, 114, 5, 214, 0, 114, p4[0], 0, 0, 0, T.float32(0.0174839), -128, "NHCWB16", 1824, 16, 1, 3, 1, 1, 1, 1, 2, p2[0], 80, 0, p3[0], 64, 0, 1, 0, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_conv2d", "int8", 214, 114, 5, 214, 0, 114, p4[0], 0, 0, 0, T.float32(0.0174839), -128, "NHCWB16", 1824, 16, 1, "int8", 214, 114, 3, 214, 0, 114, buffer3[0], 0, 0, 0, T.float32(0.104816), -128, "NHWC", 342, 3, 1, 3, 1, 1, 1, 1, 2, p5[0], 96, 0, p6[0], 32, 0, 1, 0, 1, "CLIP", -128, 127, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-    # fmt: on
-
-    with tvm.transform.PassContext(
-        config={"tir.contrib.ethos-u.copy_compute_reordering_max_copy_movements": 2}
-    ):
-        test_mod = CopyComputeReordering()(OperatorsWithAndWithoutWeights)
-    reference_mod = ReferenceModule
-    tvm.ir.assert_structural_equal(test_mod, reference_mod, True)
-
-
-def test_reordering_based_on_cycles():
-    # Uninitialized vars used
-    # fmt: off
-    @tvm.script.ir_module(check_well_formed=False)
-    class ModuleBefore:
-        @T.prim_func
-        def main(placeholder: T.Buffer(97156, "int8"), placeholder_encoded: T.Buffer(208, "uint8"), placeholder_encoded_1: T.Buffer(112, "uint8"), placeholder_encoded_2: T.Buffer(96, "uint8"), placeholder_encoded_3: T.Buffer(112, "uint8"), ethosu_write: T.Buffer(43672, "int8")) -> None:
-            # function attr dict
-            T.func_attr({"tir.noalias": True, "global_symbol": "main", "from_legacy_te_schedule": True})
-            ax0_ax1_fused_ax2_fused_ax3_fused = T.int32()
-            ax0_ax1_fused_ax2_fused_ax3_fused_1 = T.int32()
-            ax0_ax1_fused_ax2_fused_ax3_fused_2 = T.int32()
-            ax0_ax1_fused_ax2_fused_ax3_fused_3 = T.int32()
-            nn = T.int32()
-            nn_1 = T.int32()
-            nn_2 = T.int32()
-            nn_3 = T.int32()
-            nn_4 = T.int32()
-            nn_5 = T.int32()
-            # body
-            placeholder_d_global = T.decl_buffer([208], "uint8")
-            placeholder_d_global_1 = T.decl_buffer([112], "uint8")
-            placeholder_d_global_2 = T.decl_buffer([96], "uint8")
-            placeholder_d_global_3 = T.decl_buffer([112], "uint8")
-            ethosu_write_1 = T.decl_buffer([195168], "int8")
-            ethosu_write_2 = T.decl_buffer([184800], "int8")
-            ethosu_write_3 = T.decl_buffer([174688], "int8")
-            ethosu_write_4 = T.decl_buffer([174688], "int8")
-            ethosu_write_5 = T.decl_buffer([174688], "int8")
-            with T.attr(T.iter_var(ax0_ax1_fused_ax2_fused_ax3_fused, None, "DataPar", ""), "pragma_compute_cycles_hint", 1792):
-                T.evaluate(T.call_extern("ethosu_copy", placeholder_encoded[0], 208, placeholder_d_global[0], dtype="handle"))
-            with T.attr(T.iter_var(nn, None, "DataPar", ""), "pragma_compute_cycles_hint", 250):
-                T.evaluate(T.call_extern("ethosu_conv2d", "int8", 214, 227, 2, 214, 0, 227, placeholder[0], 0, 0, 0, T.float32(0.0039215679280459881), -128, "NHWC", 454, 2, 1, "int8", 107, 114, 4, 107, 0, 114, ethosu_write_1[0], 0, 0, 0, T.float32(0.009109782986342907), -128, "NHCWB16", 1824, 16, 1, 3, 3, 2, 2, 1, 1, placeholder_d_global[0], 160, T.int8(-1), T.int8(-1), 0, placeholder_d_global[160], 48, T.int8(-1), T.int8(-1), 0, 1, 1, 1, "NONE", 0, 0, "TFL", "NONE", 12, 10, 16, dtype="handle"))
-            with T.attr(T.iter_var(ax0_ax1_fused_ax2_fused_ax3_fused_1, None, "DataPar", ""), "pragma_compute_cycles_hint", 1024):
-                T.evaluate(T.call_extern("ethosu_copy", placeholder_encoded_1[0], 112, placeholder_d_global_1[0], dtype="handle"))
-            with T.attr(T.iter_var(nn_1, None, "DataPar", ""), "pragma_compute_cycles_hint", 467):
-                T.evaluate(T.call_extern("ethosu_depthwise_conv2d", "int8", 107, 114, 4, 107, 0, 114, ethosu_write_1[0], 0, 0, 0, T.float32(0.009109782986342907), -128, "NHCWB16", 1824, 16, 1, "int8", 105, 110, 4, 105, 0, 110, ethosu_write_2[0], 0, 0, 0, T.float32(0.0066184266470372677), -128, "NHCWB16", 1760, 16, 1, 3, 2, 1, 1, 2, 2, placeholder_d_global_1[0], 64, 0, placeholder_d_global_1[64], 48, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 8, 16, 16, dtype="handle"))
-            with T.attr(T.iter_var(ax0_ax1_fused_ax2_fused_ax3_fused_2, None, "DataPar", ""), "pragma_compute_cycles_hint", 1024):
-                T.evaluate(T.call_extern("ethosu_copy", placeholder_encoded_2[0], 96, placeholder_d_global_2[0], dtype="handle"))
-            with T.attr(T.iter_var(nn_2, None, "DataPar", ""), "pragma_compute_cycles_hint", 441):
-                T.evaluate(T.call_extern("ethosu_depthwise_conv2d", "int8", 105, 110, 4, 105, 0, 110, ethosu_write_2[0], 0, 0, 0, T.float32(0.0066184266470372677), -128, "NHCWB16", 1760, 16, 1, "int8", 103, 106, 4, 103, 0, 106, ethosu_write_3[0], 0, 0, 0, T.float32(0.0057637207210063934), -128, "NHCWB16", 1696, 16, 1, 3, 2, 1, 1, 2, 2, placeholder_d_global_2[0], 48, 0, placeholder_d_global_2[48], 48, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 8, 16, 16, dtype="handle"))
-            with T.attr(T.iter_var(nn_3, None, "DataPar", ""), "pragma_compute_cycles_hint", 439):
-                T.evaluate(T.call_extern("ethosu_pooling", "int8", 103, 106, 4, 103, 0, 106, ethosu_write_3[0], 0, 0, 0, T.float32(1), 0, "NHCWB16", 1696, 16, 1, "int8", 103, 106, 4, 103, 0, 106, ethosu_write_4[0], 0, 0, 0, T.float32(1), 0, "NHCWB16", 1696, 16, 1, "MAX", 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 2, 64, 16, dtype="handle"))
-            with T.attr(T.iter_var(nn_4, None, "DataPar", ""), "pragma_compute_cycles_hint", 439):
-                T.evaluate(T.call_extern("ethosu_pooling", "int8", 103, 106, 4, 103, 0, 106, ethosu_write_4[0], 0, 0, 0, T.float32(1), 0, "NHCWB16", 1696, 16, 1, "int8", 103, 106, 4, 103, 0, 106, ethosu_write_5[0], 0, 0, 0, T.float32(1), 0, "NHCWB16", 1696, 16, 1, "AVG", 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 2, 64, 16, dtype="handle"))
-            with T.attr(T.iter_var(ax0_ax1_fused_ax2_fused_ax3_fused_3, None, "DataPar", ""), "pragma_compute_cycles_hint", 1024):
-                T.evaluate(T.call_extern("ethosu_copy", placeholder_encoded_3[0], 112, placeholder_d_global_3[0], dtype="handle"))
-            T.attr(T.iter_var(nn_5, None, "DataPar", ""), "pragma_compute_cycles_hint", 22340)
-            T.evaluate(T.call_extern("ethosu_depthwise_conv2d", "int8", 103, 106, 4, 103, 0, 106, ethosu_write_5[0], 0, 0, 0, T.float32(0.0057637207210063934), -128, "NHCWB16", 1696, 16, 1, "int8", 103, 106, 4, 103, 0, 106, ethosu_write[0], 0, 0, 0, T.float32(0.0057619437575340271), -128, "NHWC", 424, 4, 1, 3, 2, 1, 1, 2, 2, placeholder_d_global_3[0], 64, 0, placeholder_d_global_3[64], 48, 1, 2, 1, 2, "NONE", 0, 0, "TFL", "NONE", 14, 18, 8, dtype="handle"))
-
-
-    # Uninitialized vars used
-    @tvm.script.ir_module(check_well_formed=False)
-    class ModuleAfter:
-        @T.prim_func
-        def main(placeholder: T.Buffer(97156, "int8"), placeholder_encoded: T.Buffer(208, "uint8"), placeholder_encoded_1: T.Buffer(112, "uint8"), placeholder_encoded_2: T.Buffer(96, "uint8"), placeholder_encoded_3: T.Buffer(112, "uint8"), ethosu_write: T.Buffer(43672, "int8")) -> None:
-            # function attr dict
-            T.func_attr({"tir.noalias": True, "global_symbol": "main", "from_legacy_te_schedule": True})
-            ax0_ax1_fused_ax2_fused_ax3_fused = T.int32()
-            ax0_ax1_fused_ax2_fused_ax3_fused_1 = T.int32()
-            ax0_ax1_fused_ax2_fused_ax3_fused_2 = T.int32()
-            ax0_ax1_fused_ax2_fused_ax3_fused_3 = T.int32()
-            nn = T.int32()
-            nn_1 = T.int32()
-            nn_2 = T.int32()
-            nn_3 = T.int32()
-            nn_4 = T.int32()
-            nn_5 = T.int32()
-            # body
-            placeholder_d_global = T.decl_buffer([208], "uint8")
-            placeholder_d_global_1 = T.decl_buffer([112], "uint8")
-            placeholder_d_global_2 = T.decl_buffer([96], "uint8")
-            placeholder_d_global_3 = T.decl_buffer([112], "uint8")
-            ethosu_write_1 = T.decl_buffer([195168], "int8")
-            ethosu_write_2 = T.decl_buffer([184800], "int8")
-            ethosu_write_3 = T.decl_buffer([174688], "int8")
-            ethosu_write_4 = T.decl_buffer([174688], "int8")
-            ethosu_write_5 = T.decl_buffer([174688], "int8")
-            with T.attr(T.iter_var(ax0_ax1_fused_ax2_fused_ax3_fused, None, "DataPar", ""), "pragma_compute_cycles_hint", 1792):
-                T.evaluate(T.call_extern("ethosu_copy", placeholder_encoded[0], 208, placeholder_d_global[0], dtype="handle"))
-            with T.attr(T.iter_var(ax0_ax1_fused_ax2_fused_ax3_fused_1, None, "DataPar", ""), "pragma_compute_cycles_hint", 1024):
-                T.evaluate(T.call_extern("ethosu_copy", placeholder_encoded_1[0], 112, placeholder_d_global_1[0], dtype="handle"))
-            with T.attr(T.iter_var(nn, None, "DataPar", ""), "pragma_compute_cycles_hint", 250):
-                T.evaluate(T.call_extern("ethosu_conv2d", "int8", 214, 227, 2, 214, 0, 227, placeholder[0], 0, 0, 0, T.float32(0.0039215679280459881), -128, "NHWC", 454, 2, 1, "int8", 107, 114, 4, 107, 0, 114, ethosu_write_1[0], 0, 0, 0, T.float32(0.009109782986342907), -128, "NHCWB16", 1824, 16, 1, 3, 3, 2, 2, 1, 1, placeholder_d_global[0], 160, T.int8(-1), T.int8(-1), 0, placeholder_d_global[160], 48, T.int8(-1), T.int8(-1), 0, 1, 1, 1, "NONE", 0, 0, "TFL", "NONE", 12, 10, 16, dtype="handle"))
-            with T.attr(T.iter_var(ax0_ax1_fused_ax2_fused_ax3_fused_2, None, "DataPar", ""), "pragma_compute_cycles_hint", 1024):
-                T.evaluate(T.call_extern("ethosu_copy", placeholder_encoded_2[0], 96, placeholder_d_global_2[0], dtype="handle"))
-            with T.attr(T.iter_var(nn_1, None, "DataPar", ""), "pragma_compute_cycles_hint", 467):
-                T.evaluate(T.call_extern("ethosu_depthwise_conv2d", "int8", 107, 114, 4, 107, 0, 114, ethosu_write_1[0], 0, 0, 0, T.float32(0.009109782986342907), -128, "NHCWB16", 1824, 16, 1, "int8", 105, 110, 4, 105, 0, 110, ethosu_write_2[0], 0, 0, 0, T.float32(0.0066184266470372677), -128, "NHCWB16", 1760, 16, 1, 3, 2, 1, 1, 2, 2, placeholder_d_global_1[0], 64, 0, placeholder_d_global_1[64], 48, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 8, 16, 16, dtype="handle"))
-            with T.attr(T.iter_var(ax0_ax1_fused_ax2_fused_ax3_fused_3, None, "DataPar", ""), "pragma_compute_cycles_hint", 1024):
-                T.evaluate(T.call_extern("ethosu_copy", placeholder_encoded_3[0], 112, placeholder_d_global_3[0], dtype="handle"))
-            with T.attr(T.iter_var(nn_2, None, "DataPar", ""), "pragma_compute_cycles_hint", 441):
-                T.evaluate(T.call_extern("ethosu_depthwise_conv2d", "int8", 105, 110, 4, 105, 0, 110, ethosu_write_2[0], 0, 0, 0, T.float32(0.0066184266470372677), -128, "NHCWB16", 1760, 16, 1, "int8", 103, 106, 4, 103, 0, 106, ethosu_write_3[0], 0, 0, 0, T.float32(0.0057637207210063934), -128, "NHCWB16", 1696, 16, 1, 3, 2, 1, 1, 2, 2, placeholder_d_global_2[0], 48, 0, placeholder_d_global_2[48], 48, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 8, 16, 16, dtype="handle"))
-            with T.attr(T.iter_var(nn_3, None, "DataPar", ""), "pragma_compute_cycles_hint", 439):
-                T.evaluate(T.call_extern("ethosu_pooling", "int8", 103, 106, 4, 103, 0, 106, ethosu_write_3[0], 0, 0, 0, T.float32(1), 0, "NHCWB16", 1696, 16, 1, "int8", 103, 106, 4, 103, 0, 106, ethosu_write_4[0], 0, 0, 0, T.float32(1), 0, "NHCWB16", 1696, 16, 1, "MAX", 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 2, 64, 16, dtype="handle"))
-            with T.attr(T.iter_var(nn_4, None, "DataPar", ""), "pragma_compute_cycles_hint", 439):
-                T.evaluate(T.call_extern("ethosu_pooling", "int8", 103, 106, 4, 103, 0, 106, ethosu_write_4[0], 0, 0, 0, T.float32(1), 0, "NHCWB16", 1696, 16, 1, "int8", 103, 106, 4, 103, 0, 106, ethosu_write_5[0], 0, 0, 0, T.float32(1), 0, "NHCWB16", 1696, 16, 1, "AVG", 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 2, 64, 16, dtype="handle"))
-            T.attr(T.iter_var(nn_5, None, "DataPar", ""), "pragma_compute_cycles_hint", 22340)
-            T.evaluate(T.call_extern("ethosu_depthwise_conv2d", "int8", 103, 106, 4, 103, 0, 106, ethosu_write_5[0], 0, 0, 0, T.float32(0.0057637207210063934), -128, "NHCWB16", 1696, 16, 1, "int8", 103, 106, 4, 103, 0, 106, ethosu_write[0], 0, 0, 0, T.float32(0.0057619437575340271), -128, "NHWC", 424, 4, 1, 3, 2, 1, 1, 2, 2, placeholder_d_global_3[0], 64, 0, placeholder_d_global_3[64], 48, 1, 2, 1, 2, "NONE", 0, 0, "TFL", "NONE", 14, 18, 8, dtype="handle"))
-    # fmt: on
-
-    test_mod = CopyComputeReordering(reorder_by_cycles=True)(ModuleBefore)
-    reference_mod = ModuleAfter
-    tvm.ir.assert_structural_equal(test_mod, reference_mod, True)
-
-
-def test_reordering_based_on_cycles_luts_present():
-    # Uninitialized vars used
-    # fmt: off
-    @tvm.script.ir_module(check_well_formed=False)
-    class ModuleBefore:
-        @T.prim_func
-        def main(placeholder: T.Buffer(97156, "int8"), placeholder_encoded: T.Buffer(208, "uint8"), placeholder_encoded_1: T.Buffer(112, "uint8"), placeholder_1: T.Buffer(256, "int8"), placeholder_encoded_2: T.Buffer(96, "uint8"), placeholder_2: T.Buffer(256, "int8"), placeholder_3: T.Buffer(256, "int8"), ethosu_write: T.Buffer(46200, "int8")) -> None:
-            # function attr dict
-            T.func_attr({"tir.noalias": True, "global_symbol": "main", "from_legacy_te_schedule": True})
-            ax0_ax1_fused_ax2_fused_ax3_fused = T.int32()
-            ax0_ax1_fused_ax2_fused_ax3_fused_1 = T.int32()
-            ax0_ax1_fused_ax2_fused_ax3_fused_2 = T.int32()
-            nn = T.int32()
-            nn_1 = T.int32()
-            nn_2 = T.int32()
-            nn_3 = T.int32()
-            nn_4 = T.int32()
-            nn_5 = T.int32()
-            # body
-            placeholder_d_d_global = T.decl_buffer([208], "uint8")
-            placeholder_d_d_global_1 = T.decl_buffer([112], "uint8")
-            placeholder_d_global = T.decl_buffer([96], "uint8")
-            ethosu_write_1 = T.decl_buffer([195168], "int8")
-            placeholder_local = T.decl_buffer([256], "int8", scope="local")
-            ethosu_write_2 = T.decl_buffer([184800], "int8")
-            ethosu_write_3 = T.decl_buffer([184800], "int8")
-            ethosu_write_4 = T.decl_buffer([184800], "int8")
-            placeholder_d_local = T.decl_buffer([256], "int8", scope="local")
-            ethosu_write_5 = T.decl_buffer([184800], "int8")
-            placeholder_d_d_local = T.decl_buffer([256], "int8", scope="local")
-            with T.attr(T.iter_var(ax0_ax1_fused_ax2_fused_ax3_fused, None, "DataPar", ""), "pragma_compute_cycles_hint", 1792):
-                T.evaluate(T.call_extern("ethosu_copy", placeholder_encoded[0], 208, placeholder_d_d_global[0], dtype="handle"))
-            with T.attr(T.iter_var(nn, None, "DataPar", ""), "pragma_compute_cycles_hint", 73668):
-                T.evaluate(T.call_extern("ethosu_conv2d", "int8", 214, 227, 2, 214, 0, 227, placeholder[0], 0, 0, 0, T.float32(0.0039215679280459881), -128, "NHWC", 454, 2, 1, "int8", 107, 114, 4, 107, 0, 114, ethosu_write_1[0], 0, 0, 0, T.float32(0.009109782986342907), -128, "NHCWB16", 1824, 16, 1, 3, 3, 2, 2, 1, 1, placeholder_d_d_global[0], 160, T.int8(-1), T.int8(-1), 0, placeholder_d_d_global[160], 48, T.int8(-1), T.int8(-1), 0, 1, 1, 1, "NONE", 0, 0, "TFL", "NONE", 12, 10, 16, dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", placeholder_1[0], 256, placeholder_local[0], dtype="handle"))
-            with T.attr(T.iter_var(ax0_ax1_fused_ax2_fused_ax3_fused_1, None, "DataPar", ""), "pragma_compute_cycles_hint", 384):
-                T.evaluate(T.call_extern("ethosu_copy", placeholder_encoded_1[0], 112, placeholder_d_d_global_1[0], dtype="handle"))
-            with T.attr(T.iter_var(nn_1, None, "DataPar", ""), "pragma_compute_cycles_hint", 330):
-                T.evaluate(T.call_extern("ethosu_depthwise_conv2d", "int8", 107, 114, 4, 107, 0, 114, ethosu_write_1[0], 0, 0, 0, T.float32(0.009109782986342907), -128, "NHCWB16", 1824, 16, 1, "int8", 105, 110, 4, 105, 0, 110, ethosu_write_2[0], 0, 0, 0, T.float32(0.0066184266470372677), -128, "NHCWB16", 1760, 16, 1, 3, 2, 1, 1, 2, 2, placeholder_d_d_global_1[0], 64, 0, placeholder_d_d_global_1[64], 48, 0, 0, 0, 0, "SIGMOID", 0, 0, "TFL", "NONE", 8, 16, 16, dtype="handle"))
-            with T.attr(T.iter_var(nn_2, None, "DataPar", ""), "pragma_compute_cycles_hint", 411):
-                T.evaluate(T.call_extern("ethosu_pooling", "int8", 105, 110, 4, 105, 0, 110, ethosu_write_2[0], 0, 0, 0, T.float32(1), 0, "NHCWB16", 1760, 16, 1, "int8", 105, 110, 4, 105, 0, 110, ethosu_write_3[0], 0, 0, 0, T.float32(1), 0, "NHCWB16", 1760, 16, 1, "MAX", 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 2, 64, 16, dtype="handle"))
-            with T.attr(T.iter_var(nn_3, None, "DataPar", ""), "pragma_compute_cycles_hint", 458):
-                T.evaluate(T.call_extern("ethosu_pooling", "int8", 105, 110, 4, 105, 0, 110, ethosu_write_3[0], 0, 0, 0, T.float32(1), 0, "NHCWB16", 1760, 16, 1, "int8", 105, 110, 4, 105, 0, 110, ethosu_write_4[0], 0, 0, 0, T.float32(1), 0, "NHCWB16", 1760, 16, 1, "AVG", 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 2, 64, 16, dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", placeholder_2[0], 256, placeholder_d_local[0], dtype="handle"))
-            with T.attr(T.iter_var(ax0_ax1_fused_ax2_fused_ax3_fused_2, None, "DataPar", ""), "pragma_compute_cycles_hint", 1500):
-                T.evaluate(T.call_extern("ethosu_copy", placeholder_encoded_2[0], 96, placeholder_d_global[0], dtype="handle"))
-            with T.attr(T.iter_var(nn_4, None, "DataPar", ""), "pragma_compute_cycles_hint", 10464):
-                T.evaluate(T.call_extern("ethosu_depthwise_conv2d", "int8", 105, 110, 4, 105, 0, 110, ethosu_write_4[0], 0, 0, 0, T.float32(0.00390625), -128, "NHCWB16", 1760, 16, 1, "int8", 105, 110, 4, 105, 0, 110, ethosu_write_5[0], 0, 0, 0, T.float32(0.00381289585493505), -128, "NHCWB16", 1760, 16, 1, 3, 2, 1, 1, 2, 2, placeholder_d_global[0], 48, 0, placeholder_d_global[48], 48, 1, 2, 1, 2, "TANH", 0, 0, "TFL", "NONE", 8, 16, 16, dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", placeholder_3[0], 256, placeholder_d_d_local[0], dtype="handle"))
-            T.attr(T.iter_var(nn_5, None, "DataPar", ""), "pragma_compute_cycles_hint", 5253)
-            T.evaluate(T.call_extern("ethosu_pooling", "int8", 105, 110, 4, 105, 0, 110, ethosu_write_5[0], 0, 0, 0, T.float32(1), 0, "NHCWB16", 1760, 16, 1, "int8", 105, 110, 4, 105, 0, 110, ethosu_write[0], 0, 0, 0, T.float32(1), 0, "NHWC", 440, 4, 1, "MAX", 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, "TANH", 0, 0, "TFL", "NONE", 4, 64, 8, dtype="handle"))
-
-
-    # Uninitialized vars used
-    @tvm.script.ir_module(check_well_formed=False)
-    class ModuleAfter:
-        @T.prim_func
-        def main(placeholder: T.Buffer(97156, "int8"), placeholder_encoded: T.Buffer(208, "uint8"), placeholder_encoded_1: T.Buffer(112, "uint8"), placeholder_1: T.Buffer(256, "int8"), placeholder_encoded_2: T.Buffer(96, "uint8"), placeholder_2: T.Buffer(256, "int8"), placeholder_3: T.Buffer(256, "int8"), ethosu_write: T.Buffer(46200, "int8")) -> None:
-            # function attr dict
-            T.func_attr({"tir.noalias": True, "global_symbol": "main", "from_legacy_te_schedule": True})
-            ax0_ax1_fused_ax2_fused_ax3_fused = T.int32()
-            ax0_ax1_fused_ax2_fused_ax3_fused_1 = T.int32()
-            ax0_ax1_fused_ax2_fused_ax3_fused_2 = T.int32()
-            nn = T.int32()
-            nn_1 = T.int32()
-            nn_2 = T.int32()
-            nn_3 = T.int32()
-            nn_4 = T.int32()
-            nn_5 = T.int32()
-            # body
-            placeholder_d_d_global = T.decl_buffer([208], "uint8")
-            placeholder_d_d_global_1 = T.decl_buffer([112], "uint8")
-            placeholder_d_global = T.decl_buffer([96], "uint8")
-            ethosu_write_1 = T.decl_buffer([195168], "int8")
-            placeholder_local = T.decl_buffer([256], "int8", scope="local")
-            ethosu_write_2 = T.decl_buffer([184800], "int8")
-            ethosu_write_3 = T.decl_buffer([184800], "int8")
-            ethosu_write_4 = T.decl_buffer([184800], "int8")
-            placeholder_d_local = T.decl_buffer([256], "int8", scope="local")
-            ethosu_write_5 = T.decl_buffer([184800], "int8")
-            placeholder_d_d_local = T.decl_buffer([256], "int8", scope="local")
-            with T.attr(T.iter_var(ax0_ax1_fused_ax2_fused_ax3_fused, None, "DataPar", ""), "pragma_compute_cycles_hint", 1792):
-                T.evaluate(T.call_extern("ethosu_copy", placeholder_encoded[0], 208, placeholder_d_d_global[0], dtype="handle"))
-            with T.attr(T.iter_var(ax0_ax1_fused_ax2_fused_ax3_fused_1, None, "DataPar", ""), "pragma_compute_cycles_hint", 384):
-                T.evaluate(T.call_extern("ethosu_copy", placeholder_encoded_1[0], 112, placeholder_d_d_global_1[0], dtype="handle"))
-            with T.attr(T.iter_var(nn, None, "DataPar", ""), "pragma_compute_cycles_hint", 73668):
-                T.evaluate(T.call_extern("ethosu_conv2d", "int8", 214, 227, 2, 214, 0, 227, placeholder[0], 0, 0, 0, T.float32(0.0039215679280459881), -128, "NHWC", 454, 2, 1, "int8", 107, 114, 4, 107, 0, 114, ethosu_write_1[0], 0, 0, 0, T.float32(0.009109782986342907), -128, "NHCWB16", 1824, 16, 1, 3, 3, 2, 2, 1, 1, placeholder_d_d_global[0], 160, T.int8(-1), T.int8(-1), 0, placeholder_d_d_global[160], 48, T.int8(-1), T.int8(-1), 0, 1, 1, 1, "NONE", 0, 0, "TFL", "NONE", 12, 10, 16, dtype="handle"))
-            with T.attr(T.iter_var(ax0_ax1_fused_ax2_fused_ax3_fused_2, None, "DataPar", ""), "pragma_compute_cycles_hint", 1500):
-                T.evaluate(T.call_extern("ethosu_copy", placeholder_encoded_2[0], 96, placeholder_d_global[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", placeholder_1[0], 256, placeholder_local[0], dtype="handle"))
-            with T.attr(T.iter_var(nn_1, None, "DataPar", ""), "pragma_compute_cycles_hint", 330):
-                T.evaluate(T.call_extern("ethosu_depthwise_conv2d", "int8", 107, 114, 4, 107, 0, 114, ethosu_write_1[0], 0, 0, 0, T.float32(0.009109782986342907), -128, "NHCWB16", 1824, 16, 1, "int8", 105, 110, 4, 105, 0, 110, ethosu_write_2[0], 0, 0, 0, T.float32(0.0066184266470372677), -128, "NHCWB16", 1760, 16, 1, 3, 2, 1, 1, 2, 2, placeholder_d_d_global_1[0], 64, 0, placeholder_d_d_global_1[64], 48, 0, 0, 0, 0, "SIGMOID", 0, 0, "TFL", "NONE", 8, 16, 16, dtype="handle"))
-            with T.attr(T.iter_var(nn_2, None, "DataPar", ""), "pragma_compute_cycles_hint", 411):
-                T.evaluate(T.call_extern("ethosu_pooling", "int8", 105, 110, 4, 105, 0, 110, ethosu_write_2[0], 0, 0, 0, T.float32(1), 0, "NHCWB16", 1760, 16, 1, "int8", 105, 110, 4, 105, 0, 110, ethosu_write_3[0], 0, 0, 0, T.float32(1), 0, "NHCWB16", 1760, 16, 1, "MAX", 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 2, 64, 16, dtype="handle"))
-            with T.attr(T.iter_var(nn_3, None, "DataPar", ""), "pragma_compute_cycles_hint", 458):
-                T.evaluate(T.call_extern("ethosu_pooling", "int8", 105, 110, 4, 105, 0, 110, ethosu_write_3[0], 0, 0, 0, T.float32(1), 0, "NHCWB16", 1760, 16, 1, "int8", 105, 110, 4, 105, 0, 110, ethosu_write_4[0], 0, 0, 0, T.float32(1), 0, "NHCWB16", 1760, 16, 1, "AVG", 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 2, 64, 16, dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", placeholder_2[0], 256, placeholder_d_local[0], dtype="handle"))
-            with T.attr(T.iter_var(nn_4, None, "DataPar", ""), "pragma_compute_cycles_hint", 10464):
-                T.evaluate(T.call_extern("ethosu_depthwise_conv2d", "int8", 105, 110, 4, 105, 0, 110, ethosu_write_4[0], 0, 0, 0, T.float32(0.00390625), -128, "NHCWB16", 1760, 16, 1, "int8", 105, 110, 4, 105, 0, 110, ethosu_write_5[0], 0, 0, 0, T.float32(0.00381289585493505), -128, "NHCWB16", 1760, 16, 1, 3, 2, 1, 1, 2, 2, placeholder_d_global[0], 48, 0, placeholder_d_global[48], 48, 1, 2, 1, 2, "TANH", 0, 0, "TFL", "NONE", 8, 16, 16, dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", placeholder_3[0], 256, placeholder_d_d_local[0], dtype="handle"))
-            T.attr(T.iter_var(nn_5, None, "DataPar", ""), "pragma_compute_cycles_hint", 5253)
-            T.evaluate(T.call_extern("ethosu_pooling", "int8", 105, 110, 4, 105, 0, 110, ethosu_write_5[0], 0, 0, 0, T.float32(1), 0, "NHCWB16", 1760, 16, 1, "int8", 105, 110, 4, 105, 0, 110, ethosu_write[0], 0, 0, 0, T.float32(1), 0, "NHWC", 440, 4, 1, "MAX", 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, "TANH", 0, 0, "TFL", "NONE", 4, 64, 8, dtype="handle"))
-    # fmt: on
-
-    test_mod = CopyComputeReordering(reorder_by_cycles=True)(ModuleBefore)
-    reference_mod = ModuleAfter
-    tvm.ir.assert_structural_equal(test_mod, reference_mod, True)
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/contrib/test_ethosu/test_create_tiles.py b/tests/python/contrib/test_ethosu/test_create_tiles.py
deleted file mode 100644
index ac90e3c27839..000000000000
--- a/tests/python/contrib/test_ethosu/test_create_tiles.py
+++ /dev/null
@@ -1,162 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import pytest
-
-pytest.importorskip("ethosu.vela")
-import tvm.script
-from tvm.relay.backend.contrib.ethosu.tir.dma import Tiles, create_tiles
-from tvm.script import tir as T
-
-
-def check_tiles_equal(tiles, expected):
-    assert tiles.height_0 == expected.height_0
-    assert tiles.height_1 == expected.height_1
-    assert tiles.width_0 == expected.width_0
-    if isinstance(tiles.address_0, int):
-        assert tiles.address_0 == expected.address_0
-    else:
-        assert tiles.address_0.buffer == expected.address_0.buffer
-        assert tiles.address_0.indices[0] == expected.address_0.indices[0]
-    if isinstance(tiles.address_1, int):
-        assert tiles.address_1 == expected.address_1
-    else:
-        assert tiles.address_1.buffer == expected.address_1.buffer
-        assert tiles.address_1.indices[0] == expected.address_1.indices[0]
-    if isinstance(tiles.address_2, int):
-        assert tiles.address_2 == expected.address_2
-    else:
-        assert tiles.address_2.buffer == expected.address_2.buffer
-        assert tiles.address_2.indices[0] == expected.address_2.indices[0]
-
-
-def test_create_tiles_h():
-    # fmt: off
-    @tvm.script.ir_module
-    class Module:
-        @T.prim_func
-        def main(placeholder1: T.Buffer((100,), "int8"), placeholder2: T.Buffer((100,), "int8")) -> None:
-            T.attr("i0", "pragma_layout", "NHCWB16")
-            for i0 in T.serial(0, 1):
-                for i1 in T.serial(0, 6):
-                    for i2 in T.serial(0, 1):
-                        for i3 in T.serial(0, 1):
-                            for i4 in T.serial(0, 16):
-                                placeholder1[((i1*16) + i4)] = placeholder2[((T.floormod((i1 + 4), 6)*16) + i4)]
-    # fmt: on
-
-    stmt = Module["main"].body
-    tiles = create_tiles(stmt)
-    buffer = stmt.body.body.body.body.body.body.value.buffer
-    expected = Tiles(
-        height_0=tvm.tir.expr.IntImm("int32", 2),
-        height_1=tvm.tir.expr.IntImm("int32", 0),
-        width_0=tvm.tir.expr.IntImm("int32", 1),
-        address_0=tvm.tir.BufferLoad(buffer, [tvm.tir.expr.IntImm("int32", 64)]),
-        address_1=tvm.tir.expr.IntImm("int32", 0),
-        address_2=tvm.tir.BufferLoad(buffer, [tvm.tir.expr.IntImm("int32", 0)]),
-    )
-    check_tiles_equal(tiles, expected)
-
-
-def test_create_tiles_w():
-    # fmt: off
-    @tvm.script.ir_module
-    class Module:
-        @T.prim_func
-        def main(placeholder1: T.Buffer((100,), "int8"), placeholder2: T.Buffer((100,), "int8")) -> None:
-            T.attr("i0", "pragma_layout", "NHCWB16")
-            for i0 in T.serial(0, 1):
-                for i1 in T.serial(0, 1):
-                    for i2 in T.serial(0, 1):
-                        for i3 in T.serial(0, 6):
-                            for i4 in T.serial(0, 16):
-                                placeholder1[((i3*16) + i4)] = placeholder2[((T.floormod((i3 + 4), 6)*16) + i4)]
-    # fmt: on
-
-    stmt = Module["main"].body
-    tiles = create_tiles(stmt)
-    buffer = stmt.body.body.body.body.body.body.value.buffer
-    expected = Tiles(
-        height_0=tvm.tir.expr.IntImm("int32", 1),
-        height_1=tvm.tir.expr.IntImm("int32", 1),
-        width_0=tvm.tir.expr.IntImm("int32", 2),
-        address_0=tvm.tir.BufferLoad(buffer, [tvm.tir.expr.IntImm("int32", 64)]),
-        address_1=tvm.tir.BufferLoad(buffer, [tvm.tir.expr.IntImm("int32", 0)]),
-        address_2=tvm.tir.expr.IntImm("int32", 0),
-    )
-    check_tiles_equal(tiles, expected)
-
-
-def test_create_tiles_wrong_var_stride():
-    # fmt: off
-    @tvm.script.ir_module
-    class Module:
-        @T.prim_func
-        def main(placeholder1: T.Buffer((100,), "int8"), placeholder2: T.Buffer((100,), "int8")) -> None:
-            T.attr("i0", "pragma_layout", "NHCWB16")
-            for i0 in T.serial(0, 1):
-                for i1 in T.serial(0, 6):
-                    for i2 in T.serial(0, 1):
-                        for i3 in T.serial(0, 1):
-                            for i4 in T.serial(0, 16):
-                                placeholder1[((i1*16) + i4)] = placeholder2[((T.floormod((i1 + 4), 6)*8) + i4)]
-    # fmt: on
-
-    stmt = Module["main"].body
-    tiles = create_tiles(stmt)
-    buffer = stmt.body.body.body.body.body.body.value.buffer
-    expected = Tiles(
-        height_0=tvm.tir.expr.IntImm("int32", 6),
-        height_1=tvm.tir.expr.IntImm("int32", 0),
-        width_0=tvm.tir.expr.IntImm("int32", 1),
-        address_0=tvm.tir.BufferLoad(buffer, [tvm.tir.expr.IntImm("int32", 32)]),
-        address_1=tvm.tir.expr.IntImm("int32", 0),
-        address_2=tvm.tir.expr.IntImm("int32", 0),
-    )
-    check_tiles_equal(tiles, expected)
-
-
-def test_create_tiles_multiple_var_occurrences():
-    # fmt: off
-    @tvm.script.ir_module
-    class Module:
-        @T.prim_func
-        def main(placeholder1: T.Buffer((100,), "int8"), placeholder2: T.Buffer((100,), "int8")) -> None:
-            T.attr("i0", "pragma_layout", "NHWC")
-            for i0 in T.serial(0, 1):
-                for i1 in T.serial(0, 5):
-                    for i2 in T.serial(0, 6):
-                        for i3 in T.serial(0, 4):
-                            placeholder1[(((i1*24) + (i2*4)) + i3)] = placeholder2[(((((T.floordiv((i1 - 1), 2)*48) + (T.floormod((i1 + 1), 2)*24)) + (i2*4)) + i3) + 96)]
-    # fmt: on
-
-    stmt = Module["main"].body
-    tiles = create_tiles(stmt)
-    buffer = stmt.body.body.body.body.body.value.buffer
-    expected = Tiles(
-        height_0=tvm.tir.expr.IntImm("int32", 5),
-        height_1=tvm.tir.expr.IntImm("int32", 0),
-        width_0=tvm.tir.expr.IntImm("int32", 6),
-        address_0=tvm.tir.BufferLoad(buffer, [tvm.tir.expr.IntImm("int32", 72)]),
-        address_1=tvm.tir.expr.IntImm("int32", 0),
-        address_2=tvm.tir.expr.IntImm("int32", 0),
-    )
-    check_tiles_equal(tiles, expected)
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/contrib/test_ethosu/test_encode_constants.py b/tests/python/contrib/test_ethosu/test_encode_constants.py
deleted file mode 100644
index 8c35a43e47e9..000000000000
--- a/tests/python/contrib/test_ethosu/test_encode_constants.py
+++ /dev/null
@@ -1,535 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import numpy as np
-import pytest
-
-pytest.importorskip("ethosu.vela")
-import tvm
-from tvm import relay
-from tvm.relay.backend.contrib.ethosu import tir_to_cs_translator
-from tvm.relay.backend.contrib.ethosu.tir.compiler import _lower_to_tir
-from tvm.relay.backend.contrib.ethosu.tir.scheduler import (
-    OperatorCompute,
-    copy_constants,
-)
-from tvm.relay.testing import run_opt_pass
-from tvm.script import tir as T
-
-from .infra import make_ethosu_binary_elementwise, make_ethosu_conv2d
-
-
-# Uninitialized variables used
-# fmt: off
-@tvm.script.ir_module(check_well_formed=False)
-class WeightStreamOnlyU55:
-    @T.prim_func
-    def main(input_placeholder: T.Buffer((1, 16, 16, 32), "int8"), input_ethosu_write: T.Buffer((1, 16, 16, 8), "int8")) -> None:
-        # function attr dict
-        T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
-        placeholder = T.Buffer([8192], "int8", data=input_placeholder.data)
-        ethosu_write = T.Buffer([2048], "int8", data=input_ethosu_write.data)
-        buffer1 = T.Buffer([160], "uint8")
-        buffer3 = T.Buffer([144], "uint8")
-        buffer5 = T.Buffer([144], "uint8")
-        buffer7 = T.Buffer([144], "uint8")
-        buffer8 = T.Buffer([32], "uint8")
-        # body
-        p1_data = T.allocate([160], "uint8", "global", annotations={"disable_lower_builtin":True})
-        p1 = T.Buffer([160], "uint8", data=p1_data)
-        p2_data = T.allocate([144], "uint8", "global", annotations={"disable_lower_builtin":True})
-        p2 = T.Buffer([144], "uint8", data=p2_data)
-        buffer9 = T.Buffer([144], "uint8", data=p1.data)
-        T.evaluate(T.call_extern("ethosu_copy", buffer1[0], 160, p1[0], dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_copy", buffer3[0], 144, p2[0], dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p1[0], 128, T.int8(-1), T.int8(-1), 12, p1[128], 32, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_copy", buffer5[0], 144, buffer9[0], dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write[2], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p2[0], 112, T.int8(-1), T.int8(-1), 12, p2[112], 32, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_copy", buffer7[0], 144, p2[0], dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write[4], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, buffer9[0], 112, T.int8(-1), T.int8(-1), 12, buffer9[112], 32, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write[6], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p2[0], 112, T.int8(-1), T.int8(-1), 12, p2[112], 32, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-
-
-# Uninitialized variables used
-@tvm.script.ir_module(check_well_formed=False)
-class WeightStreamOnlyU65:
-    @T.prim_func
-    def main(ifm: T.Buffer((1, 16, 16, 32), "int8"), ethosu_write: T.Buffer((1, 16, 16, 8), "int8")):
-        T.func_attr({"from_legacy_te_schedule": T.bool(True), "global_symbol": "main", "tir.noalias": T.bool(True)})
-        p2_global_6 = T.allocate([192], "uint8", "global", annotations={"disable_lower_builtin": T.bool(True)})
-        p2_global_4 = T.allocate([192], "uint8", "global", annotations={"disable_lower_builtin": T.bool(True)})
-        p2_global_5 = T.allocate([208], "uint8", "global", annotations={"disable_lower_builtin": T.bool(True)})
-        buffer_encoded = T.Buffer((192,), "uint8")
-        p2_global_3 = T.Buffer((192,), "uint8", data=p2_global_6)
-        T.call_extern("handle", "ethosu_copy", buffer_encoded[0], 192, p2_global_3[0])
-        buffer_encoded_1 = T.Buffer((192,), "uint8")
-        p2_global_4_1 = T.Buffer((192,), "uint8", data=p2_global_4)
-        T.call_extern("handle", "ethosu_copy", buffer_encoded_1[0], 192, p2_global_4_1[0])
-        buffer_encoded_2 = T.Buffer((208,), "uint8")
-        p2_global_5_1 = T.Buffer((208,), "uint8", data=p2_global_5)
-        T.call_extern("handle", "ethosu_copy", buffer_encoded_2[0], 208, p2_global_5_1[0])
-        ifm_1 = T.Buffer((8192,), "int8", data=ifm.data)
-        ethosu_write_1 = T.Buffer((2048,), "int8", data=ethosu_write.data)
-        T.call_extern("handle", "ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, ifm_1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write_1[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p2_global_3[0], 80, p2_global_3[80], 80, 12, p2_global_3[160], 16, p2_global_3[176], 16, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0)
-        buffer_encoded_3 = T.Buffer((192,), "uint8")
-        p2_global_6_1 = T.Buffer((192,), "uint8", data=p2_global_6)
-        T.call_extern("handle", "ethosu_copy", buffer_encoded_3[0], 192, p2_global_6_1[0])
-        T.call_extern("handle", "ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, ifm_1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write_1[2], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p2_global_4_1[0], 80, p2_global_4_1[80], 80, 12, p2_global_4_1[160], 16, p2_global_4_1[176], 16, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0)
-        T.call_extern("handle", "ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, ifm_1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write_1[4], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p2_global_5_1[0], 96, p2_global_5_1[96], 80, 12, p2_global_5_1[176], 16, p2_global_5_1[192], 16, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0)
-        T.call_extern("handle", "ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, ifm_1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write_1[6], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p2_global_6_1[0], 80, p2_global_6_1[80], 80, 12, p2_global_6_1[160], 16, p2_global_6_1[176], 16, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0)
-# fmt: on
-
-
-@pytest.mark.parametrize(
-    "accelerator, reference_mod, reference_const_sizes",
-    [
-        (
-            "ethos-u55-128",
-            WeightStreamOnlyU55,
-            [160, 144, 144, 144],
-        ),
-        (
-            "ethos-u65-512",
-            WeightStreamOnlyU65,
-            [192, 192, 208, 192],
-        ),
-    ],
-)
-def test_weight_stream_only(accelerator, reference_mod, reference_const_sizes):
-    def _planner(cached_func, const_dict, sch):
-        weights = cached_func.inputs[1]
-        bias = cached_func.inputs[2]
-        out = cached_func.outputs[0]
-        conv_compute = OperatorCompute.from_output(out)
-        co = conv_compute.split(sch, 3, 2)
-        cache_weights = sch.cache_read(weights, "global", [conv_compute.op])
-        cache_bias = sch.cache_read(bias, "global", [conv_compute.op])
-        sch[cache_weights].compute_at(sch[out], co)
-        sch[cache_bias].compute_at(sch[out], co)
-
-    def _get_func():
-        ifm = relay.var("ifm", shape=(1, 16, 16, 32), dtype="int8")
-        conv = make_ethosu_conv2d(
-            ifm,
-            32,
-            8,
-            (1, 1),
-            (0, 0),
-            (1, 1),
-            (1, 1),
-        )
-        func = relay.Function(relay.analysis.free_vars(conv), conv)
-        func = run_opt_pass(func, relay.transform.InferType())
-        return func
-
-    config = {
-        "accelerator_config": accelerator,
-    }
-    with tvm.transform.PassContext(config={"relay.ext.ethos-u.options": config}):
-        func = _get_func()
-        mod, consts = _lower_to_tir(func, cascader=_planner)
-        script = mod.script()
-        test_mod = tvm.script.from_source(script, check_well_formed=False)
-        tvm.ir.assert_structural_equal(test_mod["main"], reference_mod["main"], True)
-
-        test_const_size = [value.size for value in list(consts.values())]
-        assert reference_const_sizes.sort() == test_const_size.sort()
-
-
-# Uninitialized variables used
-# fmt: off
-@tvm.script.ir_module(check_well_formed=False)
-class RereadWeightsU55:
-    @T.prim_func
-    def main(input_placeholder: T.Buffer((1, 16, 16, 32), "int8"), input_ethosu_write: T.Buffer((1, 16, 16, 8), "int8")) -> None:
-        # function attr dict
-        T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
-        buffer1 = T.Buffer([384], "uint8")
-        placeholder = T.Buffer([8192], "int8", data=input_placeholder.data)
-        ethosu_write = T.Buffer([2048], "int8", data=input_ethosu_write.data)
-        # body
-        p1_data = T.allocate([384], "uint8", "global", annotations={"disable_lower_builtin":True})
-        p1 = T.Buffer([384], "uint8", data=p1_data)
-        p2_data = T.allocate([384], "uint8", "global", annotations={"disable_lower_builtin":True})
-        p2 = T.Buffer([384], "uint8", data=p2_data)
-        T.evaluate(T.call_extern("ethosu_copy", buffer1[0], 384, p1[0], dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_copy", buffer1[0], 384, p2[0], dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 8, 32, 16, 0, 8, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 8, 8, 16, 0, 8, ethosu_write[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p1[0], 304, T.int8(-1), T.int8(-1), 12, p1[304], 80, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 8, 32, 16, 0, 8, placeholder[256], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 8, 8, 16, 0, 8, ethosu_write[64], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p2[0], 304, T.int8(-1), T.int8(-1), 12, p2[304], 80, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-
-
-# Uninitialized variables used
-@tvm.script.ir_module(check_well_formed=False)
-class RereadWeightsU65:
-    @T.prim_func
-    def main(input_placeholder: T.Buffer((1, 16, 16, 32), "int8"), input_ethosu_write: T.Buffer((1, 16, 16, 8), "int8")) -> None:
-        # function attr dict
-        T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
-        # buffer definition
-        placeholder = T.Buffer([8192], dtype="int8", data=input_placeholder.data)
-        ethosu_write = T.Buffer([2048], dtype="int8", data=input_ethosu_write.data)
-        placeholder_encoded_1 = T.Buffer([464], "uint8")
-        # body
-        p1_data = T.allocate([464], "uint8", "global", annotations={"disable_lower_builtin":True})
-        p1 = T.Buffer([464], "uint8", data=p1_data)
-        p2_data = T.allocate([464], "uint8", "global", annotations={"disable_lower_builtin":True})
-        p2 = T.Buffer([464], "uint8", data=p2_data)
-        T.evaluate(T.call_extern("ethosu_copy", placeholder_encoded_1[0], 464, p1[0], dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_copy", placeholder_encoded_1[0], 464, p2[0], dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 8, 32, 16, 0, 8, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 8, 8, 16, 0, 8, ethosu_write[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p1[0], 192, p1[192], 176, 12, p1[368], 48, p1[416], 48, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 8, 32, 16, 0, 8, placeholder[256], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 8, 8, 16, 0, 8, ethosu_write[64], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p2[0], 192, p2[192], 176, 12, p2[368], 48, p2[416], 48, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-# fmt: on
-
-
-@pytest.mark.parametrize(
-    "accelerator, reference_mod, reference_const_sizes",
-    [
-        (
-            "ethos-u55-128",
-            RereadWeightsU55,
-            [384],
-        ),
-        (
-            "ethos-u65-512",
-            RereadWeightsU65,
-            [464],
-        ),
-    ],
-)
-def test_re_read_weights(accelerator, reference_mod, reference_const_sizes):
-    def _cascader(cached_func, const_dict, sch):
-        weights = cached_func.inputs[1]
-        bias = cached_func.inputs[2]
-        out = cached_func.outputs[0]
-        conv_compute = OperatorCompute.from_output(out)
-        co = conv_compute.split(sch, 2, 8)
-        cache_weights = sch.cache_read(weights, "global", [conv_compute.op])
-        cache_bias = sch.cache_read(bias, "global", [conv_compute.op])
-        sch[cache_weights].compute_at(sch[out], co)
-        sch[cache_bias].compute_at(sch[out], co)
-
-    def _get_func():
-        ifm = relay.var("ifm", shape=(1, 16, 16, 32), dtype="int8")
-        conv = make_ethosu_conv2d(
-            ifm,
-            32,
-            8,
-            (1, 1),
-            (0, 0),
-            (1, 1),
-            (1, 1),
-        )
-        func = relay.Function(relay.analysis.free_vars(conv), conv)
-        func = run_opt_pass(func, relay.transform.InferType())
-        return func
-
-    config = {
-        "accelerator_config": accelerator,
-    }
-    with tvm.transform.PassContext(config={"relay.ext.ethos-u.options": config}):
-        func = _get_func()
-        mod, consts = _lower_to_tir(func, cascader=_cascader)
-        script = mod.script()
-        test_mod = tvm.script.from_source(script, check_well_formed=False)
-        tvm.ir.assert_structural_equal(test_mod["main"], reference_mod["main"], True)
-
-        test_const_size = [value.size for value in list(consts.values())]
-        assert reference_const_sizes.sort() == test_const_size.sort()
-
-
-# Uninitialized variables used
-# fmt: off
-@tvm.script.ir_module(check_well_formed=False)
-class DirectReadOnlyU55:
-    @T.prim_func
-    def main(input_placeholder: T.Buffer((1, 16, 16, 32), "int8"), input_ethosu_write: T.Buffer((1, 16, 16, 8), "int8")) -> None:
-        # function attr dict
-        T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
-        buffer = T.Buffer([592], "uint8")
-        buffer_1 = T.Buffer([160], "uint8")
-        buffer_2 = T.Buffer([160], "uint8")
-        buffer_3 = T.Buffer([80], "uint8")
-        placeholder = T.Buffer([8192], "int8", data=input_placeholder.data)
-        ethosu_write = T.Buffer([2048], "int8", data=input_ethosu_write.data)
-        # body
-        ethosu_write_1_data = T.allocate([4096], "int8", "global", annotations={"disable_lower_builtin":True})
-        ethosu_write_1 = T.Buffer([4096], "int8", data=ethosu_write_1_data)
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 16, 16, 0, 16, ethosu_write_1[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 256, 16, 1, 1, 1, 1, 1, 1, 1, buffer[0], 592, T.int8(-1), T.int8(-1), 12, buffer_1[0], 160, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 16, 16, 0, 16, ethosu_write_1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 256, 16, 1, "int8", 16, 16, 8, 16, 0, 16, ethosu_write[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, buffer_2[0], 160, T.int8(-1), T.int8(-1), 12, buffer_3[0], 80, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-
-
-# Uninitialized variables used
-@tvm.script.ir_module(check_well_formed=False)
-class DirectReadOnlyU65:
-    @T.prim_func
-    def main(input_placeholder: T.Buffer((1, 16, 16, 32), "int8"), input_ethosu_write: T.Buffer((1, 16, 16, 8), "int8")) -> None:
-        # function attr dict
-        T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
-        # buffer definition
-        placeholder_encoded = T.Buffer([608], dtype="uint8")
-        placeholder_encoded_1 = T.Buffer([160], dtype="uint8")
-        placeholder_encoded_2 = T.Buffer([208], dtype="uint8")
-        placeholder_encoded_3 = T.Buffer([96], dtype="uint8")
-        placeholder = T.Buffer([8192], dtype="int8", data=input_placeholder.data)
-        ethosu_write = T.Buffer([2048], dtype="int8", data=input_ethosu_write.data)
-        # body
-        ethosu_write_2_data = T.allocate([4096], "int8", "global", annotations={"disable_lower_builtin":True})
-        ethosu_write_2 = T.Buffer([4096], "int8", data=ethosu_write_2_data)
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 16, 16, 0, 16, ethosu_write_2[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 256, 16, 1, 1, 1, 1, 1, 1, 1, placeholder_encoded[0], 304, placeholder_encoded[304], 304, 12, placeholder_encoded_1[0], 80, placeholder_encoded_1[80], 80, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 16, 16, 0, 16, ethosu_write_2[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 256, 16, 1, "int8", 16, 16, 8, 16, 0, 16, ethosu_write[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, placeholder_encoded_2[0], 112, placeholder_encoded_2[112], 96, 12, placeholder_encoded_3[0], 48, placeholder_encoded_3[48], 48, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-# fmt: on
-
-
-@pytest.mark.parametrize(
-    "accelerator, reference_mod, reference_const_sizes",
-    [
-        (
-            "ethos-u55-128",
-            DirectReadOnlyU55,
-            [592, 160, 160, 80],
-        ),
-        (
-            "ethos-u65-512",
-            DirectReadOnlyU65,
-            [608, 160, 208, 96],
-        ),
-    ],
-)
-def test_direct_read_only(accelerator, reference_mod, reference_const_sizes):
-    def _get_func():
-        ifm = relay.var("ifm", shape=(1, 16, 16, 32), dtype="int8")
-        conv1 = make_ethosu_conv2d(
-            ifm,
-            32,
-            16,
-            (1, 1),
-            (0, 0),
-            (1, 1),
-            (1, 1),
-        )
-        conv2 = make_ethosu_conv2d(
-            conv1,
-            16,
-            8,
-            (1, 1),
-            (0, 0),
-            (1, 1),
-            (1, 1),
-        )
-        func = relay.Function(relay.analysis.free_vars(conv2), conv2)
-        func = run_opt_pass(func, relay.transform.InferType())
-        return func
-
-    config = {
-        "accelerator_config": accelerator,
-    }
-    with tvm.transform.PassContext(config={"relay.ext.ethos-u.options": config}):
-        func = _get_func()
-        mod, consts = _lower_to_tir(func)
-
-        script = mod.script()
-        test_mod = tvm.script.from_source(script, check_well_formed=False)
-        tvm.ir.assert_structural_equal(test_mod["main"], reference_mod["main"], True)
-
-        test_const_size = [value.size for value in list(consts.values())]
-        assert reference_const_sizes.sort() == test_const_size.sort()
-
-
-# Uninitialized variables used
-# fmt: off
-@tvm.script.ir_module(check_well_formed=False)
-class MixedReadU55:
-    @T.prim_func
-    def main(input_ifm: T.Buffer((1,16,16,32), "int8"), input_ethosu_write: T.Buffer((1,16,16,8), "int8")) -> None:
-        # function attr dict
-        T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
-        buffer1 = T.Buffer([112], "uint8")
-        buffer3 = T.Buffer([112], "uint8")
-        buffer5 = T.Buffer([112], "uint8")
-        buffer7 = T.Buffer([112], "uint8")
-        buffer9 = T.Buffer([592], "uint8")
-        buffer10 = T.Buffer([160], "uint8")
-        ifm = T.Buffer([8192], "int8", data=input_ifm.data)
-        ethosu_write = T.Buffer([2048], "int8", data=input_ethosu_write.data)
-        # body
-        p1_data = T.allocate([112], "uint8", "global", annotations={"disable_lower_builtin":True})
-        p1 = T.Buffer([112], "uint8", data=p1_data)
-        p3_data = T.allocate([4096], "int8", "global", annotations={"disable_lower_builtin":True})
-        p3 = T.Buffer([4096], "int8", data=p3_data)
-        p2_data = T.allocate([112], "uint8", "global", annotations={"disable_lower_builtin":True})
-        p2 = T.Buffer([112], "uint8", data=p2_data)
-        T.evaluate(T.call_extern("ethosu_copy", buffer1[0], 112, p1[0], dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, ifm[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 16, 16, 0, 16, p3[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 256, 16, 1, 1, 1, 1, 1, 1, 1, buffer9[0], 592, T.int8(-1), T.int8(-1), 12, buffer10[0], 160, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_copy", buffer3[0], 112, p2[0], dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 16, 16, 0, 16, p3[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 256, 16, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p1[0], 80, T.int8(-1), T.int8(-1), 12, p1[80], 32, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_copy", buffer5[0], 112, p1[0], dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 16, 16, 0, 16, p3[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 256, 16, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write[2], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p2[0], 80, T.int8(-1), T.int8(-1), 12, p2[80], 32, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_copy", buffer7[0], 112, p2[0], dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 16, 16, 0, 16, p3[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 256, 16, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write[4], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p1[0], 80, T.int8(-1), T.int8(-1), 12, p1[80], 32, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 16, 16, 0, 16, p3[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 256, 16, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write[6], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p2[0], 80, T.int8(-1), T.int8(-1), 12, p2[80], 32, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-
-
-# Uninitialized variables used
-@tvm.script.ir_module(check_well_formed=False)
-class MixedReadU65:
-    @T.prim_func
-    def main(ifm: T.Buffer((1, 16, 16, 32), "int8"), ethosu_write: T.Buffer((1, 16, 16, 8), "int8")):
-        T.func_attr({"from_legacy_te_schedule": T.bool(True), "global_symbol": "main", "tir.noalias": T.bool(True)})
-        p5_global = T.allocate([128], "uint8", "global", annotations={"disable_lower_builtin": T.bool(True)})
-        p5_global_1 = T.allocate([128], "uint8", "global", annotations={"disable_lower_builtin": T.bool(True)})
-        ethosu_write_1 = T.allocate([4096], "int8", "global", annotations={"disable_lower_builtin": T.bool(True)})
-        p5_global_2 = T.allocate([128], "uint8", "global", annotations={"disable_lower_builtin": T.bool(True)})
-        buffer_encoded = T.Buffer((128,), "uint8")
-        p5_global_3 = T.Buffer((128,), "uint8", data=p5_global)
-        T.call_extern("handle", "ethosu_copy", buffer_encoded[0], 128, p5_global_3[0])
-        buffer_encoded_1 = T.Buffer((128,), "uint8")
-        p5_global_4 = T.Buffer((128,), "uint8", data=p5_global_1)
-        T.call_extern("handle", "ethosu_copy", buffer_encoded_1[0], 128, p5_global_4[0])
-        ifm_1 = T.Buffer((8192,), "int8", data=ifm.data)
-        ethosu_write_2 = T.Buffer((4096,), "int8", data=ethosu_write_1)
-        p1_encoded = T.Buffer((608,), "uint8")
-        p2_encoded = T.Buffer((160,), "uint8")
-        T.call_extern("handle", "ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, ifm_1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 16, 16, 0, 16, ethosu_write_2[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 256, 16, 1, 1, 1, 1, 1, 1, 1, p1_encoded[0], 304, p1_encoded[304], 304, 12, p2_encoded[0], 80, p2_encoded[80], 80, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0)
-        buffer_encoded_2 = T.Buffer((128,), "uint8")
-        p5_global_5 = T.Buffer((128,), "uint8", data=p5_global_2)
-        T.call_extern("handle", "ethosu_copy", buffer_encoded_2[0], 128, p5_global_5[0])
-        ethosu_write_3 = T.Buffer((2048,), "int8", data=ethosu_write.data)
-        T.call_extern("handle", "ethosu_conv2d", "int8", 16, 16, 16, 16, 0, 16, ethosu_write_2[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 256, 16, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write_3[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p5_global_3[0], 48, p5_global_3[48], 48, 12, p5_global_3[96], 16, p5_global_3[112], 16, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0)
-        buffer_encoded_3 = T.Buffer((128,), "uint8")
-        p5_global_6 = T.Buffer((128,), "uint8", data=p5_global)
-        T.call_extern("handle", "ethosu_copy", buffer_encoded_3[0], 128, p5_global_6[0])
-        T.call_extern("handle", "ethosu_conv2d", "int8", 16, 16, 16, 16, 0, 16, ethosu_write_2[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 256, 16, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write_3[2], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p5_global_4[0], 48, p5_global_4[48], 48, 12, p5_global_4[96], 16, p5_global_4[112], 16, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0)
-        T.call_extern("handle", "ethosu_conv2d", "int8", 16, 16, 16, 16, 0, 16, ethosu_write_2[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 256, 16, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write_3[4], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p5_global_5[0], 48, p5_global_5[48], 48, 12, p5_global_5[96], 16, p5_global_5[112], 16, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0)
-        T.call_extern("handle", "ethosu_conv2d", "int8", 16, 16, 16, 16, 0, 16, ethosu_write_2[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 256, 16, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write_3[6], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p5_global_6[0], 48, p5_global_6[48], 48, 12, p5_global_6[96], 16, p5_global_6[112], 16, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0)
-# fmt: on
-
-
-@pytest.mark.parametrize(
-    "accelerator, reference_mod, reference_const_sizes",
-    [
-        (
-            "ethos-u55-128",
-            MixedReadU55,
-            [592, 160, 112, 112, 112, 112],
-        ),
-        (
-            "ethos-u65-512",
-            MixedReadU65,
-            [608, 160, 128, 128, 128, 128],
-        ),
-    ],
-)
-def test_mixed_read(accelerator, reference_mod, reference_const_sizes):
-    def _planner(cached_func, const_dict, sch):
-        weight = cached_func.inputs[4]
-        scale_bias = cached_func.inputs[5]
-        out = cached_func.outputs[0]
-        conv_compute = OperatorCompute.from_output(out)
-        co = conv_compute.split(sch, 3, 2)
-        cache_weight = sch.cache_read(weight, "global", [conv_compute.op])
-        cache_scale_bias = sch.cache_read(scale_bias, "global", [conv_compute.op])
-        sch[cache_weight].compute_at(sch[out], co)
-        sch[cache_scale_bias].compute_at(sch[out], co)
-
-    def _get_func():
-        ifm = relay.var("ifm", shape=(1, 16, 16, 32), dtype="int8")
-        conv1 = make_ethosu_conv2d(
-            ifm,
-            32,
-            16,
-            (1, 1),
-            (0, 0),
-            (1, 1),
-            (1, 1),
-        )
-        conv2 = make_ethosu_conv2d(
-            conv1,
-            16,
-            8,
-            (1, 1),
-            (0, 0),
-            (1, 1),
-            (1, 1),
-        )
-        func = relay.Function(relay.analysis.free_vars(conv2), conv2)
-        func = run_opt_pass(func, relay.transform.InferType())
-        return func
-
-    config = {
-        "accelerator_config": accelerator,
-    }
-    with tvm.transform.PassContext(config={"relay.ext.ethos-u.options": config}):
-        func = _get_func()
-        mod, consts = _lower_to_tir(func, cascader=_planner)
-
-        script = mod.script()
-        test_mod = tvm.script.from_source(script, check_well_formed=False)
-        tvm.ir.assert_structural_equal(test_mod["main"], reference_mod["main"], True)
-
-        test_const_size = [value.size for value in list(consts.values())]
-        assert reference_const_sizes.sort() == test_const_size.sort()
-
-
-def test_constant_as_input():
-    """Test to check that constants specified as inputs aren't
-    interpreted as an encoded constant."""
-
-    def get_graph():
-        dtype = "uint8"
-        ifm = relay.var("ifm", shape=(1, 16, 16, 32), dtype=dtype)
-        conv1 = make_ethosu_conv2d(
-            ifm,
-            32,
-            16,
-            (1, 1),
-            (0, 0),
-            (1, 1),
-            (1, 1),
-        )
-        scalar = relay.const(np.ones((1, 1, 1, 1), dtype=dtype), dtype=dtype)
-        add1 = make_ethosu_binary_elementwise(
-            conv1, scalar, ifm_channels=32, ifm2_channels=1, operator_type="ADD", ofm_dtype=dtype
-        )
-        func = relay.Function(relay.analysis.free_vars(add1), add1)
-        func = run_opt_pass(func, relay.transform.InferType())
-        return func
-
-    tir_mod, params = _lower_to_tir(get_graph(), copy_constants())
-
-    # Check tile address for the scalar constant input hasn't been
-    # overwritten.
-    extern_calls = tir_mod["main"].body.body.body.body
-    binary_elementwise = extern_calls[-1].value
-    args = binary_elementwise.args
-
-    reason = "Tile address overwritten"
-    assert args[26] == 0, reason
-    assert args[27] == 0, reason
-    assert args[28] == 0, reason
-
-    # More generally, check compiles successfully to make sure
-    # nothing else was overrwritten.
-    # With Target Hooks the TIR module needs a target attached
-    # and lowered via make unpacked API.
-    tir_mod["main"] = tir_mod["main"].with_attr(
-        "target", tvm.target.Target("ethos-u", host="ethos-u")
-    )
-    tir_mod = tvm.tir.transform.MakeUnpackedAPI()(tir_mod)
-    tir_to_cs_translator.translate(tir_mod, params)
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/contrib/test_ethosu/test_extract_constants.py b/tests/python/contrib/test_ethosu/test_extract_constants.py
deleted file mode 100644
index 204ff34bb806..000000000000
--- a/tests/python/contrib/test_ethosu/test_extract_constants.py
+++ /dev/null
@@ -1,99 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import pytest
-
-pytest.importorskip("ethosu.vela")
-import tvm
-from tvm import relay
-from tvm.relay.testing import run_opt_pass
-from tvm.relay.backend.contrib.ethosu.tir.compiler import extract_constants
-
-import numpy as np
-
-
-def test_extract_constants_single():
-    def _get_func():
-        var_input = relay.var("data", shape=(10, 10), dtype="uint8")
-        const_data = np.random.uniform(0, 255, (10, 10)).astype("uint8")
-        const_input = relay.const(const_data, dtype="uint8")
-        out = relay.add(var_input, const_input)
-        func = relay.Function(relay.analysis.free_vars(out), out)
-        func = run_opt_pass(func, relay.transform.InferType())
-        return func, const_input
-
-    def _expected():
-        var_input1 = relay.var("data", shape=(10, 10), dtype="uint8")
-        var_input2 = relay.var("p1", shape=(10, 10), dtype="uint8")
-        out = relay.add(var_input1, var_input2)
-        func = relay.Function(relay.analysis.free_vars(out), out)
-        func = run_opt_pass(func, relay.transform.InferType())
-        return func
-
-    func, const = _get_func()
-    new_func, const_dict = extract_constants(func)
-    tvm.ir.assert_structural_equal(new_func, _expected())
-    assert 1 in const_dict
-    assert (const_dict[1] == const.data.asnumpy()).all()
-
-
-def test_extract_constants_multi():
-    def _get_func():
-        var_input1 = relay.var("data1", shape=(10, 10), dtype="uint8")
-        var_input2 = relay.var("data2", shape=(10, 10), dtype="uint8")
-        const_data_1 = np.random.uniform(0, 255, (10, 10)).astype("uint8")
-        const_data_2 = np.random.uniform(0, 255, (10, 10)).astype("uint8")
-        const_data_3 = np.random.uniform(0, 255, (10, 10)).astype("uint8")
-        const_data_4 = np.random.uniform(0, 255, (10, 10)).astype("uint8")
-        const_input_1 = relay.const(const_data_1, dtype="uint8")
-        const_input_2 = relay.const(const_data_2, dtype="uint8")
-        const_input_3 = relay.const(const_data_3, dtype="uint8")
-        const_input_4 = relay.const(const_data_4, dtype="uint8")
-        out = relay.add(var_input1, var_input2)
-        out = relay.add(out, const_input_1)
-        out = relay.add(out, const_input_2)
-        out = relay.add(out, const_input_3)
-        out = relay.add(out, const_input_4)
-        func = relay.Function(relay.analysis.free_vars(out), out)
-        func = run_opt_pass(func, relay.transform.InferType())
-        return func, [const_input_1, const_input_2, const_input_3, const_input_4]
-
-    def _expected():
-        var_input1 = relay.var("data1", shape=(10, 10), dtype="uint8")
-        var_input2 = relay.var("data2", shape=(10, 10), dtype="uint8")
-        var_input3 = relay.var("p1", shape=(10, 10), dtype="uint8")
-        var_input4 = relay.var("p2", shape=(10, 10), dtype="uint8")
-        var_input5 = relay.var("p3", shape=(10, 10), dtype="uint8")
-        var_input6 = relay.var("p4", shape=(10, 10), dtype="uint8")
-        out = relay.add(var_input1, var_input2)
-        out = relay.add(out, var_input3)
-        out = relay.add(out, var_input4)
-        out = relay.add(out, var_input5)
-        out = relay.add(out, var_input6)
-        func = relay.Function(relay.analysis.free_vars(out), out)
-        func = run_opt_pass(func, relay.transform.InferType())
-        return func
-
-    func, consts = _get_func()
-    new_func, const_dict = extract_constants(func)
-    tvm.ir.assert_structural_equal(new_func, _expected())
-    for i, const in enumerate(consts):
-        assert i + 2 in const_dict
-        assert (const_dict[i + 2] == consts[i].data.asnumpy()).all()
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/contrib/test_ethosu/test_hoist_allocates.py b/tests/python/contrib/test_ethosu/test_hoist_allocates.py
deleted file mode 100644
index f38e981e93bd..000000000000
--- a/tests/python/contrib/test_ethosu/test_hoist_allocates.py
+++ /dev/null
@@ -1,317 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""
-Testing the pass that moves allocate nodes to the body of the function.
-"""
-# pylint: disable=wrong-import-position
-
-import pytest
-
-pytest.importorskip("ethosu.vela")
-
-import tvm
-from tvm.script import tir as T
-from tvm.relay.backend.contrib.ethosu.tir.passes import HoistAllocates
-
-
-class ExtractAllocateInfo:
-    """
-    Extracts information from allocate nodes which we will use as sanity to check the allocate
-    after mutation.
-    """
-
-    def __init__(self):
-        self.allocates_info = []
-
-    def __call__(self, mod):
-        tvm.tir.stmt_functor.ir_transform(mod["main"].body, self._pre_visit, None, ["tir.Allocate"])
-        return self.allocates_info
-
-    def _pre_visit(self, stmt):
-        self.allocates_info.append(
-            {"extents": stmt.extents, "dtype": stmt.dtype, "condition": stmt.condition}
-        )
-
-
-def CheckAllocates(allocate_info):  # pylint: disable=invalid-name
-    """
-    Checks that all allocates have been visited before an external call has been visited and
-    checks that the information for each allocate is what is expected. Additionally, the pass
-    checks the body of the tir after the final allocate statement is flat (it contains no
-    sequence statement).
-    """
-
-    allocate_idx = 0
-    expected_num_allocates = len(allocate_info)
-    num_seq_stmts = 0
-
-    def _pre_visit(stmt):
-        nonlocal allocate_idx, expected_num_allocates, num_seq_stmts
-
-        if isinstance(stmt, tvm.tir.Allocate):
-            expected = allocate_info[allocate_idx]
-            assert (
-                stmt.extents == expected["extents"]
-            ), f"Allocate extents {stmt.extents} did not match expected {expected['extents']}"
-            assert (
-                stmt.dtype == expected["dtype"]
-            ), f"Allocate dtype {stmt.dtype} did not match expected {expected['dtype']}"
-            assert (
-                stmt.condition == expected["condition"]
-            ), f"Allocate condition {stmt.condition} did not match expected {expected['condition']}"
-
-            allocate_idx += 1
-        elif isinstance(stmt, tvm.tir.SeqStmt):
-            num_seq_stmts += 1
-            assert num_seq_stmts <= expected_num_allocates, (
-                "Encountered a SeqStmt after all allocates have been visited, was the "
-                "body flattened correctly?"
-            )
-        else:
-            assert (
-                allocate_idx == expected_num_allocates
-            ), "A call node was visited before all allocates"
-
-    def _ftransform(f, mod, ctx):
-        f.with_body(
-            tvm.tir.stmt_functor.ir_transform(
-                f.body, _pre_visit, None, ["tir.Allocate", "tir.Call", "tir.SeqStmt"]
-            )
-        )
-
-    return tvm.tir.transform.prim_func_pass(_ftransform, opt_level=0)
-
-
-def test_double_convolution():
-    """
-    Test to check the HoistAllocates pass works on a function with two convolutions.
-    """
-
-    # fmt: off
-    @tvm.script.ir_module
-    class Module:
-        @T.prim_func
-        def main(input_placeholder: T.Buffer((1, 27, 42, 3), "int8"), input_placeholder_encoded: T.Buffer((3, 3, 2, 3), "uint8"), input_placeholder_encoded_1: T.Buffer((3, 10), "uint8"), input_placeholder_encoded_2: T.Buffer((3, 3, 2, 3), "uint8"), input_placeholder_encoded_3: T.Buffer((3, 10), "uint8"), input_ethosu_write: T.Buffer((1, 27, 42, 3), "int8")) -> None:
-            # function attr dict
-            T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
-            placeholder = T.Buffer([3402], dtype="int8", data=input_placeholder.data)
-            placeholder_encoded = T.Buffer([128], dtype="int8", data=input_placeholder_encoded.data)
-            placeholder_encoded_1 = T.Buffer([32], dtype="uint8", data=input_placeholder_encoded_1.data)
-            placeholder_encoded_2 = T.Buffer([128], dtype="int8", data=input_placeholder_encoded_2.data)
-            placeholder_encoded_3 = T.Buffer([32], dtype="uint8", data=input_placeholder_encoded_3.data)
-            ethosu_write = T.Buffer([3402], dtype="int8", data=input_ethosu_write.data)
-            # body
-            placeholder_global_data = T.allocate([128], "uint8", "global")
-            placeholder_global = T.Buffer([128], "uint8", data=placeholder_global_data)
-            T.evaluate(T.call_extern("ethosu_copy", placeholder_encoded[0], 128, placeholder_global[0], dtype="handle"))
-            placeholder_d_global_data = T.allocate([32], "uint8", "global")
-            placeholder_d_global = T.Buffer([32], "uint8", data=placeholder_d_global_data)
-            T.evaluate(T.call_extern("ethosu_copy", placeholder_encoded_1[0], 32, placeholder_d_global[0], dtype="handle"))
-            ethosu_write_2_data = T.allocate([18144], "int8", "global")
-            ethosu_write_2 = T.Buffer([18144], "int8", data=ethosu_write_2_data)
-            T.evaluate(T.call_extern("ethosu_conv2d", "int8", 27, 42, 3, 27, 0, 42, placeholder[0], 0, 0, 0, T.float32(0.0039215646684169769), -128, "NHWC", 126, 3, 1, "int8", 27, 42, 3, 27, 0, 42, ethosu_write_2[0], 0, 0, 0, T.float32(0.031308155506849289), -128, "NHCWB16", 672, 16, 1, 2, 3, 1, 1, 1, 2, placeholder_global[0], 128, 0, placeholder_d_global[0], 32, 2, 0, 2, 1, "NONE", 0, 0, "TFL", "NONE", dtype="handle"))
-            placeholder_d_global_1_data = T.allocate([128], "uint8", "global")
-            placeholder_d_global_1 = T.Buffer([128], "uint8", data=placeholder_d_global_1_data)
-            T.evaluate(T.call_extern("ethosu_copy", placeholder_encoded_2[0], 128, placeholder_d_global_1[0], dtype="handle"))
-            placeholder_d_global_2_data = T.allocate([32], "uint8", "global")
-            placeholder_d_global_2 = T.Buffer([32], "uint8", data=placeholder_d_global_2_data)
-            T.evaluate(T.call_extern("ethosu_copy", placeholder_encoded_3[0], 32, placeholder_d_global_2[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_conv2d", "int8", 27, 42, 3, 27, 0, 42, ethosu_write_2[0], 0, 0, 0, T.float32(0.031308155506849289), -128, "NHCWB16", 672, 16, 1, "int8", 27, 42, 3, 27, 0, 42, ethosu_write[0], 0, 0, 0, T.float32(0.23604340851306915), -128, "NHWC", 126, 3, 1, 2, 3, 1, 1, 1, 2, placeholder_d_global_1[0], 128, 0, placeholder_d_global_2[0], 32, 2, 0, 2, 1, "CLIP", -128, 127, "TFL", "NONE", dtype="handle"))
-    # fmt: on
-
-    mod = Module
-    allocate_info = ExtractAllocateInfo()(mod)
-    mod = HoistAllocates()(mod)
-    CheckAllocates(allocate_info)(mod)
-
-
-def test_identities():
-    """
-    Test to check the HoistAllocates pass works on a function with multiple identity
-    operations, with no copy operations.
-    """
-
-    # fmt: off
-    @tvm.script.ir_module
-    class Module:
-        @T.prim_func
-        def main(input_placeholder: T.Buffer((1, 2, 3, 4), "int8"), T_concat: T.Buffer((24,), "int8")) -> None:
-            # function attr dict
-            T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
-            placeholder = T.Buffer([24], dtype="int8", data=input_placeholder.data)
-            # body
-            ethosu_write_data = T.allocate([12], "int8", "global")
-            ethosu_write = T.Buffer([12], "int8", data=ethosu_write_data)
-            T.evaluate(T.call_extern("ethosu_identity", "int8", 1, 3, 4, 1, 0, 3, placeholder[12], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 4, 1, "int8", 1, 3, 4, 1, 0, 3, ethosu_write[0], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 4, 1, "AVG", 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", dtype="handle"))
-            ethosu_write_1_data = T.allocate([12], "int8", "global")
-            ethosu_write_1 = T.Buffer([12], "int8", data=ethosu_write_1_data)
-            T.evaluate(T.call_extern("ethosu_identity", "int8", 1, 3, 4, 1, 0, 3, ethosu_write[0], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 4, 1, "int8", 1, 3, 4, 1, 0, 3, ethosu_write_1[0], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 4, 1, "AVG", 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_identity", "int8", 12, 1, 1, 12, 0, 1, ethosu_write_1[0], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 1, 1, "int8", 12, 1, 1, 12, 0, 1, T_concat[12], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 1, 1, "AVG", 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", dtype="handle"))
-            ethosu_write_2_data = T.allocate([12], "int8", "global")
-            ethosu_write_2 = T.Buffer([12], "int8", data=ethosu_write_2_data)
-            T.evaluate(T.call_extern("ethosu_identity", "int8", 1, 3, 4, 1, 0, 3, placeholder[0], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 4, 1, "int8", 1, 3, 4, 1, 0, 3, ethosu_write_2[0], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 4, 1, "AVG", 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", dtype="handle"))
-            ethosu_write_3_data = T.allocate([12], "int8", "global")
-            ethosu_write_3 = T.Buffer([12], "int8", data=ethosu_write_3_data)
-            T.evaluate(T.call_extern("ethosu_identity", "int8", 1, 3, 4, 1, 0, 3, ethosu_write_2[0], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 4, 1, "int8", 1, 3, 4, 1, 0, 3, ethosu_write_3[0], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 4, 1, "AVG", 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_identity", "int8", 12, 1, 1, 12, 0, 1, ethosu_write_3[0], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 1, 1, "int8", 12, 1, 1, 12, 0, 1, T_concat[0], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 1, 1, "AVG", 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", dtype="handle"))
-    # fmt: on
-
-    mod = Module
-    allocate_info = ExtractAllocateInfo()(mod)
-    mod = HoistAllocates()(mod)
-    CheckAllocates(allocate_info)(mod)
-
-
-def test_outer_seq_stmt():
-    """
-    Test to check the HoistAllocates pass works on a function where the outer-most statement is
-    a sequence statement, rather than the usual allocate.
-    """
-
-    # fmt: off
-    @tvm.script.ir_module
-    class Module:
-        @T.prim_func
-        def main(input_placeholder: T.Buffer((1, 16, 16, 32), "int8"), input_ethosu_write: T.Buffer((1, 16, 16, 8), "int8"), buffer_encoded: T.Buffer((128,), "uint8"), buffer_encoded_1: T.Buffer((32,), "uint8"), buffer_encoded_2: T.Buffer((112,), "uint8"), buffer_encoded_3: T.Buffer((32,), "uint8"), buffer_encoded_4: T.Buffer((112,), "uint8"), buffer_encoded_5: T.Buffer((32,), "uint8"), buffer_encoded_6: T.Buffer((112,), "uint8"), buffer_encoded_7: T.Buffer((32,), "uint8")) -> None:
-            # function attr dict
-            T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
-            placeholder = T.Buffer([8192], dtype="int8", data=input_placeholder.data)
-            ethosu_write = T.Buffer([2048], dtype="int8", data=input_ethosu_write.data)
-            # body
-            with T.allocate([128], "uint8", "global") as placeholder_global_data:
-                placeholder_global = T.Buffer([128], "uint8", data=placeholder_global_data)
-                T.evaluate(T.call_extern("ethosu_copy", buffer_encoded[0], 128, placeholder_global[0], dtype="handle"))
-                placeholder_d_global_data = T.allocate([32], "uint8", "global")
-                placeholder_d_global = T.Buffer([32], "uint8", data=placeholder_d_global_data)
-                T.evaluate(T.call_extern("ethosu_copy", buffer_encoded_1[0], 32, placeholder_d_global[0], dtype="handle"))
-                T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, placeholder_global[0], 128, 12, placeholder_d_global[0], 32, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-            with T.allocate([112], "uint8", "global") as placeholder_global_1_data:
-                placeholder_global_1 = T.Buffer([112], "uint8", data=placeholder_global_1_data)
-                T.evaluate(T.call_extern("ethosu_copy", buffer_encoded_2[0], 112, placeholder_global_1[0], dtype="handle"))
-                placeholder_d_global_1_data = T.allocate([32], "uint8", "global")
-                placeholder_d_global_1 = T.Buffer([32], "uint8", data=placeholder_d_global_1_data)
-                T.evaluate(T.call_extern("ethosu_copy", buffer_encoded_3[0], 32, placeholder_d_global_1[0], dtype="handle"))
-                T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write[2], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, placeholder_global_1[0], 112, 12, placeholder_d_global_1[0], 32, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-            with T.allocate([112], "uint8", "global") as placeholder_global_2_data:
-                placeholder_global_2 = T.Buffer([112], "uint8", data=placeholder_global_2_data)
-                T.evaluate(T.call_extern("ethosu_copy", buffer_encoded_4[0], 112, placeholder_global_2[0], dtype="handle"))
-                placeholder_d_global_2_data = T.allocate([32], "uint8", "global")
-                placeholder_d_global_2 = T.Buffer([32], "uint8", data=placeholder_d_global_2_data)
-                T.evaluate(T.call_extern("ethosu_copy", buffer_encoded_5[0], 32, placeholder_d_global_2[0], dtype="handle"))
-                T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write[4], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, placeholder_global_2[0], 112, 12, placeholder_d_global_2[0], 32, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-            placeholder_global_3_data = T.allocate([112], "uint8", "global")
-            placeholder_global_3 = T.Buffer([112], "uint8", data=placeholder_global_3_data)
-            T.evaluate(T.call_extern("ethosu_copy", buffer_encoded_6[0], 112, placeholder_global_3[0], dtype="handle"))
-            placeholder_d_global_3_data = T.allocate([32], "uint8", "global")
-            placeholder_d_global_3 = T.Buffer([32], "uint8", data=placeholder_d_global_3_data)
-            T.evaluate(T.call_extern("ethosu_copy", buffer_encoded_7[0], 32, placeholder_d_global_3[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write[6], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, placeholder_global_3[0], 112, 12, placeholder_d_global_3[0], 32, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-    # fmt: on
-
-    mod = Module
-    allocate_info = ExtractAllocateInfo()(mod)
-    mod = HoistAllocates()(mod)
-    CheckAllocates(allocate_info)(mod)
-
-
-def test_allocate_without_seq_stmt():
-    """
-    Tests the case when an allocate statement does not have a sequence statement as its body.
-    """
-    # fmt: off
-    @tvm.script.ir_module
-    class Module:
-        @T.prim_func
-        def main(input_placeholder: T.Buffer((1, 16, 16, 32), "int8"), input_ethosu_write: T.Buffer((1, 16, 16, 8), "int8"), buffer_encoded: T.Buffer((128,), "uint8"), buffer_encoded_1: T.Buffer((32,), "uint8"), buffer_encoded_2: T.Buffer((112,), "uint8"), buffer_encoded_3: T.Buffer((32,), "uint8"), buffer_encoded_4: T.Buffer((112,), "uint8"), buffer_encoded_5: T.Buffer((32,), "uint8"), buffer_encoded_6: T.Buffer((112,), "uint8"), buffer_encoded_7: T.Buffer((32,), "uint8")) -> None:
-            # function attr dict
-            T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
-            placeholder = T.Buffer([8192], dtype="int8", data=input_placeholder.data)
-            ethosu_write = T.Buffer([2048], dtype="int8", data=input_ethosu_write.data)
-            # body
-            placeholder_global_data = T.allocate([128], "uint8", "global")
-            placeholder_global = T.Buffer([128], "uint8", data=placeholder_global_data)
-            placeholder_global_1_data = T.allocate([112], "uint8", "global")
-            placeholder_global_1 = T.Buffer([112], "uint8", data=placeholder_global_1_data)
-            placeholder_global_2_data = T.allocate([112], "uint8", "global")
-            placeholder_global_2 = T.Buffer([112], "uint8", data=placeholder_global_2_data)
-            placeholder_d_global_data = T.allocate([32], "uint8", "global")
-            placeholder_d_global = T.Buffer([32], "uint8", data=placeholder_d_global_data)
-            placeholder_d_global_1_data = T.allocate([32], "uint8", "global")
-            placeholder_d_global_1 = T.Buffer([32], "uint8", data=placeholder_d_global_1_data)
-            placeholder_d_global_2_data = T.allocate([32], "uint8", "global")
-            placeholder_d_global_2 = T.Buffer([32], "uint8", data=placeholder_d_global_2_data)
-            placeholder_global_3_data = T.allocate([112], "uint8", "global")
-            placeholder_global_3 = T.Buffer([112], "uint8", data=placeholder_global_3_data)
-            T.evaluate(T.call_extern("ethosu_copy", buffer_encoded[0], 128, placeholder_global[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer_encoded_1[0], 32, placeholder_d_global[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, placeholder_global[0], 128, 12, placeholder_d_global[0], 32, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer_encoded_2[0], 112, placeholder_global_1[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer_encoded_3[0], 32, placeholder_d_global_1[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write[2], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, placeholder_global_1[0], 112, 12, placeholder_d_global_1[0], 32, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer_encoded_4[0], 112, placeholder_global_2[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer_encoded_5[0], 32, placeholder_d_global_2[0], dtype="handle"))
-            placeholder_d_global_3_data = T.allocate([32], "uint8", "global")
-            placeholder_d_global_3 = T.Buffer([32], "uint8", data=placeholder_d_global_3_data)
-            T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write[4], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, placeholder_global_2[0], 112, 12, placeholder_d_global_2[0], 32, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer_encoded_6[0], 112, placeholder_global_3[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer_encoded_7[0], 32, placeholder_d_global_3[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write[6], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, placeholder_global_3[0], 112, 12, placeholder_d_global_3[0], 32, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-    # fmt: on
-
-    mod = Module
-    allocate_info = ExtractAllocateInfo()(mod)
-    mod = HoistAllocates()(mod)
-    CheckAllocates(allocate_info)(mod)
-
-
-def test_multiple_prim_funcs():
-    @tvm.script.ir_module
-    class Module:
-        @T.prim_func
-        def main():
-            T.evaluate(0)
-
-        @T.prim_func
-        def abc():
-            T.evaluate(0)
-
-    mod = Module
-
-    err_rgx = (
-        r"Expected a single primitive function called 'main'. "
-        r"Please run the HoistAllocates pass in conjunction with the LowerToTIR\(\) pass."
-    )
-    with pytest.raises(tvm.TVMError, match=err_rgx):
-        mod = HoistAllocates()(mod)
-
-
-def test_no_main_prim_func():
-    @tvm.script.ir_module
-    class Module:
-        @T.prim_func
-        def abs():
-            T.evaluate(0)
-
-    mod = Module
-
-    err_rgx = (
-        r"Expected a single primitive function called 'main'. "
-        r"Please run the HoistAllocates pass in conjunction with the LowerToTIR\(\) pass."
-    )
-    with pytest.raises(tvm.TVMError, match=err_rgx):
-        mod = HoistAllocates()(mod)
diff --git a/tests/python/contrib/test_ethosu/test_identity_optimizer.py b/tests/python/contrib/test_ethosu/test_identity_optimizer.py
deleted file mode 100644
index 83aca640f767..000000000000
--- a/tests/python/contrib/test_ethosu/test_identity_optimizer.py
+++ /dev/null
@@ -1,352 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""
-Test the identity optimizer pass that removes redundant identity
-operations from the microNPU codegen.
-"""
-import pytest
-
-pytest.importorskip("ethosu.vela")
-
-import tensorflow as tf
-
-import tvm
-from tvm import relay
-from tvm.relay.op.contrib.ethosu import partition_for_ethosu
-from tvm.relay.backend.contrib.ethosu.codegen import relay_to_tir
-from tvm.relay.backend.contrib.ethosu.codegen import IdentityOptimizer
-
-from . import infra
-
-
-def _optimize(func, optimize=True):
-    """Create IRModule and run identity optimizer pass."""
-    func = func.with_attr("Compiler", "ethos-u")
-    mod = tvm.IRModule.from_expr(func)
-    mod = relay.transform.InferType()(mod)
-    if optimize:
-        mod = IdentityOptimizer()(mod)
-    entry = mod["main"]
-    return entry if isinstance(func, relay.Function) else entry.body
-
-
-def test_simple_reshape_identity_removal():
-    """Check identity is removed when there is a reshape in
-    the graph and a compute operation follows."""
-
-    def get_graph(get_expected=False):
-        x = relay.var("x", shape=(1, 2, 2, 4), dtype="int8")
-        x = infra.make_ethosu_conv2d(x, 4, 4, (1, 1), (0, 0), (1, 1), (1, 1))
-        x = relay.reshape(x, newshape=(1, 4, 4, 1))
-        if not get_expected:
-            x = infra.make_ethosu_identity(x)
-        x = infra.make_ethosu_unary_elementwise(x, 1, "ABS")
-        return relay.Function(relay.analysis.free_vars(x), x)
-
-    actual = _optimize(get_graph())
-    expected = _optimize(get_graph(get_expected=True), optimize=False)
-    tvm.ir.assert_structural_equal(actual, expected)
-
-
-def test_simple_strided_slice_identity_removal():
-    """Check identity is removed when there is a strided slice
-    in the graph and a compute operation follows."""
-
-    def get_graph(get_expected=False):
-        dtype = "int8"
-
-        x = relay.var("x", shape=(1, 2, 2, 4), dtype=dtype)
-        x = infra.make_ethosu_pooling(x, "MAX", (1, 1), 4, dtype, (1, 1), (0, 0))
-        x = relay.strided_slice(x, begin=[0, 0, 0, 0], end=[1, 2, 2, 2])
-        if not get_expected:
-            x = infra.make_ethosu_identity(x)
-        x = infra.make_ethosu_pooling(x, "MAX", (1, 1), 2, dtype, (1, 1), (0, 0))
-        return relay.Function(relay.analysis.free_vars(x), x)
-
-    actual = _optimize(get_graph())
-    expected = _optimize(get_graph(get_expected=True), optimize=False)
-    tvm.ir.assert_structural_equal(actual, expected)
-
-
-def test_no_identity():
-    """Check the graph is not affected when there is no identity in the graph."""
-
-    def get_graph():
-        dtype = "int8"
-
-        x = relay.var("x", shape=(1, 2, 2, 4), dtype=dtype)
-        x = infra.make_ethosu_conv2d(x, 4, 4, (1, 1), (0, 0), (1, 1), (1, 1))
-        x = infra.make_ethosu_pooling(x, "MAX", (1, 1), 4, dtype, (1, 1), (0, 0))
-        x = infra.make_ethosu_depthwise_conv2d(x, 4, (1, 1), (0, 0), (1, 1), (1, 1))
-        x = infra.make_ethosu_unary_elementwise(x, 4, "ABS")
-        return relay.Function(relay.analysis.free_vars(x), x)
-
-    actual = _optimize(get_graph())
-    expected = _optimize(get_graph(), optimize=False)
-    tvm.ir.assert_structural_equal(actual, expected)
-
-
-def test_reshape_last():
-    """Check that an identity as a leaf of the graph is not removed."""
-
-    def get_graph():
-        x = relay.var("x", shape=(1, 2, 2, 4), dtype="int8")
-        x = infra.make_ethosu_conv2d(x, 4, 4, (1, 1), (0, 0), (1, 1), (1, 1))
-        x = relay.reshape(x, newshape=(1, 4, 4, 1))
-        x = infra.make_ethosu_identity(x)
-        return relay.Function(relay.analysis.free_vars(x), x)
-
-    actual = _optimize(get_graph())
-    expected = _optimize(get_graph(), optimize=False)
-    tvm.ir.assert_structural_equal(actual, expected)
-
-
-def test_requantize_identity_no_removal():
-    """Check that an identity that actually performs a requantize isn't removed."""
-
-    def get_graph():
-        x = relay.var("x", shape=(1, 2, 2, 4), dtype="int8")
-        x = relay.reshape(x, newshape=(1, 1, 4, 4))
-        x = infra.make_ethosu_identity(
-            x, ifm_scale=0.5, ifm_zero_point=1, ofm_scale=0.3, ofm_zero_point=2
-        )
-        x = infra.make_ethosu_unary_elementwise(x, 4, "ABS")
-        return relay.Function(relay.analysis.free_vars(x), x)
-
-    actual = _optimize(get_graph())
-    expected = _optimize(get_graph(), optimize=False)
-    tvm.ir.assert_structural_equal(actual, expected)
-
-
-def test_activation_identity_no_removal():
-    """Check thst an identity with an activation isn't removed."""
-
-    def get_graph():
-        x = relay.var("x", shape=(1, 2, 2, 4), dtype="int8")
-        x = relay.reshape(x, newshape=(1, 1, 4, 4))
-        x = infra.make_ethosu_identity(x, activation="LUT")
-        x = infra.make_ethosu_unary_elementwise(x, 4, "ABS")
-        return relay.Function(relay.analysis.free_vars(x), x)
-
-    actual = _optimize(get_graph())
-    expected = _optimize(get_graph(), optimize=False)
-    tvm.ir.assert_structural_equal(actual, expected)
-
-
-def test_multiple_output_identity():
-    """Check that an identity is removed when it has multiple outputs."""
-
-    def get_graph(get_expected=False):
-        x = relay.var("x", shape=(1, 2, 2, 4), dtype="int8")
-        if not get_expected:
-            x = infra.make_ethosu_identity(x)
-        y = infra.make_ethosu_unary_elementwise(x, 4, "ABS")
-        z = infra.make_ethosu_unary_elementwise(x, 4, "ABS")
-        out = relay.concatenate((y, z), axis=0)
-        return relay.Function(relay.analysis.free_vars(x), out)
-
-    actual = _optimize(get_graph())
-    expected = _optimize(get_graph(get_expected=True), optimize=False)
-    tvm.ir.assert_structural_equal(actual, expected)
-
-
-def test_many_output_identity():
-    """Check an identity with many outputs. It cannot be removed due
-    to having a strided slice as output."""
-
-    def get_graph(get_expected=False):
-        x = relay.var("x", shape=(1, 2, 2, 4), dtype="int8")
-        x = relay.reshape(x, newshape=(1, 1, 4, 4))
-        if not get_expected:
-            x = infra.make_ethosu_identity(x)
-        outputs = []
-        for _ in range(4):
-            outputs.append(infra.make_ethosu_unary_elementwise(x, 4, "ABS"))
-        ss = relay.strided_slice(x, begin=(0, 0, 0, 0), end=(1, 1, 4, 4))
-        identity_2 = infra.make_ethosu_identity(ss)
-        outputs.append(identity_2)
-        out = relay.concatenate(outputs, axis=0)
-        return relay.Function(relay.analysis.free_vars(out), out)
-
-    actual = _optimize(get_graph())
-    expected = _optimize(get_graph(get_expected=True), optimize=False)
-    tvm.ir.assert_structural_equal(actual, expected)
-
-
-def test_identity_before_concatenate_no_removal():
-    """Check that an identity isn't removed when the operator
-    following it is a concatenate operation."""
-
-    def get_graph():
-        x = relay.var("x", shape=(1, 1, 4, 4), dtype="int8")
-        y = relay.var("y", shape=(1, 2, 2, 4), dtype="int8")
-        z = relay.var("z", shape=(1, 2, 2, 4), dtype="int8")
-        x = relay.reshape(x, newshape=(1, 2, 2, 4))
-        y = relay.strided_slice(y, begin=(0, 0, 0, 0), end=(1, 2, 2, 4))
-        x = infra.make_ethosu_identity(x)
-        y = infra.make_ethosu_identity(y)
-        out = relay.concatenate([x, y, z], axis=0)
-        return relay.Function(relay.analysis.free_vars(out), out)
-
-    actual = _optimize(get_graph())
-    expected = _optimize(get_graph(), optimize=False)
-    tvm.ir.assert_structural_equal(actual, expected)
-
-
-def test_identity_removal_with_multiple_transform_ops():
-    """Check that only an identity directly parent to a compute
-    operation is removed."""
-
-    def get_graph(get_expected=False):
-        x = relay.var("x", shape=(1, 2, 2, 4), dtype="int8")
-        x = relay.strided_slice(x, begin=[0, 0, 0, 0], end=[1, 2, 2, 2])
-        if not get_expected:
-            x = infra.make_ethosu_identity(x)
-        x = relay.reshape(x, newshape=(1, 1, 1, 8))
-        if not get_expected:
-            x = infra.make_ethosu_identity(x)
-        x = infra.make_ethosu_unary_elementwise(x, 8, "ABS")
-        return relay.Function(relay.analysis.free_vars(x), x)
-
-    actual = _optimize(get_graph())
-    expected = _optimize(get_graph(get_expected=True), optimize=False)
-    tvm.ir.assert_structural_equal(actual, expected)
-
-
-def test_identity_removal_on_binary_elementwise():
-    """Check identities before binary elementwise are removed correctly."""
-
-    def get_graph(get_expected=False):
-        x = relay.var("x", shape=(1, 2, 2, 4), dtype="int8")
-        y = relay.var("y", shape=(1, 2, 2, 4), dtype="int8")
-        if not get_expected:
-            x = infra.make_ethosu_identity(x)
-            y = infra.make_ethosu_identity(y)
-        z = infra.make_ethosu_binary_elementwise(x, y, 4, 4, "ADD", "int8")
-        return relay.Function(relay.analysis.free_vars(z), z)
-
-    actual = _optimize(get_graph())
-    expected = _optimize(get_graph(get_expected=True), optimize=False)
-    tvm.ir.assert_structural_equal(actual, expected)
-
-
-def test_identity_single_removal_on_binary_elementwise():
-    """Check that identity on the second input of the binary elementwise
-    operation is removed while the other input has no identity."""
-
-    def get_graph(get_expected=False):
-        x = relay.var("x", shape=(1, 4, 1, 4), dtype="int8")
-        y = relay.var("y", shape=(1, 2, 2, 4), dtype="int8")
-        y = relay.reshape(y, newshape=(1, 4, 1, 4))
-        if not get_expected:
-            y = infra.make_ethosu_identity(y)
-        z = infra.make_ethosu_binary_elementwise(x, y, 4, 4, "ADD", "int8")
-        return relay.Function(relay.analysis.free_vars(z), z)
-
-    actual = _optimize(get_graph())
-    expected = _optimize(get_graph(get_expected=True), optimize=False)
-    tvm.ir.assert_structural_equal(actual, expected)
-
-
-def test_multiple_transform_ops_with_reduction_in_dimensionality():
-    """Removal of an identity operation between two transform operations is usually okay.
-    However, if the dimensionality of the input is reduced by the second transformation
-    operation, it can lead to an output mismatch. Checking that the pass doesn't remove
-    an identity given this case."""
-
-    def get_graph():
-        x = relay.var("x", shape=(1, 2, 2, 4), dtype="int8")
-        x = relay.strided_slice(x, begin=(0, 0, 0, 0), end=(1, 2, 2, 2))
-        x = infra.make_ethosu_identity(x)
-        x = relay.reshape(x, newshape=(1, 2, 4))
-        x = infra.make_ethosu_identity(x)
-        return relay.Function(relay.analysis.free_vars(x), x)
-
-    actual = _optimize(get_graph())
-    expected = _optimize(get_graph(), optimize=False)
-    tvm.ir.assert_structural_equal(actual, expected)
-
-
-def test_identity_optimizer_runs_in_compilation_pipeline():
-    """Checks that the identity optimization pass is run as part of the NPU compilation pipeline."""
-
-    def get_graph():
-        x = relay.var("x", shape=(1, 4, 4, 4), dtype="int8")
-        x = relay.reshape(x, newshape=(1, 1, 16, 4))
-        x = relay.nn.max_pool2d(x, layout="NHWC")
-        func = relay.Function(relay.analysis.free_vars(x), x)
-        return tvm.IRModule.from_expr(func)
-
-    mod = get_graph()
-    mod = partition_for_ethosu(mod)
-    mod = relay_to_tir(mod)
-
-    external_gv_name = mod["main"].body.op.name_hint
-    prim_func = mod[external_gv_name]
-
-    # Check for hints in the TIR prim func that the identity optimization pass
-    # has ran. There should not be an identity in the prim func.
-    assert prim_func.body.value.args[0] == "ethosu_pooling"
-
-
-def test_same_output():
-    """Check that the output remains the same when the identity
-    optimizer pass removes some identities inserted during legalization."""
-    ifm_shapes = [(1, 1, 25, 8), (1, 5, 5, 8)]
-
-    @tf.function
-    def model(x, y):
-        x = tf.reshape(x, (1, 5, 5, 8))
-        z = tf.add(x, y)
-        z = tf.reshape(z, (1, 1, 25, 8))
-        return z
-
-    infra.compare_tvm_with_tflite(model, ifm_shapes, "ethos-u55-256", enable_cascader=False)
-
-
-def test_multi_output_identity_has_same_output():
-    """Check that the output remains the same with an identity with
-    multiple outputs."""
-    ifm_shape = (1, 1, 64, 16)
-
-    @tf.function
-    def model(x):
-        x = tf.reshape(x, (1, 8, 8, 16))
-        outputs = []
-        for _ in range(4):
-            outputs.append(tf.nn.max_pool2d(x, 1, 1, "VALID"))
-        outputs.append(tf.reshape(x, (1, 8, 8, 16)))
-        y = tf.concat(outputs, axis=0)
-        return y
-
-    infra.compare_tvm_with_tflite(model, [ifm_shape], "ethos-u55-256", enable_cascader=False)
-
-
-def test_multiple_transform_ops_same_output():
-    """Check case of identity removal between transform ops and
-    then without, making sure they have the same output."""
-    ifm_shape = (1, 2, 2, 4)
-
-    @tf.function
-    def model(x):
-        x = tf.reshape(x, (1, 1, 4, 4))
-        x = tf.slice(x, (0, 0, 0, 0), (1, 1, 4, 3))
-        x = tf.reshape(x, (12,))
-        return x
-
-    infra.compare_tvm_with_tflite(model, [ifm_shape], "ethos-u55-256", enable_cascader=False)
diff --git a/tests/python/contrib/test_ethosu/test_layout_optimizer.py b/tests/python/contrib/test_ethosu/test_layout_optimizer.py
deleted file mode 100644
index 445eedbf64a8..000000000000
--- a/tests/python/contrib/test_ethosu/test_layout_optimizer.py
+++ /dev/null
@@ -1,796 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""Test the layout optimization pass. This pass is used to
-convert subgraphs to the preferred layout of NHCWB16.
-"""
-
-import pytest
-
-pytest.importorskip("ethosu.vela")
-
-import sys
-
-import numpy as np
-import tensorflow as tf
-import tflite.Model
-
-import tvm
-from tvm import relay
-from tvm.relay.op.contrib.ethosu import partition_for_ethosu
-from tvm.relay.backend.contrib.ethosu.codegen import LayoutOptimizer
-from tvm.relay.backend.contrib.ethosu.codegen import relay_to_tir
-
-from . import infra
-
-
-def _optimize(func, optimize=True):
-    """Create IRModule and run layout optimizer pass."""
-    func = func.with_attr("Compiler", "ethos-u")
-    mod = tvm.IRModule.from_expr(func)
-    mod = relay.transform.InferType()(mod)
-    if optimize:
-        mod = LayoutOptimizer()(mod)
-    entry = mod["main"]
-    return entry if isinstance(func, relay.Function) else entry.body
-
-
-def _compile_and_compare_model(tflite_graph, ifm_shape, dtype):
-    """Compare running result of compilation against TFLite."""
-    tflite_model = tflite.Model.Model.GetRootAsModel(tflite_graph, 0)
-
-    mod, params = relay.frontend.from_tflite(
-        tflite_model,
-        shape_dict={
-            "ifm": ifm_shape,
-        },
-        dtype_dict={
-            "ifm": dtype,
-        },
-    )
-    mod = partition_for_ethosu(mod, params)
-
-    # Generate reference data
-    input_data, output_data = infra.generate_ref_data_tflite(tflite_graph)
-
-    test_runner = infra.create_test_runner("ethos-u55-256")
-    compiled_models = infra.build_source(
-        mod,
-        input_data,
-        output_data,
-        test_runner,
-        output_tolerance=0,
-    )
-
-    # Assumes only two runtime.Modules are created -- i.e. single offload module
-    ethosu_module = compiled_models[0].executor_factory.lib.imported_modules[0].imported_modules[0]
-
-    # Verify generated C source
-    get_artifacts = tvm._ffi.get_global_func("runtime.module.ethos-u.get_artifacts")
-    compilation_artifacts = get_artifacts(ethosu_module)
-    cmms = bytes.fromhex(compilation_artifacts[0].command_stream)
-    infra.print_payload(cmms)
-    infra.verify_source(compiled_models, test_runner)
-
-
-def test_single_convolution():
-    """Test a single convolution to make sure the layouts remain
-    unaltered.
-    """
-
-    def get_graph():
-        x = relay.var("x", shape=(1, 8, 8, 4), dtype="int8")
-        x = infra.make_ethosu_conv2d(
-            ifm=x,
-            ifm_channels=8,
-            ofm_channels=8,
-            kernel_shape=(1, 1),
-            padding=(0, 0),
-            strides=(1, 1),
-            dilation=(1, 1),
-            ifm_layout="NHWC",
-            ofm_layout="NHWC",
-        )
-        return relay.Function(relay.analysis.free_vars(x), x)
-
-    a = _optimize(get_graph())
-    b = _optimize(get_graph(), optimize=False)
-    tvm.ir.assert_structural_equal(a, b)
-
-
-@pytest.mark.parametrize("dtype", ["int8", "int32"])
-def test_add_reduce_sum(dtype):
-    """Test add with reduce sum to make sure the layouts remain
-    unaltered for int32 and altered for other types.
-    """
-
-    def get_graph(get_expected=False):
-        in_1 = relay.var("x", shape=(1, 2, 2, 2), dtype=dtype)
-        in_2 = relay.var("y", shape=(1, 2, 2, 2), dtype=dtype)
-        layout = "NHCWB16" if get_expected and dtype != "int32" else "NHWC"
-        add = infra.make_ethosu_binary_elementwise(
-            in_1,
-            in_2,
-            ifm_channels=2,
-            ifm2_channels=2,
-            operator_type="ADD",
-            ofm_dtype=dtype,
-            ifm_layout="NHWC",
-            ifm2_layout="NHWC",
-            ofm_layout=layout,
-        )
-        x = infra.make_ethosu_pooling(
-            ifm=add,
-            pooling_type="SUM",
-            pool_shape=(1, 1),
-            ofm_channels=1,
-            ofm_dtype="int32",
-            strides=(1, 1),
-            padding=(0, 0),
-            ifm_layout=layout,
-            ofm_layout="NHWC",
-        )
-        return relay.Function(relay.analysis.free_vars(x), x)
-
-    a = _optimize(get_graph())
-    b = _optimize(get_graph(get_expected=True), optimize=False)
-    tvm.ir.assert_structural_equal(a, b)
-
-
-def test_multiple_convolution():
-    """Test layout optimization pass on linear chain of convolutions. I.e,
-
-    conv_1
-      |
-    conv_2
-      |
-    conv_3
-    """
-
-    def get_graph(get_expected=False):
-        x = relay.var("x", shape=(1, 8, 8, 4), dtype="int8")
-        for i in range(3):
-            ifm_layout = "NHCWB16" if get_expected and i != 0 else "NHWC"
-            ofm_layout = "NHCWB16" if get_expected and i != 2 else "NHWC"
-            x = infra.make_ethosu_conv2d(
-                ifm=x,
-                ifm_channels=8,
-                ofm_channels=8,
-                kernel_shape=(1, 1),
-                padding=(0, 0),
-                strides=(1, 1),
-                dilation=(1, 1),
-                ifm_layout=ifm_layout,
-                ofm_layout=ofm_layout,
-            )
-        return relay.Function(relay.analysis.free_vars(x), x)
-
-    a = _optimize(get_graph())
-    b = _optimize(get_graph(get_expected=True), optimize=False)
-    tvm.ir.assert_structural_equal(a, b)
-
-
-def test_multiple_depthwise_convolution():
-    """Test layout optimization pass on multiple depthwise convolutions.
-
-    depthwise_conv_1
-           |
-    depthwise_conv_2
-           |
-    depthwise_conv_3
-    """
-
-    def get_graph(get_expected=False):
-        x = relay.var("x", shape=(1, 8, 8, 4), dtype="int8")
-        for i in range(3):
-            ifm_layout = "NHCWB16" if get_expected and i != 0 else "NHWC"
-            ofm_layout = "NHCWB16" if get_expected and i != 2 else "NHWC"
-            x = infra.make_ethosu_depthwise_conv2d(
-                ifm=x,
-                channels=4,
-                kernel_shape=(1, 1),
-                padding=(0, 0),
-                strides=(1, 1),
-                dilation=(1, 1),
-                ifm_layout=ifm_layout,
-                ofm_layout=ofm_layout,
-            )
-        return relay.Function(relay.analysis.free_vars(x), x)
-
-    a = _optimize(get_graph())
-    b = _optimize(get_graph(get_expected=True), optimize=False)
-    tvm.ir.assert_structural_equal(a, b)
-
-
-def test_ignore_transform_operations():
-    """Test layout optimization pass ignores transform operations
-    such as reshape and strided slice.
-
-       conv_1
-         |
-      reshape
-         |
-    strided_slice
-         |
-       conv_2
-    """
-
-    def get_graph():
-        in_1 = relay.var("x", shape=(1, 16, 16, 8), dtype="int8")
-        conv_1 = infra.make_ethosu_conv2d(
-            ifm=in_1,
-            ifm_channels=8,
-            ofm_channels=8,
-            kernel_shape=(1, 1),
-            padding=(0, 0),
-            strides=(1, 1),
-            dilation=(1, 1),
-            ifm_layout="NHWC",
-            ofm_layout="NHWC",
-        )
-        reshape = relay.reshape(conv_1, (1, 16, 16, 8))
-        strided_slice = relay.strided_slice(reshape, (0, 0, 0, 0), (1, 16, 16, 8))
-        conv_2 = infra.make_ethosu_conv2d(
-            ifm=strided_slice,
-            ifm_channels=8,
-            ofm_channels=8,
-            kernel_shape=(1, 1),
-            padding=(0, 0),
-            strides=(1, 1),
-            dilation=(1, 1),
-            ifm_layout="NHWC",
-            ofm_layout="NHWC",
-        )
-        return relay.Function(relay.analysis.free_vars(conv_2), conv_2)
-
-    a = _optimize(get_graph())
-    b = _optimize(get_graph(), optimize=False)
-    tvm.ir.assert_structural_equal(a, b)
-
-
-def test_ignore_concatenate():
-    """Test layout optimization pass ignores the concatenate operation,
-    when layout transformation cannot occur.
-
-    in_1     in_2
-      \       /
-       \   conv_1
-        \   /
-       concat
-         |
-       conv_2
-    """
-
-    def get_graph():
-        in_1 = relay.var("x", shape=(1, 16, 16, 8), dtype="int8")
-        in_2 = relay.var("y", shape=(1, 16, 16, 8), dtype="int8")
-        conv_1 = infra.make_ethosu_conv2d(
-            ifm=in_2,
-            ifm_channels=8,
-            ofm_channels=8,
-            kernel_shape=(1, 1),
-            padding=(0, 0),
-            strides=(1, 1),
-            dilation=(1, 1),
-            ifm_layout="NHWC",
-            ofm_layout="NHWC",
-        )
-        concat = relay.concatenate([in_1, conv_1], axis=1)
-        conv_2 = infra.make_ethosu_conv2d(
-            ifm=concat,
-            ifm_channels=8,
-            ofm_channels=4,
-            kernel_shape=(1, 1),
-            padding=(0, 0),
-            strides=(1, 1),
-            dilation=(1, 1),
-            ifm_layout="NHWC",
-            ofm_layout="NHWC",
-        )
-        return relay.Function(relay.analysis.free_vars(conv_2), conv_2)
-
-    a = _optimize(get_graph())
-    b = _optimize(get_graph(), optimize=False)
-    tvm.ir.assert_structural_equal(a, b)
-
-
-def test_ignore_concatnate_with_layout_transform():
-    """Test the layout optimization pass ignores the concatenate
-    operation and performs a layout transformation.
-
-     in_1       in_2
-      \          /
-     pool_1   pool_2
-        \      /
-         concat
-           |
-         pool_3
-    """
-
-    def get_graph():
-        dtype = "int8"
-
-        in_1 = relay.var("x", shape=(1, 16, 16, 8), dtype=dtype)
-        in_2 = relay.var("y", shape=(1, 16, 16, 8), dtype=dtype)
-        pool_1 = infra.make_ethosu_pooling(
-            in_1,
-            "MAX",
-            (1, 1),
-            ofm_channels=8,
-            ofm_dtype=dtype,
-            strides=(1, 1),
-            padding=(0, 0),
-            ifm_layout="NHWC",
-            ofm_layout="NHWC",
-        )
-        pool_2 = infra.make_ethosu_pooling(
-            in_2,
-            "MAX",
-            (1, 1),
-            ofm_channels=8,
-            ofm_dtype=dtype,
-            strides=(1, 1),
-            padding=(0, 0),
-            ifm_layout="NHWC",
-            ofm_layout="NHWC",
-        )
-        concat = relay.concatenate([pool_1, pool_2], axis=1)
-        pool_3 = infra.make_ethosu_pooling(
-            concat,
-            "MAX",
-            (1, 1),
-            ofm_channels=8,
-            ofm_dtype=dtype,
-            strides=(1, 1),
-            padding=(0, 0),
-            ifm_layout="NHWC",
-            ofm_layout="NHWC",
-        )
-        return relay.Function(relay.analysis.free_vars(pool_3), pool_3)
-
-    a = _optimize(get_graph())
-    b = _optimize(get_graph(), optimize=False)
-    tvm.ir.assert_structural_equal(a, b)
-
-
-def test_multiple_inputs():
-    """Test the layout optimization pass works as expected when there
-    are multiple inputs in the graph.
-
-    pool_1 pool_2 pool_3
-      \     |      /
-       \    |    /
-         concat
-           |
-         conv
-    """
-
-    def get_graph():
-        poolings = []
-        for _ in range(3):
-            dtype = "int8"
-
-            inp = relay.var("x", shape=(1, 3, 3, 4), dtype=dtype)
-            pool = infra.make_ethosu_pooling(
-                inp,
-                "MAX",
-                (1, 1),
-                ofm_channels=4,
-                ofm_dtype=dtype,
-                strides=(1, 1),
-                padding=(0, 0),
-                ifm_layout="NHWC",
-                ofm_layout="NHWC",
-            )
-            poolings.append(pool)
-        concat = relay.concatenate(poolings, axis=0)
-        conv = infra.make_ethosu_conv2d(
-            ifm=concat,
-            ifm_channels=8,
-            ofm_channels=4,
-            kernel_shape=(1, 1),
-            padding=(0, 0),
-            strides=(1, 1),
-            dilation=(1, 1),
-            ifm_layout="NHWC",
-            ofm_layout="NHWC",
-        )
-        return relay.Function(relay.analysis.free_vars(conv), conv)
-
-    a = _optimize(get_graph())
-    b = _optimize(get_graph(), optimize=False)
-    tvm.ir.assert_structural_equal(a, b)
-
-
-def test_multiple_outputs():
-    """Test the layout optimization pass works as expected when there
-    are multiple outputs in the graph.
-
-          pool_1
-       /    |   \
-  pool_2 pool_3 pool_4
-        \   |   /
-         concat
-    """
-
-    def get_graph(get_expected=False):
-        dtype = "int8"
-
-        in_1 = relay.var("x", shape=(1, 4, 4, 8), dtype=dtype)
-        pool_1 = infra.make_ethosu_pooling(
-            in_1,
-            "MAX",
-            (1, 1),
-            ofm_channels=4,
-            ofm_dtype=dtype,
-            strides=(1, 1),
-            padding=(0, 0),
-            ifm_layout="NHWC",
-            ofm_layout="NHCWB16" if get_expected else "NHWC",
-        )
-        poolings = []
-        for _ in range(3):
-            poolings.append(
-                infra.make_ethosu_pooling(
-                    pool_1,
-                    "MAX",
-                    (1, 1),
-                    ofm_channels=4,
-                    ofm_dtype=dtype,
-                    strides=(1, 1),
-                    padding=(0, 0),
-                    ifm_layout="NHCWB16" if get_expected else "NHWC",
-                    ofm_layout="NHWC",
-                )
-            )
-        concat = relay.concatenate(poolings, axis=0)
-        return relay.Function(relay.analysis.free_vars(concat), concat)
-
-    a = _optimize(get_graph())
-    b = _optimize(get_graph(get_expected=True), optimize=False)
-    tvm.ir.assert_structural_equal(a, b)
-
-
-def test_multiple_binary_elementwise():
-    """Test the layout optimization pass works as expected for
-    binary elementwise operations.
-
-    add_1  add_2
-      \     /
-       \   /
-       add_3
-    """
-
-    def get_graph(get_expected=False):
-        in_1 = relay.var("x", shape=(1, 2, 2, 2), dtype="int8")
-        in_2 = relay.var("y", shape=(1, 2, 2, 2), dtype="int8")
-        in_3 = relay.var("z", shape=(1, 2, 2, 2), dtype="int8")
-        add_1 = infra.make_ethosu_binary_elementwise(
-            in_1,
-            in_2,
-            ifm_channels=2,
-            ifm2_channels=2,
-            operator_type="ADD",
-            ofm_dtype="int8",
-            ifm_layout="NHWC",
-            ifm2_layout="NHWC",
-            ofm_layout="NHCWB16" if get_expected else "NHWC",
-        )
-        add_2 = infra.make_ethosu_binary_elementwise(
-            in_2,
-            in_3,
-            ifm_channels=2,
-            ifm2_channels=2,
-            operator_type="ADD",
-            ofm_dtype="int8",
-            ifm_layout="NHWC",
-            ifm2_layout="NHWC",
-            ofm_layout="NHCWB16" if get_expected else "NHWC",
-        )
-        add_3 = infra.make_ethosu_binary_elementwise(
-            add_1,
-            add_2,
-            ifm_channels=2,
-            ifm2_channels=2,
-            operator_type="ADD",
-            ofm_dtype="int8",
-            ifm_layout="NHCWB16" if get_expected else "NHWC",
-            ifm2_layout="NHCWB16" if get_expected else "NHWC",
-            ofm_layout="NHWC",
-        )
-        return relay.Function(relay.analysis.free_vars(add_3), add_3)
-
-    a = _optimize(get_graph())
-    b = _optimize(get_graph(get_expected=True), optimize=False)
-    tvm.ir.assert_structural_equal(a, b)
-
-
-def test_multiple_pooling():
-    """Test the layout optimization pass works as expected for
-    multiple pooling operations.
-
-    pool_1
-      |
-    pool_2
-      |
-    pool_3
-    """
-
-    def get_graph(get_expected=False):
-        dtype = "int8"
-
-        x = relay.var("x", shape=(1, 8, 8, 4), dtype=dtype)
-        for i in range(3):
-            ifm_layout = "NHCWB16" if get_expected and i != 0 else "NHWC"
-            ofm_layout = "NHCWB16" if get_expected and i != 2 else "NHWC"
-            x = infra.make_ethosu_pooling(
-                x,
-                "MAX",
-                (1, 1),
-                ofm_channels=4,
-                ofm_dtype=dtype,
-                strides=(1, 1),
-                padding=(0, 0),
-                ifm_layout=ifm_layout,
-                ofm_layout=ofm_layout,
-            )
-        return relay.Function(relay.analysis.free_vars(x), x)
-
-    a = _optimize(get_graph())
-    b = _optimize(get_graph(get_expected=True), optimize=False)
-    tvm.ir.assert_structural_equal(a, b)
-
-
-def test_multiple_unary_elementwise():
-    """Test the layout optimization pass works as expected for multiple
-    unary elementwise operations.
-
-    abs_1
-      |
-    abs_2
-      |
-    abs_3
-    """
-
-    def get_graph(get_expected=False):
-        x = relay.var("x", shape=(1, 8, 8, 4), dtype="int8")
-        for i in range(3):
-            ifm_layout = "NHCWB16" if get_expected and i != 0 else "NHWC"
-            ofm_layout = "NHCWB16" if get_expected and i != 2 else "NHWC"
-            x = infra.make_ethosu_unary_elementwise(
-                x,
-                ofm_channels=4,
-                operator_type="ABS",
-                ifm_layout=ifm_layout,
-                ofm_layout=ofm_layout,
-            )
-        return relay.Function(relay.analysis.free_vars(x), x)
-
-    a = _optimize(get_graph())
-    b = _optimize(get_graph(get_expected=True), optimize=False)
-    tvm.ir.assert_structural_equal(a, b)
-
-
-def test_op_without_ethosu_consumer():
-    """Test the layout optimization pass works as expected when
-    there is a case that the output layout should not be altered
-    since not all consumers are NPU operations (in this case conv).
-
-    depthwise
-        |
-      conv
-      /  \
-     |  pool
-     \   /
-    (concat)
-    """
-
-    def get_graph(get_expected=False):
-        exp_layout = "NHCWB16" if get_expected else "NHWC"
-        dtype = "int8"
-
-        x = relay.var("x", shape=(1, 2, 2, 2), dtype=dtype)
-        depthwise = infra.make_ethosu_depthwise_conv2d(
-            x, 2, (1, 1), (0, 0), (1, 1), (0, 0), ofm_layout=exp_layout
-        )
-        conv = infra.make_ethosu_conv2d(
-            depthwise,
-            2,
-            2,
-            (1, 1),
-            (0, 0),
-            (1, 1),
-            (0, 0),
-            ifm_layout=exp_layout,
-        )
-        pool = infra.make_ethosu_pooling(conv, "MAX", (1, 1), 2, dtype, (1, 1), (0, 0))
-        concat = relay.concatenate([conv, pool], axis=0)
-        return relay.Function(relay.analysis.free_vars(concat), concat)
-
-    a = _optimize(get_graph())
-    b = _optimize(get_graph(get_expected=True), optimize=False)
-    tvm.ir.assert_structural_equal(a, b)
-
-
-def test_diamond_graph():
-    """
-    Test the layout optimizer pass works as expected on a diamond graph
-    with a case where the operation dominating the output operation
-    cannot be altered, but operations within the diamond can.
-
-      pool_1
-        |
-      pool_2
-      /   \
-     |  pool_3
-     |     |
-     |  pool_4
-     |     |
-     |  pool_5
-     \    /
-    (concat)
-    """
-
-    def get_graph(get_expected=False):
-        exp_layout = "NHCWB16" if get_expected else "NHWC"
-        dtype = "int8"
-
-        x = relay.var("x", shape=(1, 2, 2, 2), dtype=dtype)
-        pool_1 = infra.make_ethosu_pooling(
-            x, "MAX", (1, 1), 2, dtype, (1, 1), (0, 0), ofm_layout=exp_layout
-        )
-        pool_2 = infra.make_ethosu_pooling(
-            pool_1, "MAX", (1, 1), 2, dtype, (1, 1), (0, 0), ifm_layout=exp_layout
-        )
-        pool_3 = infra.make_ethosu_pooling(
-            pool_2, "MAX", (1, 1), 2, dtype, (1, 1), (0, 0), ofm_layout=exp_layout
-        )
-        pool_4 = infra.make_ethosu_pooling(
-            pool_3,
-            "MAX",
-            (1, 1),
-            2,
-            dtype,
-            (1, 1),
-            (0, 0),
-            ifm_layout=exp_layout,
-            ofm_layout=exp_layout,
-        )
-        pool_5 = infra.make_ethosu_pooling(
-            pool_4, "MAX", (1, 1), 2, dtype, (1, 1), (0, 0), ifm_layout=exp_layout
-        )
-        concat = relay.concatenate([pool_2, pool_5], axis=0)
-        return relay.Function(relay.analysis.free_vars(concat), concat)
-
-    a = _optimize(get_graph())
-    b = _optimize(get_graph(get_expected=True), optimize=False)
-    tvm.ir.assert_structural_equal(a, b)
-
-
-def test_same_output_multiple_convolutions():
-    """Test running the layout optimization pass with multiple convolutions
-    gives same output as TFLite."""
-
-    np.random.seed(0)
-    dtype = "int8"
-    ifm_shape = (1, 8, 8, 32)
-    kernel_shape = (1, 1, 32, 32)
-
-    def create_model():
-        class Model(tf.Module):
-            @tf.function
-            def tf_function(self, x):
-                for _ in range(3):
-                    x = tf.nn.conv2d(
-                        x,
-                        filters=tf.constant(np.random.uniform(size=kernel_shape), dtype=tf.float32),
-                        strides=(1, 1),
-                        padding="SAME",
-                        data_format="NHWC",
-                        dilations=1,
-                    )
-                return x
-
-        model = Model()
-        concrete_func = model.tf_function.get_concrete_function(
-            tf.TensorSpec(ifm_shape, dtype=tf.float32)
-        )
-
-        # Convert the model
-        def representative_dataset():
-            for _ in range(100):
-                data = np.random.rand(*tuple(ifm_shape))
-                yield [data.astype(np.float32)]
-
-        converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func])
-        converter.optimizations = [tf.lite.Optimize.DEFAULT]
-        converter.representative_dataset = representative_dataset
-        converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
-        converter.inference_input_type = tf.int8
-        converter.inference_output_type = tf.int8
-        return converter.convert()
-
-    _compile_and_compare_model(create_model(), ifm_shape, dtype)
-
-
-def test_same_output_multiple_pooling():
-    """Test running the layout optimization pass with multiple pooling
-    operations gives same output as TFLite."""
-
-    np.random.seed(0)
-    dtype = "int8"
-    ifm_shape = (1, 4, 2, 7)
-
-    def create_model():
-        class Model(tf.Module):
-            @tf.function
-            def tf_function(self, x):
-                for _ in range(2):
-                    x = tf.nn.max_pool2d(x, (1, 1), (1, 1), "SAME", "NHWC")
-                return x
-
-        model = Model()
-        concrete_func = model.tf_function.get_concrete_function(
-            tf.TensorSpec(ifm_shape, dtype=tf.float32)
-        )
-
-        # Convert the model
-        def representative_dataset():
-            for _ in range(100):
-                data = np.random.rand(*tuple(ifm_shape))
-                yield [data.astype(np.float32)]
-
-        converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func])
-        converter.optimizations = [tf.lite.Optimize.DEFAULT]
-        converter.representative_dataset = representative_dataset
-        converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
-        converter.inference_input_type = tf.int8
-        converter.inference_output_type = tf.int8
-        return converter.convert()
-
-    _compile_and_compare_model(create_model(), ifm_shape, dtype)
-
-
-def test_layout_optimizer_runs_in_compilation_pipeline():
-    """Checks that the layout optimization pass runs as part of the NPU compilation
-    pipeline."""
-
-    def get_graph():
-        x = relay.var("x", shape=(1, 4, 4, 4), dtype="int8")
-        for _ in range(2):
-            x = relay.nn.max_pool2d(x, layout="NHWC")
-
-        func = relay.Function(relay.analysis.free_vars(x), x)
-        return tvm.IRModule.from_expr(func)
-
-    mod = get_graph()
-    mod = partition_for_ethosu(mod)
-    mod = relay_to_tir(mod)
-
-    external_gv_name = mod["main"].body.op.name_hint
-    prim_func = mod[external_gv_name]
-
-    # Check for hints in the TIR prim func that the layout optimization pass has ran
-    ops = prim_func.body.body.seq
-    max_pool1, max_pool2 = ops
-
-    assert str(max_pool1.value.args[31]) == '"NHCWB16"'
-    assert str(max_pool2.value.args[14]) == '"NHCWB16"'
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/contrib/test_ethosu/test_legalize.py b/tests/python/contrib/test_ethosu/test_legalize.py
deleted file mode 100644
index c5bcf7bc2380..000000000000
--- a/tests/python/contrib/test_ethosu/test_legalize.py
+++ /dev/null
@@ -1,3972 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name, unused-argument
-
-import pytest
-
-pytest.importorskip("ethosu.vela")
-
-import math
-
-import numpy as np
-import tensorflow as tf
-import tflite.Model
-
-import tvm
-from tvm import relay
-from tvm.relay.backend.contrib.ethosu import legalize, preprocess
-from tvm.relay import dataflow_pattern
-from tvm.relay.op.contrib import ethosu
-from tvm.relay.backend.contrib.ethosu import util, codegen
-from tvm.relay.build_module import bind_params_by_name
-from tvm.relay.frontend.tflite import get_pad_value
-from tvm.relay.expr_functor import ExprVisitor
-
-from . import infra
-
-
-def partition_ethosu_by_table(mod, pattern_table):
-    """In case only the legalization part is supported for an operator, we don't
-    want to add the operator's pattern to the pattern table so that the compiler
-    wouldn't attempt to offload an operator without full stack support."""
-    mod = relay.transform.InferType()(mod)
-    mod = mod = codegen.replicate_pads(mod)
-    mod = relay.transform.InferType()(mod)
-    mod = relay.transform.MergeComposite(pattern_table)(mod)
-    mod = relay.transform.AnnotateTarget("ethos-u")(mod)
-    mod = relay.transform.MergeCompilerRegions()(mod)
-    mod = relay.transform.InferType()(mod)
-    mod = relay.transform.PartitionGraph()(mod)
-    mod = relay.transform.InferType()(mod)
-    mod = preprocess.preprocess_ext_io()(mod)
-    return mod
-
-
-def relu_n1_to_1(x):
-    """
-    The specific pattern will be replaced into RELU_N1_TO_1 by tflite.
-    """
-    return tf.math.maximum(-1.0, tf.math.minimum(x, 1.0))
-
-
-def test_split_indices_legalize():
-    def create_graph(axis):
-        x = relay.var("x", shape=(1, 50, 50, 3))
-        x_relu = relay.nn.relu(x)
-        split_output = relay.split(x_relu, [5, 20, 45], axis).tuple_value
-        return relay.Function([x], split_output)
-
-    def expected_mod_axis1():
-        expected_ir_string = """
-        #[version = "0.0.5"]
-        def @tvmgen_default_ethos_u_main_0(%x: Tensor[(1, 50, 50, 3), float32]) -> (Tensor[(1, 5, 50, 3), float32],\
-                                                               Tensor[(1, 15, 50, 3), float32],\
-                                                               Tensor[(1, 25, 50, 3), float32],\
-                                                               Tensor[(1, 5, 50, 3), float32]) {
-          %0 = nn.relu(%x) /* ty=Tensor[(1, 50, 50, 3), float32] */;
-          %1 = strided_slice(%0, begin=[0, 0, 0, 0], end=[1, 5, 50, 3], strides=[1], axes=None)\
-           /* ty=Tensor[(1, 5, 50, 3), float32] */;
-          %2 = strided_slice(%0, begin=[0, 5, 0, 0], end=[1, 20, 50, 3], strides=[1], axes=None)\
-           /* ty=Tensor[(1, 15, 50, 3), float32] */;
-          %3 = strided_slice(%0, begin=[0, 20, 0, 0], end=[1, 45, 50, 3], strides=[1], axes=None)\
-           /* ty=Tensor[(1, 25, 50, 3), float32] */;
-          %4 = strided_slice(%0, begin=[0, 45, 0, 0], end=[1, 50, 50, 3], strides=[1], axes=None)\
-           /* ty=Tensor[(1, 5, 50, 3), float32] */;
-          (%1, %2, %3, %4)
-        }
-        """
-        return tvm.relay.fromtext(expected_ir_string)
-
-    def expected_mod_axis2():
-        expected_ir_string = """
-        #[version = "0.0.5"]
-        def @tvmgen_default_ethos_u_main_0(%x: Tensor[(1, 50, 50, 3), float32]) -> (Tensor[(1, 50, 5, 3), float32],\
-                                                               Tensor[(1, 50, 15, 3), float32],\
-                                                               Tensor[(1, 50, 25, 3), float32],\
-                                                               Tensor[(1, 50, 5, 3), float32]) {
-          %0 = nn.relu(%x) /* ty=Tensor[(1, 50, 50, 3), float32] */;
-          %1 = strided_slice(%0, begin=[0, 0, 0, 0], end=[1, 50, 5, 3], strides=[1], axes=None)\
-           /* ty=Tensor[(1, 50, 5, 3), float32] */;
-          %2 = strided_slice(%0, begin=[0, 0, 5, 0], end=[1, 50, 20, 3], strides=[1], axes=None)\
-           /* ty=Tensor[(1, 50, 15, 3), float32] */;
-          %3 = strided_slice(%0, begin=[0, 0, 20, 0], end=[1, 50, 45, 3], strides=[1], axes=None)\
-           /* ty=Tensor[(1, 50, 25, 3), float32] */;
-          %4 = strided_slice(%0, begin=[0, 0, 45, 0], end=[1, 50, 50, 3], strides=[1], axes=None)\
-           /* ty=Tensor[(1, 50, 5, 3), float32] */;
-          (%1, %2, %3, %4)
-        }
-        """
-        return tvm.relay.fromtext(expected_ir_string)
-
-    rewrite_split = [legalize.PartitionedSplitRewriter(), legalize.SplitRewriter()]
-
-    mod_axis1 = tvm.IRModule()
-    func = create_graph(1)
-    for r in rewrite_split:
-        func = dataflow_pattern.rewrite(r, func)
-    mod_axis1["tvmgen_default_ethos_u_main_0"] = func
-    expected_axis1 = expected_mod_axis1()
-    tvm.ir.assert_structural_equal(mod_axis1, expected_axis1)
-
-    mod_axis2 = tvm.IRModule()
-    func = create_graph(2)
-    for r in rewrite_split:
-        func = dataflow_pattern.rewrite(r, func)
-    mod_axis2["tvmgen_default_ethos_u_main_0"] = func
-    expected_axis2 = expected_mod_axis2()
-    tvm.ir.assert_structural_equal(mod_axis2, expected_axis2)
-
-
-def test_split_sections_legalize():
-    def create_graph(axis, sections):
-        x = relay.var("x", shape=(1, 50, 50, 3))
-        x_abs = relay.abs(x)
-        split_output = relay.split(x_abs, sections, axis).tuple_value
-        outputs = list()
-        for section_idx in range(sections):
-            split_single_out = relay.TupleGetItem(split_output, section_idx)
-            tanh = relay.tanh(split_single_out)
-            outputs.append(tanh)
-        tuple_out = relay.Tuple(outputs)
-        return relay.Function([x], tuple_out)
-
-    def expected_mod_axis1():
-        expected_ir_string = """
-        #[version = "0.0.5"]
-        def @tvmgen_default_ethos_u_main_0(%x: Tensor[(1, 50, 50, 3), float32]) -> (Tensor[(1, 10, 50, 3), float32],\
-                                                               Tensor[(1, 10, 50, 3), float32],\
-                                                               Tensor[(1, 10, 50, 3), float32],\
-                                                               Tensor[(1, 10, 50, 3), float32],\
-                                                               Tensor[(1, 10, 50, 3), float32]) {
-          %0 = abs(%x) /* ty=Tensor[(1, 50, 50, 3), float32] */;
-          %1 = strided_slice(%0, begin=[0, 0, 0, 0], end=[1, 10, 50, 3], strides=[1], axes=None)\
-           /* ty=Tensor[(1, 10, 50, 3), float32] */;
-          %2 = strided_slice(%0, begin=[0, 10, 0, 0], end=[1, 20, 50, 3], strides=[1], axes=None)\
-           /* ty=Tensor[(1, 10, 50, 3), float32] */;
-          %3 = strided_slice(%0, begin=[0, 20, 0, 0], end=[1, 30, 50, 3], strides=[1], axes=None)\
-           /* ty=Tensor[(1, 10, 50, 3), float32] */;
-          %4 = strided_slice(%0, begin=[0, 30, 0, 0], end=[1, 40, 50, 3], strides=[1], axes=None)\
-           /* ty=Tensor[(1, 10, 50, 3), float32] */;
-          %5 = strided_slice(%0, begin=[0, 40, 0, 0], end=[1, 50, 50, 3], strides=[1], axes=None)\
-           /* ty=Tensor[(1, 10, 50, 3), float32] */;
-          %6 = (%1, %2, %3, %4, %5);
-          %7 = %6.0;
-          %8 = tanh(%7) /* ty=Tensor[(1, 10, 50, 3), float32] */;
-          %9 = %6.1;
-          %10 = tanh(%9) /* ty=Tensor[(1, 10, 50, 3), float32] */;
-          %11 = %6.2;
-          %12 = tanh(%11) /* ty=Tensor[(1, 10, 50, 3), float32] */;
-          %13 = %6.3;
-          %14 = tanh(%13) /* ty=Tensor[(1, 10, 50, 3), float32] */;
-          %15 = %6.4;
-          %16 = tanh(%15) /* ty=Tensor[(1, 10, 50, 3), float32] */;
-          (%8, %10, %12, %14, %16)
-        }
-        """
-        return tvm.relay.fromtext(expected_ir_string)
-
-    def expected_mod_axis2():
-        expected_ir_string = """
-        #[version = "0.0.5"]
-        def @tvmgen_default_ethos_u_main_0(%x: Tensor[(1, 50, 50, 3), float32]) -> (Tensor[(1, 50, 10, 3), float32],\
-                                                               Tensor[(1, 50, 10, 3), float32],\
-                                                               Tensor[(1, 50, 10, 3), float32],\
-                                                               Tensor[(1, 50, 10, 3), float32],\
-                                                               Tensor[(1, 50, 10, 3), float32]) {
-          %0 = abs(%x) /* ty=Tensor[(1, 50, 50, 3), float32] */;
-          %1 = strided_slice(%0, begin=[0, 0, 0, 0], end=[1, 50, 10, 3], strides=[1], axes=None)\
-           /* ty=Tensor[(1, 50, 10, 3), float32] */;
-          %2 = strided_slice(%0, begin=[0, 0, 10, 0], end=[1, 50, 20, 3], strides=[1], axes=None)\
-           /* ty=Tensor[(1, 50, 10, 3), float32] */;
-          %3 = strided_slice(%0, begin=[0, 0, 20, 0], end=[1, 50, 30, 3], strides=[1], axes=None)\
-           /* ty=Tensor[(1, 50, 10, 3), float32] */;
-          %4 = strided_slice(%0, begin=[0, 0, 30, 0], end=[1, 50, 40, 3], strides=[1], axes=None)\
-           /* ty=Tensor[(1, 50, 10, 3), float32] */;
-          %5 = strided_slice(%0, begin=[0, 0, 40, 0], end=[1, 50, 50, 3], strides=[1], axes=None)\
-           /* ty=Tensor[(1, 50, 10, 3), float32] */;
-          %6 = (%1, %2, %3, %4, %5);
-          %7 = %6.0;
-          %8 = tanh(%7) /* ty=Tensor[(1, 50, 10, 3), float32] */;
-          %9 = %6.1;
-          %10 = tanh(%9) /* ty=Tensor[(1, 50, 10, 3), float32] */;
-          %11 = %6.2;
-          %12 = tanh(%11) /* ty=Tensor[(1, 50, 10, 3), float32] */;
-          %13 = %6.3;
-          %14 = tanh(%13) /* ty=Tensor[(1, 50, 10, 3), float32] */;
-          %15 = %6.4;
-          %16 = tanh(%15) /* ty=Tensor[(1, 50, 10, 3), float32] */;
-          (%8, %10, %12, %14, %16)
-        }
-        """
-        return tvm.relay.fromtext(expected_ir_string)
-
-    rewrite_split = [legalize.PartitionedSplitRewriter(), legalize.SplitRewriter()]
-
-    mod_axis1 = tvm.IRModule()
-    func = create_graph(1, 5)
-    for r in rewrite_split:
-        func = dataflow_pattern.rewrite(r, func)
-    mod_axis1["tvmgen_default_ethos_u_main_0"] = func
-    expected_axis1 = expected_mod_axis1()
-    tvm.ir.assert_structural_equal(mod_axis1, expected_axis1)
-
-    mod_axis2 = tvm.IRModule()
-    func = create_graph(2, 5)
-    for r in rewrite_split:
-        func = dataflow_pattern.rewrite(r, func)
-    mod_axis2["tvmgen_default_ethos_u_main_0"] = func
-    expected_axis2 = expected_mod_axis2()
-    tvm.ir.assert_structural_equal(mod_axis2, expected_axis2)
-
-
-INVERSE_LAYOUT_TRANSFORM_OHWI_MAP = {
-    "HWIO": [1, 2, 3, 0],
-    "HWOI": [1, 2, 0, 3],
-    "OWHI": [0, 1, 2, 3],
-}
-
-
-@pytest.mark.parametrize("ifm_shape", [(1, 299, 299, 3), (1, 55, 55, 3)])
-@pytest.mark.parametrize("kernel_shape", [(3, 2), (1, 3)])
-@pytest.mark.parametrize("padding", ["SAME", "VALID"])
-@pytest.mark.parametrize("strides, dilation", [((1, 1), (2, 1)), ((3, 2), (1, 1))])
-@pytest.mark.parametrize("activation", [None, "RELU"])
-def test_tflite_conv2d_legalize(ifm_shape, kernel_shape, padding, strides, dilation, activation):
-    dtype = "int8"
-
-    def create_tflite_graph_single():
-        class Model(tf.Module):
-            @tf.function
-            def tf_function(self, input_shape):
-                op = tf.nn.conv2d(
-                    input_shape,
-                    filters=tf.constant(
-                        np.random.uniform(size=(kernel_shape[0], kernel_shape[1], 3, 3)),
-                        dtype=tf.float32,
-                    ),
-                    strides=strides,
-                    padding=padding,
-                    data_format="NHWC",
-                    dilations=dilation,
-                )
-                if activation:
-                    op = tf.nn.relu(op)
-                return op
-
-        model = Model()
-        concrete_func = model.tf_function.get_concrete_function(
-            tf.TensorSpec(ifm_shape, dtype=tf.float32)
-        )
-        # Convert the model
-        def representative_dataset():
-            for _ in range(100):
-                data = np.random.rand(*tuple(ifm_shape))
-                yield [data.astype(np.float32)]
-
-        converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func])
-        converter.optimizations = [tf.lite.Optimize.DEFAULT]
-        converter.representative_dataset = representative_dataset
-        converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
-        converter.inference_input_type = tf.int8
-        converter.inference_output_type = tf.int8
-        tflite_model = converter.convert()
-        return tflite_model
-
-    def verify(ext_func):
-        op = ext_func.body
-        ofm_channels = op.attrs.ofm_channels
-
-        # check IFM
-        ifm = op.args[0].checked_type
-        assert list(ifm.shape) == list(ifm_shape)
-        assert str(ifm.dtype) == dtype
-        assert ifm.shape[3] == ofm_channels
-
-        # check OFM
-        ofm = op.checked_type
-        expected_ofm_shape = infra.compute_ofm_shape(
-            ifm_shape, padding, kernel_shape, strides, dilation
-        )
-        assert list(ofm.shape) == list(expected_ofm_shape)
-        assert str(ofm.dtype) == dtype
-        assert ofm.shape[3] == ofm_channels
-
-        # check weights
-        weights_ohwi = op.args[1].data.asnumpy()
-        assert str(weights_ohwi.dtype) == dtype
-        assert weights_ohwi.shape[0] == ofm_channels
-        assert weights_ohwi.shape[1] == kernel_shape[0]
-        assert weights_ohwi.shape[2] == kernel_shape[1]
-        assert weights_ohwi.shape[3] == 3
-
-        # Check that scale_bias matches weight tensor
-        assert list(op.args[2].checked_type.shape)[0] == ofm_channels
-
-        expected_padding = infra.compute_padding_shape(
-            ifm_shape,
-            expected_ofm_shape,
-            padding,
-            (kernel_shape[0], kernel_shape[1]),
-            strides,
-            dilation,
-        )
-        assert list(op.attrs.padding) == list(expected_padding)
-        assert list(op.attrs.strides) == list(strides)
-        assert list(op.attrs.dilation) == list(dilation)
-        if activation == "RELU":
-            assert str(op.attrs.activation) == "CLIP"
-
-    conv2d_pattern_table = [
-        (
-            ethosu.QnnConv2DParams.composite_name,
-            ethosu.qnn_conv2d_pattern(),
-            lambda pat: ethosu.QnnConv2DParams(pat).is_valid(),
-        )
-    ]
-
-    tflite_graph = create_tflite_graph_single()
-    tflite_model = tflite.Model.Model.GetRootAsModel(tflite_graph, 0)
-
-    mod, conv_params = relay.frontend.from_tflite(
-        tflite_model,
-        shape_dict={"input": ifm_shape},
-        dtype_dict={"input": dtype},
-    )
-
-    mod["main"] = bind_params_by_name(mod["main"], conv_params)
-    mod = partition_ethosu_by_table(mod, conv2d_pattern_table)
-
-    mod["tvmgen_default_ethos_u_main_0"] = dataflow_pattern.rewrite(
-        legalize.Conv2DRewriter(), mod["tvmgen_default_ethos_u_main_0"]
-    )
-
-    verify(mod["tvmgen_default_ethos_u_main_0"])
-
-
-def test_tflite_conv2d_with_separate_padding_legalize():
-    dtype = "int8"
-    ifm_shape = (1, 55, 34, 3)
-    kernel_shape = (3, 2)
-    strides = (1, 1)
-    dilation = (2, 1)
-    padding = (0, 0, 1, 1)
-
-    def create_tflite_graph_single():
-        class Model(tf.Module):
-            @tf.function
-            def tf_function(self, x):
-                tf_strides = [1, strides[0], strides[1], 1]
-                op = tf.pad(
-                    x,
-                    [[0, 0], [padding[0], padding[2]], [padding[1], padding[3]], [0, 0]],
-                    "CONSTANT",
-                )
-                weight_shape = [kernel_shape[0], kernel_shape[1], ifm_shape[3], 3]
-                weight = tf.constant(np.random.uniform(size=weight_shape), dtype=tf.float32)
-                return tf.nn.conv2d(
-                    op,
-                    weight,
-                    strides=tf_strides,
-                    padding="VALID",
-                    dilations=dilation,
-                )
-
-        model = Model()
-        concrete_func = model.tf_function.get_concrete_function(
-            tf.TensorSpec(ifm_shape, dtype=tf.float32)
-        )
-        # Convert the model
-        def representative_dataset():
-            for _ in range(100):
-                data = np.random.rand(*tuple(ifm_shape))
-                yield [data.astype(np.float32)]
-
-        converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func])
-        converter.optimizations = [tf.lite.Optimize.DEFAULT]
-        converter.representative_dataset = representative_dataset
-        converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
-        converter.inference_input_type = tf.int8
-        converter.inference_output_type = tf.int8
-        tflite_model = converter.convert()
-        return tflite_model
-
-    def verify(ext_func):
-        op = ext_func.body
-        ofm_channels = op.attrs.ofm_channels
-
-        # check IFM
-        ifm = op.args[0].checked_type
-        assert list(ifm.shape) == list(ifm_shape)
-        assert str(ifm.dtype) == dtype
-        assert ifm.shape[3] == ofm_channels
-
-        # check OFM
-        ofm = op.checked_type
-        expected_ofm_shape = infra.compute_ofm_shape(
-            ifm_shape, padding, kernel_shape, strides, dilation
-        )
-        assert list(ofm.shape) == list(expected_ofm_shape)
-        assert str(ofm.dtype) == dtype
-        assert ofm.shape[3] == ofm_channels
-
-        # check weights
-        weights_ohwi = op.args[1].data.asnumpy()
-        assert str(weights_ohwi.dtype) == dtype
-        assert weights_ohwi.shape[0] == ofm_channels
-        assert weights_ohwi.shape[1] == kernel_shape[0]
-        assert weights_ohwi.shape[2] == kernel_shape[1]
-        assert weights_ohwi.shape[3] == 3
-
-        # Check that scale_bias matches weight tensor
-        assert list(op.args[2].checked_type.shape)[0] == ofm_channels
-
-        assert list(op.attrs.padding) == list(padding)
-        assert list(op.attrs.strides) == list(strides)
-        assert list(op.attrs.dilation) == list(dilation)
-
-    conv2d_pattern_table = [
-        (
-            ethosu.QnnConv2DParams.composite_name,
-            ethosu.qnn_conv2d_pattern(),
-            lambda pat: ethosu.QnnConv2DParams(pat).is_valid(),
-        )
-    ]
-
-    tflite_graph = create_tflite_graph_single()
-    tflite_model = tflite.Model.Model.GetRootAsModel(tflite_graph, 0)
-
-    mod, conv_params = relay.frontend.from_tflite(
-        tflite_model,
-        shape_dict={"input": ifm_shape},
-        dtype_dict={"input": dtype},
-    )
-
-    mod["main"] = bind_params_by_name(mod["main"], conv_params)
-    mod = partition_ethosu_by_table(mod, conv2d_pattern_table)
-
-    mod["tvmgen_default_ethos_u_main_0"] = dataflow_pattern.rewrite(
-        legalize.Conv2DRewriter(), mod["tvmgen_default_ethos_u_main_0"]
-    )
-
-    verify(mod["tvmgen_default_ethos_u_main_0"])
-
-
-def test_tflite_conv2d_with_separate_channel_padding_legalize():
-    dtype = "int8"
-    ifm_shape = (1, 55, 34, 3)
-    kernel_shape = (3, 2)
-    strides = (1, 1)
-    dilation = (2, 1)
-    padding_ch = (1, 1)
-
-    class ArePadOnGraph(ExprVisitor):
-        """
-        Visits the Graph recursively and checks if it contains 'nn.pad' op
-        """
-
-        def __init__(self):
-            ExprVisitor.__init__(self)
-            self.on_graph = False
-
-        def visit_call(self, call):
-            if isinstance(call.op, tvm.ir.Op):
-                if str(call.op.name) == "nn.pad":
-                    self.on_graph = True
-
-            return super().visit_call(call)
-
-        def are_pad_on_graph(self, subgraph) -> bool:
-            """
-            This function recursively visits the graph and checks if 'nn.pad' op is on graph
-            """
-            self.visit(subgraph)
-            return self.on_graph
-
-    def create_tflite_graph():
-        class Model(tf.Module):
-            @tf.function
-            def tf_function(self, x):
-                tf_strides = [1, strides[0], strides[1], 1]
-                op = tf.pad(
-                    x,
-                    [[0, 0], [0, 0], [0, 0], [padding_ch[0], padding_ch[1]]],
-                    "CONSTANT",
-                )
-                # HWIO
-                weight_shape = [
-                    kernel_shape[0],
-                    kernel_shape[1],
-                    ifm_shape[3] + padding_ch[0] + padding_ch[1],
-                    3,
-                ]
-                weight = tf.constant(np.random.uniform(size=weight_shape), dtype=tf.float32)
-                return tf.nn.conv2d(
-                    op,
-                    weight,
-                    strides=tf_strides,
-                    padding="VALID",
-                    dilations=dilation,
-                )
-
-        model = Model()
-        concrete_func = model.tf_function.get_concrete_function(
-            tf.TensorSpec(ifm_shape, dtype=tf.float32)
-        )
-        # Convert the model
-        def representative_dataset():
-            for _ in range(100):
-                data = np.random.rand(*tuple(ifm_shape))
-                yield [data.astype(np.float32)]
-
-        converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func])
-        converter.optimizations = [tf.lite.Optimize.DEFAULT]
-        converter.representative_dataset = representative_dataset
-        converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
-        converter.inference_input_type = tf.int8
-        converter.inference_output_type = tf.int8
-        tflite_model = converter.convert()
-        return tflite_model
-
-    def verify(ext_func):
-
-        assert ArePadOnGraph().are_pad_on_graph(ext_func.body) == True
-
-    conv2d_pattern_table = [
-        (
-            ethosu.ChannelPadParams.composite_name,
-            ethosu.pad_pattern(),
-            lambda pat: ethosu.ChannelPadParams(pat).is_valid(),
-        ),
-        (
-            ethosu.QnnConv2DParams.composite_name,
-            ethosu.qnn_conv2d_pattern(),
-            lambda pat: ethosu.QnnConv2DParams(pat).is_valid(),
-        ),
-    ]
-
-    tflite_graph = create_tflite_graph()
-    tflite_model = tflite.Model.Model.GetRootAsModel(tflite_graph, 0)
-
-    mod, conv_params = relay.frontend.from_tflite(
-        tflite_model,
-        shape_dict={"input": ifm_shape},
-        dtype_dict={"input": dtype},
-    )
-
-    mod["main"] = bind_params_by_name(mod["main"], conv_params)
-    mod = partition_ethosu_by_table(mod, conv2d_pattern_table)
-
-    mod["tvmgen_default_ethos_u_main_0"] = dataflow_pattern.rewrite(
-        legalize.Conv2DRewriter(), mod["tvmgen_default_ethos_u_main_0"]
-    )
-
-    verify(mod["tvmgen_default_ethos_u_main_0"])
-
-
-@pytest.mark.parametrize("ifm_shape", [(1, 299, 299, 3), (1, 123, 17, 7)])
-@pytest.mark.parametrize("kernel_shape", [(7, 3), (22, 5)])
-@pytest.mark.parametrize("padding", ["SAME", "VALID"])
-@pytest.mark.parametrize("strides, dilation", [((1, 1), (2, 1)), ((3, 2), (1, 1))])
-@pytest.mark.parametrize("activation", ["RELU", None])
-def test_tflite_depthwise_conv_2d_legalize(
-    ifm_shape, kernel_shape, padding, strides, dilation, activation
-):
-    dtype = "int8"
-
-    def create_tflite_graph():
-        class Model(tf.Module):
-            @tf.function
-            def depthwise_conv2d(self, x):
-                weight_shape = [kernel_shape[0], kernel_shape[1], ifm_shape[3], 1]
-                weight = tf.constant(np.random.uniform(size=weight_shape), dtype=tf.float32)
-                # The input strides to the TensorFlow API needs to be of shape 1x4
-                tf_strides = [1, strides[0], strides[1], 1]
-                op = tf.nn.depthwise_conv2d(
-                    x, weight, strides=tf_strides, padding=padding, dilations=dilation
-                )
-                if activation:
-                    op = tf.nn.relu(op)
-                return op
-
-        model = Model()
-        concrete_func = model.depthwise_conv2d.get_concrete_function(
-            tf.TensorSpec(ifm_shape, dtype=tf.float32)
-        )
-
-        # Convert the model
-        def representative_dataset():
-            for _ in range(100):
-                data = np.random.rand(*tuple(ifm_shape))
-                yield [data.astype(np.float32)]
-
-        converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func])
-        converter.optimizations = [tf.lite.Optimize.DEFAULT]
-        converter.representative_dataset = representative_dataset
-        converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
-        converter.inference_input_type = tf.int8
-        converter.inference_output_type = tf.int8
-        tflite_model = converter.convert()
-        return tflite_model
-
-    def verify(ext_func):
-        op = ext_func.body
-        ofm_channels = op.attrs.ofm_channels
-
-        # check IFM
-        ifm = op.args[0].checked_type
-        assert list(ifm.shape) == list(ifm_shape)
-        assert str(ifm.dtype) == dtype
-        assert ifm.shape[3] == ofm_channels
-
-        # check OFM
-        ofm = op.checked_type
-        expected_ofm_shape = infra.compute_ofm_shape(
-            ifm_shape, padding, kernel_shape, strides, dilation
-        )
-        assert list(ofm.shape) == list(expected_ofm_shape)
-        assert str(ofm.dtype) == dtype
-        assert ofm.shape[3] == ofm_channels
-
-        # check weights
-        weights_ohwi = op.args[1].data.asnumpy()
-        assert str(weights_ohwi.dtype) == dtype
-        assert weights_ohwi.shape[0] == ofm_channels
-        assert weights_ohwi.shape[1] == kernel_shape[0]
-        assert weights_ohwi.shape[2] == kernel_shape[1]
-        assert weights_ohwi.shape[3] == 1  # only depth multiplier 1 is supported
-
-        # Check that scale_bias matches weight tensor
-        assert list(op.args[2].checked_type.shape)[0] == ofm_channels
-
-        expected_padding = infra.compute_padding_shape(
-            ifm_shape, expected_ofm_shape, padding, kernel_shape, strides, dilation
-        )
-        assert list(op.attrs.padding) == list(expected_padding)
-        assert op.attrs.ofm_channels == ofm_channels
-        assert list(op.attrs.strides) == list(strides)
-        assert list(op.attrs.dilation) == list(dilation)
-        if activation == "RELU":
-            assert str(op.attrs.activation) == "CLIP"
-
-    depthwise_pattern_table = [
-        (
-            ethosu.QnnDepthwiseConv2DParams.composite_name,
-            ethosu.qnn_depthwise_conv2d_pattern(),
-            lambda pat: ethosu.QnnDepthwiseConv2DParams(pat).is_valid(),
-        )
-    ]
-
-    tflite_graph = create_tflite_graph()
-    tflite_model = tflite.Model.Model.GetRootAsModel(tflite_graph, 0)
-
-    mod, params = relay.frontend.from_tflite(
-        tflite_model,
-        shape_dict={"input": ifm_shape},
-        dtype_dict={"input": dtype},
-    )
-
-    mod["main"] = bind_params_by_name(mod["main"], params)
-    mod = partition_ethosu_by_table(mod, depthwise_pattern_table)
-
-    mod["tvmgen_default_ethos_u_main_0"] = dataflow_pattern.rewrite(
-        legalize.DepthwiseConv2DRewriter(), mod["tvmgen_default_ethos_u_main_0"]
-    )
-    verify(mod["tvmgen_default_ethos_u_main_0"])
-
-
-def test_tflite_depthwise_conv2d_with_separate_padding_legalize():
-    dtype = "int8"
-    ifm_shape = (1, 23, 32, 7)
-    kernel_shape = (1, 2)
-    strides = (3, 2)
-    dilation = (1, 1)
-    padding = (0, 0, 1, 1)
-
-    def create_tflite_graph():
-        class Model(tf.Module):
-            @tf.function
-            def tf_function(self, x):
-                tf_strides = [1, strides[0], strides[1], 1]
-                op = tf.pad(
-                    x,
-                    [[0, 0], [padding[0], padding[2]], [padding[1], padding[3]], [0, 0]],
-                    "CONSTANT",
-                )
-                weight_shape = [kernel_shape[0], kernel_shape[1], ifm_shape[3], 1]
-                weight = tf.constant(np.random.uniform(size=weight_shape), dtype=tf.float32)
-                return tf.nn.depthwise_conv2d(
-                    op,
-                    weight,
-                    strides=tf_strides,
-                    padding="VALID",
-                    dilations=dilation,
-                )
-
-        model = Model()
-        concrete_func = model.tf_function.get_concrete_function(
-            tf.TensorSpec(ifm_shape, dtype=tf.float32)
-        )
-        # Convert the model
-        def representative_dataset():
-            for _ in range(100):
-                data = np.random.rand(*tuple(ifm_shape))
-                yield [data.astype(np.float32)]
-
-        converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func])
-        converter.optimizations = [tf.lite.Optimize.DEFAULT]
-        converter.representative_dataset = representative_dataset
-        converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
-        converter.inference_input_type = tf.int8
-        converter.inference_output_type = tf.int8
-        tflite_model = converter.convert()
-        return tflite_model
-
-    def verify(ext_func):
-        op = ext_func.body
-        ofm_channels = op.attrs.ofm_channels
-
-        # check IFM
-        ifm = op.args[0].checked_type
-        assert list(ifm.shape) == list(ifm_shape)
-        assert str(ifm.dtype) == dtype
-        assert ifm.shape[3] == ofm_channels
-
-        # check OFM
-        ofm = op.checked_type
-        expected_ofm_shape = infra.compute_ofm_shape(
-            ifm_shape, padding, kernel_shape, strides, dilation
-        )
-        assert list(ofm.shape) == list(expected_ofm_shape)
-        assert str(ofm.dtype) == dtype
-        assert ofm.shape[3] == ofm_channels
-
-        # check weights
-        weights_ohwi = op.args[1].data.asnumpy()
-        assert str(weights_ohwi.dtype) == dtype
-        assert weights_ohwi.shape[0] == ofm_channels
-        assert weights_ohwi.shape[1] == kernel_shape[0]
-        assert weights_ohwi.shape[2] == kernel_shape[1]
-        assert weights_ohwi.shape[3] == 1  # only depth multiplier 1 is supported
-
-        # Check that scale_bias matches weight tensor
-        assert list(op.args[2].checked_type.shape)[0] == ofm_channels
-
-        assert list(op.attrs.padding) == list(padding)
-        assert op.attrs.ofm_channels == ofm_channels
-        assert list(op.attrs.strides) == list(strides)
-        assert list(op.attrs.dilation) == list(dilation)
-
-    depthwise_pattern_table = [
-        (
-            ethosu.QnnDepthwiseConv2DParams.composite_name,
-            ethosu.qnn_depthwise_conv2d_pattern(),
-            lambda pat: ethosu.QnnDepthwiseConv2DParams(pat).is_valid(),
-        )
-    ]
-
-    tflite_graph = create_tflite_graph()
-    tflite_model = tflite.Model.Model.GetRootAsModel(tflite_graph, 0)
-
-    mod, params = relay.frontend.from_tflite(
-        tflite_model,
-        shape_dict={"input": ifm_shape},
-        dtype_dict={"input": dtype},
-    )
-
-    mod["main"] = bind_params_by_name(mod["main"], params)
-    mod = partition_ethosu_by_table(mod, depthwise_pattern_table)
-
-    mod["tvmgen_default_ethos_u_main_0"] = dataflow_pattern.rewrite(
-        legalize.DepthwiseConv2DRewriter(), mod["tvmgen_default_ethos_u_main_0"]
-    )
-    verify(mod["tvmgen_default_ethos_u_main_0"])
-
-
-@pytest.mark.parametrize("ifm_shape", [(1, 55, 55, 3), (1, 23, 32, 7)])
-@pytest.mark.parametrize("padding", [(0, 1, 0, 0), (1, 1, 1, 1), (1, 1, 5, 5)])
-@pytest.mark.parametrize("const_value", [0, 5, 125, -5])
-def test_tflite_separate_padding_legalize(ifm_shape, padding, const_value):
-    dtype = "int8"
-    kernel_shape = (1, 1)
-    strides = (1, 1)
-    dilation = (1, 1)
-
-    def create_tflite_graph():
-        class Model(tf.Module):
-            @tf.function
-            def tf_function(self, x):
-                return tf.pad(
-                    x,
-                    [[0, 0], [padding[0], padding[2]], [padding[1], padding[3]], [0, 0]],
-                    "CONSTANT",
-                    const_value,
-                )
-
-        model = Model()
-        concrete_func = model.tf_function.get_concrete_function(
-            tf.TensorSpec(ifm_shape, dtype=tf.float32)
-        )
-        # Convert the model
-        def representative_dataset():
-            for _ in range(100):
-                data = np.random.rand(*tuple(ifm_shape))
-                yield [data.astype(np.float32)]
-
-        converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func])
-        converter.optimizations = [tf.lite.Optimize.DEFAULT]
-        converter.representative_dataset = representative_dataset
-        converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
-        converter.inference_input_type = tf.int8
-        converter.inference_output_type = tf.int8
-        tflite_model = converter.convert()
-        return tflite_model
-
-    def verify(ext_func):
-        op = ext_func.body
-        ofm_channels = op.attrs.ofm_channels
-
-        # check IFM
-        ifm = op.args[0].checked_type
-        assert list(ifm.shape) == list(ifm_shape)
-        assert str(ifm.dtype) == dtype
-        assert ifm.shape[3] == ofm_channels
-
-        # check OFM
-        ofm = op.checked_type
-        expected_ofm_shape = infra.compute_ofm_shape(
-            ifm_shape, padding, kernel_shape, strides, dilation
-        )
-        assert list(ofm.shape) == list(expected_ofm_shape)
-        assert str(ofm.dtype) == dtype
-        assert ofm.shape[3] == ofm_channels
-
-        # check weights
-        weights_ohwi = op.args[1].data.asnumpy()
-        assert str(weights_ohwi.dtype) == dtype
-        assert weights_ohwi.shape[0] == ofm_channels
-        assert weights_ohwi.shape[1] == kernel_shape[0]
-        assert weights_ohwi.shape[2] == kernel_shape[1]
-        assert weights_ohwi.shape[3] == 1  # only depth multiplier 1 is supported
-
-        # Check that scale_bias matches weight tensor
-        assert list(op.args[2].checked_type.shape)[0] == ofm_channels
-
-        assert list(op.attrs.padding) == list(padding)
-        assert op.attrs.ofm_channels == ofm_channels
-        assert list(op.attrs.strides) == list(strides)
-        assert list(op.attrs.dilation) == list(dilation)
-
-    pad_pattern_table = [
-        (
-            ethosu.PadParams.composite_name,
-            ethosu.pad_pattern(),
-            lambda pat: ethosu.PadParams(pat).is_valid(),
-        ),
-    ]
-
-    tflite_graph = create_tflite_graph()
-    tflite_model = tflite.Model.Model.GetRootAsModel(tflite_graph, 0)
-
-    mod, params = relay.frontend.from_tflite(
-        tflite_model,
-        shape_dict={"input": ifm_shape},
-        dtype_dict={"input": dtype},
-    )
-
-    mod["main"] = bind_params_by_name(mod["main"], params)
-    mod = partition_ethosu_by_table(mod, pad_pattern_table)
-
-    mod["tvmgen_default_ethos_u_main_0"] = dataflow_pattern.rewrite(
-        legalize.PadRewriter(), mod["tvmgen_default_ethos_u_main_0"]
-    )
-    verify(mod["tvmgen_default_ethos_u_main_0"])
-
-
-@pytest.mark.parametrize("ifm_shape", [(1, 55, 55, 3), (1, 23, 32, 7)])
-@pytest.mark.parametrize("channel_padding", [(0, 1), (1, 1), (5, 2)])
-@pytest.mark.parametrize("const_value", [0, 5, 125, -5])
-def test_tflite_separate_channel_padding_legalize(ifm_shape, channel_padding, const_value):
-    dtype = "int8"
-    padding = (0, 0, 0, 0)
-
-    class AreConcatenateOnGraph(ExprVisitor):
-        """
-        Visits the Graph recursively and checks if it contains 'concatenate' op
-        """
-
-        def __init__(self):
-            ExprVisitor.__init__(self)
-            self.on_graph = False
-
-        def visit_call(self, call):
-            if isinstance(call.op, tvm.ir.Op):
-                if str(call.op.name) == "concatenate":
-                    self.on_graph = True
-
-            return super().visit_call(call)
-
-        def are_concatenate_on_graph(self, subgraph) -> bool:
-            """
-            This function recursively visits the graph and checks if 'concatenate' op is on graph
-            """
-            self.visit(subgraph)
-            return self.on_graph
-
-    def create_tflite_graph():
-        class Model(tf.Module):
-            @tf.function
-            def tf_function(self, x):
-                return tf.pad(
-                    x,
-                    [
-                        [0, 0],
-                        [padding[0], padding[2]],
-                        [padding[1], padding[3]],
-                        [channel_padding[0], channel_padding[1]],
-                    ],
-                    "CONSTANT",
-                    const_value,
-                )
-
-        model = Model()
-        concrete_func = model.tf_function.get_concrete_function(
-            tf.TensorSpec(ifm_shape, dtype=tf.float32)
-        )
-        # Convert the model
-        def representative_dataset():
-            for _ in range(100):
-                data = np.random.rand(*tuple(ifm_shape))
-                yield [data.astype(np.float32)]
-
-        converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func])
-        converter.optimizations = [tf.lite.Optimize.DEFAULT]
-        converter.representative_dataset = representative_dataset
-        converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
-        converter.inference_input_type = tf.int8
-        converter.inference_output_type = tf.int8
-        tflite_model = converter.convert()
-        return tflite_model
-
-    def verify(ext_func, channel_padding):
-
-        op = ext_func.body
-
-        pad_before = 0
-        pad_after = 0
-        if channel_padding[0] == 0 and channel_padding[1] > 0:
-            pad_after = ext_func.body.args[0][1].args[0].checked_type.shape[3]
-            ifm = ext_func.body.args[0][0].args[0].checked_type
-        if channel_padding[0] > 0 and channel_padding[1] == 0:
-            pad_before = ext_func.body.args[0][0].args[0].checked_type.shape[3]
-            ifm = ext_func.body.args[0][1].args[0].checked_type
-        if channel_padding[0] > 0 and channel_padding[1] > 0:
-            pad_before = ext_func.body.args[0][0].args[0].checked_type.shape[3]
-            ifm = ext_func.body.args[0][1].args[0].checked_type
-            pad_after = ext_func.body.args[0][2].args[0].checked_type.shape[3]
-
-        # check IFM
-        assert list(ifm.shape) == list(ifm_shape)
-        assert str(ifm.dtype) == dtype
-        assert ifm.shape[3] == ifm_shape[3]
-
-        # check OFM
-        ofm = op.checked_type
-        expected_ofm_shape = list(ifm_shape)
-        expected_ofm_shape[3] = channel_padding[0] + ifm_shape[3] + channel_padding[1]
-        assert list(ofm.shape) == expected_ofm_shape
-        assert str(ofm.dtype) == dtype
-
-        # check padding
-        assert [pad_before, pad_after] == list(channel_padding)
-
-        # check if relay contains 'concatenate' op
-        assert AreConcatenateOnGraph().are_concatenate_on_graph(ext_func.body) == True
-
-    pad_pattern_table = [
-        (
-            ethosu.ChannelPadParams.composite_name,
-            ethosu.pad_pattern(),
-            lambda pat: ethosu.ChannelPadParams(pat).is_valid(),
-        ),
-    ]
-
-    tflite_graph = create_tflite_graph()
-    tflite_model = tflite.Model.Model.GetRootAsModel(tflite_graph, 0)
-
-    mod, params = relay.frontend.from_tflite(
-        tflite_model,
-        shape_dict={"input": ifm_shape},
-        dtype_dict={"input": dtype},
-    )
-
-    mod["main"] = bind_params_by_name(mod["main"], params)
-    mod = partition_ethosu_by_table(mod, pad_pattern_table)
-
-    mod["tvmgen_default_ethos_u_main_0"] = dataflow_pattern.rewrite(
-        legalize.ChannelPadRewriter(), mod["tvmgen_default_ethos_u_main_0"]
-    )
-    verify(mod["tvmgen_default_ethos_u_main_0"], channel_padding)
-
-
-@pytest.mark.parametrize("pooling_type", ["MAX", "AVG"])
-@pytest.mark.parametrize("ifm_shape", [[1, 3, 4, 3], [1, 4, 5, 2]])
-@pytest.mark.parametrize(
-    "pool_shape, strides, activation_function, padding",
-    [([1, 2], [1, 2], "NONE", "SAME"), ([2, 3], [2, 3], "RELU", "VALID")],
-)
-def test_tflite_pool2d_legalize(
-    ifm_shape, pooling_type, strides, pool_shape, activation_function, padding
-):
-    dtype = "int8"
-
-    def create_tflite_graph():
-        class Model(tf.Module):
-            @tf.function
-            def tf_function(self, x):
-                if pooling_type == "MAX":
-                    op = tf.nn.max_pool(x, pool_shape, strides, padding)
-                elif pooling_type == "AVG":
-                    op = tf.nn.avg_pool(x, pool_shape, strides, padding)
-                if activation_function == "RELU":
-                    op = tf.nn.relu(op)
-                return op
-
-        model = Model()
-        concrete_func = model.tf_function.get_concrete_function(
-            tf.TensorSpec(ifm_shape, dtype=tf.float32)
-        )
-
-        # Convert the model
-        def representative_dataset():
-            for _ in range(100):
-                data = np.random.rand(*tuple(ifm_shape))
-                yield [data.astype(np.float32)]
-
-        converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func])
-        converter.optimizations = [tf.lite.Optimize.DEFAULT]
-        converter.representative_dataset = representative_dataset
-        converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
-        converter.inference_input_type = tf.int8
-        converter.inference_output_type = tf.int8
-        tflite_model = converter.convert()
-        return tflite_model
-
-    def verify(ext_func):
-        ofm_shape = infra.compute_ofm_shape(ifm_shape, padding, pool_shape, strides)
-        op = ext_func.body
-        assert list(op.args[0].checked_type.shape) == ifm_shape
-        assert op.args[0].checked_type.dtype == dtype
-        assert list(op.checked_type.shape) == ofm_shape
-        assert op.checked_type.dtype == dtype
-        assert op.attrs.pooling_type == pooling_type
-        assert list(op.attrs.strides) == strides
-        assert list(op.attrs.padding) == infra.compute_padding_shape(
-            ifm_shape, ofm_shape, padding, pool_shape, strides
-        )
-        assert list(op.attrs.pool_shape) == pool_shape
-        assert op.attrs.ofm_channels == ifm_shape[3]
-        if activation_function == "RELU":
-            assert str(op.attrs.activation) == "CLIP"
-
-    if pooling_type == "MAX":
-        rewriter = legalize.MaxPoolingRewriter()
-        pattern_table = [
-            (
-                ethosu.MaxPool2DParams.composite_name,
-                ethosu.qnn_maxpool2d_pattern(),
-                lambda pat: ethosu.MaxPool2DParams(pat).is_valid(),
-            ),
-        ]
-    elif pooling_type == "AVG":
-        rewriter = legalize.AvgPoolingRewriter()
-        pattern_table = [
-            (
-                ethosu.AvgPool2DParams.composite_name,
-                ethosu.qnn_avgpool2d_pattern(),
-                lambda pat: ethosu.AvgPool2DParams(pat).is_valid(),
-            ),
-        ]
-    tflite_graph = create_tflite_graph()
-    tflite_model = tflite.Model.Model.GetRootAsModel(tflite_graph, 0)
-
-    mod, _ = relay.frontend.from_tflite(
-        tflite_model,
-        shape_dict={"x": ifm_shape},
-        dtype_dict={"x": dtype},
-    )
-    mod = partition_ethosu_by_table(mod, pattern_table)
-
-    mod["tvmgen_default_ethos_u_main_0"] = dataflow_pattern.rewrite(
-        rewriter, mod["tvmgen_default_ethos_u_main_0"]
-    )
-    verify(mod["tvmgen_default_ethos_u_main_0"])
-
-
-@pytest.mark.parametrize("pooling_type", ["MAX", "AVG"])
-@pytest.mark.parametrize(
-    "ifm_shape, pool_shape, strides, activation_function, padding",
-    [
-        ([1, 4, 4, 3], [4, 4], [4, 4], "NONE", "SAME"),
-        ([1, 4, 4, 3], [4, 4], [4, 4], "RELU", "VALID"),
-        ([1, 25, 5, 64], [25, 5], [25, 5], "NONE", "VALID"),
-        ([1, 25, 5, 64], [25, 5], [25, 5], "RELU", "SAME"),
-    ],
-)
-def test_tflite_pool2d_same_ifm_and_kernel_shape_legalize(
-    pooling_type, ifm_shape, pool_shape, strides, activation_function, padding
-):
-    dtype = "int8"
-    strides_legalized = [1, 1]
-
-    def create_tflite_graph():
-        class Model(tf.Module):
-            @tf.function
-            def tf_function(self, x):
-                if pooling_type == "MAX":
-                    op = tf.nn.max_pool(x, pool_shape, strides, padding)
-                elif pooling_type == "AVG":
-                    op = tf.nn.avg_pool(x, pool_shape, strides, padding)
-                if activation_function == "RELU":
-                    op = tf.nn.relu(op)
-                return op
-
-        model = Model()
-        concrete_func = model.tf_function.get_concrete_function(
-            tf.TensorSpec(ifm_shape, dtype=tf.float32)
-        )
-
-        # Convert the model
-        def representative_dataset():
-            for _ in range(100):
-                data = np.random.rand(*tuple(ifm_shape))
-                yield [data.astype(np.float32)]
-
-        converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func])
-        converter.optimizations = [tf.lite.Optimize.DEFAULT]
-        converter.representative_dataset = representative_dataset
-        converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
-        converter.inference_input_type = tf.int8
-        converter.inference_output_type = tf.int8
-        tflite_model = converter.convert()
-        return tflite_model
-
-    def expected_mod():
-
-        expected_ir_string = ""
-
-        if activation_function == "NONE" and pooling_type == "AVG":
-            expected_ir_string = f"""
-            #[version = "0.0.5"]
-            def @main(%x: Tensor[{str(tuple(ifm_shape))}, {dtype}], output_tensor_names=\
-                ["Identity"]) -> Tensor[(1, 1, 1, {str(ifm_shape[3])}), {dtype}] {{
-                @tvmgen_default_ethos_u_main_0(%x)
-            }}
-
-            def @tvmgen_default_ethos_u_main_0(%y: Tensor[{str(tuple(ifm_shape))}, {dtype}], \
-                Compiler="ethos-u", Primitive=1, Inline=1, \
-                    global_symbol="tvmgen_default_ethos_u_main_0") -> Tensor[(1, 1, 1, \
-                        {str(ifm_shape[3])}), {dtype}] {{
-                %2 = fn (%z: Tensor[{str(tuple(ifm_shape))}, {dtype}], \
-                    PartitionedFromPattern="cast_nn.avg_pool2d_cast_", \
-                        Composite="ethos-u.avgpool2d") -> Tensor[(1, 1, 1, {str(ifm_shape[3])}), \
-                            {dtype}] {{
-                    %0 = cast(%z, dtype="int32") ;
-                    %1 = nn.avg_pool2d(%0, pool_size={str(pool_shape)}, strides={str(strides)}, \
-                        padding=[0, 0, 0, 0], layout="NHWC") ;
-                    cast(%1, dtype="{dtype}")
-                }} ;
-                %2(%y)
-            }}
-            """
-
-        if activation_function == "RELU" and pooling_type == "AVG":
-            expected_ir_string = f"""
-            #[version = "0.0.5"]
-            def @main(%x: Tensor[{str(tuple(ifm_shape))}, {dtype}], output_tensor_names=\
-                ["Identity"]) -> Tensor[(1, 1, 1, {str(ifm_shape[3])}), {dtype}] {{
-                @tvmgen_default_ethos_u_main_0(%x)
-            }}
-
-            def @tvmgen_default_ethos_u_main_0(%y: Tensor[{str(tuple(ifm_shape))}, {dtype}], \
-                Compiler="ethos-u", Primitive=1, Inline=1, \
-                    global_symbol="tvmgen_default_ethos_u_main_0") -> Tensor[(1, 1, 1, \
-                        {str(ifm_shape[3])}), {dtype}] {{
-                %3 = fn (%z: Tensor[{str(tuple(ifm_shape))}, {dtype}], \
-                    PartitionedFromPattern="cast_nn.avg_pool2d_cast_clip_", \
-                        Composite="ethos-u.avgpool2d") -> Tensor[(1, 1, 1, {str(ifm_shape[3])}), \
-                            {dtype}] {{
-                    %0 = cast(%z, dtype="int32") ;
-                    %1 = nn.avg_pool2d(%0, pool_size={str(pool_shape)}, strides={str(strides)}, \
-                        padding=[0, 0, 0, 0], layout="NHWC") ;
-                    %2 = cast(%1, dtype="{dtype}") ;
-                    clip(%2, a_min=-128f, a_max=127f)
-                }} ;
-                %3(%y)
-            }}
-            """
-
-        if activation_function == "NONE" and pooling_type == "MAX":
-            expected_ir_string = f"""
-            #[version = "0.0.5"]
-            def @main(%x: Tensor[{str(tuple(ifm_shape))}, {dtype}], output_tensor_names=\
-                ["Identity"]) -> Tensor[(1, 1, 1, {str(ifm_shape[3])}), {dtype}] {{
-                @tvmgen_default_ethos_u_main_0(%x)
-            }}
-
-            def @tvmgen_default_ethos_u_main_0(%y: Tensor[{str(tuple(ifm_shape))}, {dtype}], \
-                Compiler="ethos-u", Primitive=1, Inline=1, \
-                    global_symbol="tvmgen_default_ethos_u_main_0") -> Tensor[(1, 1, 1, \
-                        {str(ifm_shape[3])}), {dtype}] {{
-                %0 = fn (%z: Tensor[{str(tuple(ifm_shape))}, {dtype}], \
-                    PartitionedFromPattern="nn.max_pool2d_", \
-                        Composite="ethos-u.maxpool2d") -> Tensor[(1, 1, 1, {str(ifm_shape[3])}), \
-                            {dtype}] {{
-                    nn.max_pool2d(%z, pool_size={str(pool_shape)}, strides={str(strides)}, \
-                        padding=[0, 0, 0, 0], layout="NHWC")
-                }} ;
-                %0(%y)
-            }}
-            """
-
-        if activation_function == "RELU" and pooling_type == "MAX":
-            expected_ir_string = f"""
-            #[version = "0.0.5"]
-            def @main(%x: Tensor[{str(tuple(ifm_shape))}, {dtype}] , output_tensor_names=\
-                ["Identity"]) -> Tensor[(1, 1, 1, {str(ifm_shape[3])}), {dtype}] {{
-                @tvmgen_default_ethos_u_main_0(%x)
-            }}
-
-            def @tvmgen_default_ethos_u_main_0(%y: Tensor[{str(tuple(ifm_shape))}, {dtype}] , \
-                Compiler="ethos-u", Primitive=1, Inline=1, \
-                    global_symbol="tvmgen_default_ethos_u_main_0") -> Tensor[(1, 1, 1, \
-                        {str(ifm_shape[3])}), {dtype}] {{
-                %1 = fn (%z: Tensor[{str(tuple(ifm_shape))}, {dtype}] , \
-                    PartitionedFromPattern="nn.max_pool2d_clip_", \
-                        Composite="ethos-u.maxpool2d") -> Tensor[(1, 1, 1, {str(ifm_shape[3])}), \
-                            {dtype}] {{
-                    %0 = nn.max_pool2d(%z, pool_size={str(pool_shape)}, strides={str(strides)}, \
-                        padding=[0, 0, 0, 0], layout="NHWC");
-                    clip(%0, a_min=-128f, a_max=127f)
-                }};
-                %1(%y)
-            }}
-            """
-
-        return tvm.relay.fromtext(expected_ir_string)
-
-    def verify(ext_func):
-        ofm_shape = infra.compute_ofm_shape(ifm_shape, padding, pool_shape, strides)
-        op = ext_func.body
-        assert list(op.args[0].checked_type.shape) == ifm_shape
-        assert op.args[0].checked_type.dtype == dtype
-        assert list(op.checked_type.shape) == ofm_shape
-        assert op.checked_type.dtype == dtype
-        assert op.attrs.pooling_type == pooling_type
-        assert list(op.attrs.strides) == strides_legalized
-        assert list(op.attrs.padding) == infra.compute_padding_shape(
-            ifm_shape, ofm_shape, padding, pool_shape, strides
-        )
-        assert list(op.attrs.padding) == infra.compute_padding_shape(
-            ifm_shape, ofm_shape, padding, pool_shape, strides_legalized
-        )
-        assert list(op.attrs.pool_shape) == pool_shape
-        assert op.attrs.ofm_channels == ifm_shape[3]
-        if activation_function == "RELU":
-            assert str(op.attrs.activation) == "CLIP"
-
-    if pooling_type == "MAX":
-        rewriter = legalize.MaxPoolingRewriter()
-        pattern_table = [
-            (
-                ethosu.MaxPool2DParams.composite_name,
-                ethosu.qnn_maxpool2d_pattern(),
-                lambda pat: ethosu.MaxPool2DParams(pat).is_valid(),
-            ),
-        ]
-
-    if pooling_type == "AVG":
-        rewriter = legalize.AvgPoolingRewriter()
-        pattern_table = [
-            (
-                ethosu.AvgPool2DParams.composite_name,
-                ethosu.qnn_avgpool2d_pattern(),
-                lambda pat: ethosu.AvgPool2DParams(pat).is_valid(),
-            ),
-        ]
-
-    tflite_graph = create_tflite_graph()
-    tflite_model = tflite.Model.Model.GetRootAsModel(tflite_graph, 0)
-
-    mod, _ = relay.frontend.from_tflite(
-        tflite_model,
-        shape_dict={"x": ifm_shape},
-        dtype_dict={"x": dtype},
-    )
-    mod = partition_ethosu_by_table(mod, pattern_table)
-
-    expected = expected_mod()
-    tvm.ir.assert_structural_equal(mod, expected)
-
-    mod["tvmgen_default_ethos_u_main_0"] = dataflow_pattern.rewrite(
-        rewriter, mod["tvmgen_default_ethos_u_main_0"]
-    )
-    verify(mod["tvmgen_default_ethos_u_main_0"])
-
-
-@pytest.mark.parametrize("operator_type", ["ADD", "SUB", "MUL", "MIN", "MAX"])
-@pytest.mark.parametrize(
-    "ifm_shape, ifm2_shape, reversed_operands",
-    [
-        ([1, 2, 3, 4], [1, 2, 3, 4], False),
-        ([1, 2, 3, 4], [1, 1, 3, 1], False),
-        ([1, 1, 3, 1], [1, 2, 3, 4], True),
-        ([1, 4, 4], [4, 1], False),
-        ([4], [4], False),
-        ([4], [1, 2, 3, 4], True),
-        ([1, 4, 4], [4, 1], False),
-    ],
-)
-@pytest.mark.parametrize("activation_function", [None, tf.nn.relu])
-def test_tflite_binary_elemwise_legalize(
-    operator_type,
-    ifm_shape,
-    ifm2_shape,
-    reversed_operands,
-    activation_function,
-):
-    np.random.seed(0)
-    dtype = "int8"
-
-    def create_tflite_graph():
-        class Model(tf.Module):
-            @tf.function
-            def tf_function(self, x, y):
-                if operator_type == "ADD":
-                    op = tf.math.add(x, y)
-                elif operator_type == "SUB":
-                    op = tf.math.subtract(x, y)
-                elif operator_type == "MUL":
-                    op = tf.math.multiply(x, y)
-                elif operator_type == "MIN":
-                    op = tf.math.minimum(x, y)
-                elif operator_type == "MAX":
-                    op = tf.math.maximum(x, y)
-                if activation_function:
-                    op = activation_function(op)
-                return op
-
-        model = Model()
-        concrete_func = model.tf_function.get_concrete_function(
-            tf.TensorSpec(ifm_shape, dtype=tf.float32), tf.TensorSpec(ifm2_shape, dtype=tf.float32)
-        )
-
-        # Convert the model
-        def representative_dataset():
-            for _ in range(100):
-                data = np.random.rand(*tuple(ifm_shape))
-                data2 = np.random.rand(*tuple(ifm2_shape)) * 2
-                yield [data.astype(np.float32), data2.astype(np.float32)]
-
-        converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func])
-        converter.optimizations = [tf.lite.Optimize.DEFAULT]
-        converter.representative_dataset = representative_dataset
-        converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
-        converter.inference_input_type = tf.int8
-        converter.inference_output_type = tf.int8
-        tflite_model = converter.convert()
-        return tflite_model
-
-    def verify(ext_func):
-        out_shape = ifm2_shape if reversed_operands else ifm_shape
-        shapes = [ifm_shape, ifm2_shape]
-        ifm_index, ifm2_index = (1, 0) if reversed_operands else (0, 1)
-        op = ext_func.body
-
-        has_reshaped_output = False
-        has_separate_requantize = False
-        shapes_padded = [[1] * (4 - len(s)) + s for s in shapes]
-        out_padded = [1] * (4 - len(out_shape)) + out_shape
-        if op.op.name == "contrib.ethosu.identity":
-            op = op.args[0]
-            has_separate_requantize = True
-        if op.op.name == "reshape":
-            has_reshaped_output = True
-            op = op.args[0]
-
-        assert list(op.args[0].checked_type.shape) == shapes_padded[ifm_index]
-        assert list(op.args[1].checked_type.shape) == shapes_padded[ifm2_index]
-        assert op.args[0].checked_type.dtype == dtype
-        assert list(op.checked_type.shape) == out_padded
-        assert op.checked_type.dtype == dtype
-        assert op.attrs.operator_type == operator_type
-        assert op.attrs.reversed_operands == reversed_operands
-        if activation_function != None:
-            assert str(op.attrs.activation) == "CLIP"
-
-            if operator_type in ["MIN", "MAX"]:
-                if has_separate_requantize:
-                    # In case when requantize cannot be fused with MIN/MAX + CLIP due to hardware constraints
-                    # there should be default quantization values since requantize is separate operation.
-                    assert float(op.attrs.ifm_scale) == 1.0
-                    assert int(op.attrs.ifm_zero_point) == 0
-                    assert float(op.attrs.ifm2_scale) == 1.0
-                    assert int(op.attrs.ifm2_zero_point) == 0
-                    assert float(op.attrs.ofm_scale) == 1.0
-                    assert int(op.attrs.ofm_zero_point) == 0
-                else:
-                    # MIN and MAX with an activation must have a requantize operation
-                    # baked into the output. To check the extra requantize node was
-                    # picked up by the pattern, we can make sure the quantization
-                    # information is not default.
-                    assert float(op.attrs.ifm_scale) != 1.0
-                    assert int(op.attrs.ifm_zero_point) != 0
-                    assert float(op.attrs.ifm2_scale) != 1.0
-                    assert int(op.attrs.ifm2_zero_point) != 0
-                    assert float(op.attrs.ofm_scale) != 1.0
-                    assert int(op.attrs.ofm_zero_point) != 0
-
-        if has_reshaped_output:
-            assert list(ext_func.body.checked_type.shape) == out_shape
-
-    if operator_type == "ADD":
-        rewriter = legalize.AddRewriter()
-        pattern_table = [
-            (
-                ethosu.AddParams.composite_name,
-                ethosu.qnn_add_pattern(),
-                lambda pat: ethosu.AddParams(pat).is_valid(),
-            ),
-        ]
-    elif operator_type == "SUB":
-        rewriter = legalize.SubRewriter()
-        pattern_table = [
-            (
-                ethosu.SubParams.composite_name,
-                ethosu.qnn_subtract_pattern(),
-                lambda pat: ethosu.SubParams(pat).is_valid(),
-            ),
-        ]
-    elif operator_type == "MUL":
-        rewriter = legalize.MulRewriter()
-        pattern_table = [
-            (
-                ethosu.MulParams.composite_name,
-                ethosu.qnn_mul_pattern(),
-                lambda pat: ethosu.MulParams(pat).is_valid(),
-            ),
-        ]
-    elif operator_type == "MIN":
-        rewriter = [legalize.MinRewriter(), legalize.RequantizeRewriter()]
-        pattern_table = [
-            (
-                ethosu.MinParams.composite_name,
-                ethosu.minimum_clip_requantize_pattern(),
-                lambda pat: ethosu.MinParams(pat).is_valid(),
-            ),
-            (
-                ethosu.MinParams.composite_name,
-                ethosu.minimum_pattern(),
-                lambda pat: ethosu.MinParams(pat).is_valid(),
-            ),
-            (
-                ethosu.RequantizeParams.composite_name,
-                ethosu.requantize_pattern(),
-                lambda pat: ethosu.RequantizeParams(pat).is_valid(),
-            ),
-        ]
-    elif operator_type == "MAX":
-        rewriter = [legalize.MaxRewriter(), legalize.RequantizeRewriter()]
-        pattern_table = [
-            (
-                ethosu.MaxParams.composite_name,
-                ethosu.maximum_clip_requantize_pattern(),
-                lambda pat: ethosu.MaxParams(pat).is_valid(),
-            ),
-            (
-                ethosu.MaxParams.composite_name,
-                ethosu.maximum_pattern(),
-                lambda pat: ethosu.MaxParams(pat).is_valid(),
-            ),
-            (
-                ethosu.RequantizeParams.composite_name,
-                ethosu.requantize_pattern(),
-                lambda pat: ethosu.RequantizeParams(pat).is_valid(),
-            ),
-        ]
-
-    tflite_graph = create_tflite_graph()
-    tflite_model = tflite.Model.Model.GetRootAsModel(tflite_graph, 0)
-
-    mod, _ = relay.frontend.from_tflite(
-        tflite_model,
-        shape_dict={"x": ifm_shape, "y": ifm2_shape},
-        dtype_dict={"x": dtype, "y": dtype},
-    )
-    mod = partition_ethosu_by_table(mod, pattern_table)
-
-    mod["tvmgen_default_ethos_u_main_0"] = dataflow_pattern.rewrite(
-        rewriter, mod["tvmgen_default_ethos_u_main_0"]
-    )
-    verify(mod["tvmgen_default_ethos_u_main_0"])
-
-
-# This test is for checking the case when requantize cannot be fused with MIN/MAX + CLIP due to hardware constraints.
-def test_tflite_max_relu_n1_to_1_legalize():
-    ifm_shape = [1, 4, 8, 16]
-    test_tflite_binary_elemwise_legalize("MAX", ifm_shape, ifm_shape, False, relu_n1_to_1)
-
-
-def test_binary_add_from_constant_scalar():
-    dtype = "uint8"
-    ifm_shape = (1, 4, 4, 8)
-
-    def create_graph():
-        inp = relay.var("input", shape=ifm_shape, dtype=dtype)
-        scalar = relay.const(np.ones((1, 1, 1, 1), dtype=dtype), dtype=dtype)
-        add = relay.qnn.op.add(
-            inp,
-            scalar,
-            relay.const(1.0, dtype="float32"),
-            relay.const(0, dtype="int32"),
-            relay.const(1.0, dtype="float32"),
-            relay.const(0, dtype="int32"),
-            relay.const(1.0, dtype="float32"),
-            relay.const(0, dtype="int32"),
-        )
-        func = relay.Function(relay.analysis.free_vars(add), add)
-        return tvm.IRModule.from_expr(func)
-
-    def verify(ext_func):
-        op = ext_func.body
-        assert list(op.args[0].checked_type.shape) == [1, 4, 4, 8]
-        assert list(op.args[1].checked_type.shape) == [1, 1, 1, 1]
-        assert op.args[0].checked_type.dtype == "uint8"
-        assert list(op.checked_type.shape) == [1, 4, 4, 8]
-        assert op.checked_type.dtype == "uint8"
-        assert op.attrs.operator_type == "ADD"
-
-    rewriter = legalize.AddRewriter()
-    pattern_table = [
-        (
-            ethosu.AddParams.composite_name,
-            ethosu.qnn_add_pattern(),
-            lambda pat: ethosu.AddParams(pat).is_valid(),
-        ),
-    ]
-
-    mod = create_graph()
-    mod = partition_ethosu_by_table(mod, pattern_table)
-
-    mod["tvmgen_default_ethos_u_main_0"] = dataflow_pattern.rewrite(
-        rewriter, mod["tvmgen_default_ethos_u_main_0"]
-    )
-    verify(mod["tvmgen_default_ethos_u_main_0"])
-
-
-@pytest.mark.parametrize(
-    "ifm_shape, ifm2_shape, reversed_operands",
-    [
-        ([1, 2, 3, 4], [1, 2, 3, 4], False),
-        ([1, 2, 3, 4], [1, 1, 3, 1], False),
-        ([1, 1, 3, 1], [1, 2, 3, 4], True),
-    ],
-)
-def test_ethosu_left_shift_binary_elemwise_legalize(ifm_shape, ifm2_shape, reversed_operands):
-    dtype = "int32"
-    operator_type = "SHL"
-
-    def create_graph():
-        input1 = relay.var("x1", shape=ifm_shape, dtype=dtype)
-        input2 = relay.var("x2", shape=ifm2_shape, dtype=dtype)
-        c1 = relay.left_shift(input1, input2)
-        f = relay.Function([input1, input2], c1)
-        mod = tvm.IRModule()
-        mod["main"] = f
-        return mod
-
-    def verify(ext_func):
-        out_shape = ifm2_shape if reversed_operands else ifm_shape
-        shapes = [ifm_shape, ifm2_shape]
-        ifm_index, ifm2_index = (1, 0) if reversed_operands else (0, 1)
-        op = ext_func.body
-        assert list(op.args[0].checked_type.shape) == shapes[ifm_index]
-        assert list(op.args[1].checked_type.shape) == shapes[ifm2_index]
-        assert op.args[0].checked_type.dtype == dtype
-        assert list(op.checked_type.shape) == out_shape
-        assert op.checked_type.dtype == dtype
-        assert op.attrs.operator_type == operator_type
-        assert op.attrs.reversed_operands == reversed_operands
-        assert str(op.attrs.activation) == "NONE"
-
-    rewriter = legalize.ShlRewriter()
-    pattern_table = [
-        (
-            ethosu.ShlParams.composite_name,
-            ethosu.shl_pattern(),
-            lambda pat: ethosu.ShlParams(pat).is_valid(),
-        ),
-    ]
-
-    mod = create_graph()
-    mod = partition_ethosu_by_table(mod, pattern_table)
-
-    mod["tvmgen_default_ethos_u_main_0"] = dataflow_pattern.rewrite(
-        rewriter, mod["tvmgen_default_ethos_u_main_0"]
-    )
-    verify(mod["tvmgen_default_ethos_u_main_0"])
-
-
-@pytest.mark.parametrize(
-    "ifm_shape, new_shape",
-    [
-        ((1, 4, 1, 2), (4, 2)),
-        ((1, 5, 1, 20), (100,)),
-        ((12, 20), (1, 6, 4, 10)),
-        ((30,), (10, 1, 3)),
-    ],
-)
-def test_relay_reshape_legalize(ifm_shape, new_shape):
-
-    ifm = relay.var("ifm", shape=ifm_shape, dtype="int8")
-    reshape = relay.op.reshape(ifm, new_shape)
-    func = relay.Function([ifm], reshape)
-    mod = tvm.IRModule()
-    mod["main"] = func
-    mod = relay.transform.InferType()(mod)
-
-    reshape_pattern_table = [
-        (
-            ethosu.ReshapeParams.composite_name,
-            ethosu.reshape_pattern(),
-            lambda pat: ethosu.ReshapeParams(pat).is_valid(),
-        ),
-    ]
-
-    mod = partition_ethosu_by_table(mod, reshape_pattern_table)
-    mod["tvmgen_default_ethos_u_main_0"] = dataflow_pattern.rewrite(
-        legalize.ReshapeRewriter(), mod["tvmgen_default_ethos_u_main_0"]
-    )
-    mod["tvmgen_default_ethos_u_main_0"] = dataflow_pattern.rewrite(
-        legalize.NoOpRewriter(), mod["tvmgen_default_ethos_u_main_0"]
-    )
-    mod = relay.transform.InferType()(mod)
-
-    ext_func = mod["tvmgen_default_ethos_u_main_0"]
-
-    identity = ext_func.body
-    assert identity.op.name == "contrib.ethosu.identity"
-
-    # check that the reshape is still there
-    reshape = identity.args[0]
-    assert reshape.op.name == "reshape"
-
-    # check that identity's output shape matches reshape's output shape
-    assert tuple(identity.checked_type.shape) == new_shape
-
-
-@pytest.mark.parametrize(
-    "ifm_shape, begin, size",
-    [
-        ([1, 10, 50, 4], [0, 5, 11, 2], [1, 5, 11, 1]),
-        ([15, 17, 3], [3, 0, 1], [8, 17, 2]),
-        ([7, 6043], [0, 704], [1, 2860]),
-        ([5000], [123], [2151]),
-    ],
-)
-def test_tflite_slice(ifm_shape, begin, size):
-    dtype = "int8"
-
-    def create_tflite_graph():
-        class Model(tf.Module):
-            @tf.function
-            def slice_func(self, x):
-                return tf.slice(x, begin, size)
-
-        model = Model()
-
-        # Save the model
-        concrete_func = model.slice_func.get_concrete_function(
-            tf.TensorSpec(ifm_shape, dtype=tf.float32)
-        )
-
-        # Convert the model
-        def representative_dataset():
-            for _ in range(100):
-                data = np.random.rand(*tuple(ifm_shape))
-                yield [data.astype(np.float32)]
-
-        converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func])
-        converter.optimizations = [tf.lite.Optimize.DEFAULT]
-        converter.representative_dataset = representative_dataset
-        converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
-        converter.inference_input_type = tf.int8
-        converter.inference_output_type = tf.int8
-        tflite_model = converter.convert()
-        return tflite_model
-
-    def verify(ext_func):
-        identity = ext_func.body
-        assert identity.op.name == "contrib.ethosu.identity"
-
-        # check that the strided_slice is still there
-        strided_slice = identity.args[0]
-        assert strided_slice.op.name == "strided_slice"
-
-        # check that identity's output shape matches strided slice's output shape
-        assert list(identity.checked_type.shape) == size
-
-    tflite_graph = create_tflite_graph()
-    tflite_model = tflite.Model.Model.GetRootAsModel(tflite_graph, 0)
-    mod, _ = relay.frontend.from_tflite(
-        tflite_model,
-        shape_dict={"input": ifm_shape},
-        dtype_dict={"input": dtype},
-    )
-
-    strided_slice_pattern_table = [
-        (
-            ethosu.StridedSliceParams.composite_name,
-            ethosu.strided_slice_pattern(),
-            lambda pat: ethosu.StridedSliceParams(pat).is_valid(),
-        ),
-    ]
-    mod = partition_ethosu_by_table(mod, strided_slice_pattern_table)
-
-    mod["tvmgen_default_ethos_u_main_0"] = dataflow_pattern.rewrite(
-        legalize.StridedSliceRewriter(), mod["tvmgen_default_ethos_u_main_0"]
-    )
-    mod["tvmgen_default_ethos_u_main_0"] = dataflow_pattern.rewrite(
-        legalize.NoOpRewriter(), mod["tvmgen_default_ethos_u_main_0"]
-    )
-    mod = relay.transform.InferType()(mod)
-
-    verify(mod["tvmgen_default_ethos_u_main_0"])
-
-
-@pytest.mark.parametrize(
-    "ifm_shape, begin, end",
-    [([1, 1, 5, 8], [0, 0, 0, 0], [1, 1, 2, 3]), ([1, 3, 3], [0, 1, 2], [1, 2, 3])],
-)
-def test_tflite_strided_slice(ifm_shape, begin, end):
-    dtype = "int8"
-
-    def create_tflite_graph():
-        class Model(tf.Module):
-            @tf.function
-            def strided_slice_func(self, x):
-                return tf.strided_slice(x, begin, end)
-
-        model = Model()
-
-        # Save the model
-        concrete_func = model.strided_slice_func.get_concrete_function(
-            tf.TensorSpec(ifm_shape, dtype=tf.float32)
-        )
-
-        # Convert the model
-        def representative_dataset():
-            for _ in range(100):
-                data = np.random.rand(*tuple(ifm_shape))
-                yield [data.astype(np.float32)]
-
-        converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func])
-        converter.optimizations = [tf.lite.Optimize.DEFAULT]
-        converter.representative_dataset = representative_dataset
-        converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
-        converter.inference_input_type = tf.int8
-        converter.inference_output_type = tf.int8
-        tflite_model = converter.convert()
-        return tflite_model
-
-    def verify(ext_func):
-        identity = ext_func.body
-        assert identity.op.name == "contrib.ethosu.identity"
-
-        # check that the strided_slice is still there
-        strided_slice = identity.args[0]
-        assert strided_slice.op.name == "strided_slice"
-
-        # check that identity's output shape matches strided slice's output shape
-        size = list(np.array(end) - np.array(begin))
-        assert list(identity.checked_type.shape) == size
-
-    tflite_graph = create_tflite_graph()
-    tflite_model = tflite.Model.Model.GetRootAsModel(tflite_graph, 0)
-    mod, _ = relay.frontend.from_tflite(
-        tflite_model,
-        shape_dict={"input": ifm_shape},
-        dtype_dict={"input": dtype},
-    )
-
-    strided_slice_pattern_table = [
-        (
-            ethosu.StridedSliceParams.composite_name,
-            ethosu.strided_slice_pattern(),
-            lambda pat: ethosu.StridedSliceParams(pat).is_valid(),
-        ),
-    ]
-    mod = partition_ethosu_by_table(mod, strided_slice_pattern_table)
-
-    mod["tvmgen_default_ethos_u_main_0"] = dataflow_pattern.rewrite(
-        legalize.StridedSliceRewriter(), mod["tvmgen_default_ethos_u_main_0"]
-    )
-    mod["tvmgen_default_ethos_u_main_0"] = dataflow_pattern.rewrite(
-        legalize.NoOpRewriter(), mod["tvmgen_default_ethos_u_main_0"]
-    )
-    mod = relay.transform.InferType()(mod)
-
-    verify(mod["tvmgen_default_ethos_u_main_0"])
-
-
-@pytest.mark.parametrize("operator_type", ["ABS"])
-@pytest.mark.parametrize(
-    "ifm_shape",
-    [[1, 2, 3, 4], [1, 7, 3], [8, 3, 1], [11, 22], [300]],
-)
-def test_tflite_unary_elemwise_legalize(
-    operator_type,
-    ifm_shape,
-):
-    dtype = "int8"
-
-    def create_tflite_graph():
-        class Model(tf.Module):
-            @tf.function
-            def abs_func(self, x):
-                if operator_type == "ABS":
-                    op = tf.math.abs(x)
-                return op
-
-        model = Model()
-
-        # Save the model
-        concrete_func = model.abs_func.get_concrete_function(
-            tf.TensorSpec(ifm_shape, dtype=tf.float32)
-        )
-
-        # Convert the model
-        def representative_dataset():
-            for _ in range(100):
-                data = np.random.rand(*tuple(ifm_shape))
-                yield [data.astype(np.float32)]
-
-        converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func])
-        converter.optimizations = [tf.lite.Optimize.DEFAULT]
-        converter.representative_dataset = representative_dataset
-        converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
-        converter.inference_input_type = tf.int8
-        converter.inference_output_type = tf.int8
-        tflite_model = converter.convert()
-        return tflite_model
-
-    def verify(ext_func):
-        out_shape = ifm_shape
-        func_body = ext_func.body
-
-        # If we legalized the unary elementwise op into 4D
-        if func_body.op.name == "reshape":
-            reshape = func_body
-            unary = func_body.args[0]
-            reshape2 = unary.args[0]
-
-            # Check the input to the reshape
-            reshape2_in_shape = [i for i in reshape2.args[0].checked_type.shape]
-            assert reshape2_in_shape == ifm_shape
-
-            # Check that the unary elementwise operator is 4D after reshape
-            assert len(unary.checked_type.shape) == 4
-            assert unary.args[0].checked_type.dtype == dtype
-
-            # Check that the output of the graph has the same shape as input
-            reshape_out_shape = [i for i in reshape.checked_type.shape]
-            assert reshape_out_shape == ifm_shape
-            assert unary.attrs.operator_type == operator_type
-
-        else:
-            unary = func_body
-
-            # Check the IFM
-            assert list(unary.args[0].checked_type.shape) == ifm_shape
-            assert unary.args[0].checked_type.dtype == dtype
-
-            # Check the OFM
-            assert list(unary.checked_type.shape) == out_shape
-            assert unary.checked_type.dtype == dtype
-
-            # operator type check
-            assert unary.attrs.operator_type == operator_type
-
-    if operator_type == "ABS":
-        rewriter = legalize.AbsRewriter()
-        pattern_table = [
-            (
-                ethosu.AbsParams.composite_name,
-                ethosu.abs_pattern(),
-                lambda pat: ethosu.AbsParams(pat).is_valid(),
-            ),
-        ]
-
-    tflite_graph = create_tflite_graph()
-    tflite_model = tflite.Model.Model.GetRootAsModel(tflite_graph, 0)
-    mod, _ = relay.frontend.from_tflite(
-        tflite_model,
-        shape_dict={"input": ifm_shape},
-        dtype_dict={"input": dtype},
-    )
-    mod = partition_ethosu_by_table(mod, pattern_table)
-    mod["tvmgen_default_ethos_u_main_0"] = dataflow_pattern.rewrite(
-        rewriter, mod["tvmgen_default_ethos_u_main_0"]
-    )
-    verify(mod["tvmgen_default_ethos_u_main_0"])
-
-
-def test_tflite_tanh_legalize():
-    dtype = "int8"
-    ifm_shape = (1, 241, 132, 7)
-
-    def create_tflite_graph():
-        class Model(tf.Module):
-            @tf.function
-            def tanh_func(self, x):
-                op = tf.math.tanh(x)
-                return op
-
-        model = Model()
-        concrete_func = model.tanh_func.get_concrete_function(
-            tf.TensorSpec(ifm_shape, dtype=tf.float32)
-        )
-
-        # Convert the model
-        def representative_dataset():
-            for _ in range(100):
-                data = np.random.rand(*tuple(ifm_shape))
-                yield [data.astype(np.float32)]
-
-        converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func])
-        converter.optimizations = [tf.lite.Optimize.DEFAULT]
-        converter.representative_dataset = representative_dataset
-        converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
-        converter.inference_input_type = tf.int8
-        converter.inference_output_type = tf.int8
-        tflite_model = converter.convert()
-        return tflite_model
-
-    tflite_graph = create_tflite_graph()
-    tflite_model = tflite.Model.Model.GetRootAsModel(tflite_graph, 0)
-
-    mod, params = relay.frontend.from_tflite(
-        tflite_model,
-        shape_dict={"input": ifm_shape},
-        dtype_dict={"input": dtype},
-    )
-
-    mod = ethosu.partition_for_ethosu(mod, params)
-    mod["tvmgen_default_ethos_u_main_0"] = dataflow_pattern.rewrite(
-        legalize.TanhRewriter(), mod["tvmgen_default_ethos_u_main_0"]
-    )
-    mod = relay.transform.InferType()(mod)
-
-    func_body = mod["tvmgen_default_ethos_u_main_0"].body
-    assert func_body.op.name == "contrib.ethosu.identity"
-    assert func_body.attrs.activation == "TANH"
-    assert tuple(func_body.args[0].checked_type.shape) == (ifm_shape)
-    assert tuple(func_body.args[1].checked_type.shape) == (256,)
-
-
-@pytest.mark.parametrize("dtype", ["int8", "uint8"])
-@pytest.mark.parametrize(
-    "ifm_shape, axis, keep_dims, use_same_quantization",
-    [
-        # mean to average pool
-        [(1, 8, 16, 16), (1,), True, True],
-        [(1, 8, 16, 16), (2,), False, True],
-        [(1, 8, 16, 16), (1, 2), False, True],
-        [(3, 3, 4), (0,), True, True],
-        [(3, 3, 4), (1,), False, True],
-        [(8, 5), (0,), False, True],
-        [(8, 5), (1,), True, True],
-        # mean to depthwise
-        [(1, 8, 16, 16), (1,), True, False],
-        [(1, 8, 16, 16), (2,), True, False],
-        [(1, 8, 16, 16), (1, 2), False, False],
-        [(8, 4), (0,), False, False],
-        [(1, 65, 2, 1), (1, 2), True, False],  # special case when h > 64
-    ],
-)
-def test_mean(ifm_shape, axis, keep_dims, use_same_quantization, dtype):
-    def create_tflite_graph():
-        class Model(tf.Module):
-            @tf.function
-            def tf_function(self, x):
-                op = tf.math.reduce_mean(x, axis=axis, keepdims=keep_dims)
-                return op
-
-        model = Model()
-        concrete_func = model.tf_function.get_concrete_function(
-            tf.TensorSpec(ifm_shape, dtype=tf.float32)
-        )
-
-        # Convert the model
-        def representative_dataset():
-            for _ in range(100):
-                data = np.random.rand(*tuple(ifm_shape))
-                yield [data.astype(np.float32)]
-
-        converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func])
-        converter.optimizations = [tf.lite.Optimize.DEFAULT]
-        converter.representative_dataset = representative_dataset
-        converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
-        converter.inference_input_type = tf.int8
-        converter.inference_output_type = tf.int8
-        tflite_model = converter.convert()
-        tflite_model = tflite.Model.Model.GetRootAsModel(tflite_model, 0)
-
-        mod, _ = relay.frontend.from_tflite(
-            tflite_model,
-            shape_dict={"input": ifm_shape},
-            dtype_dict={"input": dtype},
-        )
-        return mod
-
-    def create_relay_graph_with_same_quantization():
-        ifm = relay.var("input", shape=ifm_shape, dtype=dtype)
-        cast = relay.cast(ifm, dtype="int32")
-        mean = relay.mean(cast, axis=axis, keepdims=keep_dims)
-        requantize = relay.qnn.op.requantize(
-            mean,
-            input_scale=relay.const(1.0, dtype="float32"),
-            input_zero_point=relay.const(0, dtype="int32"),
-            output_scale=relay.const(1.0, dtype="float32"),
-            output_zero_point=relay.const(0, dtype="int32"),
-            out_dtype=dtype,
-        )
-
-        func = relay.Function(relay.analysis.free_vars(requantize), requantize)
-        mod = tvm.IRModule.from_expr(func)
-        return mod
-
-    def verify(ext_func):
-        out_var = ext_func.body
-
-        next_op = out_var
-        pooling_op = None
-        depthwise_op = None
-        if (
-            isinstance(next_op, relay.expr.Call)
-            and isinstance(next_op.op, tvm.ir.op.Op)
-            and next_op.op.name == "reshape"
-        ):
-            next_op = next_op.args[0]
-        if util.is_named_ethosu_op(next_op, "pooling"):
-            pooling_op = next_op
-            next_op = next_op.args[0]
-        if util.is_named_ethosu_op(next_op, "depthwise_conv2d"):
-            depthwise_op = next_op
-            next_op = next_op.args[0]
-        while (
-            isinstance(next_op, relay.expr.Call)
-            and isinstance(next_op.op, tvm.ir.op.Op)
-            and next_op.op.name == "reshape"
-        ):
-            next_op = next_op.args[0]
-        in_var = next_op
-
-        def calculate_expected_output_shape():
-            for i in range(len(ifm_shape)):
-                if i in axis:
-                    if keep_dims:
-                        yield 1
-                else:
-                    yield ifm_shape[i]
-
-        out_shape = tuple(calculate_expected_output_shape())
-
-        # check IFM
-        assert tuple(in_var.checked_type.shape) == ifm_shape
-
-        if use_same_quantization:
-            assert in_var.checked_type.dtype == dtype
-        else:
-            # in_var's dtype is equal to int8 due to TFLite's requantize
-            assert in_var.checked_type.dtype == "int8"
-
-        # check OFM
-        assert tuple(out_var.checked_type.shape) == out_shape
-        if use_same_quantization:
-            assert out_var.checked_type.dtype == dtype
-        else:
-            # out_var's dtype is equal to int8 due to TFLite's requantize
-            assert out_var.checked_type.dtype == "int8"
-
-        # check expected legalization case
-        if pooling_op:
-            attrs = pooling_op.attrs
-            assert (
-                attrs.ifm_scale == attrs.ofm_scale and attrs.ifm_zero_point == attrs.ofm_zero_point
-            )
-        else:
-            assert depthwise_op
-            attrs = depthwise_op.attrs
-            assert (
-                attrs.ifm_scale != attrs.ofm_scale or attrs.ifm_zero_point != attrs.ofm_zero_point
-            )
-
-    rewriter = legalize.MeanRewriter()
-    pattern_table = [
-        (
-            ethosu.MeanParams.composite_name,
-            ethosu.mean_pattern(),
-            lambda pat: ethosu.MeanParams(pat).is_valid(),
-        ),
-    ]
-
-    mod = (
-        create_relay_graph_with_same_quantization()
-        if use_same_quantization
-        else create_tflite_graph()
-    )
-    mod = partition_ethosu_by_table(mod, pattern_table)
-    mod["tvmgen_default_ethos_u_main_0"] = dataflow_pattern.rewrite(
-        rewriter, mod["tvmgen_default_ethos_u_main_0"]
-    )
-    verify(mod["tvmgen_default_ethos_u_main_0"])
-
-
-@pytest.mark.parametrize(
-    "ifm_shape, axis, keepdims, relu",
-    [
-        [(1, 4, 2, 8), 3, False, False],
-        [(1, 4, 4, 1), 3, False, True],
-        [(3, 5, 7), 2, False, True],
-        [(1, 4, 2, 8), 3, True, False],
-        [(3, 5, 7), 2, True, False],
-    ],
-)
-def test_ethosu_sum(ifm_shape, axis, keepdims, relu):
-    dtype = "int8"
-
-    def create_tflite_graph():
-        class Model(tf.Module):
-            @tf.function
-            def tf_function(self, x):
-                op = tf.math.reduce_sum(x, axis=axis, keepdims=keepdims)
-                return tf.nn.relu(op) if relu else op
-
-        model = Model()
-        concrete_func = model.tf_function.get_concrete_function(
-            tf.TensorSpec(ifm_shape, dtype=tf.float32)
-        )
-
-        # Convert the model
-        def representative_dataset():
-            for _ in range(100):
-                data = np.random.rand(*tuple(ifm_shape))
-                yield [data.astype(np.float32)]
-
-        converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func])
-        converter.optimizations = [tf.lite.Optimize.DEFAULT]
-        converter.representative_dataset = representative_dataset
-        converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
-        converter.inference_input_type = tf.int8
-        converter.inference_output_type = tf.int8
-        tflite_model = converter.convert()
-        tflite_model = tflite.Model.Model.GetRootAsModel(tflite_model, 0)
-
-        mod, _ = relay.frontend.from_tflite(
-            tflite_model,
-            shape_dict={"input": ifm_shape},
-            dtype_dict={"input": dtype},
-        )
-        return mod
-
-    def verify(ext_func):
-        out_var = ext_func.body
-
-        binary_elementwise_op = None
-        pooling_op = None
-        next_op = out_var
-        if (
-            isinstance(next_op, relay.expr.Call)
-            and isinstance(next_op.op, tvm.ir.op.Op)
-            and next_op.op.name == "reshape"
-        ):
-            next_op = next_op.args[0]
-        binary_elementwise_op = next_op
-        pooling_op = binary_elementwise_op.args[0]
-        next_op = pooling_op.args[0]
-        if (
-            isinstance(next_op, relay.expr.Call)
-            and isinstance(next_op.op, tvm.ir.op.Op)
-            and next_op.op.name == "reshape"
-        ):
-            next_op = next_op.args[0]
-        in_var = next_op
-
-        def calculate_expected_output_shape():
-            for i in range(len(ifm_shape)):
-                if i != axis:
-                    yield ifm_shape[i]
-                elif keepdims:
-                    yield 1
-
-        out_shape = tuple(calculate_expected_output_shape())
-
-        # check IFM
-        assert tuple(in_var.checked_type.shape) == ifm_shape
-        assert in_var.checked_type.dtype == dtype
-
-        # check OFM
-        assert tuple(out_var.checked_type.shape) == out_shape
-        assert out_var.checked_type.dtype == dtype
-
-        # check expected legalization case
-        assert pooling_op
-        attrs = pooling_op.attrs
-        assert attrs.pooling_type == "SUM"
-        if relu:
-            assert attrs.activation == "CLIP"
-
-        assert binary_elementwise_op
-        attrs = binary_elementwise_op.attrs
-        assert attrs.operator_type == "MUL"
-        assert attrs.ifm_channels == attrs.ifm2_channels == 1
-        assert attrs.ofm_dtype == "int8"
-
-    rewriter = legalize.SumRewriter()
-    pattern_table = [
-        (
-            ethosu.SumParams.composite_name,
-            ethosu.sum_pattern(),
-            lambda pat: ethosu.SumParams(pat).is_valid(),
-        ),
-    ]
-
-    mod = create_tflite_graph()
-    mod = partition_ethosu_by_table(mod, pattern_table)
-    mod["tvmgen_default_ethos_u_main_0"] = dataflow_pattern.rewrite(
-        rewriter, mod["tvmgen_default_ethos_u_main_0"]
-    )
-    verify(mod["tvmgen_default_ethos_u_main_0"])
-
-
-@pytest.mark.parametrize(
-    "shapes, axis",
-    [
-        ([(2, 3), (4, 3)], 0),
-        ([(10, 2, 1), (10, 14, 1)], 1),
-        ([(10,), (13,), (14,)], 0),
-        ([(1, 5, 2, 1), (1, 5, 7, 1), (1, 5, 3, 1)], 2),
-    ],
-)
-def test_tflite_concat_legalize(shapes, axis):
-    def create_tflite_graph():
-        class Model(tf.Module):
-            @tf.function
-            def tf_function(self, shapes, axis):
-                op = tf.concat(shapes, axis)
-                return op
-
-        model = Model()
-        concrete_func = model.tf_function.get_concrete_function(
-            [tf.TensorSpec(shape, tf.float32) for shape in shapes], axis
-        )
-
-        def representative_dataset():
-            for _ in range(100):
-                datas = [np.random.rand(*shape) for shape in shapes]
-                yield [data.astype(np.float32) for data in datas]
-
-        converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func])
-        converter.optimizations = [tf.lite.Optimize.DEFAULT]
-        converter.representative_dataset = representative_dataset
-        converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
-        converter.inference_input_type = tf.int8
-        converter.inference_output_type = tf.int8
-        tflite_model = converter.convert()
-
-        return tflite_model
-
-    def verify(ext_func):
-        new_concat_axis = np.sum(shape[axis] for shape in shapes)
-        out_shape = list(shapes[0])
-        out_shape[axis] = new_concat_axis
-
-        op = ext_func.body
-        for i, _ in enumerate(shapes):
-            assert list(op.args[0][i].checked_type.shape) == list(shapes[i])
-
-        assert list(op.checked_type.shape) == out_shape
-        assert op.checked_type.dtype == "int8"
-
-    concat_pattern_table = [
-        (
-            ethosu.ConcatParams.composite_name,
-            ethosu.concat_pattern(),
-            lambda pat: ethosu.ConcatParams(pat).is_valid(),
-        )
-    ]
-
-    tflite_graph = create_tflite_graph()
-    tflite_model = tflite.Model.Model.GetRootAsModel(tflite_graph, 0)
-
-    relay_module, _ = relay.frontend.from_tflite(
-        tflite_model,
-        shape_dict={("ifm" + str(i)): shape for i, shape in enumerate(shapes)},
-        dtype_dict={("ifm" + str(i)): "int8" for i, _ in enumerate(shapes)},
-    )
-    mod = partition_ethosu_by_table(relay_module, concat_pattern_table)
-
-    mod["tvmgen_default_ethos_u_main_0"] = dataflow_pattern.rewrite(
-        legalize.ConcatRewriter(), mod["tvmgen_default_ethos_u_main_0"]
-    )
-    mod["tvmgen_default_ethos_u_main_0"] = dataflow_pattern.rewrite(
-        legalize.NoOpRewriter(), mod["tvmgen_default_ethos_u_main_0"]
-    )
-    mod["tvmgen_default_ethos_u_main_0"] = relay.transform.InferType()(mod)[
-        "tvmgen_default_ethos_u_main_0"
-    ]
-    verify(mod["tvmgen_default_ethos_u_main_0"])
-
-
-def test_tflite_sigmoid_legalize():
-    dtype = "int8"
-    ifm_shape = (1, 237, 91, 7)
-
-    def create_tflite_graph():
-        class Model(tf.Module):
-            @tf.function
-            def sigmoid_func(self, x):
-                op = tf.math.sigmoid(x)
-                return op
-
-        model = Model()
-        concrete_func = model.sigmoid_func.get_concrete_function(
-            tf.TensorSpec(ifm_shape, dtype=tf.float32)
-        )
-
-        # Convert the model
-        def representative_dataset():
-            for _ in range(100):
-                data = np.random.rand(*tuple(ifm_shape))
-                yield [data.astype(np.float32)]
-
-        converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func])
-        converter.optimizations = [tf.lite.Optimize.DEFAULT]
-        converter.representative_dataset = representative_dataset
-        converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
-        converter.inference_output_type = tf.int8
-        converter.inference_input_type = tf.int8
-        tflite_model = converter.convert()
-        return tflite_model
-
-    tflite_graph = create_tflite_graph()
-    tflite_model = tflite.Model.Model.GetRootAsModel(tflite_graph, 0)
-
-    mod, params = relay.frontend.from_tflite(
-        tflite_model,
-        shape_dict={"input": ifm_shape},
-        dtype_dict={"input": dtype},
-    )
-
-    mod = ethosu.partition_for_ethosu(mod, params)
-    mod["tvmgen_default_ethos_u_main_0"] = dataflow_pattern.rewrite(
-        legalize.SigmoidRewriter(), mod["tvmgen_default_ethos_u_main_0"]
-    )
-    mod = relay.transform.InferType()(mod)
-
-    func_body = mod["tvmgen_default_ethos_u_main_0"].body
-    assert func_body.op.name == "contrib.ethosu.identity"
-    assert func_body.attrs.activation == "SIGMOID"
-    assert tuple(func_body.args[0].checked_type.shape) == (ifm_shape)
-    assert tuple(func_body.args[1].checked_type.shape) == (256,)
-
-
-@pytest.mark.parametrize(
-    "ifm_shape, num_or_size_splits, axis",
-    [
-        ((1, 4, 6, 8), 3, 2),
-        ((4, 6, 8), 2, 0),
-        ((5, 15), 3, 1),
-        ((3, 7), 1, 1),
-        ((100,), 25, 0),
-    ],
-)
-def test_tflite_split_legalize(ifm_shape, num_or_size_splits, axis):
-    dtype = "int8"
-
-    def create_tflite_graph():
-        class Model(tf.Module):
-            @tf.function
-            def tf_function(self, x, num_or_size_splits, axis):
-                op = tf.split(x, num_or_size_splits, axis=axis)
-                return op
-
-        model = Model()
-        concrete_func = model.tf_function.get_concrete_function(
-            tf.TensorSpec(ifm_shape, tf.float32), num_or_size_splits, axis
-        )
-
-        def representative_dataset():
-            for _ in range(100):
-                data = np.random.rand(*tuple(ifm_shape))
-                yield [data.astype(np.float32)]
-
-        converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func])
-        converter.optimizations = [tf.lite.Optimize.DEFAULT]
-        converter.representative_dataset = representative_dataset
-        converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
-        converter.inference_input_type = tf.int8
-        converter.inference_output_type = tf.int8
-        tflite_model = converter.convert()
-
-        return tflite_model
-
-    def verify(ext_func):
-        # dig out the split
-        single_output_split = num_or_size_splits == 1
-        split = (
-            ext_func.body.tuple_value
-            if single_output_split
-            else ext_func.body.args[0][0].args[0].tuple_value
-        )
-        assert split.op.name == "split"
-
-        # Split is specified by number of equal chunks
-        assert split.attrs.indices_or_sections == num_or_size_splits
-
-        assert split.attrs.axis == axis
-
-    tflite_graph = create_tflite_graph()
-    tflite_model = tflite.Model.Model.GetRootAsModel(tflite_graph, 0)
-
-    mod, _ = relay.frontend.from_tflite(
-        tflite_model,
-        shape_dict={"input": ifm_shape},
-        dtype_dict={"input": dtype},
-    )
-    mod = ethosu.partition_for_ethosu(mod)
-
-    mod["tvmgen_default_ethos_u_main_0"] = dataflow_pattern.rewrite(
-        legalize.PartitionedSplitRewriter(), mod["tvmgen_default_ethos_u_main_0"]
-    )
-
-    mod["tvmgen_default_ethos_u_main_0"] = relay.transform.InferType()(mod)[
-        "tvmgen_default_ethos_u_main_0"
-    ]
-
-    verify(mod["tvmgen_default_ethos_u_main_0"])
-
-
-@pytest.mark.parametrize(
-    "ifm_shape, num_or_size_splits, axis",
-    [
-        ((1, 4, 6, 8), (1, 3, 4), 3),
-        ((10, 18, 4), (1, 4, 3, 2), 0),
-        ((22, 7), (4, -1), 1),
-        ((25,), (25,), 0),
-    ],
-)
-def test_tflite_split_v_legalize(ifm_shape, num_or_size_splits, axis):
-    dtype = "int8"
-
-    def create_tflite_graph():
-        class Model(tf.Module):
-            @tf.function
-            def tf_function(self, x, num_or_size_splits, axis):
-                # TF split gets converted into TFLite's split_v
-                op = tf.split(x, num_or_size_splits, axis=axis)
-                return op
-
-        model = Model()
-        concrete_func = model.tf_function.get_concrete_function(
-            tf.TensorSpec(ifm_shape, tf.float32), num_or_size_splits, axis
-        )
-
-        def representative_dataset():
-            for _ in range(100):
-                data = np.random.rand(*tuple(ifm_shape))
-                yield [data.astype(np.float32)]
-
-        converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func])
-        converter.optimizations = [tf.lite.Optimize.DEFAULT]
-        converter.representative_dataset = representative_dataset
-        converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
-        converter.inference_input_type = tf.int8
-        converter.inference_output_type = tf.int8
-        tflite_model = converter.convert()
-
-        return tflite_model
-
-    def verify(ext_func):
-        # dig out the split
-        single_output_split = len(num_or_size_splits) == 1
-        split = (
-            ext_func.body.tuple_value
-            if single_output_split
-            else ext_func.body.args[0][0].args[0].tuple_value
-        )
-        assert split.op.name == "split"
-
-        # Split is specified by the size of sections, so converting num_or_size_splits
-        # into the indices where the tensor is split at since this is how split is represented
-        # in Relay
-        split_sections = [] if single_output_split else [num_or_size_splits[0]]
-        for split_size in num_or_size_splits[1:-1]:
-            sec = split_sections[-1] + split_size
-            split_sections.append(sec)
-        assert list(split.attrs.indices_or_sections) == split_sections
-
-        assert split.attrs.axis == axis
-
-    tflite_graph = create_tflite_graph()
-    tflite_model = tflite.Model.Model.GetRootAsModel(tflite_graph, 0)
-
-    mod, _ = relay.frontend.from_tflite(
-        tflite_model,
-        shape_dict={"input": ifm_shape},
-        dtype_dict={"input": dtype},
-    )
-    mod = ethosu.partition_for_ethosu(mod)
-
-    mod["tvmgen_default_ethos_u_main_0"] = dataflow_pattern.rewrite(
-        legalize.PartitionedSplitRewriter(), mod["tvmgen_default_ethos_u_main_0"]
-    )
-
-    mod["tvmgen_default_ethos_u_main_0"] = relay.transform.InferType()(mod)[
-        "tvmgen_default_ethos_u_main_0"
-    ]
-
-    verify(mod["tvmgen_default_ethos_u_main_0"])
-
-
-@pytest.mark.parametrize(
-    "ifm_shape,ifm_scale,ifm_zp,ofm_scale,ofm_zp",
-    [[(1, 8, 8, 3), 1.0, 0, 1.0, 0], [(1, 20, 30, 3), 1.345, 34, 0.32, -23]],
-)
-def test_ethosu_requantize(ifm_shape, ifm_scale, ifm_zp, ofm_scale, ofm_zp):
-    dtype = "int8"
-
-    def create_model():
-        ifm = relay.var("ifm", shape=ifm_shape, dtype="int8")
-        requantize = relay.qnn.op.requantize(
-            ifm,
-            relay.const(ifm_scale, dtype="float32"),
-            relay.const(ifm_zp, dtype="int32"),
-            relay.const(ofm_scale, dtype="float32"),
-            relay.const(ofm_zp, dtype="int32"),
-        )
-        return tvm.IRModule.from_expr(relay.Function([ifm], requantize))
-
-    def verify(ext_func):
-        op = ext_func.body
-
-        # Check IFM
-        ifm = op.args[0].checked_type
-        assert list(ifm.shape) == list(ifm_shape)
-        assert str(ifm.dtype) == dtype
-
-        # Check OFM
-        ofm = op.checked_type
-        assert list(ofm.shape) == list(ifm_shape)
-        assert str(ofm.dtype) == dtype
-
-        # Check quantization params
-        assert math.isclose(op.attrs.ifm_scale, ifm_scale, abs_tol=1e-7)
-        assert op.attrs.ifm_zero_point == ifm_zp
-        assert math.isclose(op.attrs.ofm_scale, ofm_scale, abs_tol=1e-7)
-        assert op.attrs.ofm_zero_point == ofm_zp
-
-    rewriter = legalize.RequantizeRewriter()
-    pattern_table = [
-        (
-            ethosu.RequantizeParams.composite_name,
-            ethosu.requantize_pattern(),
-            lambda pat: ethosu.RequantizeParams(pat).is_valid(),
-        ),
-    ]
-
-    mod = create_model()
-    mod = partition_ethosu_by_table(mod, pattern_table)
-
-    mod["tvmgen_default_ethos_u_main_0"] = dataflow_pattern.rewrite(
-        rewriter, mod["tvmgen_default_ethos_u_main_0"]
-    )
-    verify(mod["tvmgen_default_ethos_u_main_0"])
-
-
-def test_multiple_requantize_offload():
-    """
-    Testing requantize offload in the case one requantize operation is part of
-    an existing pattern (in this case Mean: cast->mean->requantize) and the
-    other is a stand-alone requantize.
-    """
-
-    def create_model():
-        ifm = relay.var("input", shape=(1, 3, 3, 4), dtype="int8")
-        cast = relay.cast(ifm, dtype="int32")
-        mean = relay.mean(cast, axis=1, keepdims=True)
-        requantize = relay.qnn.op.requantize(
-            mean,
-            input_scale=relay.const(1.0, dtype="float32"),
-            input_zero_point=relay.const(0, dtype="int32"),
-            output_scale=relay.const(1.0, dtype="float32"),
-            output_zero_point=relay.const(0, dtype="int32"),
-        )
-        requantize = relay.qnn.op.requantize(
-            requantize,
-            input_scale=relay.const(1.0, dtype="float32"),
-            input_zero_point=relay.const(0, dtype="int32"),
-            output_scale=relay.const(1.0, dtype="float32"),
-            output_zero_point=relay.const(0, dtype="int32"),
-        )
-        return tvm.IRModule.from_expr(relay.Function([ifm], requantize))
-
-    def verify(ext_func):
-        # If mean operation and separate requantize were offloaded correctly,
-        # there should only be a pooling operation followed by an identity
-        # operation leagalized.
-        op = ext_func.body
-        assert op.op.name == "contrib.ethosu.identity"
-        op = op.args[0]
-        assert ext_func.body.args[0].op.name == "contrib.ethosu.pooling"
-        op = op.args[0]
-        assert isinstance(op, relay.Var)
-
-    mod = create_model()
-    mod = ethosu.partition_for_ethosu(mod)
-    mod = legalize.LegalizeEthosU()(mod)
-    verify(mod["tvmgen_default_ethos_u_main_0"])
-
-
-@pytest.mark.parametrize("ifm_shape,axis", [((2,), 0), ((1, 3, 3), 2)])
-def test_tflite_expand_dims(ifm_shape, axis):
-    dtype = "int8"
-
-    def create_tflite_graph():
-        class Model(tf.Module):
-            @tf.function
-            def tf_function(self, x):
-                return tf.expand_dims(x, axis=axis)
-
-        model = Model()
-        concrete_func = model.tf_function.get_concrete_function(
-            tf.TensorSpec(ifm_shape, tf.float32)
-        )
-
-        def representative_dataset():
-            for _ in range(100):
-                data = np.random.rand(*tuple(ifm_shape))
-                yield [data.astype(np.float32)]
-
-        converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func])
-        converter.optimizations = [tf.lite.Optimize.DEFAULT]
-        converter.representative_dataset = representative_dataset
-        converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
-        converter.inference_input_type = tf.int8
-        converter.inference_output_type = tf.int8
-        tflite_model = converter.convert()
-
-        return tflite_model
-
-    def verify(ext_func):
-        op = ext_func.body
-        expected_shape = list(ifm_shape)
-        expected_shape.insert(axis, 1)
-
-        # Check IFM
-        assert list(op.args[0].checked_type.shape) == list(ifm_shape)
-        assert op.args[0].checked_type.dtype == dtype
-
-        # Check OFM
-        assert list(op.checked_type.shape) == expected_shape
-        assert op.checked_type.dtype == dtype
-
-        # Check op
-        assert op.op.name == "reshape"
-
-    tflite_graph = create_tflite_graph()
-    tflite_model = tflite.Model.Model.GetRootAsModel(tflite_graph, 0)
-
-    mod, _ = relay.frontend.from_tflite(
-        tflite_model,
-        shape_dict={"input": ifm_shape},
-        dtype_dict={"input": dtype},
-    )
-    mod = ethosu.partition_for_ethosu(mod)
-
-    mod["tvmgen_default_ethos_u_main_0"] = dataflow_pattern.rewrite(
-        legalize.ExpandDimsRewriter(), mod["tvmgen_default_ethos_u_main_0"]
-    )
-    mod["tvmgen_default_ethos_u_main_0"] = dataflow_pattern.rewrite(
-        legalize.ReshapeRewriter(), mod["tvmgen_default_ethos_u_main_0"]
-    )
-    mod["tvmgen_default_ethos_u_main_0"] = relay.transform.InferType()(mod)[
-        "tvmgen_default_ethos_u_main_0"
-    ]
-    verify(mod["tvmgen_default_ethos_u_main_0"])
-
-
-@pytest.mark.parametrize(
-    "ifm_shape,axis", [((1, 1, 2, 1), 0), ((1, 3, 3, 1), 3), ((1, 1, 2, 1), None)]
-)
-def test_tflite_squeeze(ifm_shape, axis):
-    dtype = "int8"
-
-    def create_tflite_graph():
-        class Model(tf.Module):
-            @tf.function
-            def tf_function(self, x):
-                return tf.squeeze(x, axis=axis)
-
-        model = Model()
-        concrete_func = model.tf_function.get_concrete_function(
-            tf.TensorSpec(ifm_shape, tf.float32)
-        )
-
-        def representative_dataset():
-            for _ in range(100):
-                data = np.random.rand(*tuple(ifm_shape))
-                yield [data.astype(np.float32)]
-
-        converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func])
-        converter.optimizations = [tf.lite.Optimize.DEFAULT]
-        converter.representative_dataset = representative_dataset
-        converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
-        converter.inference_input_type = tf.int8
-        converter.inference_output_type = tf.int8
-        tflite_model = converter.convert()
-
-        return tflite_model
-
-    def verify(ext_func):
-        op = ext_func.body
-        expected_shape = list(ifm_shape)
-        if isinstance(axis, int):
-            expected_shape = ifm_shape[:axis] + ifm_shape[axis + 1 :]
-        else:
-            expected_shape = list(filter(lambda a: a != 1, expected_shape))
-
-        # Check IFM
-        assert list(op.args[0].checked_type.shape) == list(ifm_shape)
-        assert op.args[0].checked_type.dtype == dtype
-
-        # Check OFM
-        assert list(op.checked_type.shape) == list(expected_shape)
-        assert op.checked_type.dtype == dtype
-
-        # Check op
-        assert op.op.name == "reshape"
-
-    tflite_graph = create_tflite_graph()
-    tflite_model = tflite.Model.Model.GetRootAsModel(tflite_graph, 0)
-
-    mod, _ = relay.frontend.from_tflite(
-        tflite_model,
-        shape_dict={"input": ifm_shape},
-        dtype_dict={"input": dtype},
-    )
-    mod = ethosu.partition_for_ethosu(mod)
-
-    mod["tvmgen_default_ethos_u_main_0"] = dataflow_pattern.rewrite(
-        legalize.SqueezeRewriter(), mod["tvmgen_default_ethos_u_main_0"]
-    )
-    mod["tvmgen_default_ethos_u_main_0"] = dataflow_pattern.rewrite(
-        legalize.ReshapeRewriter(), mod["tvmgen_default_ethos_u_main_0"]
-    )
-    mod["tvmgen_default_ethos_u_main_0"] = relay.transform.InferType()(mod)[
-        "tvmgen_default_ethos_u_main_0"
-    ]
-    verify(mod["tvmgen_default_ethos_u_main_0"])
-
-
-@pytest.mark.parametrize(
-    "ifm_shape,size,half_pixel",
-    [
-        [(1, 2, 2, 1), (4, 4), False],
-        [(1, 2, 2, 1), (4, 4), True],
-        [(1, 4, 7, 3), (8, 14), False],
-        [(1, 3, 5, 3), (3, 5), False],
-        [(1, 6, 6, 96), (12, 12), False],
-        [(1, 6, 6, 96), (12, 12), True],
-    ],
-)
-def test_tflite_resize2d_nearest_neighbor(ifm_shape, size, half_pixel):
-    align_corners = False
-    dtype = "int8"
-
-    def create_tflite_graph():
-        @tf.function
-        def resize_model(x):
-            return tf.compat.v1.image.resize_nearest_neighbor(
-                x,
-                size,
-                align_corners=align_corners,
-                half_pixel_centers=half_pixel,
-            )
-
-        concrete_func = resize_model.get_concrete_function(
-            tf.TensorSpec(ifm_shape, dtype=tf.float32)
-        )
-
-        def representative_dataset():
-            for _ in range(100):
-                data = np.random.rand(*tuple(ifm_shape))
-                yield [data.astype(np.float32)]
-
-        converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func])
-        converter.optimizations = [tf.lite.Optimize.DEFAULT]
-        converter.representative_dataset = representative_dataset
-        converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
-        converter.inference_input_type = tf.int8
-        converter.inference_output_type = tf.int8
-        tflite_model = converter.convert()
-        tflite_model = tflite.Model.Model.GetRootAsModel(tflite_model, 0)
-
-        mod, _ = relay.frontend.from_tflite(
-            tflite_model,
-            shape_dict={"input": ifm_shape},
-            dtype_dict={"input": dtype},
-        )
-        return mod
-
-    def verify(ext_func):
-        op = ext_func.body
-        in_var = op.args[0]
-
-        # check IFM
-        assert tuple(in_var.checked_type.shape) == ifm_shape
-        assert in_var.checked_type.dtype == dtype
-
-        # check OFM
-        attrs = dict(op.attrs)
-        out_shape = (ifm_shape[0], size[0], size[1], ifm_shape[3])
-        assert tuple(op.checked_type.shape) == out_shape
-        assert op.checked_type.dtype == dtype
-
-        # Check Op attributes
-        if size[0] == ifm_shape[1] and size[1] == ifm_shape[2]:
-            assert op.op.name == "contrib.ethosu.identity"
-        else:
-            assert attrs["pooling_type"] == "AVG"
-            assert attrs["upscale"] == "NEAREST"
-
-    rewriter = legalize.Resize2dRewriter()
-    pattern_table = [
-        (
-            ethosu.Resize2dParams.composite_name,
-            ethosu.resize2d_pattern(),
-            lambda pat: ethosu.Resize2dParams(pat).is_valid(),
-        ),
-    ]
-
-    mod = create_tflite_graph()
-    mod = partition_ethosu_by_table(mod, pattern_table)
-    mod["tvmgen_default_ethos_u_main_0"] = dataflow_pattern.rewrite(
-        rewriter, mod["tvmgen_default_ethos_u_main_0"]
-    )
-    verify(mod["tvmgen_default_ethos_u_main_0"])
-
-
-@pytest.mark.parametrize(
-    "ifm_shape,size,align_corners",
-    [
-        [(1, 2, 2, 1), (4, 4), False],
-        [(1, 4, 7, 3), (8, 14), False],
-        [(1, 2, 2, 1), (3, 3), True],
-        [(1, 4, 7, 3), (7, 13), True],
-        [(1, 3, 5, 3), (3, 5), False],
-    ],
-)
-def test_tflite_resize2d_bilinear(ifm_shape, size, align_corners):
-    dtype = "int8"
-
-    def create_tflite_graph():
-        @tf.function
-        def resize_model(x):
-            return tf.compat.v1.image.resize_bilinear(
-                x, size, align_corners=align_corners, half_pixel_centers=False
-            )
-
-        concrete_func = resize_model.get_concrete_function(
-            tf.TensorSpec(ifm_shape, dtype=tf.float32)
-        )
-
-        def representative_dataset():
-            for _ in range(100):
-                data = np.random.rand(*tuple(ifm_shape))
-                yield [data.astype(np.float32)]
-
-        converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func])
-        converter.optimizations = [tf.lite.Optimize.DEFAULT]
-        converter.representative_dataset = representative_dataset
-        converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
-        converter.inference_input_type = tf.int8
-        converter.inference_output_type = tf.int8
-        tflite_model = converter.convert()
-        tflite_model = tflite.Model.Model.GetRootAsModel(tflite_model, 0)
-
-        mod, _ = relay.frontend.from_tflite(
-            tflite_model,
-            shape_dict={"input": ifm_shape},
-            dtype_dict={"input": dtype},
-        )
-        return mod
-
-    def verify(ext_func):
-        op = ext_func.body
-        in_var = op.args[0]
-
-        # check IFM
-        assert tuple(in_var.checked_type.shape) == ifm_shape
-        assert in_var.checked_type.dtype == dtype
-
-        # check OFM
-        attrs = dict(op.attrs)
-        out_shape = (ifm_shape[0], size[0], size[1], ifm_shape[3])
-        assert tuple(op.checked_type.shape) == out_shape
-        assert op.checked_type.dtype == dtype
-
-        # Check Op attributes
-        if size[0] == ifm_shape[1] and size[1] == ifm_shape[2]:
-            assert op.op.name == "contrib.ethosu.identity"
-        else:
-            assert attrs["pooling_type"] == "AVG"
-            assert attrs["upscale"] == "NEAREST"
-
-            # Check padding
-            if align_corners:
-                assert list(attrs["padding"]) == [0, 0, 0, 0]
-            else:
-                assert list(attrs["padding"]) == [0, 0, 1, 1]
-
-    rewriter = legalize.Resize2dRewriter()
-    pattern_table = [
-        (
-            ethosu.Resize2dParams.composite_name,
-            ethosu.resize2d_pattern(),
-            lambda pat: ethosu.Resize2dParams(pat).is_valid(),
-        ),
-    ]
-
-    mod = create_tflite_graph()
-    mod = partition_ethosu_by_table(mod, pattern_table)
-    mod["tvmgen_default_ethos_u_main_0"] = dataflow_pattern.rewrite(
-        rewriter, mod["tvmgen_default_ethos_u_main_0"]
-    )
-    verify(mod["tvmgen_default_ethos_u_main_0"])
-
-
-@pytest.mark.parametrize(
-    "ifm_shape,ofm_shape,kernel_shape,padding",
-    [
-        [(1, 2, 2, 1), (1, 4, 4, 1), (3, 3), "SAME"],
-        [(1, 2, 2, 1), (1, 9, 9, 1), (7, 7), "VALID"],
-        [(1, 2, 4, 3), (1, 4, 8, 3), (3, 3), "SAME"],
-        [(1, 10, 5, 3), (1, 21, 13, 3), (3, 5), "VALID"],
-    ],
-)
-@pytest.mark.parametrize("has_bias", [False, True])
-def test_tflite_transpose_convolution(ifm_shape, ofm_shape, kernel_shape, padding, has_bias):
-    dtype = "int8"
-    dilations = (1, 1)
-    strides = (2, 2)
-
-    def create_tflite_graph():
-        @tf.function
-        def conv2d_transpose(x):
-            bias_shape = ofm_shape[3]
-            bias = tf.constant(np.random.uniform(size=bias_shape), dtype=tf.float32)
-            weight_shape = [kernel_shape[0], kernel_shape[1], ifm_shape[3], ofm_shape[3]]
-            weight = tf.constant(np.random.uniform(size=weight_shape), dtype=tf.float32)
-            tf_strides = [1, strides[0], strides[1], 1]
-            op = tf.nn.conv2d_transpose(
-                x,
-                weight,
-                output_shape=ofm_shape,
-                strides=tf_strides,
-                padding=padding,
-                dilations=dilations,
-            )
-            if has_bias:
-                op = tf.nn.bias_add(op, bias)
-            return op
-
-        concrete_func = conv2d_transpose.get_concrete_function(
-            tf.TensorSpec(ifm_shape, dtype=tf.float32)
-        )
-
-        def representative_dataset():
-            for _ in range(100):
-                data = np.random.rand(*tuple(ifm_shape))
-                yield [data.astype(np.float32)]
-
-        converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func])
-        converter.optimizations = [tf.lite.Optimize.DEFAULT]
-        converter.representative_dataset = representative_dataset
-        converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
-        converter.inference_input_type = tf.int8
-        converter.inference_output_type = tf.int8
-        tflite_model = converter.convert()
-        tflite_model = tflite.Model.Model.GetRootAsModel(tflite_model, 0)
-
-        mod, params = relay.frontend.from_tflite(
-            tflite_model,
-            shape_dict={"input": ifm_shape},
-            dtype_dict={"input": dtype},
-        )
-        return mod, params
-
-    def verify(ext_func):
-        strided_slice = ext_func.body
-        conv = strided_slice.args[0]
-        ofm_channels = conv.attrs.ofm_channels
-
-        # Check IFM
-        ifm = conv.args[0].checked_type
-        assert list(ifm.shape) == list(ifm_shape)
-        assert str(ifm.dtype) == dtype
-        assert ifm.shape[3] == ofm_channels
-
-        # Check OFM
-        ofm = strided_slice.checked_type
-        assert list(ofm.shape) == list(ofm_shape)
-        assert str(ofm.dtype) == dtype
-        assert ofm.shape[3] == ofm_channels
-
-        # Check weights
-        weights_ohwi = conv.args[1].data.asnumpy()
-        assert str(weights_ohwi.dtype) == dtype
-        assert list(weights_ohwi.shape) == [
-            ofm_channels,
-            kernel_shape[0],
-            kernel_shape[1],
-            ifm_shape[3],
-        ]
-
-        # Check that scale_bias matches weight tensor
-        assert list(conv.args[2].checked_type.shape)[0] == ofm_channels
-
-        # Calculate expected padding for conv2d op
-        if padding == "VALID":
-            expected_padding = [0, 0, 0, 0]
-        elif padding == "SAME":
-            pad_top, pad_bottom = get_pad_value(ofm_shape[1], kernel_shape[0], strides[0])
-            pad_left, pad_right = get_pad_value(ofm_shape[2], kernel_shape[1], strides[1])
-            expected_padding = [pad_top, pad_left, pad_bottom, pad_right]
-        pad_top = kernel_shape[0] - 1 - expected_padding[0]
-        pad_left = kernel_shape[1] - 1 - expected_padding[1]
-        pad_bottom = kernel_shape[0] - 1 - expected_padding[2]
-        pad_right = kernel_shape[1] - 1 - expected_padding[3]
-        if strides == [2, 2]:
-            pad_bottom -= 1
-            pad_right -= 1
-        expected_padding = [pad_top, pad_left, pad_bottom, pad_right]
-        assert list(conv.attrs.padding) == list(expected_padding)
-
-        assert list(conv.attrs.strides) == [1, 1]
-
-    rewriter = legalize.Conv2DTransposeRewriter()
-    pattern_table = [
-        (
-            ethosu.QnnConv2DTransposeParams.composite_name,
-            ethosu.qnn_conv2d_transpose_pattern(),
-            lambda pat: ethosu.QnnConv2DTransposeParams(pat).is_valid(),
-        ),
-    ]
-
-    mod, params = create_tflite_graph()
-    mod["main"] = bind_params_by_name(mod["main"], params)
-    mod = partition_ethosu_by_table(mod, pattern_table)
-
-    mod["tvmgen_default_ethos_u_main_0"] = dataflow_pattern.rewrite(
-        rewriter, mod["tvmgen_default_ethos_u_main_0"]
-    )
-    verify(mod["tvmgen_default_ethos_u_main_0"])
-
-
-@pytest.mark.parametrize(
-    "ifm_shapes,axis",
-    [
-        ([(1, 2, 2), (1, 2, 2), (1, 2, 2)], 2),
-        ([(5, 4), (5, 4)], 1),
-        ([(1,), (1,)], 0),
-        ([(3, 1), (3, 1), (3, 1), (3, 1)], 0),
-    ],
-)
-def test_tflite_pack(ifm_shapes, axis):
-    dtype = "int8"
-
-    def create_tflite_graph():
-        class Model(tf.Module):
-            @tf.function
-            def tf_function(self, inputs, axis):
-                return tf.stack(inputs, axis=axis)
-
-        model = Model()
-        concrete_func = model.tf_function.get_concrete_function(
-            [tf.TensorSpec(shape, tf.float32) for shape in ifm_shapes], axis
-        )
-
-        def representative_dataset():
-            for _ in range(100):
-                datas = [np.random.rand(*shape) for shape in ifm_shapes]
-                yield [data.astype(np.float32) for data in datas]
-
-        converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func])
-        converter.optimizations = [tf.lite.Optimize.DEFAULT]
-        converter.representative_dataset = representative_dataset
-        converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
-        converter.inference_input_type = tf.int8
-        converter.inference_output_type = tf.int8
-        tflite_model = converter.convert()
-
-        return tflite_model
-
-    def verify(ext_func):
-        new_pack_axis = len(ifm_shapes)
-        ifm_shape = list(ifm_shapes[0])
-        op = ext_func.body
-
-        after_reshape = ifm_shape[:axis] + [1] + ifm_shape[axis:]
-        out_shape = ifm_shape[:axis] + [new_pack_axis] + ifm_shape[axis:]
-
-        assert op.op.name == "concatenate"
-
-        # Check shapes after expand_dims (legalized as reshape)
-        for i in range(len(ifm_shapes)):
-            assert list(op.args[0][i].checked_type.shape) == after_reshape
-            assert op.args[0][i].checked_type.dtype == dtype
-
-        # Check output
-        assert list(op.checked_type.shape) == out_shape
-        assert op.checked_type.dtype == dtype
-
-    pack_pattern_table = [
-        (
-            ethosu.ConcatParams.composite_name,
-            ethosu.concat_pattern(),
-            lambda pat: ethosu.ConcatParams(pat).is_valid(),
-        ),
-        (
-            ethosu.ExpandDimsParams.composite_name,
-            ethosu.expand_dims_pattern(),
-            lambda pat: ethosu.ExpandDimsParams(pat).is_valid(),
-        ),
-        (
-            ethosu.ReshapeParams.composite_name,
-            ethosu.reshape_pattern(),
-            lambda pat: ethosu.ReshapeParams(pat).is_valid(),
-        ),
-    ]
-
-    tflite_graph = create_tflite_graph()
-    tflite_model = tflite.Model.Model.GetRootAsModel(tflite_graph, 0)
-
-    relay_module, _ = relay.frontend.from_tflite(
-        tflite_model,
-        shape_dict={("ifm" + str(i)): shape for i, shape in enumerate(ifm_shapes)},
-        dtype_dict={("ifm" + str(i)): dtype for i, _ in enumerate(ifm_shapes)},
-    )
-    mod = partition_ethosu_by_table(relay_module, pack_pattern_table)
-
-    seq = [
-        legalize.ConcatRewriter(),
-        legalize.ExpandDimsRewriter(),
-        legalize.ReshapeRewriter(),
-        legalize.NoOpRewriter(),
-    ]
-    for legalizer in seq:
-        mod["tvmgen_default_ethos_u_main_0"] = dataflow_pattern.rewrite(
-            legalizer, mod["tvmgen_default_ethos_u_main_0"]
-        )
-    mod["tvmgen_default_ethos_u_main_0"] = relay.transform.InferType()(mod)[
-        "tvmgen_default_ethos_u_main_0"
-    ]
-    verify(mod["tvmgen_default_ethos_u_main_0"])
-
-
-@pytest.mark.parametrize(
-    "ifm_shape,axis",
-    [[(1, 2, 3, 4), 1], [(2, 3), 1], [(5, 6, 7), 2]],
-)
-def test_tflite_unpack(ifm_shape, axis):
-    dtype = "int8"
-
-    def create_tflite_graph():
-        class Model(tf.Module):
-            @tf.function
-            def tf_function(self, x, axis):
-                return tf.unstack(x, axis=axis)
-
-        model = Model()
-        concrete_func = model.tf_function.get_concrete_function(
-            tf.TensorSpec(ifm_shape, tf.float32), axis
-        )
-
-        def representative_dataset():
-            for _ in range(100):
-                data = np.random.rand(*tuple(ifm_shape))
-                yield [data.astype(np.float32)]
-
-        converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func])
-        converter.optimizations = [tf.lite.Optimize.DEFAULT]
-        converter.representative_dataset = representative_dataset
-        converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
-        converter.inference_input_type = tf.int8
-        converter.inference_output_type = tf.int8
-        tflite_model = converter.convert()
-
-        return tflite_model
-
-    def verify(ext_func):
-        outputs = ext_func.body.args[0].fields
-        shape = list(ifm_shape)
-        unpacked_shape = shape[:axis] + shape[axis + 1 :]
-        split_shape = shape[:axis] + [1] + shape[axis + 1 :]
-
-        assert len(outputs) == shape[axis]
-
-        for i, output in enumerate(outputs):
-            expr = output.args[0].args[0]
-            expr = expr.tuple_value[expr.index]
-            expr = expr.args[0]
-
-            # Checking expected unpacked output shape.
-            # Squeeze is legalized to a reshape.
-            assert expr.op.name == "reshape"
-            assert list(expr.checked_type.shape) == unpacked_shape
-            assert output.checked_type.dtype == dtype
-
-            expr = expr.args[0]
-            expr = expr.tuple_value[expr.index]
-            expr = expr.args[0]
-
-            # Check input is split correctly
-            assert list(expr.args[0].checked_type.shape) == shape
-            assert list(expr.checked_type.shape) == split_shape
-            assert expr.checked_type.dtype == dtype
-
-            # Check split attrs
-            begin_shape = [0] * len(ifm_shape)
-            begin_shape[axis] = i
-            assert list(expr.attrs.begin) == begin_shape
-            end_shape = shape[:axis] + [i + 1] + shape[axis + 1 :]
-            assert list(expr.attrs.end) == end_shape
-            assert list(expr.attrs.strides) == [1]
-
-    pack_pattern_table = [
-        (
-            ethosu.SplitParams.composite_name,
-            ethosu.split_pattern(),
-            lambda pat: ethosu.SplitParams(pat).is_valid(),
-        ),
-        (
-            ethosu.SqueezeParams.composite_name,
-            ethosu.squeeze_pattern(),
-            lambda pat: ethosu.SqueezeParams(pat).is_valid(),
-        ),
-        (
-            ethosu.ReshapeParams.composite_name,
-            ethosu.reshape_pattern(),
-            lambda pat: ethosu.ReshapeParams(pat).is_valid(),
-        ),
-    ]
-
-    tflite_graph = create_tflite_graph()
-    tflite_model = tflite.Model.Model.GetRootAsModel(tflite_graph, 0)
-
-    mod, _ = relay.frontend.from_tflite(
-        tflite_model,
-        shape_dict={"input": ifm_shape},
-        dtype_dict={"input": dtype},
-    )
-    mod = partition_ethosu_by_table(mod, pack_pattern_table)
-
-    seq = [
-        legalize.PartitionedSplitRewriter(),
-        legalize.SplitRewriter(),
-        legalize.SqueezeRewriter(),
-        legalize.ReshapeRewriter(),
-        legalize.NoOpRewriter(),
-    ]
-    for legalizer in seq:
-
-        mod["tvmgen_default_ethos_u_main_0"] = dataflow_pattern.rewrite(
-            legalizer, mod["tvmgen_default_ethos_u_main_0"]
-        )
-    mod["tvmgen_default_ethos_u_main_0"] = relay.transform.InferType()(mod)[
-        "tvmgen_default_ethos_u_main_0"
-    ]
-    verify(mod["tvmgen_default_ethos_u_main_0"])
-
-
-@pytest.mark.parametrize("ifm_shape", [(1, 15, 15, 3), (1, 8, 9, 1)])
-@pytest.mark.parametrize("alpha", [0.2, 0.634])
-def test_tflite_leaky_relu(ifm_shape, alpha):
-    dtype = "int8"
-
-    def create_tflite_graph():
-        class Model(tf.Module):
-            @tf.function
-            def leaky_relu_func(self, x):
-                return tf.nn.leaky_relu(x, alpha=alpha)
-
-        model = Model()
-        concrete_func = model.leaky_relu_func.get_concrete_function(
-            tf.TensorSpec(ifm_shape, tf.float32),
-        )
-
-        def representative_dataset():
-            for _ in range(100):
-                data = np.random.rand(*tuple(ifm_shape))
-                yield [data.astype(np.float32)]
-
-        converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func])
-        converter.optimizations = [tf.lite.Optimize.DEFAULT]
-        converter.representative_dataset = representative_dataset
-        converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
-        converter.inference_input_type = tf.int8
-        converter.inference_output_type = tf.int8
-        tflite_model = converter.convert()
-
-        return tflite_model
-
-    def verify(ext_func):
-        func_body = ext_func.body
-        assert func_body.op.name == "contrib.ethosu.identity"
-        assert func_body.attrs.activation == "LUT"
-        assert tuple(func_body.args[0].checked_type.shape) == (ifm_shape)
-        assert tuple(func_body.args[1].checked_type.shape) == (256,)
-
-    tflite_graph = create_tflite_graph()
-    tflite_model = tflite.Model.Model.GetRootAsModel(tflite_graph, 0)
-
-    mod, _ = relay.frontend.from_tflite(
-        tflite_model,
-        shape_dict={"input": ifm_shape},
-        dtype_dict={"input": dtype},
-    )
-    mod = ethosu.partition_for_ethosu(mod)
-    mod["tvmgen_default_ethos_u_main_0"] = dataflow_pattern.rewrite(
-        legalize.LeakyReLURewriter(), mod["tvmgen_default_ethos_u_main_0"]
-    )
-    mod["tvmgen_default_ethos_u_main_0"] = relay.transform.InferType()(mod)[
-        "tvmgen_default_ethos_u_main_0"
-    ]
-    verify(mod["tvmgen_default_ethos_u_main_0"])
-
-
-@pytest.mark.parametrize("ifm_shape", [(1, 14), (1, 151)])
-@pytest.mark.parametrize("ofm_channels", [32, 64])
-@pytest.mark.parametrize("use_bias", [True, False])
-@pytest.mark.parametrize("activation_function", ["RELU", "NONE"])
-def test_tflite_fully_connected(
-    ifm_shape,
-    ofm_channels,
-    use_bias,
-    activation_function,
-):
-    dtype = "int8"
-
-    def create_tflite_graph():
-        class Model(tf.Module):
-            @tf.function
-            def fully_connected(self, x):
-                bias_shape = ofm_channels
-                bias = tf.constant(np.random.uniform(size=bias_shape), dtype=tf.float32)
-                w = tf.constant(
-                    np.random.uniform(size=[ifm_shape[1], ofm_channels]),
-                    dtype=tf.float32,
-                )
-                x = tf.matmul(x, w)
-                if use_bias:
-                    x = tf.nn.bias_add(x, bias)
-                if activation_function:
-                    x = tf.nn.relu(x)
-                return x
-
-        model = Model()
-        concrete_func = model.fully_connected.get_concrete_function(
-            tf.TensorSpec(ifm_shape, dtype=tf.float32)
-        )
-        # Convert the model
-        def representative_dataset():
-            for _ in range(100):
-                data = np.random.rand(*tuple(ifm_shape))
-                yield [data.astype(np.float32)]
-
-        converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func])
-        converter.optimizations = [tf.lite.Optimize.DEFAULT]
-        converter.representative_dataset = representative_dataset
-        converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
-        converter.inference_input_type = tf.int8
-        converter.inference_output_type = tf.int8
-        tflite_model = converter.convert()
-        return tflite_model
-
-    def verify(ext_func):
-        op = ext_func.body.args[0]
-        ofm_channels = op.attrs.ofm_channels
-
-        # check IFM
-        ifm = op.args[0].checked_type
-        assert list(ifm.shape) == [1, 1] + list(ifm_shape)
-        assert str(ifm.dtype) == dtype
-
-        # check OFM
-        ofm = op.checked_type
-        assert list(ofm.shape) == [1, 1, 1, ofm_channels]
-        assert str(ofm.dtype) == dtype
-
-        # check weights
-        weights_ohwi = op.args[1].data.asnumpy()
-        assert str(weights_ohwi.dtype) == dtype
-        assert list(weights_ohwi.shape) == [ofm_channels, 1, 1, ifm_shape[1]]
-
-        # Check that scale_bias matches weight tensor
-        assert list(op.args[2].checked_type.shape)[0] == ofm_channels
-
-        assert list(op.attrs.padding) == [0, 0, 0, 0]
-        assert list(op.attrs.strides) == [1, 1]
-        assert list(op.attrs.dilation) == [1, 1]
-        if activation_function == "RELU":
-            assert str(op.attrs.activation) == "CLIP"
-
-    fc_pattern_table = [
-        (
-            ethosu.FullyConnectedParams.composite_name,
-            ethosu.qnn_fc_pattern(),
-            lambda pat: ethosu.FullyConnectedParams(pat).is_valid(),
-        )
-    ]
-
-    tflite_graph = create_tflite_graph()
-    tflite_model = tflite.Model.Model.GetRootAsModel(tflite_graph, 0)
-
-    mod, fc_params = relay.frontend.from_tflite(
-        tflite_model,
-        shape_dict={"input": ifm_shape},
-        dtype_dict={"input": dtype},
-    )
-
-    mod["main"] = bind_params_by_name(mod["main"], fc_params)
-    mod = partition_ethosu_by_table(mod, fc_pattern_table)
-
-    mod["tvmgen_default_ethos_u_main_0"] = dataflow_pattern.rewrite(
-        legalize.FullyConnectedRewriter(), mod["tvmgen_default_ethos_u_main_0"]
-    )
-
-    verify(mod["tvmgen_default_ethos_u_main_0"])
-
-
-@pytest.mark.parametrize("ifm_shape", [(1, 5, 5, 3), (1, 12, 9, 1)])
-def test_tflite_hard_swish(ifm_shape):
-    dtype = "int8"
-
-    def create_tflite_graph():
-        class Model(tf.Module):
-            @tf.function
-            def tf_function(self, x):
-                op = tf.keras.layers.Lambda(
-                    lambda x: x * tf.keras.activations.relu(x + 3.0, max_value=6.0) / 6.0
-                )(x)
-                return op
-
-        model = Model()
-        concrete_func = model.tf_function.get_concrete_function(
-            tf.TensorSpec(ifm_shape, tf.float32)
-        )
-
-        def representative_dataset():
-            for _ in range(100):
-                data = np.random.rand(*tuple(ifm_shape))
-                yield [data.astype(np.float32)]
-
-        converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func])
-        converter.optimizations = [tf.lite.Optimize.DEFAULT]
-        converter.representative_dataset = representative_dataset
-        converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
-        converter.inference_input_type = tf.int8
-        converter.inference_output_type = tf.int8
-        tflite_model = converter.convert()
-
-        return tflite_model
-
-    tflite_graph = create_tflite_graph()
-    tflite_model = tflite.Model.Model.GetRootAsModel(tflite_graph, 0)
-
-    mod, params = relay.frontend.from_tflite(
-        tflite_model,
-        shape_dict={"input": ifm_shape},
-        dtype_dict={"input": dtype},
-    )
-
-    mod = ethosu.partition_for_ethosu(mod, params)
-    mod["tvmgen_default_ethos_u_main_0"] = dataflow_pattern.rewrite(
-        legalize.HardSwishRewriter(), mod["tvmgen_default_ethos_u_main_0"]
-    )
-    mod = relay.transform.InferType()(mod)
-
-    func_body = mod["tvmgen_default_ethos_u_main_0"].body
-    assert func_body.op.name == "contrib.ethosu.identity"
-    assert func_body.attrs.activation == "LUT"
-    assert tuple(func_body.args[0].checked_type.shape) == (ifm_shape)
-    assert tuple(func_body.args[1].checked_type.shape) == (256,)
-
-
-def test_tflite_softmax():
-    np.random.seed(0)
-    dtype = "int8"
-    ifm_shape = (1, 12)
-
-    def create_tflite_graph():
-        @tf.function
-        def softmax(x):
-            return tf.nn.softmax(x)
-
-        concrete_func = softmax.get_concrete_function(tf.TensorSpec(ifm_shape, dtype=tf.float32))
-        # Convert the model
-        def representative_dataset():
-            for _ in range(100):
-                data = np.random.uniform(low=-1, high=2, size=tuple(ifm_shape))
-                yield [data.astype(np.float32)]
-
-        converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func])
-        converter.optimizations = [tf.lite.Optimize.DEFAULT]
-        converter.representative_dataset = representative_dataset
-        converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
-        converter.inference_input_type = tf.int8
-        converter.inference_output_type = tf.int8
-        tflite_model = converter.convert()
-        return tflite_model
-
-    def verify(ext_func):
-        out_op = ext_func.body
-        ops = []
-        # List of expected operations, their type and activation parameters if it exists
-        expected_ops_params = [
-            ("reshape", None, [None, None, None, None, None, None]),
-            ("reshape", None, [None, None, None, None, None, None]),
-            ("contrib.ethosu.pooling", "MAX", [0.011756093241274357, -43, None, None, 0.0, -43]),
-            (
-                "contrib.ethosu.binary_elementwise",
-                "SUB",
-                [0.011756093241274357, -43, 0.0, -43, 1.0, 127],
-            ),
-            ("contrib.ethosu.binary_elementwise", "SHR", [1.0, 0, 0.0, 0, 0.0, -43]),
-            ("contrib.ethosu.pooling", "SUM", [0.0, 0, None, None, 0.0, -43]),
-            ("contrib.ethosu.unary_elementwise", "CLZ", [0.0, 0, None, None, 0.0, -43]),
-            ("contrib.ethosu.binary_elementwise", "SUB", [0.0, 0, 0.0, 0, 0.0, -43]),
-            ("contrib.ethosu.binary_elementwise", "SHL", [0.0, 0, 0.0, 0, 0.0, -43]),
-            ("contrib.ethosu.binary_elementwise", "SUB", [0.0, 0, 0.0, 0, 0.0, -43]),
-            ("contrib.ethosu.binary_elementwise", "SHL", [0.0, 0, 0.0, 0, 0.0, -43]),
-            ("contrib.ethosu.binary_elementwise", "ADD", [0.0, 0, 0.0, 0, 1.0, 0]),
-            ("contrib.ethosu.binary_elementwise", "MUL", [1.0, 0, 1.0, 0, 2.0, 0]),
-            ("contrib.ethosu.binary_elementwise", "ADD", [2.0, 0, 0.0, 0, 1.0, 0]),
-            ("contrib.ethosu.binary_elementwise", "MUL", [1.0, 0, 1.0, 0, 2.0, 0]),
-            ("contrib.ethosu.binary_elementwise", "SUB", [2.0, 0, 0.0, 0, 1.0, 0]),
-            ("contrib.ethosu.binary_elementwise", "MUL", [1.0, 0, 1.0, 0, 2.0, 0]),
-            ("contrib.ethosu.binary_elementwise", "MUL", [2.0, 0, 0.0, 0, 0.0, -43]),
-            ("contrib.ethosu.binary_elementwise", "ADD", [1.0, 0, 0.0, 0, 1.0, 0]),
-            ("contrib.ethosu.binary_elementwise", "MUL", [1.0, 0, 1.0, 0, 2.0, 0]),
-            ("contrib.ethosu.binary_elementwise", "SUB", [2.0, 0, 0.0, 0, 1.0, 0]),
-            ("contrib.ethosu.binary_elementwise", "MUL", [1.0, 0, 1.0, 0, 2.0, 0]),
-            ("contrib.ethosu.binary_elementwise", "MUL", [2.0, 0, 0.0, 0, 0.0, -43]),
-            ("contrib.ethosu.binary_elementwise", "ADD", [1.0, 0, 0.0, 0, 1.0, 0]),
-            ("contrib.ethosu.binary_elementwise", "MUL", [1.0, 0, 1.0, 0, 2.0, 0]),
-            ("contrib.ethosu.binary_elementwise", "SUB", [2.0, 0, 0.0, 0, 1.0, 0]),
-            ("contrib.ethosu.binary_elementwise", "MUL", [1.0, 0, 1.0, 0, 2.0, 0]),
-            ("contrib.ethosu.binary_elementwise", "MUL", [2.0, 0, 0.0, 0, 0.0, -43]),
-            ("contrib.ethosu.binary_elementwise", "ADD", [1.0, 0, 0.0, 0, 1.0, 0]),
-            ("contrib.ethosu.binary_elementwise", "MUL", [1.0, 0, 0.0, 0, 1.0, 0]),
-            ("contrib.ethosu.binary_elementwise", "MUL", [1.0, 0, 1.0, 0, 2.0, 0]),
-            ("contrib.ethosu.binary_elementwise", "SUB", [0.0, 0, 0.0, 0, 0.0, -43]),
-            ("contrib.ethosu.binary_elementwise", "SHR", [2.0, 0, 0.0, 0, 0.00390625, -128]),
-            ("reshape", None, [None, None, None, None, None, None]),
-        ]
-
-        def get_attr_value(op, attr_name):
-            if hasattr(op.attrs, attr_name):
-                return op.attrs[attr_name]
-            else:
-                return None
-
-        def get_op_type(op):
-            if hasattr(op.attrs, "pooling_type"):
-                return op.attrs.pooling_type
-            elif hasattr(op.attrs, "operator_type"):
-                return op.attrs.operator_type
-            return None
-
-        def get_activation_params(op):
-            activation_params = []
-            activation_params.append(get_attr_value(op, "ifm_scale"))
-            activation_params.append(get_attr_value(op, "ifm_zero_point"))
-            activation_params.append(get_attr_value(op, "ifm2_scale"))
-            activation_params.append(get_attr_value(op, "ifm2_zero_point"))
-            activation_params.append(get_attr_value(op, "ofm_scale"))
-            activation_params.append(get_attr_value(op, "ofm_zero_point"))
-            return activation_params
-
-        def _visit(stmt):
-            if isinstance(stmt, relay.expr.Call):
-                ops.append(stmt)
-
-        relay.analysis.post_order_visit(out_op, _visit)
-
-        # check IFM
-        ifm = ops[0].args[0].checked_type
-        assert list(ifm.shape) == list(ifm_shape)
-        assert str(ifm.dtype) == dtype
-
-        # check OFM
-        ofm = out_op.checked_type
-        assert list(ofm.shape) == list(ifm_shape)
-        assert ofm.dtype == dtype
-
-        # check operations
-        for op, expected_op_params in zip(ops, expected_ops_params):
-            activation_params = get_activation_params(op)
-            expected_op_name, expected_op_type, expected_activation_params = expected_op_params
-            assert op.op.name == expected_op_name
-            assert expected_op_type == get_op_type(op)
-            for activation_param, expected_activation_param in zip(
-                activation_params, expected_activation_params
-            ):
-                if isinstance(activation_param, float):
-                    assert math.isclose(expected_activation_param, activation_param, abs_tol=1e-7)
-                else:
-                    assert expected_activation_param == activation_param
-
-    softmax_pattern_table = [
-        (
-            ethosu.SoftMaxParams.composite_name,
-            ethosu.softmax_pattern(),
-            lambda pat: ethosu.SoftMaxParams(pat).is_valid(),
-        )
-    ]
-
-    tflite_graph = create_tflite_graph()
-    tflite_model = tflite.Model.Model.GetRootAsModel(tflite_graph, 0)
-
-    mod, params = relay.frontend.from_tflite(
-        tflite_model,
-        shape_dict={"input": ifm_shape},
-        dtype_dict={"input": dtype},
-    )
-    mod["main"] = bind_params_by_name(mod["main"], params)
-    mod = partition_ethosu_by_table(mod, softmax_pattern_table)
-    mod["tvmgen_default_ethos_u_main_0"] = dataflow_pattern.rewrite(
-        legalize.SoftmaxRewriter(), mod["tvmgen_default_ethos_u_main_0"]
-    )
-    mod = relay.transform.InferType()(mod)
-
-    verify(mod["tvmgen_default_ethos_u_main_0"])
-
-
-@pytest.mark.parametrize("ifm_shape", [(1, 55, 55, 3)])
-@pytest.mark.parametrize("kernel_shape", [(3, 3)])
-@pytest.mark.parametrize("strides, dilation", [((1, 1), (1, 1))])
-@pytest.mark.parametrize("op_padding", ["SAME", "VALID"])
-@pytest.mark.parametrize("sep_padding", [(0, 0, 1, 1), (7, 5, 4, 5)])
-@pytest.mark.parametrize(
-    "op_pairs", [("conv2d", "conv2d"), ("depthwise", "depthwise"), ("conv2d", "depthwise")]
-)
-def test_tflite_shared_pad_legalize(
-    ifm_shape,
-    kernel_shape,
-    strides,
-    dilation,
-    op_padding,
-    sep_padding,
-    op_pairs,
-):
-    dtype = "int8"
-
-    def create_tflite_graph():
-        class Model(tf.Module):
-            @tf.function
-            def tf_function(self, x):
-                def make_depthwise_or_conv2d(pair_idx):
-                    if op_pairs[pair_idx] == "depthwise":
-                        weight_shape = [kernel_shape[0], kernel_shape[1], ifm_shape[3], 1]
-                        weight = tf.constant(np.random.uniform(size=weight_shape), dtype=tf.float32)
-                        return tf.nn.depthwise_conv2d(
-                            x, weight, strides=tf_strides, padding=op_padding, dilations=dilation
-                        )
-                    weight_shape = [kernel_shape[0], kernel_shape[1], ifm_shape[3], 3]
-                    weight = tf.constant(np.random.uniform(size=weight_shape), dtype=tf.float32)
-                    return tf.nn.conv2d(
-                        x,
-                        weight,
-                        strides=tf_strides,
-                        padding=op_padding,
-                        dilations=dilation,
-                    )
-
-                x = tf.pad(
-                    x,
-                    [
-                        [0, 0],
-                        [sep_padding[0], sep_padding[2]],
-                        [sep_padding[1], sep_padding[3]],
-                        [0, 0],
-                    ],
-                    "CONSTANT",
-                )
-
-                # The input strides to the TensorFlow API needs to be of shape 1x4
-                tf_strides = [1, strides[0], strides[1], 1]
-
-                x1 = make_depthwise_or_conv2d(0)
-                x2 = make_depthwise_or_conv2d(1)
-
-                x3 = tf.math.add(x1, x2)
-                return x3
-
-        model = Model()
-        concrete_func = model.tf_function.get_concrete_function(
-            tf.TensorSpec(ifm_shape, dtype=tf.float32)
-        )
-        # Convert the model
-        def representative_dataset():
-            for _ in range(100):
-                data = np.random.rand(*tuple(ifm_shape))
-                yield [data.astype(np.float32)]
-
-        converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func])
-        converter.optimizations = [tf.lite.Optimize.DEFAULT]
-        converter.representative_dataset = representative_dataset
-        converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
-        converter.inference_input_type = tf.int8
-        converter.inference_output_type = tf.int8
-        tflite_model = converter.convert()
-        return tflite_model
-
-    conv2d_pattern_table = [
-        (
-            ethosu.QnnConv2DParams.composite_name,
-            ethosu.qnn_conv2d_pattern(),
-            lambda pat: ethosu.QnnConv2DParams(pat).is_valid(),
-        ),
-        (
-            ethosu.QnnDepthwiseConv2DParams.composite_name,
-            ethosu.qnn_depthwise_conv2d_pattern(),
-            lambda pat: ethosu.QnnDepthwiseConv2DParams(pat).is_valid(),
-        ),
-    ]
-
-    tflite_graph = create_tflite_graph()
-    tflite_model = tflite.Model.Model.GetRootAsModel(tflite_graph, 0)
-
-    mod, params = relay.frontend.from_tflite(
-        tflite_model,
-        shape_dict={"input": ifm_shape},
-        dtype_dict={"input": dtype},
-    )
-
-    mod["main"] = bind_params_by_name(mod["main"], params)
-    mod = partition_ethosu_by_table(mod, conv2d_pattern_table)
-
-    mod["tvmgen_default_ethos_u_main_0"] = dataflow_pattern.rewrite(
-        [legalize.Conv2DRewriter(), legalize.DepthwiseConv2DRewriter()],
-        mod["tvmgen_default_ethos_u_main_0"],
-    )
-    mod["tvmgen_default_ethos_u_main_1"] = dataflow_pattern.rewrite(
-        [legalize.Conv2DRewriter(), legalize.DepthwiseConv2DRewriter()],
-        mod["tvmgen_default_ethos_u_main_1"],
-    )
-
-    if op_pairs[0] == "depthwise":
-        assert (
-            mod["tvmgen_default_ethos_u_main_0"].body.op.name == "contrib.ethosu.depthwise_conv2d"
-        )
-    else:
-        assert mod["tvmgen_default_ethos_u_main_0"].body.op.name == "contrib.ethosu.conv2d"
-
-    if op_pairs[1] == "depthwise":
-        assert (
-            mod["tvmgen_default_ethos_u_main_1"].body.op.name == "contrib.ethosu.depthwise_conv2d"
-        )
-    else:
-        assert mod["tvmgen_default_ethos_u_main_1"].body.op.name == "contrib.ethosu.conv2d"
-
-
-def test_tflite_matmul():
-    ifm_shape = [1, 4]
-    ifm2_shape = [2, 4]
-    ifm_shapes = [ifm_shape, ifm2_shape]
-    ofm_shape = [ifm_shape[0], ifm2_shape[0]]
-    dtype = "int8"
-
-    def create_tflite_graph():
-        class Model(tf.Module):
-            @tf.function
-            def matmul(self, x, y):
-                res = tf.matmul(x, y, transpose_b=True)
-                return res
-
-        model = Model()
-        concrete_func = model.matmul.get_concrete_function(
-            *[tf.TensorSpec(shape, tf.float32) for shape in ifm_shapes]
-        )
-        # Convert the model
-        def representative_dataset():
-            for _ in range(100):
-                datas = [np.random.rand(*shape) for shape in ifm_shapes]
-                yield [data.astype(np.float32) for data in datas]
-
-        converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func])
-        converter.optimizations = [tf.lite.Optimize.DEFAULT]
-        converter.representative_dataset = representative_dataset
-        converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
-        converter.inference_input_type = tf.int8
-        converter.inference_output_type = tf.int8
-        tflite_model = converter.convert()
-        return tflite_model
-
-    def verify(ext_func):
-        ofm = ext_func.body
-        ops = []
-
-        def _visit(stmt):
-            if isinstance(stmt, relay.expr.Call):
-                ops.append(stmt)
-
-        relay.analysis.post_order_visit(ofm, _visit)
-        ofm_checked_type = ofm.checked_type
-        ofm_channels = ofm_shape[-1]
-
-        # check IFM
-        ifm = ops[1].checked_type
-        assert list(ifm.shape) == ifm_shape
-        assert str(ifm.dtype) == dtype
-
-        # check IFM2
-        ifm2 = ops[3].checked_type
-        assert list(ifm2.shape) == ifm2_shape
-        assert str(ifm2.dtype) == dtype
-
-        # check split
-        split = ops[4]
-        split_checked_types = list(split.checked_type.fields)
-        assert split.op.name == "split"
-        assert split.attrs.axis == 0
-        assert int(split.attrs.indices_or_sections) == ofm_channels
-        for split_checked_type in split_checked_types:
-            assert list(split_checked_type.shape) == ifm_shape
-            assert str(split_checked_type.dtype) == dtype
-
-        # check MUL
-        mul_ops = [ops[6], ops[10]]
-        for mul_op in mul_ops:
-            assert mul_op.op.name == "contrib.ethosu.binary_elementwise"
-            assert mul_op.attrs.operator_type == "MUL"
-            assert mul_op.attrs.ofm_dtype == "int32"
-
-        # check reduce sum
-        reduce_sum_ops = [ops[7], ops[11]]
-        for reduce_sum_op in reduce_sum_ops:
-            assert reduce_sum_op.op.name == "contrib.ethosu.pooling"
-            assert reduce_sum_op.attrs.pooling_type == "SUM"
-            assert list(reduce_sum_op.checked_type.shape) == [1, 1, 1, 1]
-
-        # check concatenation
-        concatenation = ofm.args[0]
-        concatenation_shape = concatenation.checked_type.shape
-        assert concatenation.op.name == "concatenate"
-        assert list(concatenation_shape) == [1, 1, 1, ofm_channels]
-
-        # check OFM
-        assert ofm.op.name == "reshape"
-        assert list(ofm_checked_type.shape) == ofm_shape
-        assert str(ofm_checked_type.dtype) == dtype
-
-    matmul_pattern_table = [
-        (
-            ethosu.MatMulParams.composite_name,
-            ethosu.matmul_pattern(),
-            lambda pat: ethosu.MatMulParams(pat).is_valid(),
-        )
-    ]
-
-    tflite_graph = create_tflite_graph()
-    tflite_model = tflite.Model.Model.GetRootAsModel(tflite_graph, 0)
-
-    mod, params = relay.frontend.from_tflite(
-        tflite_model,
-        shape_dict={("ifm" + str(i)): shape for i, shape in enumerate(ifm_shapes)},
-        dtype_dict={("ifm" + str(i)): dtype for i, _ in enumerate(ifm_shapes)},
-    )
-
-    mod["main"] = bind_params_by_name(mod["main"], params)
-    mod = partition_ethosu_by_table(mod, matmul_pattern_table)
-
-    mod["tvmgen_default_ethos_u_main_0"] = dataflow_pattern.rewrite(
-        legalize.MatMulRewriter(), mod["tvmgen_default_ethos_u_main_0"]
-    )
-
-    verify(mod["tvmgen_default_ethos_u_main_0"])
-
-
-@pytest.mark.parametrize(
-    "ifm_shape,fract_size",
-    [[(1, 2, 8, 4), 15], [(1, 8), 12], [(1, 1, 4, 8), 10]],
-)
-def test_relay_tanh_fixed_point_legalize(ifm_shape, fract_size):
-    dtype = "int16"
-
-    def create_model():
-        ifm = relay.var("ifm", shape=ifm_shape, dtype=dtype)
-        ifm_fixed_point = relay.cast(ifm, "int32")
-        ifm_fixed_point = relay.fixed_point_multiply(ifm_fixed_point, 2**31 - 1, 0)
-        tanh = relay.tanh(ifm_fixed_point)
-        tanh = relay.fixed_point_multiply(tanh, 1, 31 - fract_size)
-        tanh = relay.cast(tanh, dtype)
-        return tvm.IRModule.from_expr(relay.Function([ifm], tanh))
-
-    mod = create_model()
-
-    tanh_pattern_table = [
-        (
-            ethosu.TanhFixedPointParams.composite_name,
-            ethosu.tanh_fixed_point_pattern(),
-            lambda pat: ethosu.TanhFixedPointParams(pat).is_valid(),
-        ),
-    ]
-
-    mod = partition_ethosu_by_table(mod, tanh_pattern_table)
-    mod["tvmgen_default_ethos_u_main_0"] = dataflow_pattern.rewrite(
-        legalize.TanhFixedPointRewriter(), mod["tvmgen_default_ethos_u_main_0"]
-    )
-    mod = relay.transform.InferType()(mod)
-
-    func = mod["tvmgen_default_ethos_u_main_0"]
-
-    identity = func.body
-    assert identity.op.name == "contrib.ethosu.identity"
-    assert identity.attrs.activation == "TANH"
-    assert identity.args[0].checked_type.dtype == dtype
-    assert tuple(identity.args[0].checked_type.shape) == ifm_shape
-    # check LUT size
-    assert tuple(identity.args[1].checked_type.shape) == (512,)
-    assert identity.attrs.ifm_scale == 1 / 2**fract_size
-    assert identity.attrs.ifm_scale == identity.attrs.ofm_scale
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/contrib/test_ethosu/test_legalize_no_ops.py b/tests/python/contrib/test_ethosu/test_legalize_no_ops.py
deleted file mode 100644
index d6cb64b066b8..000000000000
--- a/tests/python/contrib/test_ethosu/test_legalize_no_ops.py
+++ /dev/null
@@ -1,107 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name, unused-argument
-
-import pytest
-
-pytest.importorskip("ethosu.vela")
-import numpy as np
-import tensorflow as tf
-import tflite.Model
-
-from tvm import relay
-from tvm.relay.backend.contrib.ethosu import legalize
-from tvm.relay.op.contrib import ethosu
-from tvm.relay.build_module import bind_params_by_name
-
-# There's a bug in TFLite converter which doesn't allow us to create single operator
-# reshape and strided_slice graphs, so in order to have some testing coverage for these
-# operators starting from TFLite, we test them alongside other operators
-def test_tflite_reshape_and_strided_slice():
-    dtype = "int8"
-    ifm_shape = [1, 8, 3, 6]
-
-    def create_tflite_graph():
-        class Model(tf.Module):
-            @tf.function
-            def model_func(self, x):
-                weight_shape = [3, 3, 6, 1]  # HWO1
-                weight = tf.constant(np.random.uniform(size=weight_shape), dtype=tf.float32)
-                op = tf.nn.depthwise_conv2d(x, weight, strides=[1, 1, 1, 1], padding="SAME")
-                op = tf.nn.relu(op)
-                op = tf.reshape(op, [1, 8, 6, 3])
-                op = tf.nn.pool(op, [2, 2], "MAX")
-                op = tf.strided_slice(op, [0, 2, 3, 1], [1, 6, 5, 2])
-                return op
-
-        model = Model()
-        concrete_func = model.model_func.get_concrete_function(
-            tf.TensorSpec(ifm_shape, dtype=tf.float32)
-        )
-
-        # Convert the model
-        def representative_dataset():
-            for _ in range(100):
-                data = np.random.rand(*tuple(ifm_shape))
-                yield [data.astype(np.float32)]
-
-        converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func])
-        converter.optimizations = [tf.lite.Optimize.DEFAULT]
-        converter.representative_dataset = representative_dataset
-        converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
-        converter.inference_input_type = tf.int8
-        converter.inference_output_type = tf.int8
-        tflite_model = converter.convert()
-        return tflite_model
-
-    def verify(func):
-        # This TFLite graph gets lowered into
-        # deptwhise_conv2d -> clip -> reshape -> max_pool -> strided_slice -> reshape
-        # which gets legalized into ethosu_depthwise_conv2d -> reshape -> ehtosu_identity
-        # -> ethosu_pooling -> strided_slice -> identity -> reshape -> identity
-
-        identity3 = func.body
-        reshape2 = identity3.args[0]
-        identity2 = reshape2.args[0]
-        strided_slice = identity2.args[0]
-        max_pool = strided_slice.args[0]
-        identity1 = max_pool.args[0]
-        reshape1 = identity1.args[0]
-        depthwise_conv2d = reshape1.args[0]
-
-        assert identity3.op.name == "contrib.ethosu.identity"
-        assert reshape2.op.name == "reshape"
-        assert identity2.op.name == "contrib.ethosu.identity"
-        assert strided_slice.op.name == "strided_slice"
-        assert max_pool.op.name == "contrib.ethosu.pooling"
-        assert identity1.op.name == "contrib.ethosu.identity"
-        assert reshape1.op.name == "reshape"
-        assert depthwise_conv2d.op.name == "contrib.ethosu.depthwise_conv2d"
-
-    tflite_graph = create_tflite_graph()
-    tflite_model = tflite.Model.Model.GetRootAsModel(tflite_graph, 0)
-
-    mod, params = relay.frontend.from_tflite(
-        tflite_model,
-        shape_dict={"input": ifm_shape},
-        dtype_dict={"input": dtype},
-    )
-    mod["main"] = bind_params_by_name(mod["main"], params)
-    mod = ethosu.partition_for_ethosu(mod)
-    mod = legalize.LegalizeEthosU()(mod)
-
-    verify(mod["tvmgen_default_ethos_u_main_0"])
diff --git a/tests/python/contrib/test_ethosu/test_lookup_table.py b/tests/python/contrib/test_ethosu/test_lookup_table.py
deleted file mode 100644
index e2b22897a0ab..000000000000
--- a/tests/python/contrib/test_ethosu/test_lookup_table.py
+++ /dev/null
@@ -1,175 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name, unused-argument
-import pytest
-
-pytest.importorskip("ethosu.vela")
-import numpy as np
-import tflite.Model
-
-import tvm
-import tensorflow as tf
-from tvm import relay
-from tvm.relay.op.contrib.ethosu import partition_for_ethosu
-from tvm.relay.build_module import bind_params_by_name  # type: ignore
-
-from . import infra
-
-
-ACCEL_TYPES = ["ethos-u55-256", "ethos-u55-128", "ethos-u55-64", "ethos-u55-32"]
-
-
-@pytest.mark.parametrize("accel_type", ACCEL_TYPES)
-def test_tflite_lut_activations(accel_type):
-
-    dtype = "int8"
-    ifm_shape = (1, 55, 55, 3)
-
-    def create_tflite_graph():
-        class Model(tf.Module):
-            @tf.function
-            def tf_func(self, x):
-                weight_shape = (3, 3, ifm_shape[3], 4)
-                weight = tf.constant(
-                    np.random.uniform(low=0, high=0.3, size=weight_shape), dtype=tf.float32
-                )
-                # The input strides to the TensorFlow API needs to be of shape 1x4
-                op = tf.nn.conv2d(x, weight, strides=(1, 2, 2, 1), padding="SAME", dilations=(1, 1))
-                op = tf.nn.tanh(op)
-                op = tf.nn.tanh(op)
-
-                weight_shape2 = (2, 3, 4, 1)
-                weight2 = tf.constant(
-                    np.random.uniform(low=0, high=0.3, size=weight_shape2), dtype=tf.float32
-                )
-                op = tf.nn.depthwise_conv2d(
-                    op, weight2, strides=(1, 1, 1, 1), padding="VALID", dilations=(2, 2)
-                )
-                op = tf.nn.sigmoid(op)
-                op = tf.nn.max_pool(op, (1, 1), strides=(1, 1, 1, 1), padding="SAME")
-                op = tf.nn.tanh(op)
-                return op
-
-        model = Model()
-        concrete_func = model.tf_func.get_concrete_function(
-            tf.TensorSpec(ifm_shape, dtype=tf.float32)
-        )
-        # Convert the model
-        def representative_dataset():
-            for _ in range(100):
-                data = 0.7 * np.random.rand(*tuple(ifm_shape))
-                yield [data.astype(np.float32)]
-
-        converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func])
-        converter.optimizations = [tf.lite.Optimize.DEFAULT]
-        converter.representative_dataset = representative_dataset
-        converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
-        converter.inference_input_type = tf.int8
-        converter.inference_output_type = tf.int8
-        tflite_model = converter.convert()
-        return tflite_model
-
-    tflite_graph = create_tflite_graph()
-    tflite_model = tflite.Model.Model.GetRootAsModel(tflite_graph, 0)
-    relay_module, params = relay.frontend.from_tflite(
-        tflite_model,
-        shape_dict={"input": ifm_shape},
-        dtype_dict={"input": dtype},
-    )
-    mod = partition_for_ethosu(relay_module, params)
-
-    # Generate reference data
-    input_data, output_data = infra.generate_ref_data_tflite(tflite_graph)
-
-    test_runner = infra.create_test_runner(accel_type)
-    compiled_models = infra.build_source(
-        mod,
-        input_data,
-        output_data,
-        test_runner,
-    )
-
-    # Assumes only two runtime.Modules are created -- i.e. single offload module
-    ethosu_module = compiled_models[0].executor_factory.lib.imported_modules[0].imported_modules[0]
-
-    # Verify generated C source
-    get_artifacts = tvm._ffi.get_global_func("runtime.module.ethos-u.get_artifacts")
-    compilation_artifacts = get_artifacts(ethosu_module)
-    cmms = bytes.fromhex(compilation_artifacts[0].command_stream)
-    infra.print_payload(cmms)
-    infra.verify_source(compiled_models, test_runner)
-
-
-@pytest.mark.parametrize("accel_type", ACCEL_TYPES)
-def test_random_lut(accel_type):
-
-    dtype = "int8"
-    ifm_shape = (1, 55, 55, 3)
-
-    lut_data = np.random.randint(-128, high=127, size=[256])
-    lut_data_map = {idx: lut_data[idx + 128] for idx in range(-128, 128)}
-
-    in_data = np.random.randint(-128, high=127, size=ifm_shape, dtype=dtype)
-    out_data = np.array([lut_data_map[i] for i in in_data.ravel()]).reshape(ifm_shape).astype(dtype)
-
-    ifm = relay.var("ifm", shape=ifm_shape, dtype=dtype)
-    ifm0 = relay.var("ifm0", shape=ifm_shape, dtype=dtype)
-    lut1 = relay.var("lut1", shape=(256,), dtype="uint8")
-
-    identity = infra.make_ethosu_identity(ifm0, lut=lut1, activation="LUT")
-    glb_ethosu = relay.GlobalVar("tvmgen_default_ethos_u_main_0")
-
-    func = (
-        relay.Function([ifm0, lut1], identity)
-        .with_attr("Inline", 1)
-        .with_attr("Compiler", "ethos-u")
-        .with_attr("global_symbol", "tvmgen_default_ethos_u_main_0")
-        .with_attr("Primitive", 1)
-    )
-
-    params = {"lut1": tvm.nd.array(lut_data.astype("uint8"))}
-    func = bind_params_by_name(func, params)
-
-    mod = tvm.IRModule()
-    mod[glb_ethosu] = func
-    mod = relay.transform.InferType()(mod)
-
-    call = relay.Call(glb_ethosu, [ifm])
-    mod["main"] = relay.Function([ifm], call)
-    mod = relay.transform.InferType()(mod)
-
-    test_runner = infra.create_test_runner(accel_type)
-    compiled_models = infra.build_source(
-        mod,
-        {"ifm": in_data},
-        {"output": out_data},
-        test_runner,
-    )
-
-    # Assumes only two runtime.Modules are created -- i.e. single offload module
-    ethosu_module = compiled_models[0].executor_factory.lib.imported_modules[0].imported_modules[0]
-
-    # Verify generated C source
-    get_artifacts = tvm._ffi.get_global_func("runtime.module.ethos-u.get_artifacts")
-    compilation_artifacts = get_artifacts(ethosu_module)
-    cmms = bytes.fromhex(compilation_artifacts[0].command_stream)
-    infra.print_payload(cmms)
-    infra.verify_source(compiled_models, test_runner)
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/contrib/test_ethosu/test_lower_to_te.py b/tests/python/contrib/test_ethosu/test_lower_to_te.py
deleted file mode 100644
index 9ec59af44163..000000000000
--- a/tests/python/contrib/test_ethosu/test_lower_to_te.py
+++ /dev/null
@@ -1,63 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import pytest
-
-pytest.importorskip("ethosu.vela")
-import tvm
-from tvm import relay
-from tvm.relay.backend.contrib.ethosu.tir.compiler import lower_to_te
-from tvm.relay.backend.contrib.ethosu.tir.scheduler import OperatorCompute
-import tvm.relay.backend.contrib.ethosu.op as ethosu_ops
-
-
-def test_ethosu_conv2d():
-    ifm = relay.var("ifm", shape=(1, 10, 20, 30), dtype="uint8")
-    weight = relay.var("weight", shape=(40, 3, 3, 30), dtype="uint8")
-    scale_bias = relay.var("scale_bias", shape=(40, 10), dtype="uint8")
-    lut = relay.var("lut", shape=(), dtype="uint8")
-    conv = ethosu_ops.ethosu_conv2d(
-        ifm,
-        weight,
-        scale_bias,
-        lut,
-        ifm_scale=0.5,
-        ifm_zero_point=10,
-        weight_zero_point=12,
-        ofm_scale=0.25,
-        ofm_zero_point=14,
-        ofm_channels=40,
-        padding=(1, 1, 1, 1),
-        kernel_shape=(3, 3),
-        strides=(1, 1),
-        dilation=(1, 1),
-    )
-    expr = relay.Function(relay.analysis.free_vars(conv), conv)
-    mod = tvm.IRModule.from_expr(expr)
-    mod = relay.transform.InferType()(mod)
-    lowered = lower_to_te(mod["main"])
-    assert len(lowered.outputs) == 1
-    assert len(lowered.inputs) == 4
-    conv2d_compute = OperatorCompute.from_output(lowered.outputs[0])
-    assert conv2d_compute.op.name == "ethosu_conv2d"
-    input_shapes = set()
-    for inp in lowered.inputs:
-        input_shapes.add(tuple([x.value for x in inp.shape]))
-    assert input_shapes == {(40, 10), (1, 10, 20, 30), (40, 3, 3, 30), ()}
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/contrib/test_ethosu/test_lut_optimizer.py b/tests/python/contrib/test_ethosu/test_lut_optimizer.py
deleted file mode 100644
index b8a275446207..000000000000
--- a/tests/python/contrib/test_ethosu/test_lut_optimizer.py
+++ /dev/null
@@ -1,178 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Test the pass that removes unnecssary identity operation if the identity
-uses LUT and the preceding operator is LUT capable and doesn't already have a LUT.
-"""
-import pytest
-
-pytest.importorskip("ethosu.vela")
-
-import tensorflow as tf
-import numpy as np
-
-import tvm
-from tvm import relay
-from tvm.relay.backend.contrib.ethosu.codegen import LUTsOptimizer
-from tvm.relay.backend.contrib.ethosu.codegen import relay_to_tir
-from tvm.relay.op.contrib.ethosu import partition_for_ethosu
-
-from . import infra
-
-
-def test_merge_lut_into_conv():
-    """If an operator that has a LUT attribute is followed by an identity operator
-    with LUT, we can merge the two operataors."""
-
-    ifm = relay.var("x", shape=(1, 8, 8, 4), dtype="int8")
-    lut1 = relay.const([i for i in range(256)], dtype="int8")
-    lut2 = relay.const([i for i in reversed(range(256))], dtype="int8")
-
-    def before():
-        conv1 = infra.make_ethosu_conv2d(ifm, 4, 4, (3, 3), (1, 1), (1, 1), (1, 1))
-        id1 = infra.make_ethosu_identity(conv1, lut=lut1, activation="TANH")
-        conv2 = infra.make_ethosu_conv2d(id1, 4, 7, (2, 2), (1, 1), (1, 1), (1, 1))
-        id2 = infra.make_ethosu_identity(conv2, lut=lut2, activation="SIGMOID")
-
-        func = relay.Function(relay.analysis.free_vars(id2), id2)
-        func = func.with_attr("Compiler", "ethos-u")
-        mod = tvm.IRModule.from_expr(func)
-        return mod
-
-    def after():
-        conv1 = infra.make_ethosu_conv2d(
-            ifm, 4, 4, (3, 3), (1, 1), (1, 1), (1, 1), lut=lut1, activation="TANH"
-        )
-        conv2 = infra.make_ethosu_conv2d(
-            conv1, 4, 7, (2, 2), (1, 1), (1, 1), (1, 1), lut=lut2, activation="SIGMOID"
-        )
-
-        func = relay.Function(relay.analysis.free_vars(conv2), conv2)
-        func = func.with_attr("Compiler", "ethos-u")
-        mod = tvm.IRModule.from_expr(func)
-        mod = relay.transform.InferType()(mod)
-        return mod
-
-    mod = LUTsOptimizer()(before())
-    mod = relay.transform.InferType()(mod)
-
-    tvm.ir.assert_structural_equal(mod, after())
-
-
-def test_merge_lut_into_binary_elementwise():
-    """If an binary elementwise operator is followed by an identity operator
-    with LUT, we can merge the two operataors."""
-
-    shape = (1, 8, 8, 4)
-    dtype = "int8"
-    ifm = relay.var("x", shape=shape, dtype=dtype)
-    ifm2 = relay.var("x", shape=shape, dtype=dtype)
-    lut1 = relay.const([i for i in range(256)], dtype=dtype)
-    lut2 = relay.const([i for i in reversed(range(256))], dtype=dtype)
-
-    def before():
-        sub = infra.make_ethosu_binary_elementwise(ifm, ifm2, shape[-1], shape[-1], "SUB", dtype)
-        id1 = infra.make_ethosu_identity(sub, lut=lut1, activation="TANH")
-        add = infra.make_ethosu_binary_elementwise(id1, ifm2, shape[-1], shape[-1], "ADD", dtype)
-        id2 = infra.make_ethosu_identity(add, lut=lut2, activation="SIGMOID")
-
-        func = relay.Function(relay.analysis.free_vars(id2), id2)
-        func = func.with_attr("Compiler", "ethos-u")
-        mod = tvm.IRModule.from_expr(func)
-        return mod
-
-    def after():
-        sub = infra.make_ethosu_binary_elementwise(
-            ifm, ifm2, shape[-1], shape[-1], "SUB", dtype, lut=lut1, activation="TANH"
-        )
-        add = infra.make_ethosu_binary_elementwise(
-            sub, ifm2, shape[-1], shape[-1], "ADD", dtype, lut=lut2, activation="SIGMOID"
-        )
-
-        func = relay.Function(relay.analysis.free_vars(add), add)
-        func = func.with_attr("Compiler", "ethos-u")
-        mod = tvm.IRModule.from_expr(func)
-        mod = relay.transform.InferType()(mod)
-        return mod
-
-    mod = LUTsOptimizer()(before())
-    mod = relay.transform.InferType()(mod)
-
-    tvm.ir.assert_structural_equal(mod, after())
-
-
-def test_multiple_luts():
-    """Test that when an operation already has a LUT, we don't overwrite that LUT"""
-
-    ifm = relay.var("x", shape=(1, 8, 8, 4), dtype="int8")
-    lut1 = relay.const([i for i in range(256)], dtype="int8")
-    lut2 = relay.const([i for i in reversed(range(256))], dtype="int8")
-
-    def before():
-        conv1 = infra.make_ethosu_conv2d(ifm, 4, 4, (3, 3), (1, 1), (1, 1), (1, 1))
-        id1 = infra.make_ethosu_identity(conv1, lut=lut1, activation="TANH")
-        id2 = infra.make_ethosu_identity(id1, lut=lut2, activation="TANH")
-
-        func = relay.Function(relay.analysis.free_vars(id2), id2)
-        func = func.with_attr("Compiler", "ethos-u")
-        mod = tvm.IRModule.from_expr(func)
-        return mod
-
-    def after():
-        conv1 = infra.make_ethosu_conv2d(
-            ifm, 4, 4, (3, 3), (1, 1), (1, 1), (1, 1), lut=lut1, activation="TANH"
-        )
-        id2 = infra.make_ethosu_identity(conv1, lut=lut2, activation="TANH")
-
-        func = relay.Function(relay.analysis.free_vars(id2), id2)
-        func = func.with_attr("Compiler", "ethos-u")
-        mod = tvm.IRModule.from_expr(func)
-        mod = relay.transform.InferType()(mod)
-        return mod
-
-    mod = LUTsOptimizer()(before())
-    mod = relay.transform.InferType()(mod)
-
-    tvm.ir.assert_structural_equal(mod, after())
-
-
-def test_lut_optimizer_runs_in_compilation_pipeline():
-    """Test that the LUT optimization pass runs as part of the NPU compilation pipeline."""
-    ifm_shape = (1, 4, 4, 4)
-
-    @tf.function
-    def get_graph(x):
-        weight1 = tf.constant(np.random.uniform(size=(1, 1, 4, 4)), dtype=tf.float32)
-        op = tf.nn.conv2d(x, weight1, (1, 1), "VALID")
-        op = tf.nn.tanh(op)
-        weight2 = tf.constant(np.random.uniform(size=(1, 1, 4, 1)), dtype=tf.float32)
-        op = tf.nn.depthwise_conv2d(op, weight2, (1, 1, 1, 1), "VALID")
-        return tf.nn.tanh(op)
-
-    mod, _ = infra.get_tflite_graph(get_graph, [ifm_shape])
-    mod = partition_for_ethosu(mod)
-    mod = relay_to_tir(mod)
-
-    external_gv_name = mod["main"].body.op.name_hint
-    prim_func = mod[external_gv_name]
-
-    # Check for hints in the TIR prim func that the LUT optimization pass has ran.
-    # If the module was optimized, there should be no identity operations.
-    def check_identity(stmt):
-        if isinstance(stmt, tvm.tir.expr.Call):
-            assert stmt.args[0] != "ethosu_identity"
-
-    tvm.tir.stmt_functor.post_order_visit(prim_func.body, check_identity)
diff --git a/tests/python/contrib/test_ethosu/test_merge_constants.py b/tests/python/contrib/test_ethosu/test_merge_constants.py
deleted file mode 100644
index 5c5cd960e5d3..000000000000
--- a/tests/python/contrib/test_ethosu/test_merge_constants.py
+++ /dev/null
@@ -1,814 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import pytest
-
-pytest.importorskip("ethosu.vela")
-
-import tvm
-from tvm.script import tir as T
-from tvm.relay.backend.contrib.ethosu.tir.passes import MergeConstants
-import numpy as np
-
-
-def check_const_dictionaries(const_dict, new_const_dict):
-    assert list(const_dict) == list(new_const_dict)
-    for key, value in const_dict.items():
-        new_value = new_const_dict[key]
-        assert len(value) == len(new_value)
-        for i in range(len(value)):
-            assert value[i] == new_value[i]
-
-
-def test_only_one_operator():
-    # fmt: off
-    # undefined vars used
-    @tvm.script.ir_module(check_well_formed=False)
-    class InputModule:
-        @T.prim_func
-        def main(buffer2: T.Buffer((128,), "uint8"), buffer3: T.Buffer((32,), "uint8")) -> None:
-            # function attr dict
-            T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
-            buffer1 = T.Buffer([8192], "int8")
-            buffer10 = T.Buffer([2048], "int8")
-            # body
-            p1_data = T.allocate([128], "uint8", "global")
-            p1 = T.Buffer([128], "uint8", data=p1_data)
-            p4_data = T.allocate([32], "uint8", "global")
-            p4 = T.Buffer([32], "uint8", data=p4_data)
-            T.evaluate(T.call_extern("ethosu_copy", buffer2[0], 128, p1[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer3[0], 32, p4[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, buffer1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, buffer10[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p1[0], 128, 12, p4[0], 32, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-
-
-    # undefined vars used
-    @tvm.script.ir_module(check_well_formed=False)
-    class ReferenceModule:
-        @T.prim_func
-        def main(buffer2: T.Buffer((160,), "uint8")) -> None:
-            # function attr dict
-            T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
-            buffer1 = T.Buffer([8192], "int8")
-            buffer10 = T.Buffer([2048], "int8")
-            # body
-            p4_data = T.allocate([160], "uint8", "global")
-            p4 = T.Buffer([160], "uint8", data=p4_data)
-            T.evaluate(T.call_extern("ethosu_copy", buffer2[0], 160, p4[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, buffer1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, buffer10[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p4[0], 128, 12, p4[128], 32, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-    # fmt: on
-    const_dict = {
-        0: np.array([0, 10], dtype=np.uint8),
-        1: np.array([1, 11], dtype=np.uint8),
-    }
-    new_const_dict = {0: np.concatenate((const_dict[0], const_dict[1]))}
-    test_mod, const_dict = MergeConstants(const_dict)(InputModule)
-    reference_mod = ReferenceModule
-    tvm.ir.assert_structural_equal(test_mod, reference_mod, True)
-    check_const_dictionaries(const_dict, new_const_dict)
-
-
-def test_all_operators_with_weights():
-    # fmt: off
-    # undefined vars used
-    @tvm.script.ir_module(check_well_formed=False)
-    class InputModule:
-        @T.prim_func
-        def main(buffer2: T.Buffer((128,), "uint8"), buffer3: T.Buffer((32,), "uint8"), buffer4: T.Buffer((112,), "uint8"), buffer5: T.Buffer((32,), "uint8"), buffer6: T.Buffer((112,), "uint8"), buffer7: T.Buffer((32,), "uint8"), buffer8: T.Buffer((112,), "uint8"), buffer9: T.Buffer((32,), "uint8")) -> None:
-            # function attr dict
-            T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
-            buffer1 = T.Buffer([8192], "int8")
-            buffer10 = T.Buffer([2048], "int8")
-            # body
-            p1_data = T.allocate([128], "uint8", "global")
-            p1 = T.Buffer([128], "uint8", data=p1_data)
-            p2_data = T.allocate([112], "uint8", "global")
-            p2 = T.Buffer([112], "uint8", data=p2_data)
-            p3_data = T.allocate([112], "uint8", "global")
-            p3 = T.Buffer([112], "uint8", data=p3_data)
-            p4_data = T.allocate([32], "uint8", "global")
-            p4 = T.Buffer([32], "uint8", data=p4_data)
-            p5_data = T.allocate([32], "uint8", "global")
-            p5 = T.Buffer([32], "uint8", data=p5_data)
-            p6_data = T.allocate([32], "uint8", "global")
-            p6 = T.Buffer([32], "uint8", data=p6_data)
-            p7_data = T.allocate([112], "uint8", "global")
-            p7 = T.Buffer([112], "uint8", data=p7_data)
-            p8_data = T.allocate([3], "uint8", "global")
-            p8 = T.Buffer([3], "uint8", data=p8_data)
-            T.evaluate(T.call_extern("ethosu_copy", buffer2[0], 128, p1[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer3[0], 32, p4[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer4[0], 112, p2[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer5[0], 32, p5[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, buffer1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, buffer10[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p1[0], 128, 12, p4[0], 32, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer6[0], 112, p3[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer7[0], 32, p6[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, buffer1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, buffer10[2], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p2[0], 112, 12, p5[0], 32, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer8[0], 112, p7[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer9[0], 32, p8[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, buffer1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, buffer10[4], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p3[0], 112, 12, p6[0], 32, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, buffer1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, buffer10[6], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p7[0], 112, 12, p8[0], 32, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-
-
-    # undefined vars used
-    @tvm.script.ir_module(check_well_formed=False)
-    class ReferenceModule:
-        @T.prim_func
-        def main(buffer2: T.Buffer((160,), "uint8"), buffer4: T.Buffer((144,), "uint8"), buffer6: T.Buffer((144,), "uint8"), buffer8: T.Buffer((144,), "uint8")) -> None:
-            # function attr dict
-            T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
-            buffer1 = T.Buffer([8192], "int8")
-            buffer10 = T.Buffer([2048], "int8")
-            # body
-            p4_data = T.allocate([160], "uint8", "global")
-            p4 = T.Buffer([160], "uint8", data=p4_data)
-            p7_data = T.allocate([144], "uint8", "global")
-            p7 = T.Buffer([144], "uint8", data=p7_data)
-            p10_data = T.allocate([144], "uint8", "global")
-            p10 = T.Buffer([144], "uint8", data=p10_data)
-            p11_data = T.allocate([144], "uint8", "global")
-            p11 = T.Buffer([144], "uint8", data=p11_data)
-            T.evaluate(T.call_extern("ethosu_copy", buffer2[0], 160, p4[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer4[0], 144, p7[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, buffer1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, buffer10[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p4[0], 128, 12, p4[128], 32, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer6[0], 144, p10[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, buffer1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, buffer10[2], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p7[0], 112, 12, p7[112], 32, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer8[0], 144, p11[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, buffer1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, buffer10[4], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p10[0], 112, 12, p10[112], 32, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, buffer1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, buffer10[6], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p11[0], 112, 12, p11[112], 32, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-    # fmt: on
-
-    const_dict = {
-        0: np.array([0], dtype=np.uint8),
-        1: np.array([1], dtype=np.uint8),
-        2: np.array([2], dtype=np.uint8),
-        3: np.array([3], dtype=np.uint8),
-        4: np.array([4], dtype=np.uint8),
-        5: np.array([5], dtype=np.uint8),
-        6: np.array([6], dtype=np.uint8),
-        7: np.array([7], dtype=np.uint8),
-    }
-    new_const_dict = {
-        0: np.concatenate((const_dict[0], const_dict[1])),
-        1: np.concatenate((const_dict[2], const_dict[3])),
-        2: np.concatenate((const_dict[4], const_dict[5])),
-        3: np.concatenate((const_dict[6], const_dict[7])),
-    }
-    test_mod, const_dict = MergeConstants(const_dict)(InputModule)
-    reference_mod = ReferenceModule
-    tvm.ir.assert_structural_equal(test_mod, reference_mod, True)
-    check_const_dictionaries(const_dict, new_const_dict)
-
-
-def test_operators_with_and_without_weights():
-    # fmt: off
-    # undefined vars used
-    @tvm.script.ir_module(check_well_formed=False)
-    class InputModule:
-        @T.prim_func
-        def main(buffer2: T.Buffer((80,), "uint8"), buffer3: T.Buffer((64,), "uint8")) -> None:
-            T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
-            buffer0 = T.Buffer([390336], "int8")
-            buffer1 = T.Buffer([97156], "int8")
-            buffer6 = T.Buffer([390336], "int8")
-            # body
-            p2_data = T.allocate([80], "uint8", "global")
-            p2 = T.Buffer([80], "uint8", data=p2_data)
-            p3_data = T.allocate([64], "uint8", "global")
-            p3 = T.Buffer([64], "uint8", data=p3_data)
-            T.evaluate(T.call_extern("ethosu_pooling", "int8", 214, 227, 2, 214, 0, 227, buffer1[0], 0, 0, 0, T.float32(1), 0, "NHWC", 454, 2, 1, "int8", 214, 114, 2, 214, 0, 114, buffer0[0], 0, 0, 0, T.float32(1), 0, "NHCWB16", 1824, 16, 1, "MAX", 2, 1, 2, 1, 1, 1, 0, 0, 0, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer2[0], 80, p2[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer3[0], 64, p3[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_conv2d", "int8", 214, 114, 2, 214, 0, 114, buffer0[0], 0, 0, 0, T.float32(0.00392157), -128, "NHCWB16", 1824, 16, 1, "int8", 214, 114, 5, 214, 0, 114, buffer6[0], 0, 0, 0, T.float32(0.0174839), -128, "NHCWB16", 1824, 16, 1, 3, 1, 1, 1, 1, 2, p2[0], 80, 0, p3[0], 64, 0, 1, 0, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-
-
-    # undefined vars used
-    @tvm.script.ir_module(check_well_formed=False)
-    class ReferenceModule:
-        @T.prim_func
-        def main(buffer2: T.Buffer((144,), "uint8")) -> None:
-            T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
-            buffer0 = T.Buffer([390336], "int8")
-            buffer1 = T.Buffer([97156], "int8")
-            buffer6 = T.Buffer([390336], "int8")
-            # body
-            p3_data = T.allocate([144], "uint8", "global")
-            p3 = T.Buffer([144], "uint8", data=p3_data)
-            T.evaluate(T.call_extern("ethosu_pooling", "int8", 214, 227, 2, 214, 0, 227, buffer1[0], 0, 0, 0, T.float32(1), 0, "NHWC", 454, 2, 1, "int8", 214, 114, 2, 214, 0, 114, buffer0[0], 0, 0, 0, T.float32(1), 0, "NHCWB16", 1824, 16, 1, "MAX", 2, 1, 2, 1, 1, 1, 0, 0, 0, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer2[0], 144, p3[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_conv2d", "int8", 214, 114, 2, 214, 0, 114, buffer0[0], 0, 0, 0, T.float32(0.00392157), -128, "NHCWB16", 1824, 16, 1, "int8", 214, 114, 5, 214, 0, 114, buffer6[0], 0, 0, 0, T.float32(0.0174839), -128, "NHCWB16", 1824, 16, 1, 3, 1, 1, 1, 1, 2, p3[0], 80, 0, p3[80], 64, 0, 1, 0, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-    # fmt: on
-
-    const_dict = {
-        0: np.array([0], dtype=np.uint8),
-        1: np.array([1], dtype=np.uint8),
-    }
-    new_const_dict = {0: np.concatenate((const_dict[0], const_dict[1]))}
-    test_mod, const_dict = MergeConstants(const_dict)(InputModule)
-    reference_mod = ReferenceModule
-    tvm.ir.assert_structural_equal(test_mod, reference_mod, True)
-    check_const_dictionaries(const_dict, new_const_dict)
-
-
-def test_copy_to_buffer_with_local_scope():
-    # fmt: off
-    # undefined vars used
-    @tvm.script.ir_module(check_well_formed=False)
-    class InputModule:
-        @T.prim_func
-        def main(buffer1: T.Buffer((64,), "uint8"),
-        buffer2: T.Buffer((48,), "uint8"),
-        buffer3: T.Buffer((256,), "uint8"),
-        buffer4: T.Buffer((256,), "uint8"),
-        buffer5: T.Buffer((16,), "uint8"),
-        buffer6: T.Buffer((48,), "uint8"),
-        buffer7: T.Buffer((256,), "uint8"),
-        buffer8: T.Buffer((64,), "uint8"),
-        buffer9: T.Buffer((256,), "int8"),
-        ) -> None:
-            T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
-            # body
-            p1_data = T.allocate([48], "uint8", "global")
-            p1 = T.Buffer([48], "uint8", data=p1_data)
-            p2_data = T.allocate([48], "uint8", "global")
-            p2 = T.Buffer([48], "uint8", data=p2_data)
-            p3_data = T.allocate([256], "int8", "local")
-            p3 = T.Buffer([256], "int8", data=p3_data, scope="local")
-            p5_data = T.allocate([16], "uint8", "global")
-            p5 = T.Buffer([16], "uint8", data=p5_data)
-            p6_data = T.allocate([48], "uint8", "global")
-            p6 = T.Buffer([48], "uint8", data=p6_data)
-            p7_data = T.allocate([256], "int8", "local")
-            p7 = T.Buffer([256], "int8", data=p7_data, scope="local")
-            T.evaluate(T.call_extern("ethosu_copy", buffer2[0], 48, p1[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer3[0], 48, p2[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer4[0], 256, p3[0], dtype="handle")) # Local
-            T.evaluate(T.call_extern("ethosu_copy", buffer5[0], 16, p5[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer6[0], 48, p6[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_conv2d", "int8", 4, 4, 4, 4, 0, 4, buffer1[0], 0, 0, 0, T.float32(0.00392081), -128, "NHWC", 16, 4, 1, "int8", 4, 4, 4, 4, 0, 4, buffer9[0], 0, 0, 0, T.float32(0.00839574), -128, "NHCWB16", 64, 16, 1, 1, 1, 1, 1, 1, 1, p1[0], 48, 0, p2[0], 48, 0, 0, 0, 0, "TANH", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer7[0], 256, p7[0], dtype="handle")) # Local
-            T.evaluate(T.call_extern("ethosu_depthwise_conv2d", "int8", 4, 4, 4, 4, 0, 4, buffer9[0], 0, 0, 0, T.float32(0.0078125), 0, "NHCWB16", 64, 16, 1, "int8", 4, 4, 4, 4, 0, 4, buffer8[0], 0, 0, 0, T.float32(0.00372155), -128, "NHWC", 16, 4, 1, 1, 1, 1, 1, 1, 1, p5[0], 16, 0, p6[0], 48, 0, 0, 0, 0, "TANH", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-
-
-    # undefined vars used
-    @tvm.script.ir_module(check_well_formed=False)
-    class ReferenceModule:
-        @T.prim_func
-        def main(buffer1: T.Buffer((64,), "uint8"),
-            buffer2: T.Buffer((96,), "uint8"),
-            buffer4: T.Buffer((256,), "uint8"),
-            buffer5: T.Buffer((64,), "uint8"),
-            buffer7: T.Buffer((256,), "uint8"),
-            buffer8: T.Buffer((64,), "uint8"),
-            buffer9: T.Buffer((256,), "int8"),
-            ) -> None:
-            T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
-            # body
-            p1_data = T.allocate([96], "uint8", "global")
-            p1 = T.Buffer([96], "uint8", data=p1_data)
-            p2_data = T.allocate([64], "uint8", "global")
-            p2 = T.Buffer([64], "uint8", data=p2_data)
-            p3_data = T.allocate([256], "int8", "local")
-            p3 = T.Buffer([256], "int8", data=p3_data, scope="local")
-            p7_data = T.allocate([256], "int8", "local")
-            p7 = T.Buffer([256], "int8", data=p7_data, scope="local")
-            T.evaluate(T.call_extern("ethosu_copy", buffer2[0], 96, p1[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer4[0], 256, p3[0], dtype="handle")) # Local
-            T.evaluate(T.call_extern("ethosu_copy", buffer5[0], 64, p2[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_conv2d", "int8", 4, 4, 4, 4, 0, 4, buffer1[0], 0, 0, 0, T.float32(0.00392081), -128, "NHWC", 16, 4, 1, "int8", 4, 4, 4, 4, 0, 4, buffer9[0], 0, 0, 0, T.float32(0.00839574), -128, "NHCWB16", 64, 16, 1, 1, 1, 1, 1, 1, 1, p1[0], 48, 0, p1[48], 48, 0, 0, 0, 0, "TANH", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer7[0], 256, p7[0], dtype="handle")) # Local
-            T.evaluate(T.call_extern("ethosu_depthwise_conv2d", "int8", 4, 4, 4, 4, 0, 4, buffer9[0], 0, 0, 0, T.float32(0.0078125), 0, "NHCWB16", 64, 16, 1, "int8", 4, 4, 4, 4, 0, 4, buffer8[0], 0, 0, 0, T.float32(0.00372155), -128, "NHWC", 16, 4, 1, 1, 1, 1, 1, 1, 1, p2[0], 16, 0, p2[16], 48, 0, 0, 0, 0, "TANH", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-    # fmt: on
-
-    const_dict = {
-        1: np.array([1], dtype=np.uint8),
-        2: np.array([2], dtype=np.uint8),
-        3: np.array([3], dtype=np.uint8),
-        4: np.array([4], dtype=np.uint8),
-        5: np.array([5], dtype=np.uint8),
-        6: np.array([6], dtype=np.uint8),
-    }
-    new_const_dict = {
-        1: np.concatenate((const_dict[1], const_dict[2])),
-        2: const_dict[3],
-        3: np.concatenate((const_dict[4], const_dict[5])),
-        4: const_dict[6],
-    }
-    test_mod, const_dict = MergeConstants(const_dict)(InputModule)
-    reference_mod = ReferenceModule
-    tvm.ir.assert_structural_equal(test_mod, reference_mod, True)
-    check_const_dictionaries(const_dict, new_const_dict)
-
-
-def test_no_copies():
-    # the vars placeholder and ethosu_write are undefined
-    # fmt: off
-    @tvm.script.ir_module(check_well_formed=False)
-    class InputModule:
-        @T.prim_func
-        def main() -> None:
-            # function attr dict
-            T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
-            placeholder = T.Buffer([20], "int8")
-            ethosu_write = T.Buffer([16], "int8")
-            # body
-            ethosu_write_4_data = T.allocate([16], "int8", "global")
-            ethosu_write_4 = T.Buffer([16], "int8", data=ethosu_write_4_data)
-            T.evaluate(T.call_extern("ethosu_binary_elementwise", "int8", 1, 4, 4, 1, 0, 4, placeholder[0], 0, 0, 0, T.float32(0.00783747), -128, "NHWC", 1, 4, 1, "int8", 1, 4, 1, 1, 0, 4, placeholder[16], 0, 0, 0, T.float32(0.00783747), -128, "NHWC", 1, 1, 1, "int8", 1, 4, 4, 1, 0, 4, ethosu_write_4[0], 0, 0, 0, T.float32(0.00783747), -128, "NHWC", 1, 4, 1, "MAX", 0, "CLIP", -128, 127, "TFL", 1, 4, 4, dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_identity", "int8", 1, 4, 4, 1, 0, 4, ethosu_write_4[0], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 4, 1, "int8", 1, 4, 4, 1, 0, 4, ethosu_write[0], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 4, 1, "AVG", 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-
-    @tvm.script.ir_module(check_well_formed=False)
-    class ReferenceModule:
-        @T.prim_func
-        def main() -> None:
-            # function attr dict
-            T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
-            placeholder = T.Buffer([20], "int8")
-            ethosu_write = T.Buffer([16], "int8")
-            # body
-            ethosu_write_4_data = T.allocate([16], "int8", "global")
-            ethosu_write_4 = T.Buffer([16], "int8", data=ethosu_write_4_data)
-            T.evaluate(T.call_extern("ethosu_binary_elementwise", "int8", 1, 4, 4, 1, 0, 4, placeholder[0], 0, 0, 0, T.float32(0.00783747), -128, "NHWC", 1, 4, 1, "int8", 1, 4, 1, 1, 0, 4, placeholder[16], 0, 0, 0, T.float32(0.00783747), -128, "NHWC", 1, 1, 1, "int8", 1, 4, 4, 1, 0, 4, ethosu_write_4[0], 0, 0, 0, T.float32(0.00783747), -128, "NHWC", 1, 4, 1, "MAX", 0, "CLIP", -128, 127, "TFL", 1, 4, 4, dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_identity", "int8", 1, 4, 4, 1, 0, 4, ethosu_write_4[0], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 4, 1, "int8", 1, 4, 4, 1, 0, 4, ethosu_write[0], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 4, 1, "AVG", 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-    # fmt: on
-
-    const_dict = {}
-    new_const_dict = {}
-    test_mod, const_dict = MergeConstants(const_dict)(InputModule)
-    reference_mod = ReferenceModule
-    tvm.ir.assert_structural_equal(test_mod, reference_mod, True)
-    check_const_dictionaries(const_dict, new_const_dict)
-
-
-def test_copies_to_the_same_buffer():
-    # fmt: off
-    # undefined vars used
-    @tvm.script.ir_module(check_well_formed=False)
-    class InputModule:
-        @T.prim_func
-        def main(buffer2: T.Buffer((128,), "uint8"), buffer3: T.Buffer((32,), "uint8")) -> None:
-            # function attr dict
-            T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
-            buffer1 = T.Buffer([8192], "int8")
-            buffer10 = T.Buffer([2048], "int8")
-            # body
-            p1_data = T.allocate([128], "uint8", "global")
-            p1 = T.Buffer([128], "uint8", data=p1_data)
-            p4_data = T.allocate([32], "uint8", "global")
-            p4 = T.Buffer([32], "uint8", data=p4_data)
-            T.evaluate(T.call_extern("ethosu_copy", buffer2[0], 128, p1[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer3[0], 32, p4[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, buffer1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, buffer10[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p1[0], 128, 12, p4[0], 32, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer2[0], 128, p1[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer3[0], 32, p4[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, buffer1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, buffer10[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p1[0], 128, 12, p4[0], 32, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-
-
-    # undefined vars used
-    @tvm.script.ir_module(check_well_formed=False)
-    class ReferenceModule:
-        @T.prim_func
-        def main(buffer2: T.Buffer((160,), "uint8")) -> None:
-            # function attr dict
-            T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
-            buffer1 = T.Buffer([8192], "int8")
-            buffer10 = T.Buffer([2048], "int8")
-            # body
-            p5_data = T.allocate([160], "uint8", "global")
-            p5 = T.Buffer([160], "uint8", data=p5_data)
-            T.evaluate(T.call_extern("ethosu_copy", buffer2[0], 160, p5[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, buffer1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, buffer10[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p5[0], 128, 12, p5[128], 32, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer2[0], 160, p5[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, buffer1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, buffer10[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p5[0], 128, 12, p5[128], 32, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-    # fmt: on
-
-    const_dict = {
-        0: np.array([0], dtype=np.uint8),
-        1: np.array([1], dtype=np.uint8),
-    }
-    new_const_dict = {0: np.concatenate((const_dict[0], const_dict[1]))}
-    test_mod, const_dict = MergeConstants(const_dict)(InputModule)
-    reference_mod = ReferenceModule
-    tvm.ir.assert_structural_equal(test_mod, reference_mod, True)
-    check_const_dictionaries(const_dict, new_const_dict)
-
-
-def test_read_from_the_same_buffer():
-    # fmt: off
-    @tvm.script.ir_module
-    class InputModule:
-        @T.prim_func
-        def main(input_placeholder: T.Buffer((1, 16, 16, 32), "int8"), buffer1: T.Buffer((368,), "uint8"), buffer2: T.Buffer((96,), "uint8"), input_ethosu_write: T.Buffer((1, 16, 16, 8), "int8")) -> None:
-            # function attr dict
-            T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
-            # buffer definition
-            placeholder = T.Buffer(8192, dtype="int8", data=input_placeholder.data)
-            ethosu_write = T.Buffer(2048, dtype="int8", data=input_ethosu_write.data)
-            # body
-            p1_data = T.allocate([368], "uint8", "global")
-            p1 = T.Buffer([368], "uint8", data=p1_data)
-            p2_data = T.allocate([96], "uint8", "global")
-            p2 = T.Buffer([96], "uint8", data=p2_data)
-            T.evaluate(T.call_extern("ethosu_copy", buffer1[0], 368, p1[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer2[0], 96, p2[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 8, 32, 16, 0, 8, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 8, 8, 16, 0, 8, ethosu_write[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p1[0], 192, p1[192], 176, 12, p2[0], 48, p2[48], 48, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-
-
-    @tvm.script.ir_module
-    class ReferenceModule:
-        @T.prim_func
-        def main(input_placeholder: T.Buffer((1,16,16,32), "int8"), buffer1: T.Buffer((464,), "uint8"), input_ethosu_write: T.Buffer((1,16,16,8), "int8")) -> None:
-            # function attr dict
-            T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
-            # buffer definition
-            placeholder = T.Buffer(8192, dtype="int8", data=input_placeholder.data)
-            ethosu_write = T.Buffer(2048, dtype="int8", data=input_ethosu_write.data)
-            # body
-            p1_data = T.allocate([464], "uint8", "global")
-            p1 = T.Buffer([464], "uint8", data=p1_data)
-            T.evaluate(T.call_extern("ethosu_copy", buffer1[0], 464, p1[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 8, 32, 16, 0, 8, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 8, 8, 16, 0, 8, ethosu_write[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p1[0], 192, p1[192], 176, 12, p1[368], 48, p1[416], 48, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-    # fmt: on
-
-    const_dict = {
-        1: np.array([1], dtype=np.uint8),
-        2: np.array([2], dtype=np.uint8),
-    }
-    new_const_dict = {1: np.concatenate((const_dict[1], const_dict[2]))}
-    test_mod, const_dict = MergeConstants(const_dict)(InputModule)
-    reference_mod = ReferenceModule
-    tvm.ir.assert_structural_equal(test_mod, reference_mod, True)
-    check_const_dictionaries(const_dict, new_const_dict)
-
-
-def test_arbitrary_argument_order():
-    # fmt: off
-    @tvm.script.ir_module
-    class InputModule:
-        @T.prim_func
-        def main(input_placeholder: T.Buffer((1,16,16,32), "int8"), buffer1: T.Buffer((368,), "uint8"), buffer2: T.Buffer((96,), "uint8"), input_ethosu_write: T.Buffer((1,16,16,8), "int8"), buffer3: T.Buffer((368,), "uint8"), buffer4: T.Buffer((96,), "uint8")) -> None:
-            # function attr dict
-            T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
-            # buffer definition
-            placeholder = T.Buffer(8192, dtype="int8", data=input_placeholder.data)
-            ethosu_write = T.Buffer(2048, dtype="int8", data=input_ethosu_write.data)
-            # body
-            p1_data = T.allocate([368], "uint8", "global")
-            p1 = T.Buffer([368], "uint8", data=p1_data)
-            p2_data = T.allocate([96], "uint8", "global")
-            p2 = T.Buffer([96], "uint8", data=p2_data)
-            p3_data = T.allocate([368], "uint8", "global")
-            p3 = T.Buffer([368], "uint8", data=p3_data)
-            p4_data = T.allocate([96], "uint8", "global")
-            p4 = T.Buffer([96], "uint8", data=p4_data)
-            T.evaluate(T.call_extern("ethosu_copy", buffer1[0], 368, p1[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer2[0], 96, p2[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 8, 32, 16, 0, 8, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 8, 8, 16, 0, 8, ethosu_write[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p1[0], 192, p1[192], 176, 12, p2[0], 48, p2[48], 48, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer3[0], 368, p3[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer4[0], 96, p4[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 8, 32, 16, 0, 8, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 8, 8, 16, 0, 8, ethosu_write[2048], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p3[0], 192, p3[192], 176, 12, p4[0], 48, p4[48], 48, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-
-
-    @tvm.script.ir_module
-    class ReferenceModule:
-        @T.prim_func
-        def main(input_placeholder: T.Buffer((1,16,16,32), "int8"), buffer1: T.Buffer((464,), "uint8"), input_ethosu_write: T.Buffer((1,16,16,8), "int8"), buffer2: T.Buffer((464,), "uint8")) -> None:
-            # function attr dict
-            T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
-            # buffer definition
-            placeholder = T.Buffer(8192, dtype="int8", data=input_placeholder.data)
-            ethosu_write = T.Buffer(2048, dtype="int8", data=input_ethosu_write.data)
-            # body
-            p1_data = T.allocate([464], "uint8", "global")
-            p1 = T.Buffer([464], "uint8", data=p1_data)
-            p2_data = T.allocate([464], "uint8", "global")
-            p2 = T.Buffer([464], "uint8", data=p2_data)
-            T.evaluate(T.call_extern("ethosu_copy", buffer1[0], 464, p1[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 8, 32, 16, 0, 8, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 8, 8, 16, 0, 8, ethosu_write[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p1[0], 192, p1[192], 176, 12, p1[368], 48, p1[416], 48, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer2[0], 464, p2[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 8, 32, 16, 0, 8, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 8, 8, 16, 0, 8, ethosu_write[2048], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p2[0], 192, p2[192], 176, 12, p2[368], 48, p2[416], 48, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-    # fmt: on
-
-    const_dict = {
-        1: np.array([1], dtype=np.uint8),
-        2: np.array([2], dtype=np.uint8),
-        4: np.array([4], dtype=np.uint8),
-        5: np.array([5], dtype=np.uint8),
-    }
-    new_const_dict = {
-        1: np.concatenate((const_dict[1], const_dict[2])),
-        3: np.concatenate((const_dict[4], const_dict[5])),
-    }
-    test_mod, const_dict = MergeConstants(const_dict)(InputModule)
-    reference_mod = ReferenceModule
-    tvm.ir.assert_structural_equal(test_mod, reference_mod, False)
-    check_const_dictionaries(const_dict, new_const_dict)
-
-
-def test_arbitrary_argument_order_const_split():
-    # fmt: off
-    @tvm.script.ir_module
-    class InputModule:
-        @T.prim_func
-        def main(input_placeholder: T.Buffer((1,16,16,32), "int8"), buffer1: T.Buffer((368,), "uint8"), input_ethosu_write: T.Buffer((1,16,16,8), "int8"), buffer2: T.Buffer((96,), "uint8"), buffer3: T.Buffer((368,), "uint8"), buffer4: T.Buffer((96,), "uint8")) -> None:
-            # function attr dict
-            T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
-            # buffer definition
-            placeholder = T.Buffer(8192, dtype="int8", data=input_placeholder.data)
-            ethosu_write = T.Buffer(2048, dtype="int8", data=input_ethosu_write.data)
-            # body
-            p1_data = T.allocate([368], "uint8", "global")
-            p1 = T.Buffer([368], "uint8", data=p1_data)
-            p2_data = T.allocate([96], "uint8", "global")
-            p2 = T.Buffer([96], "uint8", data=p2_data)
-            p3_data = T.allocate([368], "uint8", "global")
-            p3 = T.Buffer([368], "uint8", data=p3_data)
-            p4_data = T.allocate([96], "uint8", "global")
-            p4 = T.Buffer([96], "uint8", data=p4_data)
-            T.evaluate(T.call_extern("ethosu_copy", buffer1[0], 368, p1[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer2[0], 96, p2[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 8, 32, 16, 0, 8, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 8, 8, 16, 0, 8, ethosu_write[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p1[0], 192, p1[192], 176, 12, p2[0], 48, p2[48], 48, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer3[0], 368, p3[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer4[0], 96, p4[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 8, 32, 16, 0, 8, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 8, 8, 16, 0, 8, ethosu_write[2048], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p3[0], 192, p3[192], 176, 12, p4[0], 48, p4[48], 48, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-
-
-    @tvm.script.ir_module
-    class ReferenceModule:
-        @T.prim_func
-        def main(input_placeholder: T.Buffer((1,16,16,32), "int8"), buffer1: T.Buffer((464,), "uint8"), input_ethosu_write: T.Buffer((1,16,16,8), "int8"), buffer2: T.Buffer((464,), "uint8")) -> None:
-            # function attr dict
-            T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
-            # buffer definition
-            placeholder = T.Buffer(8192, dtype="int8", data=input_placeholder.data)
-            ethosu_write = T.Buffer(2048, dtype="int8", data=input_ethosu_write.data)
-            # body
-            p1_data = T.allocate([464], "uint8", "global")
-            p1 = T.Buffer([464], "uint8", data=p1_data)
-            p2_data = T.allocate([464], "uint8", "global")
-            p2 = T.Buffer([464], "uint8", data=p2_data)
-            T.evaluate(T.call_extern("ethosu_copy", buffer1[0], 464, p1[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 8, 32, 16, 0, 8, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 8, 8, 16, 0, 8, ethosu_write[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p1[0], 192, p1[192], 176, 12, p1[368], 48, p1[416], 48, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer2[0], 464, p2[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 8, 32, 16, 0, 8, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 8, 8, 16, 0, 8, ethosu_write[2048], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p2[0], 192, p2[192], 176, 12, p2[368], 48, p2[416], 48, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-    # fmt: on
-
-    const_dict = {
-        1: np.array([1], dtype=np.uint8),
-        3: np.array([3], dtype=np.uint8),
-        4: np.array([4], dtype=np.uint8),
-        5: np.array([5], dtype=np.uint8),
-    }
-    new_const_dict = {
-        1: np.concatenate((const_dict[1], const_dict[3])),
-        3: np.concatenate((const_dict[4], const_dict[5])),
-    }
-    test_mod, const_dict = MergeConstants(const_dict)(InputModule)
-    reference_mod = ReferenceModule
-    tvm.ir.assert_structural_equal(test_mod, reference_mod, True)
-    check_const_dictionaries(const_dict, new_const_dict)
-
-
-def test_arbitrary_argument_order_const_split_mixed():
-    # fmt: off
-    @tvm.script.ir_module
-    class InputModule:
-        @T.prim_func
-        def main(input_placeholder: T.Buffer((1,16,16,32), "int8"), buffer1: T.Buffer((368,), "uint8"), buffer2: T.Buffer((368,), "uint8"), input_ethosu_write: T.Buffer((2,16,16,8), "int8"), buffer3: T.Buffer((96,), "uint8"), buffer4: T.Buffer((96,), "uint8")) -> None:
-            # function attr dict
-            T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
-            # buffer definition
-            placeholder = T.Buffer(8192, dtype='int8', data=input_placeholder.data)
-            ethosu_write = T.Buffer(4096, dtype='int8', data=input_ethosu_write.data)
-            # body
-            p1_data = T.allocate([368], "uint8", "global")
-            p1 = T.Buffer([368], "uint8", data=p1_data)
-            p2_data = T.allocate([368], "uint8", "global")
-            p2 = T.Buffer([368], "uint8", data=p2_data)
-            p3_data = T.allocate([96], "uint8", "global")
-            p3 = T.Buffer([96], "uint8", data=p3_data)
-            p4_data = T.allocate([96], "uint8", "global")
-            p4 = T.Buffer([96], "uint8", data=p4_data)
-            T.evaluate(T.call_extern("ethosu_copy", buffer1[0], 368, p1[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer3[0], 96, p3[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 8, 32, 16, 0, 8, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 8, 8, 16, 0, 8, ethosu_write[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p1[0], 192, p1[192], 176, 12, p3[0], 48, p3[48], 48, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer2[0], 368, p2[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer4[0], 96, p4[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 8, 32, 16, 0, 8, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 8, 8, 16, 0, 8, ethosu_write[2048], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p2[0], 192, p2[192], 176, 12, p4[0], 48, p4[48], 48, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-
-
-    @tvm.script.ir_module
-    class ReferenceModule:
-        @T.prim_func
-        def main(input_placeholder: T.Buffer((1,16,16,32), "int8"), buffer1: T.Buffer((464,), "uint8"), buffer2: T.Buffer((464,), "uint8"), input_ethosu_write: T.Buffer((2,16,16,8), "int8")) -> None:
-            # function attr dict
-            T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
-            # buffer definition
-            placeholder = T.Buffer(8192, dtype='int8', data=input_placeholder.data)
-            ethosu_write = T.Buffer(4096, dtype='int8', data=input_ethosu_write.data)
-            # body
-            p1_data = T.allocate([464], "uint8", "global")
-            p1 = T.Buffer([464], "uint8", data=p1_data)
-            p2_data = T.allocate([464], "uint8", "global")
-            p2 = T.Buffer([464], "uint8", data=p2_data)
-            T.evaluate(T.call_extern("ethosu_copy", buffer1[0], 464, p1[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 8, 32, 16, 0, 8, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 8, 8, 16, 0, 8, ethosu_write[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p1[0], 192, p1[192], 176, 12, p1[368], 48, p1[416], 48, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_copy", buffer2[0], 464, p2[0], dtype="handle"))
-            T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 8, 32, 16, 0, 8, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 8, 8, 16, 0, 8, ethosu_write[2048], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p2[0], 192, p2[192], 176, 12, p2[368], 48, p2[416], 48, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-    # fmt: on
-
-    const_dict = {
-        1: np.array([1], dtype=np.uint8),
-        2: np.array([2], dtype=np.uint8),
-        4: np.array([4], dtype=np.uint8),
-        5: np.array([5], dtype=np.uint8),
-    }
-    new_const_dict = {
-        1: np.concatenate((const_dict[1], const_dict[4])),
-        2: np.concatenate((const_dict[2], const_dict[5])),
-    }
-    test_mod, const_dict = MergeConstants(const_dict)(InputModule)
-    reference_mod = ReferenceModule
-    tvm.ir.assert_structural_equal(test_mod, reference_mod, True)
-    check_const_dictionaries(const_dict, new_const_dict)
-
-
-def test_cycle_count():
-    # fmt: off
-    # undefined vars used
-    @tvm.script.ir_module(check_well_formed=False)
-    class InputModule:
-        @T.prim_func
-        def main(buffer2: T.Buffer((128,), "uint8"), buffer3: T.Buffer((32,), "uint8"), buffer4: T.Buffer((112,), "uint8"), buffer5: T.Buffer((32,), "uint8"), buffer6: T.Buffer((112,), "uint8"), buffer7: T.Buffer((32,), "uint8"), buffer8: T.Buffer((112,), "uint8"), buffer9: T.Buffer((32,), "uint8")) -> None:
-            # function attr dict
-            T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
-            v1a = T.int32()
-            v1b = T.int32()
-            v1c = T.int32()
-            v2a = T.int32()
-            v2b = T.int32()
-            v2c = T.int32()
-            v3a = T.int32()
-            v3b = T.int32()
-            v3c = T.int32()
-            v4a = T.int32()
-            v4b = T.int32()
-            v4c = T.int32()
-            buffer1 = T.Buffer([8192], "int8")
-            buffer10 = T.Buffer([2048], "int8")
-            # body
-            p1_data = T.allocate([128], "uint8", "global")
-            p1 = T.Buffer([128], "uint8", data=p1_data)
-            p2_data = T.allocate([112], "uint8", "global")
-            p2 = T.Buffer([112], "uint8", data=p2_data)
-            p3_data = T.allocate([112], "uint8", "global")
-            p3 = T.Buffer([112], "uint8", data=p3_data)
-            p4_data = T.allocate([32], "uint8", "global")
-            p4 = T.Buffer([32], "uint8", data=p4_data)
-            p5_data = T.allocate([32], "uint8", "global")
-            p5 = T.Buffer([32], "uint8", data=p5_data)
-            p6_data = T.allocate([32], "uint8", "global")
-            p6 = T.Buffer([32], "uint8", data=p6_data)
-            p7_data = T.allocate([112], "uint8", "global")
-            p7 = T.Buffer([112], "uint8", data=p7_data)
-            p8_data = T.allocate([3], "uint8", "global")
-            p8 = T.Buffer([3], "uint8", data=p8_data)
-            with T.attr(T.iter_var(v1a, None, "DataPar", ""), "pragma_compute_cycles_hint", 100):
-                T.evaluate(T.call_extern("ethosu_copy", buffer2[0], 128, p1[0], dtype="handle"))
-            with T.attr(T.iter_var(v1b, None, "DataPar", ""), "pragma_compute_cycles_hint", 101):
-                T.evaluate(T.call_extern("ethosu_copy", buffer3[0], 32, p4[0], dtype="handle"))
-            with T.attr(T.iter_var(v2a, None, "DataPar", ""), "pragma_compute_cycles_hint", 102):
-                T.evaluate(T.call_extern("ethosu_copy", buffer4[0], 112, p2[0], dtype="handle"))
-            with T.attr(T.iter_var(v2b, None, "DataPar", ""), "pragma_compute_cycles_hint", 103):
-                T.evaluate(T.call_extern("ethosu_copy", buffer5[0], 32, p5[0], dtype="handle"))
-            with T.attr(T.iter_var(v1c, None, "DataPar", ""), "pragma_compute_cycles_hint", 300):
-                T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, buffer1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, buffer10[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p1[0], 128, 12, p4[0], 32, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-            with T.attr(T.iter_var(v3a, None, "DataPar", ""), "pragma_compute_cycles_hint", 104):
-                T.evaluate(T.call_extern("ethosu_copy", buffer6[0], 112, p3[0], dtype="handle"))
-            with T.attr(T.iter_var(v3b, None, "DataPar", ""), "pragma_compute_cycles_hint", 105):
-                T.evaluate(T.call_extern("ethosu_copy", buffer7[0], 32, p6[0], dtype="handle"))
-            with T.attr(T.iter_var(v2c, None, "DataPar", ""), "pragma_compute_cycles_hint", 301):
-                T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, buffer1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, buffer10[2], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p2[0], 112, 12, p5[0], 32, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-            with T.attr(T.iter_var(v4a, None, "DataPar", ""), "pragma_compute_cycles_hint", 106):
-                T.evaluate(T.call_extern("ethosu_copy", buffer8[0], 112, p7[0], dtype="handle"))
-            with T.attr(T.iter_var(v4b, None, "DataPar", ""), "pragma_compute_cycles_hint", 107):
-                T.evaluate(T.call_extern("ethosu_copy", buffer9[0], 32, p8[0], dtype="handle"))
-            with T.attr(T.iter_var(v3c, None, "DataPar", ""), "pragma_compute_cycles_hint", 302):
-                T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, buffer1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, buffer10[4], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p3[0], 112, 12, p6[0], 32, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-            with T.attr(T.iter_var(v4c, None, "DataPar", ""), "pragma_compute_cycles_hint", 303):
-                T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, buffer1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, buffer10[6], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p7[0], 112, 12, p8[0], 32, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-
-
-    @tvm.script.ir_module(check_well_formed=False)
-    class ReferenceModule:
-        @T.prim_func
-        def main(buffer2: T.Buffer((160,), "uint8"), buffer4: T.Buffer((144,), "uint8"), buffer6: T.Buffer((144,), "uint8"), buffer8: T.Buffer((144,), "uint8")) -> None:
-            # function attr dict
-            T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
-            v1a = T.int32()
-            v1c = T.int32()
-            v2a = T.int32()
-            v2c = T.int32()
-            v3a = T.int32()
-            v3c = T.int32()
-            v4a = T.int32()
-            v4c = T.int32()
-            buffer1 = T.Buffer([8192], "int8")
-            buffer10 = T.Buffer([2048], "int8")
-            # body
-            p4_data = T.allocate([160], "uint8", "global")
-            p4 = T.Buffer([160], "uint8", data=p4_data)
-            p7_data = T.allocate([144], "uint8", "global")
-            p7 = T.Buffer([144], "uint8", data=p7_data)
-            p10_data = T.allocate([144], "uint8", "global")
-            p10 = T.Buffer([144], "uint8", data=p10_data)
-            p11_data = T.allocate([144], "uint8", "global")
-            p11 = T.Buffer([144], "uint8", data=p11_data)
-            with T.attr(T.iter_var(v1a, None, "DataPar", ""), "pragma_compute_cycles_hint", 201):
-                T.evaluate(T.call_extern("ethosu_copy", buffer2[0], 160, p4[0], dtype="handle"))
-            with T.attr(T.iter_var(v2a, None, "DataPar", ""), "pragma_compute_cycles_hint", 205):
-                T.evaluate(T.call_extern("ethosu_copy", buffer4[0], 144, p7[0], dtype="handle"))
-            with T.attr(T.iter_var(v1c, None, "DataPar", ""), "pragma_compute_cycles_hint", 300):
-                T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, buffer1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, buffer10[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p4[0], 128, 12, p4[128], 32, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-            with T.attr(T.iter_var(v3a, None, "DataPar", ""), "pragma_compute_cycles_hint", 209):
-                T.evaluate(T.call_extern("ethosu_copy", buffer6[0], 144, p10[0], dtype="handle"))
-            with T.attr(T.iter_var(v2c, None, "DataPar", ""), "pragma_compute_cycles_hint", 301):
-                T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, buffer1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, buffer10[2], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p7[0], 112, 12, p7[112], 32, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-            with T.attr(T.iter_var(v4a, None, "DataPar", ""), "pragma_compute_cycles_hint", 213):
-                T.evaluate(T.call_extern("ethosu_copy", buffer8[0], 144, p11[0], dtype="handle"))
-            with T.attr(T.iter_var(v3c, None, "DataPar", ""), "pragma_compute_cycles_hint", 302):
-                T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, buffer1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, buffer10[4], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p10[0], 112, 12, p10[112], 32, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-            with T.attr(T.iter_var(v4c, None, "DataPar", ""), "pragma_compute_cycles_hint", 303):
-                T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, buffer1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, buffer10[6], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p11[0], 112, 12, p11[112], 32, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-    # fmt: on
-
-    const_dict = {
-        0: np.array([0], dtype=np.uint8),
-        1: np.array([1], dtype=np.uint8),
-        2: np.array([2], dtype=np.uint8),
-        3: np.array([3], dtype=np.uint8),
-        4: np.array([4], dtype=np.uint8),
-        5: np.array([5], dtype=np.uint8),
-        6: np.array([6], dtype=np.uint8),
-        7: np.array([7], dtype=np.uint8),
-    }
-    new_const_dict = {
-        0: np.concatenate((const_dict[0], const_dict[1])),
-        1: np.concatenate((const_dict[2], const_dict[3])),
-        2: np.concatenate((const_dict[4], const_dict[5])),
-        3: np.concatenate((const_dict[6], const_dict[7])),
-    }
-    test_mod, const_dict = MergeConstants(const_dict)(InputModule)
-    reference_mod = ReferenceModule
-    tvm.ir.assert_structural_equal(test_mod, reference_mod, True)
-    check_const_dictionaries(const_dict, new_const_dict)
-
-
-def test_multiple_prim_funcs():
-    # fmt: off
-    @tvm.script.ir_module
-    class InputModule:
-        @T.prim_func
-        def main():
-            T.evaluate(0)
-
-        @T.prim_func
-        def abc():
-            T.evaluate(0)
-    # fmt: on
-
-    err_rgx = (
-        r"Expected a single primitive function called 'main'. "
-        r"Please run the MergeConstants pass in conjunction with the LowerToTIR\(\) pass."
-    )
-    with pytest.raises(tvm.TVMError, match=err_rgx):
-        MergeConstants({})(InputModule)
-
-
-def test_no_main_prim_func():
-    # fmt: off
-    @tvm.script.ir_module
-    class InputModule:
-        @T.prim_func
-        def abs():
-            T.evaluate(0)
-    # fmt: on
-
-    err_rgx = (
-        r"Expected a single primitive function called 'main'. "
-        r"Please run the MergeConstants pass in conjunction with the LowerToTIR\(\) pass."
-    )
-    with pytest.raises(tvm.TVMError, match=err_rgx):
-        MergeConstants({})(InputModule)
diff --git a/tests/python/contrib/test_ethosu/test_networks.py b/tests/python/contrib/test_ethosu/test_networks.py
deleted file mode 100644
index 308c06f50456..000000000000
--- a/tests/python/contrib/test_ethosu/test_networks.py
+++ /dev/null
@@ -1,202 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name, unused-argument, wrong-import-position
-import pytest
-
-pytest.importorskip("ethosu.vela")
-
-import numpy as np
-
-from tvm.relay.op.contrib.ethosu import partition_for_ethosu
-from tvm.micro import model_library_format as mlf
-from tvm import WorkspaceMemoryPools, WorkspacePoolInfo, PoolInfoProperties
-import tvm
-from tvm.testing.aot import convert_to_relay
-
-from . import infra
-
-
-MOBILENET_V1_URL = (
-    "https://storage.googleapis.com/download.tensorflow.org/models/mobilenet_v1_2018_08_02/mobilenet_v1_1.0_224_quant.tgz",
-    "mobilenet_v1_1.0_224_quant.tflite",
-)
-
-MOBILENET_V2_URL = (
-    "https://storage.googleapis.com/download.tensorflow.org/models/tflite_11_05_08/mobilenet_v2_1.0_224_quant.tgz",
-    "mobilenet_v2_1.0_224_quant.tflite",
-)
-
-
-@pytest.mark.parametrize(
-    "accel_type, model_url, workspace_size",
-    [
-        ("ethos-u65-256", MOBILENET_V1_URL, 2338864),
-        ("ethos-u65-256", MOBILENET_V2_URL, 2264320),
-        ("ethos-u55-256", MOBILENET_V1_URL, 1793392),
-        ("ethos-u55-256", MOBILENET_V2_URL, 2217152),
-        ("ethos-u55-128", MOBILENET_V2_URL, 2217152),
-        ("ethos-u55-64", MOBILENET_V2_URL, 2217152),
-        ("ethos-u55-32", MOBILENET_V2_URL, 2217152),
-    ],
-)
-def test_networks_without_usmp(accel_type, model_url, workspace_size):
-    np.random.seed(23)
-    tflite_model_buf = infra.get_tflite_model(model_url)
-    input_data, output_data = infra.generate_ref_data_tflite(tflite_model_buf)
-    mod, params = convert_to_relay(tflite_model_buf)
-    mod = partition_for_ethosu(mod, params)
-    test_runner = infra.create_test_runner(accel_type, enable_usmp=False)
-    compiled_models = infra.build_source(mod, input_data, output_data, test_runner)
-    mlf_memory_map = mlf._build_function_memory_map(
-        compiled_models[0].executor_factory.function_metadata
-    )
-    assert mlf_memory_map["main"][0]["workspace_size_bytes"] == workspace_size
-    infra.verify_source(compiled_models, test_runner)
-
-
-@pytest.mark.parametrize(
-    "accel_type, model_url, workspace_size",
-    [
-        ("ethos-u65-256", MOBILENET_V1_URL, 1311200),
-        ("ethos-u55-256", MOBILENET_V2_URL, 1509408),
-    ],
-)
-def test_networks_with_usmp(accel_type, model_url, workspace_size):
-    np.random.seed(23)
-    tflite_model_buf = infra.get_tflite_model(model_url)
-    input_data, output_data = infra.generate_ref_data_tflite(tflite_model_buf)
-    mod, params = convert_to_relay(tflite_model_buf)
-    mod = partition_for_ethosu(mod, params)
-    test_runner = infra.create_test_runner(accel_type, enable_usmp=True)
-    compiled_models = infra.build_source(mod, input_data, output_data, test_runner)
-    allocated_pool_info = list(
-        dict(compiled_models[0].executor_factory.executor_codegen_metadata.pool_inputs).values()
-    )[0]
-    assert allocated_pool_info.allocated_size == workspace_size
-    infra.verify_source(compiled_models, test_runner)
-
-
-@pytest.mark.parametrize(
-    "accel_type, model_url, workspace_size",
-    [
-        ("ethos-u55-256", MOBILENET_V1_URL, 1205872),
-        ("ethos-u55-256", MOBILENET_V2_URL, 1509408),
-    ],
-)
-def test_networks_with_usmp_and_cascader_wo_striping(accel_type, model_url, workspace_size):
-    np.random.seed(23)
-
-    pool_name = "my_memory_pool"
-    host_target = tvm.target.Target("c")
-    ethosu_target = tvm.target.Target("ethos-u")
-    workspace_pools = WorkspaceMemoryPools(
-        [
-            WorkspacePoolInfo(
-                pool_name,
-                [host_target, ethosu_target],
-                PoolInfoProperties(
-                    size_hint_bytes=2400000,
-                    read_bandwidth_bytes_per_cycle=16,
-                    write_bandwidth_bytes_per_cycle=16,
-                    target_burst_bytes={ethosu_target: 1},
-                ),
-            )
-        ]
-    )
-    tflite_model_buf = infra.get_tflite_model(model_url)
-    input_data, output_data = infra.generate_ref_data_tflite(tflite_model_buf)
-    mod, params = convert_to_relay(tflite_model_buf)
-    mod = partition_for_ethosu(mod, params)
-    test_runner = infra.create_test_runner(
-        accel_type,
-        enable_usmp=True,
-        enable_cascader=True,
-        enable_striping=False,
-        workspace_pools=workspace_pools,
-    )
-    compiled_models = infra.build_source(
-        mod, input_data, output_data, test_runner, workspace_pools=workspace_pools
-    )
-    infra.verify_source(compiled_models, test_runner)
-
-    allocated_pool_info = list(
-        dict(compiled_models[0].executor_factory.executor_codegen_metadata.pool_inputs).values()
-    )[0]
-    assert allocated_pool_info.allocated_size == workspace_size
-
-
-@pytest.mark.parametrize(
-    "accel_type, model_url, workspace_size",
-    [
-        # Checks the same test case multiple times to make sure its not flaky
-        ("ethos-u55-256", MOBILENET_V1_URL, 1010000),
-        ("ethos-u55-256", MOBILENET_V1_URL, 1010000),
-        ("ethos-u55-256", MOBILENET_V1_URL, 1010000),
-        ("ethos-u55-256", MOBILENET_V1_URL, 1010000),
-        ("ethos-u55-256", MOBILENET_V1_URL, 1010000),
-        # Checks the same test case multiple times to make sure its not flaky
-        ("ethos-u55-256", MOBILENET_V2_URL, 1400000),
-        ("ethos-u55-256", MOBILENET_V2_URL, 1400000),
-        ("ethos-u55-256", MOBILENET_V2_URL, 1400000),
-        ("ethos-u55-256", MOBILENET_V2_URL, 1400000),
-        ("ethos-u55-256", MOBILENET_V2_URL, 1400000),
-    ],
-)
-def test_networks_with_usmp_and_cascader_with_striping(accel_type, model_url, workspace_size):
-    np.random.seed(23)
-
-    pool_name = "my_memory_pool"
-    host_target = tvm.target.Target("c")
-    ethosu_target = tvm.target.Target("ethos-u")
-    workspace_pools = WorkspaceMemoryPools(
-        [
-            WorkspacePoolInfo(
-                pool_name,
-                [host_target, ethosu_target],
-                PoolInfoProperties(
-                    size_hint_bytes=workspace_size,
-                    read_bandwidth_bytes_per_cycle=16,
-                    write_bandwidth_bytes_per_cycle=16,
-                    target_burst_bytes={ethosu_target: 1},
-                ),
-            )
-        ]
-    )
-    tflite_model_buf = infra.get_tflite_model(model_url)
-    input_data, output_data = infra.generate_ref_data_tflite(tflite_model_buf)
-    mod, params = convert_to_relay(tflite_model_buf)
-    mod = partition_for_ethosu(mod, params)
-    test_runner = infra.create_test_runner(
-        accel_type,
-        enable_usmp=True,
-        enable_cascader=True,
-        enable_striping=True,
-        workspace_pools=workspace_pools,
-    )
-    compiled_models = infra.build_source(
-        mod, input_data, output_data, test_runner, workspace_pools=workspace_pools
-    )
-    infra.verify_source(compiled_models, test_runner)
-
-    allocated_pool_info = list(
-        dict(compiled_models[0].executor_factory.executor_codegen_metadata.pool_inputs).values()
-    )[0]
-    assert allocated_pool_info.allocated_size <= workspace_size
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/contrib/test_ethosu/test_outline_compiler_functions.py b/tests/python/contrib/test_ethosu/test_outline_compiler_functions.py
deleted file mode 100644
index 5a6ed70a5902..000000000000
--- a/tests/python/contrib/test_ethosu/test_outline_compiler_functions.py
+++ /dev/null
@@ -1,86 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""
-Test the outline compiler functions pass.
-"""
-
-import pytest
-
-pytest.importorskip("ethosu.vela")
-
-import tvm
-from tvm import relay
-from tvm.relay.backend.contrib.ethosu.codegen import OutlineCompilerFunctions
-
-
-def test_outline_compiler_functions():
-    compiler_name = "my-compiler"
-    wrong_compiler_name = "wrong-compiler"
-
-    def before():
-        inp = relay.var("input")
-
-        # Inlined functions for "my-compiler"
-        x = relay.var("x", shape=(1, 2, 2, 4))
-        x = relay.reshape(x, newshape=(1, 4, 4))
-        x = relay.Function(relay.analysis.free_vars(x), x)
-        x = x.with_attr("Compiler", compiler_name)
-        x = x.with_attr("global_symbol", "ext_func")
-
-        # Inlined function for "wrong-compiler"
-        y = relay.var("y", shape=(1, 4, 4))
-        y = relay.reshape(y, newshape=(1, 16))
-        y = relay.Function(relay.analysis.free_vars(y), y)
-        y = y.with_attr("Compiler", wrong_compiler_name)
-        y = y.with_attr("global_symbol", "ext_func_2")
-
-        out = relay.Call(x, [inp])
-        out = relay.Call(y, [out])
-        out = relay.Function([inp], out)
-        return tvm.ir.IRModule.from_expr(out)
-
-    def expected():
-        mod = tvm.ir.IRModule()
-
-        inp = relay.var("input")
-
-        x = relay.var("x", shape=(1, 2, 2, 4))
-        x = relay.reshape(x, newshape=(1, 4, 4))
-        x = relay.Function(relay.analysis.free_vars(x), x)
-        x = x.with_attr("Compiler", compiler_name)
-        x = x.with_attr("global_symbol", "ext_func")
-        mod["ext_func"] = x
-
-        y = relay.var("y", shape=(1, 4, 4))
-        y = relay.reshape(y, newshape=(1, 16))
-        y = relay.Function(relay.analysis.free_vars(y), y)
-        y = y.with_attr("Compiler", wrong_compiler_name)
-        y = y.with_attr("global_symbol", "ext_func_2")
-
-        out = relay.Call(mod.get_global_var("ext_func"), [inp])
-        out = relay.Call(y, [out])
-        mod["main"] = relay.Function([inp], out)
-        return mod
-
-    after = OutlineCompilerFunctions(compiler_name)(before())
-    exp = expected()
-
-    global_vars = [str(gv) for gv in after.get_global_vars()]
-    assert 'I.GlobalVar("ext_func")' in global_vars
-    assert 'I.GlobalVar("ext_func_2")' not in global_vars
-    tvm.ir.assert_structural_equal(after["ext_func"], exp["ext_func"])
diff --git a/tests/python/contrib/test_ethosu/test_partition.py b/tests/python/contrib/test_ethosu/test_partition.py
deleted file mode 100644
index 94896856db74..000000000000
--- a/tests/python/contrib/test_ethosu/test_partition.py
+++ /dev/null
@@ -1,65 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=wrong-import-position
-
-"""
-Tests to check that the NPU partitioning frontend partitions
-only supported operations.
-"""
-
-import pytest
-
-pytest.importorskip("ethosu.vela")
-
-import tvm
-from tvm import relay
-from tvm.relay.op.contrib import ethosu
-
-
-@pytest.mark.parametrize(
-    "count_include_pad,pool_shape,padding",
-    [
-        (True, [2, 2], [0, 0, 0, 0]),
-        (False, [2, 2], [4, 4, 5, 5]),
-        (False, [9, 9], [1, 1, 1, 1]),
-    ],
-)
-def test_invalid_avg_pool2d(count_include_pad, pool_shape, padding):
-    """
-    Test unsupported variants of avg_pool2d don't get partitioned.
-    """
-    ifm_shape = [1, 4, 4, 3]
-    strides = [2, 2]
-
-    def get_graph():
-        x = relay.var("x", shape=ifm_shape, dtype="int8")
-        x = relay.cast(x, dtype="int32")
-        x = relay.nn.avg_pool2d(
-            x,
-            pool_shape,
-            strides,
-            padding=padding,
-            layout="NHWC",
-            count_include_pad=count_include_pad,
-        )
-        x = relay.cast(x, dtype="int8")
-        func = relay.Function(relay.analysis.free_vars(x), x)
-        return tvm.IRModule.from_expr(func)
-
-    mod = relay.transform.InferType()(get_graph())
-    partitioned_mod = ethosu.partition_for_ethosu(mod)
-    tvm.ir.assert_structural_equal(mod, partitioned_mod)
diff --git a/tests/python/contrib/test_ethosu/test_pass_operations_distribution.py b/tests/python/contrib/test_ethosu/test_pass_operations_distribution.py
deleted file mode 100644
index 8b127de327d0..000000000000
--- a/tests/python/contrib/test_ethosu/test_pass_operations_distribution.py
+++ /dev/null
@@ -1,183 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import pytest
-
-pytest.importorskip("ethosu.vela")
-
-import numpy as np
-
-import tvm
-from tvm import relay
-from tests.python.contrib.test_ethosu.infra import get_tflite_graph
-from tvm.relay.op.contrib.ethosu import partition_for_ethosu
-from tvm.relay.analysis.operations_distribution import analyze_operations_distribution
-from tvm.relay.transform.suffixes import tag_suffixes
-
-
-def test_operations_distribution_ethos():
-
-    tflite = pytest.importorskip("tflite")
-    tensorflow = pytest.importorskip("tensorflow")
-
-    import tensorflow as tf
-
-    inp = (224, 224, 9)
-    input_shape = (1, *inp)
-    kernel_shape = (3, 3)
-    padding = (1, 1, 1, 1)
-    padding_out = (1, 33, 33, 1)
-
-    @tf.function
-    def simple_net(x):
-        weight_shape = [kernel_shape[0], kernel_shape[1], input_shape[3], 3]
-        weights = tf.constant(np.random.uniform(size=weight_shape), dtype=tf.float32)
-        weight_shape[2] = 3
-        weights1 = tf.constant(np.random.uniform(size=weight_shape), dtype=tf.float32)
-        weights2 = tf.constant(np.random.uniform(size=weight_shape), dtype=tf.float32)
-        op = tf.nn.conv2d(
-            x,
-            filters=weights,
-            strides=1,
-            padding="SAME",
-            data_format="NHWC",
-            dilations=1,
-        )
-        op1 = tf.nn.conv2d(
-            op,
-            filters=weights1,
-            strides=1,
-            padding="SAME",
-            data_format="NHWC",
-            dilations=1,
-        )
-        op2 = tf.nn.conv2d(
-            op,
-            filters=weights2,
-            strides=1,
-            padding="SAME",
-            data_format="NHWC",
-            dilations=1,
-        )
-        op = tf.concat([op1, op2], 1)
-        op = tf.pad(
-            op,
-            [[0, 0], [padding[0], padding_out[1]], [padding_out[2], padding[3]], [0, 0]],
-            "CONSTANT",
-        )
-        return op
-
-    _, tflite_graph = get_tflite_graph(simple_net, [input_shape])
-
-    # Get TFLite model from buffer
-    try:
-        import tflite
-
-        tflite_model = tflite.Model.GetRootAsModel(tflite_graph, 0)
-    except AttributeError:
-        import tflite.Model
-
-        tflite_model = tflite.Model.Model.GetRootAsModel(tflite_graph, 0)
-
-    mod, params = relay.frontend.from_tflite(tflite_model)
-
-    mod = tag_suffixes(mod)
-    mod = partition_for_ethosu(mod, params)
-    operations_distribution = analyze_operations_distribution(mod)
-
-    expected = {
-        "Identity": ["generic", "generic"],
-        "concat": ["ethos-u", "ethos-u.concat"],
-        "Conv2D_11": ["ethos-u", "ethos-u.qnn_conv2d"],
-        "Conv2D1": ["ethos-u", "ethos-u.qnn_conv2d"],
-        "Conv2D_221": ["ethos-u", "ethos-u.qnn_conv2d"],
-    }
-
-    assert operations_distribution == expected
-
-
-def test_operations_distribution_generic():
-
-    tflite = pytest.importorskip("tflite")
-    tensorflow = pytest.importorskip("tensorflow")
-
-    import tensorflow as tf
-
-    inp = (224, 224, 9)
-    input_shape = (1, *inp)
-    kernel_shape = (3, 3)
-    padding = (1, 1, 1, 1)
-    padding_out = (1, 33, 33, 1)
-    dilations_out = 32
-
-    @tf.function
-    def simple_net(x):
-        weight_shape = [kernel_shape[0], kernel_shape[1], input_shape[3], 3]
-        weights = tf.constant(np.random.uniform(size=weight_shape), dtype=tf.float32)
-        op = tf.nn.conv2d(
-            x,
-            filters=weights,
-            strides=1,
-            padding="SAME",
-            data_format="NHWC",
-            dilations=dilations_out,
-        )
-        op = tf.pad(
-            op,
-            [[0, 0], [padding[0], padding_out[2]], [padding[1], padding[3]], [0, 0]],
-            "CONSTANT",
-        )
-        op = tf.pad(
-            op,
-            [[0, 0], [padding[0], padding_out[2]], [padding[1], padding[3]], [0, 0]],
-            "CONSTANT",
-        )
-        return tf.pad(
-            op,
-            [[0, 0], [padding[0], padding_out[2]], [padding[1], padding[3]], [0, 0]],
-            "CONSTANT",
-        )
-
-    _, tflite_graph = get_tflite_graph(simple_net, [input_shape])
-
-    # Get TFLite model from buffer
-    try:
-        import tflite
-
-        tflite_model = tflite.Model.GetRootAsModel(tflite_graph, 0)
-    except AttributeError:
-        import tflite.Model
-
-        tflite_model = tflite.Model.Model.GetRootAsModel(tflite_graph, 0)
-
-    mod, params = relay.frontend.from_tflite(tflite_model)
-
-    mod = tag_suffixes(mod)
-    mod = partition_for_ethosu(mod, params)
-    operations_distribution = analyze_operations_distribution(mod)
-
-    expected = {
-        "Identity": ["generic", "generic"],
-        "Pad_1": ["generic", "generic"],
-        "Pad": ["generic", "generic"],
-        "Conv2D2": ["generic", "generic"],
-    }
-
-    assert operations_distribution == expected
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/contrib/test_ethosu/test_placeholder.py b/tests/python/contrib/test_ethosu/test_placeholder.py
deleted file mode 100644
index 53cbfc236920..000000000000
--- a/tests/python/contrib/test_ethosu/test_placeholder.py
+++ /dev/null
@@ -1,27 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""This file contains a placeholder test that always run"""
-
-
-def test_placeholder():
-    """
-    This test always run on every docker image.
-    Otherwise, pytest will return exit code 5
-    and breaks CI in the docker images where
-    microNPU tests are not run.
-    """
-    pass
diff --git a/tests/python/contrib/test_ethosu/test_preprocess.py b/tests/python/contrib/test_ethosu/test_preprocess.py
deleted file mode 100644
index a80555b02277..000000000000
--- a/tests/python/contrib/test_ethosu/test_preprocess.py
+++ /dev/null
@@ -1,343 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name, unused-argument
-
-import pytest
-
-pytest.importorskip("ethosu.vela")
-import numpy as np
-
-import tvm
-from tvm import relay
-from tvm.relay.backend.contrib.ethosu import preprocess
-
-
-def set_func_attr(func, compile_name, symbol_name):
-    """
-    Helper function to attach attributes to the external function.
-    """
-    func = func.with_attr("Primitive", tvm.tir.IntImm("int32", 1))
-    func = func.with_attr("Inline", tvm.tir.IntImm("int32", 1))
-    func = func.with_attr("Compiler", compile_name)
-    func = func.with_attr("global_symbol", symbol_name)
-    return func
-
-
-def test_single_io():
-    """
-    This test will test the pass wont touch external functions that
-    have a single input and a single output.
-    """
-
-    def create_graph():
-        def create_external_func1(mod_, compiler_name, symbol_name):
-            x_int = relay.var("x_int", shape=(10, 10))
-            z0 = relay.nn.relu(x_int)
-            f1 = relay.Function([x_int], z0)
-            f1 = set_func_attr(f1, compiler_name, symbol_name)
-            glb_f1 = relay.GlobalVar(symbol_name)
-            mod_[glb_f1] = f1
-            mod_ = relay.transform.InferType()(mod_)
-            return glb_f1, mod_
-
-        mod = tvm.IRModule()
-        x = relay.var("x", shape=(10, 10))
-
-        glb_symbol_f1, mod = create_external_func1(mod, "ethos-u", "ethosu_0")
-        r = relay.Call(glb_symbol_f1, [x])
-        main = relay.Function([x], r)
-        mod["main"] = main
-        mod = relay.transform.InferType()(mod)
-        return mod
-
-    mod = create_graph()
-    exp = create_graph()
-    mod = preprocess.preprocess_ext_io()(mod)
-    tvm.ir.assert_structural_equal(mod, exp, map_free_vars=True)
-
-
-def test_2ins_single_out():
-    """
-    The test is check two inputs and a single output of external function
-    """
-
-    def create_graph():
-        def create_external_func1(mod_, compiler_name, symbol_name):
-            x_int = relay.var("x_int", shape=(10, 10))
-            w0_int = relay.var("w0_int", shape=(10, 10))
-            z0 = relay.add(x_int, w0_int)
-
-            f1 = relay.Function([x_int, w0_int], z0)
-            f1 = set_func_attr(f1, compiler_name, symbol_name)
-            glb_f1 = relay.GlobalVar(symbol_name)
-            mod_[glb_f1] = f1
-            mod_ = relay.transform.InferType()(mod_)
-            return glb_f1, mod_
-
-        mod = tvm.IRModule()
-
-        x = relay.var("x", shape=(10, 10))
-        w0 = relay.var("w0", shape=(10, 10))
-
-        glb_symbol_f1, mod = create_external_func1(mod, "ethos-u", "ethosu_0")
-        r = relay.Call(glb_symbol_f1, [x, w0])
-        main = relay.Function([x, w0], r)
-        mod["main"] = main
-        mod = relay.transform.InferType()(mod)
-        return mod
-
-    def expected():
-        def create_external_func1(mod_, compiler_name, symbol_name):
-            ifms_int = relay.var("ifms_int", shape=[200])
-
-            # splits
-            (x_int_flat, w0_int_flat) = relay.split(ifms_int, [100])
-            # reshapes
-            x_int = relay.reshape(x_int_flat, newshape=(10, 10))
-            w0_int = relay.reshape(w0_int_flat, newshape=(10, 10))
-
-            z0 = relay.add(x_int, w0_int)
-            f1 = relay.Function([ifms_int], z0)
-            f1 = set_func_attr(f1, compiler_name, symbol_name)
-            glb_f1 = relay.GlobalVar(symbol_name)
-            mod_[glb_f1] = f1
-            mod_ = relay.transform.InferType()(mod_)
-            return glb_f1, mod_
-
-        mod = tvm.IRModule()
-
-        x = relay.var("x", shape=(10, 10))
-        w0 = relay.var("w0", shape=(10, 10))
-
-        # reshapes
-        x_reshaped = relay.reshape(x, newshape=100)
-        w0_reshaped = relay.reshape(w0, newshape=100)
-
-        # concat
-        ifms = relay.concatenate((x_reshaped, w0_reshaped), 0)
-
-        glb_symbol_f1, mod = create_external_func1(mod, "ethos-u", "ethosu_0")
-        r = relay.Call(glb_symbol_f1, [ifms])
-        main = relay.Function([x, w0], r)
-        mod["main"] = main
-        mod = relay.transform.InferType()(mod)
-        return mod
-
-    mod = create_graph()
-    exp = expected()
-    mod = preprocess.preprocess_ext_io()(mod)
-    tvm.ir.assert_structural_equal(mod, exp, map_free_vars=True)
-
-
-def test_single_in_2outs():
-    """
-    The test is to check a single input and two outputs of external function
-    """
-
-    def create_graph():
-        def create_external_func1(mod_, compiler_name, symbol_name):
-            x_int = relay.var("x_int", shape=(10, 10))
-
-            p0 = relay.nn.relu(x_int)
-            q0 = relay.tanh(x_int)
-            f1_o_tuple = relay.Tuple([p0, q0])
-
-            f1 = relay.Function([x_int], f1_o_tuple)
-            f1 = set_func_attr(f1, compiler_name, symbol_name)
-            glb_f1 = relay.GlobalVar(symbol_name)
-            mod_[glb_f1] = f1
-            mod_ = relay.transform.InferType()(mod_)
-            return glb_f1, mod_
-
-        mod = tvm.IRModule()
-        x = relay.var("x", shape=(10, 10))
-        glb_symbol_f1, mod = create_external_func1(mod, "ethos-u", "ethosu_0")
-        pq_tuple = relay.Call(glb_symbol_f1, [x])
-        p0 = relay.TupleGetItem(pq_tuple, 0)
-        q0 = relay.TupleGetItem(pq_tuple, 1)
-        r = relay.concatenate((p0, q0), axis=0)
-        main = relay.Function([x], r)
-        mod["main"] = main
-        mod = relay.transform.InferType()(mod)
-        return mod
-
-    def expected():
-        def create_external_func1(mod_, compiler_name, symbol_name):
-            x_int = relay.var("x_int", shape=(10, 10))
-
-            p0 = relay.nn.relu(x_int)
-            q0 = relay.tanh(x_int)
-
-            # reshapes
-            p0_reshaped = relay.reshape(p0, newshape=100)
-            q0_reshaped = relay.reshape(q0, newshape=100)
-            ofms = relay.concatenate((p0_reshaped, q0_reshaped), 0)
-
-            f1 = relay.Function([x_int], ofms)
-            f1 = set_func_attr(f1, compiler_name, symbol_name)
-            glb_f1 = relay.GlobalVar(symbol_name)
-            mod_[glb_f1] = f1
-            mod_ = relay.transform.InferType()(mod_)
-            return glb_f1, mod_
-
-        mod = tvm.IRModule()
-        x = relay.var("x", shape=(10, 10))
-        glb_symbol_f1, mod = create_external_func1(mod, "ethos-u", "ethosu_0")
-        ofms = relay.Call(glb_symbol_f1, [x])
-
-        # splits
-        (p0_flat, q0_flat) = relay.split(ofms, [100])
-        # reshapes
-        p0_flat_reshaped = relay.reshape(p0_flat, newshape=(10, 10))
-        q0_flat_reshaped = relay.reshape(q0_flat, newshape=(10, 10))
-        # original output
-        tuple_out = relay.Tuple([p0_flat_reshaped, q0_flat_reshaped])
-
-        p0 = relay.TupleGetItem(tuple_out, 0)
-        q0 = relay.TupleGetItem(tuple_out, 1)
-        r = relay.concatenate((p0, q0), axis=0)
-        main = relay.Function([x], r)
-        mod["main"] = main
-        mod = relay.transform.InferType()(mod)
-        return mod
-
-    mod = create_graph()
-    exp = expected()
-    mod = relay.transform.InferType()(mod)
-    mod = preprocess.preprocess_ext_io()(mod)
-    tvm.ir.assert_structural_equal(mod, exp, map_free_vars=True)
-
-
-def test_4ins_2outs():
-    """
-    The test is to check a 4 inputs and two outputs of external function.
-    This just stand as a general test for multiple ins/outs.
-    """
-
-    def create_graph():
-        def create_external_func1(mod_, compiler_name, symbol_name):
-            x_int = relay.var("x_int", shape=(10, 10))
-            w0_int = relay.var("w0_int", shape=(10, 10))
-            w1_int = relay.var("w1_int", shape=(10, 10))
-            w2_int = relay.var("w2_int", shape=(10, 10))
-
-            z0 = relay.add(x_int, w0_int)
-            p0 = relay.subtract(z0, w1_int)
-            q0 = relay.multiply(z0, w2_int)
-            f1_o_tuple = relay.Tuple([p0, q0])
-
-            f1 = relay.Function([x_int, w0_int, w1_int, w2_int], f1_o_tuple)
-            f1 = set_func_attr(f1, compiler_name, symbol_name)
-            glb_f1 = relay.GlobalVar(symbol_name)
-            mod_[glb_f1] = f1
-            mod_ = relay.transform.InferType()(mod_)
-            return glb_f1, mod_
-
-        mod = tvm.IRModule()
-
-        x = relay.var("x", shape=(10, 10))
-        w0 = relay.var("w0", shape=(10, 10))
-        w1 = relay.var("w1", shape=(10, 10))
-        w2 = relay.var("w2", shape=(10, 10))
-
-        glb_symbol_f1, mod = create_external_func1(mod, "ethos-u", "ethosu_0")
-        pq_tuple = relay.Call(glb_symbol_f1, [x, w0, w1, w2])
-
-        p0 = relay.TupleGetItem(pq_tuple, 0)
-        q0 = relay.TupleGetItem(pq_tuple, 1)
-        r = relay.concatenate((p0, q0), axis=0)
-        main = relay.Function([x, w0, w1, w2], r)
-        mod["main"] = main
-        mod = relay.transform.InferType()(mod)
-        return mod
-
-    def expected():
-        def create_external_func1(mod_, compiler_name, symbol_name):
-            ifms_int = relay.var("ifms_int", shape=[400])
-
-            # splits
-            (x_int_flat, w0_int_flat, w1_int_flat, w2_int_flat) = relay.split(
-                ifms_int, [100, 200, 300]
-            )
-            # reshapes
-            x_int = relay.reshape(x_int_flat, newshape=(10, 10))
-            w0_int = relay.reshape(w0_int_flat, newshape=(10, 10))
-            w1_int = relay.reshape(w1_int_flat, newshape=(10, 10))
-            w2_int = relay.reshape(w2_int_flat, newshape=(10, 10))
-
-            z0 = relay.add(x_int, w0_int)
-            p0 = relay.subtract(z0, w1_int)
-            q0 = relay.multiply(z0, w2_int)
-            # f1_o_tuple = relay.Tuple([p0, q0])
-
-            # reshapes
-            p0_reshaped = relay.reshape(p0, newshape=100)
-            q0_reshaped = relay.reshape(q0, newshape=100)
-            ofms = relay.concatenate((p0_reshaped, q0_reshaped), 0)
-
-            f1 = relay.Function([ifms_int], ofms)
-            f1 = set_func_attr(f1, compiler_name, symbol_name)
-            glb_f1 = relay.GlobalVar(symbol_name)
-            mod_[glb_f1] = f1
-            mod_ = relay.transform.InferType()(mod_)
-            return glb_f1, mod_
-
-        mod = tvm.IRModule()
-
-        x = relay.var("x", shape=(10, 10))
-        w0 = relay.var("w0", shape=(10, 10))
-        w1 = relay.var("w1", shape=(10, 10))
-        w2 = relay.var("w2", shape=(10, 10))
-
-        # reshapes
-        x_reshaped = relay.reshape(x, newshape=100)
-        w0_reshaped = relay.reshape(w0, newshape=100)
-        w1_reshaped = relay.reshape(w1, newshape=100)
-        w2_reshaped = relay.reshape(w2, newshape=100)
-
-        # concat
-        ifms = relay.concatenate((x_reshaped, w0_reshaped, w1_reshaped, w2_reshaped), 0)
-
-        # call
-        glb_func, mod = create_external_func1(mod, "ethos-u", "ethosu_0")
-        ofms = relay.Call(glb_func, [ifms])
-
-        # splits
-        (p0_flat, q0_flat) = relay.split(ofms, [100])
-        # reshapes
-        p0_flat_reshaped = relay.reshape(p0_flat, newshape=(10, 10))
-        q0_flat_reshaped = relay.reshape(q0_flat, newshape=(10, 10))
-        # original output
-        tuple_out = relay.Tuple([p0_flat_reshaped, q0_flat_reshaped])
-
-        p0 = relay.TupleGetItem(tuple_out, 0)
-        q0 = relay.TupleGetItem(tuple_out, 1)
-
-        r = relay.concatenate((p0, q0), axis=0)
-        main = relay.Function([x, w0, w1, w2], r)
-        mod["main"] = main
-        mod = relay.transform.InferType()(mod)
-        return mod
-
-    mod = create_graph()
-    exp = expected()
-    mod = preprocess.preprocess_ext_io()(mod)
-    tvm.ir.assert_structural_equal(mod, exp, map_free_vars=True)
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/contrib/test_ethosu/test_remove_concatenates.py b/tests/python/contrib/test_ethosu/test_remove_concatenates.py
deleted file mode 100644
index 58cf5f72d7c0..000000000000
--- a/tests/python/contrib/test_ethosu/test_remove_concatenates.py
+++ /dev/null
@@ -1,85 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import pytest
-
-pytest.importorskip("ethosu.vela")
-import tvm
-import tvm.script
-from tvm import relay
-from tvm.relay.backend.contrib.ethosu.tir.compiler import _lower_to_tir
-from tvm.relay.testing import run_opt_pass
-from tvm.script import tir as T
-
-from .infra import make_ethosu_conv2d
-
-
-# fmt: off
-# complains of an undefined buffer
-@tvm.script.ir_module(check_well_formed=False)
-class ReferenceModule:
-    @T.prim_func
-    def main(input_placeholder: T.Buffer((1,8,12,16), "int8"), input_placeholder_1: T.Buffer((1,8,10,16), "int8"), input_T_concat: T.Buffer((1,8,32,16), "int8")) -> None:
-        # function attr dict
-        T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
-
-        placeholder = T.Buffer(1536, dtype="int8", data=input_placeholder.data)
-        placeholder_1 = T.Buffer(1280, dtype="int8", data=input_placeholder_1.data)
-        T_concat = T.Buffer(4096, dtype="int8", data=input_T_concat.data)
-
-        buffer = T.Buffer([2992], "uint8")
-        buffer_1 = T.Buffer([160], "uint8")
-        buffer_2 = T.Buffer([2992], "uint8")
-        buffer_3 = T.Buffer([160], "uint8")
-        buffer_4 = T.Buffer([2992], "uint8")
-        buffer_5 = T.Buffer([160], "uint8")
-        buffer_6 = T.Buffer([2992], "uint8")
-        buffer_7 = T.Buffer([160], "uint8")
-        # body
-        T_concat_1_data = T.allocate([2816], "int8", "global", annotations={"disable_lower_builtin":True})
-        T_concat_1 = T.Buffer([2816], "int8", data=T_concat_1_data)
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 8, 10, 16, 8, 0, 10, placeholder_1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 160, 16, 1, "int8", 8, 10, 16, 8, 0, 10, T_concat_1[192], 0, 0, 0, T.float32(0.25), 14, "NHWC", 352, 16, 1, 3, 3, 1, 1, 1, 1, buffer[0], 2992, T.int8(-1), T.int8(-1), 12, buffer_1[0], 160, T.int8(-1), T.int8(-1), 1, 1, 1, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 8, 10, 16, 8, 0, 10, T_concat_1[192], 0, 0, 0, T.float32(0.5), 10, "NHWC", 352, 16, 1, "int8", 8, 10, 16, 8, 0, 10, T_concat[352], 0, 0, 0, T.float32(0.25), 14, "NHWC", 512, 16, 1, 3, 3, 1, 1, 1, 1, buffer_2[0], 2992, T.int8(-1), T.int8(-1), 12, buffer_3[0], 160, T.int8(-1), T.int8(-1), 1, 1, 1, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 8, 12, 16, 8, 0, 12, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 192, 16, 1, "int8", 8, 12, 16, 8, 0, 12, T_concat_1[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 352, 16, 1, 3, 3, 1, 1, 1, 1, buffer_4[0], 2992, T.int8(-1), T.int8(-1), 12, buffer_5[0], 160, T.int8(-1), T.int8(-1), 1, 1, 1, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 8, 22, 16, 8, 0, 22, T_concat_1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 352, 16, 1, "int8", 8, 22, 16, 8, 0, 22, T_concat[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 512, 16, 1, 3, 3, 1, 1, 1, 1, buffer_6[0], 2992, T.int8(-1), T.int8(-1), 12, buffer_7[0], 160, T.int8(-1), T.int8(-1), 1, 1, 1, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-# fmt: on
-
-
-def test_concat():
-    def _get_func():
-        ifm1 = relay.var("ifm1", shape=(1, 8, 12, 16), dtype="int8")
-        ifm2 = relay.var("ifm2", shape=(1, 8, 10, 16), dtype="int8")
-        conv1 = make_ethosu_conv2d(ifm1, 16, 16, (3, 3), (1, 1), (1, 1), (1, 1))
-        conv2 = make_ethosu_conv2d(ifm2, 16, 16, (3, 3), (1, 1), (1, 1), (1, 1))
-        conc1 = relay.concatenate((conv1, conv2), axis=2)
-        conv3 = make_ethosu_conv2d(conc1, 16, 16, (3, 3), (1, 1), (1, 1), (1, 1))
-        conv4 = make_ethosu_conv2d(conv2, 16, 16, (3, 3), (1, 1), (1, 1), (1, 1))
-        conc2 = relay.concatenate((conv3, conv4), axis=2)
-        func = relay.Function(relay.analysis.free_vars(conc2), conc2)
-        func = run_opt_pass(func, relay.transform.InferType())
-        return func
-
-    func = _get_func()
-    mod, _ = _lower_to_tir(func)
-    script = mod.script()
-    test_mod = tvm.script.from_source(script, check_well_formed=False)
-
-    reference_mod = ReferenceModule
-    tvm.ir.assert_structural_equal(test_mod["main"], reference_mod["main"], True)
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/contrib/test_ethosu/test_replace_binary_elementwise.py b/tests/python/contrib/test_ethosu/test_replace_binary_elementwise.py
deleted file mode 100644
index dd388109466f..000000000000
--- a/tests/python/contrib/test_ethosu/test_replace_binary_elementwise.py
+++ /dev/null
@@ -1,346 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import pytest
-
-pytest.importorskip("ethosu.vela")
-
-import tvm
-from tvm import relay
-from tvm.relay.testing import run_opt_pass
-from tvm.relay.backend.contrib.ethosu.tir import spec
-from tvm.relay.backend.contrib.ethosu.tir.compiler import _lower_to_tir
-from .infra import make_ethosu_binary_elementwise, get_binary_elementwise_args
-
-
-@pytest.mark.parametrize(
-    "ifm_shape, ifm2_shape, ifm_channels, ifm2_channels, ifm_layout, ofm_layout, rounding_mode",
-    [
-        ((1, 5, 9, 3), (1, 5, 9, 3), 3, 3, "NHWC", "NHWC", "TFL"),
-        ((1, 8, 3, 9, 16), (1, 8, 3, 9, 16), 40, 40, "NHCWB16", "NHCWB16", "NATURAL"),
-        ((1, 8, 3, 9, 16), (1, 8, 3, 9, 16), 40, 40, "NHCWB16", "NHWC", "TRUNCATE"),
-        ((1, 8, 9, 40), (1, 8, 9, 40), 40, 40, "NHWC", "NHCWB16", "TFL"),
-        # Broadcast
-        ((1, 5, 9, 3), (1, 1, 9, 1), 3, 1, "NHWC", "NHWC", "NATURAL"),
-        ((1, 8, 9, 40), (1, 1, 1, 1), 40, 1, "NHWC", "NHCWB16", "TRUNCATE"),
-    ],
-)
-@pytest.mark.parametrize("operator_type", ["ADD", "SUB", "MUL", "MIN", "MAX"])
-@pytest.mark.parametrize("activation", ["NONE", "CLIP"])
-def test_binary_elementwise_single(
-    ifm_shape,
-    ifm2_shape,
-    ifm_channels,
-    ifm2_channels,
-    ifm_layout,
-    ofm_layout,
-    rounding_mode,
-    operator_type,
-    activation,
-):
-    dtype = "int8"
-    ifm = relay.var("ifm", shape=ifm_shape, dtype=dtype)
-    ifm2 = relay.var("ifm2", shape=ifm2_shape, dtype=dtype)
-
-    binary_elementwise = make_ethosu_binary_elementwise(
-        ifm,
-        ifm2,
-        ifm_channels,
-        ifm2_channels,
-        operator_type,
-        dtype,
-        False,
-        activation,
-        ifm_layout,
-        ifm_layout,
-        ofm_layout,
-        rounding_mode,
-    )
-    func = relay.Function(relay.analysis.free_vars(binary_elementwise), binary_elementwise)
-    func = run_opt_pass(func, relay.transform.InferType())
-    mod, _ = _lower_to_tir(func)
-    data = []
-
-    def _visit(stmt):
-        if isinstance(stmt, tvm.tir.Call):
-            data.append(get_binary_elementwise_args(stmt))
-
-    tvm.tir.stmt_functor.post_order_visit(mod["main"].body, _visit)
-    if ifm_layout == "NHWC":
-        ifm_stride_c = 1
-        ifm_stride_w = ifm_shape[3] if ifm_shape[2] != 1 else 1
-        ifm_stride_h = ifm_shape[2] * ifm_shape[3] if ifm_shape[1] != 1 else 1
-
-        ifm2_stride_c = 1
-        ifm2_stride_w = ifm2_shape[3] if ifm2_shape[2] != 1 else 1
-        ifm2_stride_h = ifm2_shape[2] * ifm2_shape[3] if ifm2_shape[1] != 1 else 1
-
-        ofm_height = ifm_shape[1]
-        ofm_width = ifm_shape[2]
-    else:
-        ifm_stride_w = 16
-        ifm_stride_c = 16 * ifm_shape[3]
-        ifm_stride_h = 16 * ifm_shape[2] * ifm_shape[3]
-
-        ifm2_stride_w = 16
-        ifm2_stride_c = 16 * ifm2_shape[3]
-        ifm2_stride_h = 16 * ifm2_shape[2] * ifm2_shape[3]
-
-        ofm_height = ifm_shape[1]
-        ofm_width = ifm_shape[3]
-
-    if ofm_layout == "NHWC":
-        ofm_stride_c = 1
-        ofm_stride_w = ifm_channels if ofm_width > 1 else 1
-        ofm_stride_h = ifm_channels * ofm_width if ofm_height > 1 else 1
-    else:
-        ofm_stride_w = 16
-        ofm_stride_c = 16 * ofm_width
-        ofm_stride_h = 16 * ofm_width * ((ifm_channels - 1) // 16 + 1)
-
-    serial_binary_elementwise = spec.SerialBinaryElementwise(
-        ifm=spec.SerialFeatureMap(
-            data_type=dtype,
-            height=ifm_shape[1],
-            width=ifm_shape[2] if ifm_layout == "NHWC" else ifm_shape[3],
-            channels=ifm_channels,
-            tile_height_0=ifm_shape[1],
-            tile_height_1=0,
-            tile_width_0=ifm_shape[2] if ifm_layout == "NHWC" else ifm_shape[3],
-            tile_address_0=0,
-            tile_address_1=0,
-            tile_address_2=0,
-            tile_address_3=0,
-            scale=1.0,
-            zero_point=0,
-            layout=ifm_layout,
-            stride_h=ifm_stride_h,
-            stride_w=ifm_stride_w,
-            stride_c=ifm_stride_c,
-        ),
-        ifm2=spec.SerialFeatureMap(
-            data_type=dtype,
-            height=ifm2_shape[1],
-            width=ifm2_shape[2] if ifm_layout == "NHWC" else ifm2_shape[3],
-            channels=ifm2_channels,
-            tile_height_0=ifm2_shape[1],
-            tile_height_1=0,
-            tile_width_0=ifm2_shape[2] if ifm_layout == "NHWC" else ifm2_shape[3],
-            tile_address_0=0,
-            tile_address_1=0,
-            tile_address_2=0,
-            tile_address_3=0,
-            scale=1.0,
-            zero_point=0,
-            layout=ifm_layout,
-            stride_h=ifm2_stride_h,
-            stride_w=ifm2_stride_w,
-            stride_c=ifm2_stride_c,
-        ),
-        ofm=spec.SerialFeatureMap(
-            data_type=dtype,
-            height=ofm_height,
-            width=ofm_width,
-            channels=ifm_channels,
-            tile_height_0=ofm_height,
-            tile_height_1=0,
-            tile_width_0=ofm_width,
-            tile_address_0=0,
-            tile_address_1=0,
-            tile_address_2=0,
-            tile_address_3=0,
-            scale=1.0,
-            zero_point=0,
-            layout=ofm_layout,
-            stride_h=ofm_stride_h,
-            stride_w=ofm_stride_w,
-            stride_c=ofm_stride_c,
-        ),
-        operator_type=operator_type,
-        reversed_operands=False,
-        activation=spec.SerialActivation(
-            op=activation,
-            clip_min=10 if activation == "CLIP" else 0,
-            clip_max=100 if activation == "CLIP" else 0,
-        ),
-        rounding_mode=rounding_mode,
-        block_config=spec.SerialBlockConfig(0, 0, 0),
-        rescale_config=spec.SerialRescaleConfig(False, 0, 0),
-    )
-
-    assert data[0] == ["ethosu_binary_elementwise"] + list(serial_binary_elementwise)
-
-
-@pytest.mark.parametrize(
-    "ifm_shape, ifm2_shape, ifm_channels, ifm2_channels, ifm_layout, ofm_layout",
-    [
-        ((1, 5, 9, 3), (1, 5, 9, 3), 3, 3, "NHWC", "NHWC"),
-        ((1, 8, 3, 9, 16), (1, 8, 3, 9, 16), 40, 40, "NHCWB16", "NHCWB16"),
-        ((1, 8, 3, 9, 16), (1, 8, 3, 9, 16), 40, 40, "NHCWB16", "NHWC"),
-        ((1, 8, 9, 40), (1, 8, 9, 40), 40, 40, "NHWC", "NHCWB16"),
-        # Broadcast
-        ((1, 5, 9, 3), (1, 1, 9, 1), 3, 1, "NHWC", "NHWC"),
-        ((1, 8, 9, 40), (1, 1, 1, 1), 40, 1, "NHWC", "NHCWB16"),
-    ],
-)
-@pytest.mark.parametrize("operator_type", ["SHR", "SHL"])
-@pytest.mark.parametrize("rounding_mode", ["TFL", "NATURAL", "TRUNCATE"])
-def test_shift_binary_elementwise_single(
-    ifm_shape,
-    ifm2_shape,
-    ifm_channels,
-    ifm2_channels,
-    ifm_layout,
-    ofm_layout,
-    operator_type,
-    rounding_mode,
-):
-    dtype = "int32"
-    activation = "NONE"  # Only NONE is available if the activation type is int32
-    ifm = relay.var("ifm", shape=ifm_shape, dtype=dtype)
-    ifm2 = relay.var("ifm2", shape=ifm2_shape, dtype=dtype)
-
-    binary_elementwise = make_ethosu_binary_elementwise(
-        ifm,
-        ifm2,
-        ifm_channels,
-        ifm2_channels,
-        operator_type,
-        dtype,
-        False,
-        "NONE",
-        ifm_layout,
-        ifm_layout,
-        ofm_layout,
-        rounding_mode,
-    )
-    func = relay.Function(relay.analysis.free_vars(binary_elementwise), binary_elementwise)
-    func = run_opt_pass(func, relay.transform.InferType())
-    mod, _ = _lower_to_tir(func)
-    data = []
-
-    def _visit(stmt):
-        if isinstance(stmt, tvm.tir.Call):
-            data.append(get_binary_elementwise_args(stmt))
-
-    tvm.tir.stmt_functor.post_order_visit(mod["main"].body, _visit)
-    if ifm_layout == "NHWC":
-        ifm_stride_c = 1
-        ifm_stride_w = ifm_shape[3] if ifm_shape[2] != 1 else 1
-        ifm_stride_h = ifm_shape[2] * ifm_shape[3] if ifm_shape[1] != 1 else 1
-
-        ifm2_stride_c = 1
-        ifm2_stride_w = ifm2_shape[3] if ifm2_shape[2] != 1 else 1
-        ifm2_stride_h = ifm2_shape[2] * ifm2_shape[3] if ifm2_shape[1] != 1 else 1
-
-        ofm_height = ifm_shape[1]
-        ofm_width = ifm_shape[2]
-    else:
-        ifm_stride_w = 16
-        ifm_stride_c = 16 * ifm_shape[3]
-        ifm_stride_h = 16 * ifm_shape[2] * ifm_shape[3]
-
-        ifm2_stride_w = 16
-        ifm2_stride_c = 16 * ifm2_shape[3]
-        ifm2_stride_h = 16 * ifm2_shape[2] * ifm2_shape[3]
-
-        ofm_height = ifm_shape[1]
-        ofm_width = ifm_shape[3]
-
-    if ofm_layout == "NHWC":
-        ofm_stride_c = 1
-        ofm_stride_w = ifm_channels if ofm_width > 1 else 1
-        ofm_stride_h = ifm_channels * ofm_width if ofm_height > 1 else 1
-    else:
-        ofm_stride_w = 16
-        ofm_stride_c = 16 * ofm_width
-        ofm_stride_h = 16 * ofm_width * ((ifm_channels - 1) // 16 + 1)
-
-    serial_binary_elementwise = spec.SerialBinaryElementwise(
-        ifm=spec.SerialFeatureMap(
-            data_type=dtype,
-            height=ifm_shape[1],
-            width=ifm_shape[2] if ifm_layout == "NHWC" else ifm_shape[3],
-            channels=ifm_channels,
-            tile_height_0=ifm_shape[1],
-            tile_height_1=0,
-            tile_width_0=ifm_shape[2] if ifm_layout == "NHWC" else ifm_shape[3],
-            tile_address_0=0,
-            tile_address_1=0,
-            tile_address_2=0,
-            tile_address_3=0,
-            scale=1.0,
-            zero_point=0,
-            layout=ifm_layout,
-            stride_h=ifm_stride_h,
-            stride_w=ifm_stride_w,
-            stride_c=ifm_stride_c,
-        ),
-        ifm2=spec.SerialFeatureMap(
-            data_type=dtype,
-            height=ifm2_shape[1],
-            width=ifm2_shape[2] if ifm_layout == "NHWC" else ifm2_shape[3],
-            channels=ifm2_channels,
-            tile_height_0=ifm2_shape[1],
-            tile_height_1=0,
-            tile_width_0=ifm2_shape[2] if ifm_layout == "NHWC" else ifm2_shape[3],
-            tile_address_0=0,
-            tile_address_1=0,
-            tile_address_2=0,
-            tile_address_3=0,
-            scale=1.0,
-            zero_point=0,
-            layout=ifm_layout,
-            stride_h=ifm2_stride_h,
-            stride_w=ifm2_stride_w,
-            stride_c=ifm2_stride_c,
-        ),
-        ofm=spec.SerialFeatureMap(
-            data_type=dtype,
-            height=ofm_height,
-            width=ofm_width,
-            channels=ifm_channels,
-            tile_height_0=ofm_height,
-            tile_height_1=0,
-            tile_width_0=ofm_width,
-            tile_address_0=0,
-            tile_address_1=0,
-            tile_address_2=0,
-            tile_address_3=0,
-            scale=1.0,
-            zero_point=0,
-            layout=ofm_layout,
-            stride_h=ofm_stride_h,
-            stride_w=ofm_stride_w,
-            stride_c=ofm_stride_c,
-        ),
-        operator_type=operator_type,
-        reversed_operands=False,
-        activation=spec.SerialActivation(
-            op=activation,
-            clip_min=0,
-            clip_max=0,
-        ),
-        rounding_mode=rounding_mode,
-        block_config=spec.SerialBlockConfig(0, 0, 0),
-        rescale_config=spec.SerialRescaleConfig(False, 0, 0),
-    )
-
-    assert data[0] == ["ethosu_binary_elementwise"] + list(serial_binary_elementwise)
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/contrib/test_ethosu/test_replace_conv2d.py b/tests/python/contrib/test_ethosu/test_replace_conv2d.py
deleted file mode 100644
index a8aa4043293f..000000000000
--- a/tests/python/contrib/test_ethosu/test_replace_conv2d.py
+++ /dev/null
@@ -1,826 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import pytest
-
-pytest.importorskip("ethosu.vela")
-import tvm
-from tvm import relay
-from tvm.relay.backend.contrib.ethosu.tir.compiler import _lower_to_tir
-from tvm.relay.backend.contrib.ethosu.tir.scheduler import total_cascader
-from tvm.relay.testing import run_opt_pass
-from tvm.script import tir as T
-
-from .infra import make_ethosu_conv2d
-
-
-def _create_serial_conv2d_params(
-    ifm_shape,
-    ifm_channels,
-    ofm_channels,
-    kernel_shape,
-    padding,
-    strides,
-    dilation,
-    activation="NONE",
-    ifm_layout="NHWC",
-    ofm_layout="NHWC",
-    rounding_mode="TFL",
-    upscale="NONE",
-):
-    dtype = "int8"
-    dilated_kernel_h = (kernel_shape[0] - 1) * dilation[0] + 1
-    dilated_kernel_w = (kernel_shape[1] - 1) * dilation[1] + 1
-    upscale_factor = 2 if upscale != "NONE" else 1
-
-    if ifm_layout == "NHWC":
-        ifm_stride_c = 1
-        ifm_stride_w = ifm_shape[3]
-        ifm_stride_h = ifm_shape[2] * ifm_shape[3]
-        ofm_height = (
-            ifm_shape[1] * upscale_factor - dilated_kernel_h + padding[0] + padding[2]
-        ) // strides[0] + 1
-        ofm_width = (
-            ifm_shape[2] * upscale_factor - dilated_kernel_w + padding[1] + padding[3]
-        ) // strides[1] + 1
-    else:
-        ifm_stride_w = 16
-        ifm_stride_c = 16 * ifm_shape[3]
-        ifm_stride_h = 16 * ifm_shape[2] * ifm_shape[3]
-        ofm_height = (
-            ifm_shape[1] * upscale_factor - dilated_kernel_h + padding[0] + padding[2]
-        ) // strides[0] + 1
-        ofm_width = (
-            ifm_shape[3] * upscale_factor - dilated_kernel_w + padding[1] + padding[3]
-        ) // strides[1] + 1
-
-    if ofm_layout == "NHWC":
-        ofm_stride_c = 1
-        ofm_stride_w = ofm_channels if ofm_width > 1 else 1
-        ofm_stride_h = ofm_channels * ofm_width if ofm_height > 1 else 1
-    else:
-        ofm_stride_w = 16
-        ofm_stride_c = 16 * ofm_width
-        ofm_stride_h = 16 * ofm_width * ((ofm_channels - 1) // 16 + 1)
-
-    return [
-        dtype,
-        ifm_shape[1],
-        ifm_shape[2] if ifm_layout == "NHWC" else ifm_shape[3],
-        ifm_channels,
-        ifm_shape[1],
-        0,
-        ifm_shape[2] if ifm_layout == "NHWC" else ifm_shape[3],
-        0,
-        0,
-        0,
-        0,
-        0.5,
-        10,
-        ifm_layout,
-        ifm_stride_h,
-        ifm_stride_w,
-        ifm_stride_c,
-        dtype,
-        ofm_height,
-        ofm_width,
-        ofm_channels,
-        ofm_height,
-        0,
-        ofm_width,
-        0,
-        0,
-        0,
-        0,
-        0.25,
-        14,
-        ofm_layout,
-        ofm_stride_h,
-        ofm_stride_w,
-        ofm_stride_c,
-        kernel_shape[1],
-        kernel_shape[0],
-        strides[1],
-        strides[0],
-        dilation[1],
-        dilation[0],
-        12,
-        padding[0],
-        padding[1],
-        padding[2],
-        padding[3],
-        activation,
-        10 if activation == "CLIP" else 0,
-        100 if activation == "CLIP" else 0,
-        rounding_mode,
-        upscale,
-        0,
-        0,
-        0,
-    ]
-
-
-def get_conv2d_args(call, include_buffers=False, remove_constants=False):
-    """A method to extract the arguments from conv2d extern call."""
-    args = call.args
-    conv_args = []
-    remove_indices = [0]
-
-    # call.args[41]: BufferLoad for the first half of the weights
-    # call.args[42]: length of the load of the first half of the weights
-    # call.args[43]: BufferLoad for the second half of the weights
-    # call.args[44]: length of the load of the second half of the weights
-    # call.args[46]: BufferLoad for the first half of the bias
-    # call.args[47]: length of the load of the first half of the bias
-    # call.args[48]: BufferLoad for the second half of the bias
-    # call.args[49]: length of the load of the second half of the bias
-    if remove_constants:
-        remove_indices += [41, 42, 43, 44, 46, 47, 48, 49]
-
-    for i, arg in enumerate(args):
-        if i in remove_indices:
-            continue
-        elif isinstance(arg, tvm.tir.expr.IntImm) or isinstance(arg, tvm.tir.expr.FloatImm):
-            conv_args.append(arg.value)
-        elif isinstance(arg, tvm.tir.expr.BufferLoad) and not include_buffers:
-            conv_args.append(arg.indices[0])
-        else:
-            conv_args.append(arg)
-
-    return conv_args
-
-
-@pytest.mark.parametrize(
-    "trial",
-    [
-        [
-            (1, 8, 8, 3),
-            3,
-            16,
-            (1, 1),
-            (2, 1, 2, 1),
-            (1, 1),
-            (1, 1),
-            "CLIP",
-            "NHWC",
-            "NHWC",
-            "TFL",
-            "NONE",
-        ],
-        [
-            (1, 8, 8, 3),
-            3,
-            16,
-            (1, 1),
-            (0, 0, 0, 0),
-            (1, 1),
-            (1, 1),
-            "NONE",
-            "NHWC",
-            "NHWC",
-            "NATURAL",
-            "NONE",
-        ],
-        [
-            (1, 1, 1, 1),
-            1,
-            16,
-            (1, 1),
-            (0, 0, 0, 0),
-            (1, 1),
-            (1, 1),
-            "CLIP",
-            "NHWC",
-            "NHWC",
-            "TRUNCATE",
-            "NONE",
-        ],
-        [
-            (1, 7, 9, 4),
-            4,
-            13,
-            (3, 2),
-            (1, 2, 1, 2),
-            (2, 1),
-            (1, 2),
-            "NONE",
-            "NHWC",
-            "NHWC",
-            "TFL",
-            "NONE",
-        ],
-        [
-            (1, 8, 2, 8, 16),
-            18,
-            12,
-            (1, 1),
-            (2, 1, 2, 1),
-            (1, 1),
-            (1, 1),
-            "CLIP",
-            "NHCWB16",
-            "NHWC",
-            "NATURAL",
-            "ZEROS",
-        ],
-        [
-            (1, 7, 9, 4),
-            4,
-            71,
-            (3, 2),
-            (1, 2, 0, 2),
-            (2, 1),
-            (1, 2),
-            "CLIP",
-            "NHWC",
-            "NHCWB16",
-            "TRUNCATE",
-            "ZEROS",
-        ],
-        [
-            (1, 4, 12, 9, 16),
-            182,
-            67,
-            (2, 3),
-            (6, 3, 6, 2),
-            (2, 2),
-            (1, 1),
-            "CLIP",
-            "NHCWB16",
-            "NHCWB16",
-            "TFL",
-            "ZEROS",
-        ],
-        [
-            (1, 7, 9, 4),
-            4,
-            13,
-            (3, 2),
-            (1, 2, 0, 3),
-            (2, 1),
-            (2, 2),
-            "CLIP",
-            "NHWC",
-            "NHWC",
-            "NATURAL",
-            "NEAREST",
-        ],
-        [
-            (1, 7, 9, 4),
-            4,
-            71,
-            (3, 2),
-            (1, 2, 0, 2),
-            (2, 1),
-            (2, 2),
-            "CLIP",
-            "NHWC",
-            "NHCWB16",
-            "TRUNCATE",
-            "NEAREST",
-        ],
-        [
-            (1, 13, 12, 19, 16),
-            182,
-            67,
-            (1, 3),
-            (5, 3, 2, 3),
-            (2, 1),
-            (2, 1),
-            "CLIP",
-            "NHCWB16",
-            "NHCWB16",
-            "TFL",
-            "NEAREST",
-        ],
-    ],
-)
-def test_conv2d_single(trial):
-    def _get_func(
-        ifm_shape,
-        ifm_channels,
-        ofm_channels,
-        kernel_shape,
-        padding,
-        strides,
-        dilation,
-        activation,
-        ifm_layout,
-        ofm_layout,
-        rounding_mode,
-        upscale,
-    ):
-        ifm = relay.var("ifm", shape=ifm_shape, dtype="int8")
-        conv = make_ethosu_conv2d(
-            ifm,
-            ifm_channels,
-            ofm_channels,
-            kernel_shape,
-            padding,
-            strides,
-            dilation,
-            activation=activation,
-            ifm_layout=ifm_layout,
-            ofm_layout=ofm_layout,
-            rounding_mode=rounding_mode,
-            upscale=upscale,
-        )
-        func = relay.Function(relay.analysis.free_vars(conv), conv)
-        func = run_opt_pass(func, relay.transform.InferType())
-        return func
-
-    # TODO(@mbaret) Fix the tests for these known failures
-    # These are anticipated to actually be correct, just a testing issue to do with
-    # equivalent convolutions.
-    known_failures = [
-        [(1, 3, 12, 9, 16), 182, 67, (2, 3), (1, 3), (2, 2), (1, 1), "CLIP", "NHCWB16", "NHCWB16"],
-        [(1, 2, 12, 9, 16), 182, 67, (1, 3), (6, 3), (2, 2), (1, 1), "CLIP", "NHCWB16", "NHCWB16"],
-    ]
-    func = _get_func(*trial)
-    mod, _ = _lower_to_tir(func)
-    data = []
-
-    def _visit(stmt):
-        if isinstance(stmt, tvm.tir.Call):
-            data.append(get_conv2d_args(stmt, remove_constants=True))
-
-    tvm.tir.stmt_functor.post_order_visit(mod["main"].body, _visit)
-
-    answer = _create_serial_conv2d_params(*trial)
-    assert data[0] == answer, data[0]
-
-
-# Undefined variables used
-# fmt: off
-@tvm.script.ir_module(check_well_formed=False)
-class Conv2dDoubleCascade1:
-    @T.prim_func
-    def main(input_placeholder_5: T.Buffer((1, 8, 8, 3), "int8"), input_ethosu_write_1: T.Buffer((1, 8, 8, 8), "int8")) -> None:
-        # function attr dict
-        T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
-        buffer = T.Buffer([304], "uint8")
-        buffer_1 = T.Buffer([80], "uint8")
-        buffer_2 = T.Buffer([320], "uint8")
-        buffer_3 = T.Buffer([160], "uint8")
-        placeholder_5 = T.Buffer([192], 'int8', data=input_placeholder_5.data)
-        ethosu_write_1 = T.Buffer([512], 'int8', data=input_ethosu_write_1.data)
-        # body
-        ethosu_write_2_data = T.allocate([1024], "int8", "global", annotations={"disable_lower_builtin": True})
-        ethosu_write_2 = T.Buffer([1024], "int8", data=ethosu_write_2_data)
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 8, 4, 3, 8, 0, 4, placeholder_5[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 24, 3, 1, "int8", 8, 4, 32, 8, 0, 4, ethosu_write_2[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 32, 1, 1, 1, 1, 1, 1, 1, buffer_3[0], 160, T.int8(-1), T.int8(-1), 12, buffer_2[0], 320, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 8, 4, 32, 8, 0, 4, ethosu_write_2[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 128, 32, 1, "int8", 8, 4, 8, 8, 0, 4, ethosu_write_1[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 64, 8, 1, 1, 1, 1, 1, 1, 1, buffer[0], 304, T.int8(-1), T.int8(-1), 12, buffer_1[0], 80, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 8, 4, 3, 8, 0, 4, placeholder_5[12], 0, 0, 0, T.float32(0.5), 10, "NHWC", 24, 3, 1, "int8", 8, 4, 32, 8, 0, 4, ethosu_write_2[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 32, 1, 1, 1, 1, 1, 1, 1, buffer_3[0], 160, T.int8(-1), T.int8(-1), 12, buffer_2[0], 320, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 8, 4, 32, 8, 0, 4, ethosu_write_2[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 128, 32, 1, "int8", 8, 4, 8, 8, 0, 4, ethosu_write_1[32], 0, 0, 0, T.float32(0.25), 14, "NHWC", 64, 8, 1, 1, 1, 1, 1, 1, 1, buffer[0], 304, T.int8(-1), T.int8(-1), 12, buffer_1[0], 80, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-
-
-# undefined variables used
-@tvm.script.ir_module(check_well_formed=False)
-class Conv2dDoubleCascade2:
-    @T.prim_func
-    def main(input_placeholder_5: T.Buffer((1, 8, 8, 3), "int8"), input_ethosu_write_1: T.Buffer((1, 8, 8, 8), "int8")) -> None:
-        # function attr dict
-        T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
-        buffer = T.Buffer([80], "uint8")
-        buffer_1 = T.Buffer([320], "uint8")
-        buffer_2 = T.Buffer([1312], "uint8")
-        buffer_3 = T.Buffer([2608], "uint8")
-        placeholder_5 = T.Buffer([192], 'int8', data=input_placeholder_5.data)
-        ethosu_write_1 = T.Buffer([512], 'int8', data=input_ethosu_write_1.data)
-        # body
-        ethosu_write_2_data = T.allocate([1536], "int8", "global", annotations={"disable_lower_builtin": True})
-        ethosu_write_2 = T.Buffer([1536], "int8", data=ethosu_write_2_data)
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 6, 8, 3, 6, 0, 8, placeholder_5[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 24, 3, 1, "int8", 5, 8, 32, 5, 0, 8, ethosu_write_2[256], 0, 0, 0, T.float32(0.25), 14, "NHWC", 256, 32, 1, 3, 3, 1, 1, 1, 1, buffer_2[0], 1312, T.int8(-1), T.int8(-1), 12, buffer_1[0], 320, T.int8(-1), T.int8(-1), 1, 1, 0, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 5, 8, 32, 5, 0, 8, ethosu_write_2[256], 0, 0, 0, T.float32(0.5), 10, "NHWC", 256, 32, 1, "int8", 4, 8, 8, 4, 0, 8, ethosu_write_1[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 64, 8, 1, 3, 3, 1, 1, 1, 1, buffer_3[0], 2608, T.int8(-1), T.int8(-1), 12, buffer[0], 80, T.int8(-1), T.int8(-1), 1, 1, 0, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 6, 8, 3, 6, 0, 8, placeholder_5[48], 0, 0, 0, T.float32(0.5), 10, "NHWC", 24, 3, 1, "int8", 5, 8, 32, 5, 0, 8, ethosu_write_2[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 256, 32, 1, 3, 3, 1, 1, 1, 1, buffer_2[0], 1312, T.int8(-1), T.int8(-1), 12, buffer_1[0], 320, T.int8(-1), T.int8(-1), 0, 1, 1, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 5, 8, 32, 5, 0, 8, ethosu_write_2[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 256, 32, 1, "int8", 4, 8, 8, 4, 0, 8, ethosu_write_1[256], 0, 0, 0, T.float32(0.25), 14, "NHWC", 64, 8, 1, 3, 3, 1, 1, 1, 1, buffer_3[0], 2608, T.int8(-1), T.int8(-1), 12, buffer[0], 80, T.int8(-1), T.int8(-1), 0, 1, 1, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-
-
-# undefined variables used
-@tvm.script.ir_module(check_well_formed=False)
-class Conv2dDoubleCascade3:
-    @T.prim_func
-    def main(input_placeholder_5: T.Buffer((1, 16, 16, 3), "int8"), input_ethosu_write_1: T.Buffer((1, 20, 4, 8), "int8")) -> None:
-        # function attr dict
-        T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
-        buffer = T.Buffer([1744], "uint8")
-        buffer_1 = T.Buffer([80], "uint8")
-        buffer_2 = T.Buffer([320], "uint8")
-        buffer_3 = T.Buffer([880], "uint8")
-        placeholder_5 = T.Buffer([768], 'int8', data=input_placeholder_5.data)
-        ethosu_write_1 = T.Buffer([640], 'int8', data=input_ethosu_write_1.data)
-
-        # body
-        ethosu_write_2_data = T.allocate([2560], "int8", "global", annotations={"disable_lower_builtin": True})
-        ethosu_write_2 = T.Buffer([2560], "int8", data=ethosu_write_2_data)
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 8, 16, 3, 8, 0, 16, placeholder_5[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 48, 3, 1, "int8", 8, 8, 32, 8, 0, 8, ethosu_write_2[512], 0, 0, 0, T.float32(0.25), 14, "NHWC", 256, 32, 1, 2, 3, 2, 1, 2, 1, buffer_3[0], 880, T.int8(-1), T.int8(-1), 12, buffer_2[0], 320, T.int8(-1), T.int8(-1), 2, 1, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 8, 8, 32, 8, 0, 8, ethosu_write_2[512], 0, 0, 0, T.float32(0.5), 10, "NHWC", 256, 32, 1, "int8", 8, 4, 8, 8, 0, 4, ethosu_write_1[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 32, 8, 1, 2, 3, 2, 1, 2, 1, buffer[0], 1744, T.int8(-1), T.int8(-1), 12, buffer_1[0], 80, T.int8(-1), T.int8(-1), 2, 1, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 12, 16, 3, 12, 0, 16, placeholder_5[192], 0, 0, 0, T.float32(0.5), 10, "NHWC", 48, 3, 1, "int8", 10, 8, 32, 10, 0, 8, ethosu_write_2[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 256, 32, 1, 2, 3, 2, 1, 2, 1, buffer_3[0], 880, T.int8(-1), T.int8(-1), 12, buffer_2[0], 320, T.int8(-1), T.int8(-1), 0, 1, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 10, 8, 32, 10, 0, 8, ethosu_write_2[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 256, 32, 1, "int8", 8, 4, 8, 8, 0, 4, ethosu_write_1[256], 0, 0, 0, T.float32(0.25), 14, "NHWC", 32, 8, 1, 2, 3, 2, 1, 2, 1, buffer[0], 1744, T.int8(-1), T.int8(-1), 12, buffer_1[0], 80, T.int8(-1), T.int8(-1), 0, 1, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 4, 16, 3, 4, 0, 16, placeholder_5[576], 0, 0, 0, T.float32(0.5), 10, "NHWC", 48, 3, 1, "int8", 4, 8, 32, 4, 0, 8, ethosu_write_2[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 256, 32, 1, 2, 3, 2, 1, 2, 1, buffer_3[0], 880, T.int8(-1), T.int8(-1), 12, buffer_2[0], 320, T.int8(-1), T.int8(-1), 0, 1, 2, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 4, 8, 32, 4, 0, 8, ethosu_write_2[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 256, 32, 1, "int8", 4, 4, 8, 4, 0, 4, ethosu_write_1[512], 0, 0, 0, T.float32(0.25), 14, "NHWC", 32, 8, 1, 2, 3, 2, 1, 2, 1, buffer[0], 1744, T.int8(-1), T.int8(-1), 12, buffer_1[0], 80, T.int8(-1), T.int8(-1), 0, 1, 2, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-
-
-# undefined variables used
-@tvm.script.ir_module(check_well_formed=False)
-class Conv2dDoubleCascade4:
-    @T.prim_func
-    def main(input_placeholder_5: T.Buffer((1, 8, 1, 8, 16), "int8"), input_ethosu_write_1: T.Buffer((1, 8, 2, 8, 16), "int8")) -> None:
-        # function attr dict
-        T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
-        buffer = T.Buffer([1456], "uint8")
-        buffer_1 = T.Buffer([352], "uint8")
-        buffer_2 = T.Buffer([272], "uint8")
-        buffer_3 = T.Buffer([11040], "uint8")
-        placeholder_5 = T.Buffer([1024], 'int8', data=input_placeholder_5.data)
-        ethosu_write_1 = T.Buffer([2048], 'int8', data=input_ethosu_write_1.data)
-        # body
-        ethosu_write_2_data = T.allocate([2304], "int8", "global", annotations={"disable_lower_builtin": True})
-        ethosu_write_2 = T.Buffer((2304,), "int8", data=ethosu_write_2_data)
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 6, 8, 3, 6, 0, 8, placeholder_5[0], 0, 0, 0, T.float32(0.5), 10, "NHCWB16", 128, 16, 1, "int8", 5, 8, 35, 5, 0, 8, ethosu_write_2[384], 0, 0, 0, T.float32(0.25), 14, "NHCWB16", 384, 16, 128, 3, 3, 1, 1, 1, 1, buffer[0], 1456, T.int8(-1), T.int8(-1), 12, buffer_1[0], 352, T.int8(-1), T.int8(-1), 1, 1, 0, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 5, 8, 35, 5, 0, 8, ethosu_write_2[384], 0, 0, 0, T.float32(0.5), 10, "NHCWB16", 384, 16, 128, "int8", 4, 8, 26, 4, 0, 8, ethosu_write_1[0], 0, 0, 0, T.float32(0.25), 14, "NHCWB16", 256, 16, 128, 3, 3, 1, 1, 1, 1, buffer_3[0], 11040, T.int8(-1), T.int8(-1), 12, buffer_2[0], 272, T.int8(-1), T.int8(-1), 1, 1, 0, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 6, 8, 3, 6, 0, 8, placeholder_5[256], 0, 0, 0, T.float32(0.5), 10, "NHCWB16", 128, 16, 1, "int8", 5, 8, 35, 5, 0, 8, ethosu_write_2[0], 0, 0, 0, T.float32(0.25), 14, "NHCWB16", 384, 16, 128, 3, 3, 1, 1, 1, 1, buffer[0], 1456, T.int8(-1), T.int8(-1), 12, buffer_1[0], 352, T.int8(-1), T.int8(-1), 0, 1, 1, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 5, 8, 35, 5, 0, 8, ethosu_write_2[0], 0, 0, 0, T.float32(0.5), 10, "NHCWB16", 384, 16, 128, "int8", 4, 8, 26, 4, 0, 8, ethosu_write_1[1024], 0, 0, 0, T.float32(0.25), 14, "NHCWB16", 256, 16, 128, 3, 3, 1, 1, 1, 1, buffer_3[0], 11040, T.int8(-1), T.int8(-1), 12, buffer_2[0], 272, T.int8(-1), T.int8(-1), 0, 1, 1, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-
-
-# undefined variables used
-@tvm.script.ir_module(check_well_formed=False)
-class Conv2dDoubleCascade5:
-    @T.prim_func
-    def main(input_placeholder: T.Buffer((1, 8, 8, 3), "int8"), input_ethosu_write: T.Buffer((1, 32, 32, 8), "int8")) -> None:
-        # function attr dict
-        T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
-        buffer = T.Buffer([160], "uint8")
-        buffer_1 = T.Buffer([320], "uint8")
-        buffer_2 = T.Buffer([304], "uint8")
-        buffer_3 = T.Buffer([80], "uint8")
-        placeholder = T.Buffer([192], 'int8', data=input_placeholder.data)
-        ethosu_write = T.Buffer([8192], 'int8', data=input_ethosu_write.data)
-        # body
-        ethosu_write_1_data = T.allocate([4096], "int8", "global", annotations={"disable_lower_builtin":True})
-        ethosu_write_1 = T.Buffer([4096], "int8", data=ethosu_write_1_data)
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 4, 8, 3, 4, 0, 8, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 24, 3, 1, "int8", 8, 16, 32, 8, 0, 16, ethosu_write_1[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 512, 32, 1, 1, 1, 1, 1, 1, 1, buffer[0], 160, T.int8(-1), T.int8(-1), 12, buffer_1[0], 320, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "ZEROS", 0, 0, 0, dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 8, 16, 32, 8, 0, 16, ethosu_write_1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 32, 8, 16, 0, 32, ethosu_write[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 256, 8, 1, 1, 1, 1, 1, 1, 1, buffer_2[0], 304, T.int8(-1), T.int8(-1), 12, buffer_3[0], 80, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "ZEROS", 0, 0, 0, dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 4, 8, 3, 4, 0, 8, placeholder[96], 0, 0, 0, T.float32(0.5), 10, "NHWC", 24, 3, 1, "int8", 8, 16, 32, 8, 0, 16, ethosu_write_1[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 512, 32, 1, 1, 1, 1, 1, 1, 1, buffer[0], 160, T.int8(-1), T.int8(-1), 12, buffer_1[0], 320, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "ZEROS", 0, 0, 0, dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 8, 16, 32, 8, 0, 16, ethosu_write_1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 32, 8, 16, 0, 32, ethosu_write[4096], 0, 0, 0, T.float32(0.25), 14, "NHWC", 256, 8, 1, 1, 1, 1, 1, 1, 1, buffer_2[0], 304, T.int8(-1), T.int8(-1), 12, buffer_3[0], 80, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "ZEROS", 0, 0, 0, dtype="handle"))
-
-
-# undefined variables used
-@tvm.script.ir_module(check_well_formed=False)
-class Conv2dDoubleCascade6:
-    @T.prim_func
-    def main(input_placeholder: T.Buffer((1, 8, 1, 8, 16), "int8"), input_ethosu_write: T.Buffer((1, 32, 2, 32, 16), "int8")) -> None:
-        # function attr dict
-        T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
-        buffer = T.Buffer([1456], "uint8")
-        buffer_1 = T.Buffer([352], "uint8")
-        buffer_2 = T.Buffer([11040], "uint8")
-        buffer_3 = T.Buffer([272], "uint8")
-        placeholder = T.Buffer([1024], 'int8', data=input_placeholder.data)
-        ethosu_write = T.Buffer([32768], 'int8', data=input_ethosu_write.data)
-        # body
-        ethosu_write_1_data = T.allocate([12288], "int8", "global", annotations={"disable_lower_builtin":True})
-        ethosu_write_1 = T.Buffer([12288], "int8", data=ethosu_write_1_data)
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 8, 8, 3, 8, 0, 8, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHCWB16", 128, 16, 1, "int8", 16, 16, 35, 16, 0, 16, ethosu_write_1[0], 0, 0, 0, T.float32(0.25), 14, "NHCWB16", 768, 16, 256, 3, 3, 1, 1, 1, 1, buffer[0], 1456, T.int8(-1), T.int8(-1), 12, buffer_1[0], 352, T.int8(-1), T.int8(-1), 1, 1, 1, 1, "NONE", 0, 0, "TFL", "NEAREST", 0, 0, 0, dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 35, 16, 0, 16, ethosu_write_1[0], 0, 0, 0, T.float32(0.5), 10, "NHCWB16", 768, 16, 256, "int8", 32, 32, 26, 32, 0, 32, ethosu_write[0], 0, 0, 0, T.float32(0.25), 14, "NHCWB16", 1024, 16, 512, 3, 3, 1, 1, 1, 1, buffer_2[0], 11040, T.int8(-1), T.int8(-1), 12, buffer_3[0], 272, T.int8(-1), T.int8(-1), 1, 1, 1, 1, "NONE", 0, 0, "TFL", "NEAREST", 0, 0, 0, dtype="handle"))
-# fmt: on
-
-
-@pytest.mark.parametrize(
-    "trial",
-    [
-        [
-            Conv2dDoubleCascade1,
-            (1, 8, 8, 3),
-            3,
-            32,
-            8,
-            (1, 1),
-            (0, 0, 0, 0),
-            (1, 1),
-            (1, 1),
-            "NHWC",
-            "NONE",
-            (1, 8, 4, 8),
-        ],
-        [
-            Conv2dDoubleCascade2,
-            (1, 8, 8, 3),
-            3,
-            32,
-            8,
-            (3, 3),
-            (1, 1, 1, 1),
-            (1, 1),
-            (1, 1),
-            "NHWC",
-            "NONE",
-            (1, 4, 8, 8),
-        ],
-        [
-            Conv2dDoubleCascade3,
-            (1, 16, 16, 3),
-            3,
-            32,
-            8,
-            (3, 2),
-            (2, 1, 2, 1),
-            (1, 2),
-            (1, 2),
-            "NHWC",
-            "NONE",
-            (1, 8, 4, 8),
-        ],
-        [
-            Conv2dDoubleCascade4,
-            (1, 8, 1, 8, 16),
-            3,
-            35,
-            26,
-            (3, 3),
-            (1, 1, 1, 1),
-            (1, 1),
-            (1, 1),
-            "NHCWB16",
-            "NONE",
-            (1, 4, 2, 8, 16),
-        ],
-        [
-            Conv2dDoubleCascade5,
-            (1, 8, 8, 3),
-            3,
-            32,
-            8,
-            (1, 1),
-            (0, 0, 0, 0),
-            (1, 1),
-            (1, 1),
-            "NHWC",
-            "ZEROS",
-            (1, 16, 32, 8),
-        ],
-        [
-            Conv2dDoubleCascade6,
-            (1, 8, 1, 8, 16),
-            3,
-            35,
-            26,
-            (3, 3),
-            (1, 1, 1, 1),
-            (1, 1),
-            (1, 1),
-            "NHCWB16",
-            "NEAREST",
-            (1, 32, 2, 32, 16),
-        ],
-    ],
-)
-def test_conv2d_double_cascade(trial):
-    def _get_func(
-        ifm_shape,
-        ifm_channels,
-        mid_channels,
-        ofm_channels,
-        kernel_shape,
-        padding,
-        strides,
-        dilation,
-        layout,
-        upscale,
-    ):
-        ifm = relay.var("ifm", shape=ifm_shape, dtype="int8")
-        conv1 = make_ethosu_conv2d(
-            ifm,
-            ifm_channels,
-            mid_channels,
-            kernel_shape,
-            padding,
-            strides,
-            dilation,
-            activation="NONE",
-            ifm_layout=layout,
-            ofm_layout=layout,
-            upscale=upscale,
-        )
-        conv2 = make_ethosu_conv2d(
-            conv1,
-            mid_channels,
-            ofm_channels,
-            kernel_shape,
-            padding,
-            strides,
-            dilation,
-            activation="NONE",
-            ifm_layout=layout,
-            ofm_layout=layout,
-            upscale=upscale,
-        )
-        func = relay.Function(relay.analysis.free_vars(conv2), conv2)
-        func = run_opt_pass(func, relay.transform.InferType())
-        return func
-
-    reference_mod = trial[0]
-    params = trial[1:]
-    config = {
-        "enable_cascader": True,
-    }
-    with tvm.transform.PassContext(opt_level=3, config={"relay.ext.ethos-u.options": config}):
-        func = _get_func(*params[:-1])
-        mod, _ = _lower_to_tir(func, cascader=total_cascader(params[-1]))
-        script = mod.script()
-        mod = tvm.script.from_source(script, check_well_formed=False)
-        tvm.ir.assert_structural_equal(mod["main"], reference_mod["main"], True)
-
-
-# Undefined vars used
-# fmt: off
-@tvm.script.ir_module(check_well_formed=False)
-class Conv2dInlineCopy1:
-    @T.prim_func
-    def main(input_placeholder_3: T.Buffer((1, 10, 12, 8), "int8"), input_ethosu_write_1: T.Buffer((1, 8, 8, 16), "int8")) -> None:
-        # function attr dict
-        T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
-        buffer = T.Buffer([848], "uint8")
-        buffer_1 = T.Buffer([160], "uint8")
-        placeholder_3 = T.Buffer([960], 'int8', data=input_placeholder_3.data)
-        ethosu_write_1 = T.Buffer([1024], 'int8', data=input_ethosu_write_1.data)
-        # body
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 8, 8, 4, 8, 0, 8, placeholder_3[120], 0, 0, 0, T.float32(0.5), 10, "NHWC", 96, 8, 1, "int8", 8, 8, 16, 8, 0, 8, ethosu_write_1[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 16, 1, 3, 3, 1, 1, 1, 1, buffer[0], 848, T.int8(-1), T.int8(-1), 12, buffer_1[0], 160, T.int8(-1), T.int8(-1), 1, 1, 1, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-
-
-# Undefined vars used
-@tvm.script.ir_module(check_well_formed=False)
-class Conv2dInlineCopy2:
-    @T.prim_func
-    def main(input_placeholder_3: T.Buffer((1, 7, 9, 5), "int8"), input_ethosu_write_1: T.Buffer((1, 3, 5, 16), "int8")) -> None:
-        # function attr dict
-        T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
-        buffer = T.Buffer([160], "uint8")
-        buffer_1 = T.Buffer([656], "uint8")
-        placeholder_3 = T.Buffer([315], 'int8', data=input_placeholder_3.data)
-        ethosu_write_1 = T.Buffer([240], 'int8', data=input_ethosu_write_1.data)
-        # body
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 3, 5, 3, 3, 0, 5, placeholder_3[146], 0, 0, 0, T.float32(0.5), 10, "NHWC", 45, 5, 1, "int8", 3, 5, 16, 3, 0, 5, ethosu_write_1[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 80, 16, 1, 3, 3, 1, 1, 1, 1, buffer_1[0], 656, T.int8(-1), T.int8(-1), 12, buffer[0], 160, T.int8(-1), T.int8(-1), 1, 1, 1, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-# fmt: on
-
-
-@pytest.mark.parametrize(
-    "trial",
-    [
-        [Conv2dInlineCopy1, (1, 10, 12, 8), (0, 1, 3, 0), (1, 9, 11, 4)],
-        [Conv2dInlineCopy2, (1, 7, 9, 5), (0, 3, 2, 1), (1, 6, 7, 4)],
-    ],
-)
-def test_conv2d_inline_copy(trial):
-    def _get_func(ifm_shape, lower, upper, ofm_channels=16):
-        ifm = relay.var("ifm", shape=ifm_shape, dtype="int8")
-        sliced = relay.strided_slice(ifm, lower, upper)
-        conv = make_ethosu_conv2d(
-            sliced, upper[3] - lower[3], ofm_channels, (3, 3), (1, 1), (1, 1), (1, 1)
-        )
-        func = relay.Function(relay.analysis.free_vars(conv), conv)
-        func = run_opt_pass(func, relay.transform.InferType())
-        return func
-
-    reference_mod = trial[0]
-    params = trial[1:]
-    func = _get_func(*params)
-    mod, _ = _lower_to_tir(func)
-    script = mod.script()
-    mod = tvm.script.from_source(script, check_well_formed=False)
-    tvm.ir.assert_structural_equal(mod["main"], reference_mod["main"], True)
-
-
-# Undefined vars used
-# fmt: off
-@tvm.script.ir_module(check_well_formed=False)
-class Conv2dInlineReshape1:
-    @T.prim_func
-    def main(input_placeholder_3: T.Buffer((4, 6, 8, 1), "int8"), input_ethosu_write_1: T.Buffer((1, 8, 6, 16), "int8")) -> None:
-        # function attr dict
-        T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
-        buffer = T.Buffer([160], "uint8")
-        buffer_1 = T.Buffer([848], "uint8")
-        placeholder_3 = T.Buffer([192], 'int8', data=input_placeholder_3.data)
-        ethosu_write_1 = T.Buffer([768], 'int8', data=input_ethosu_write_1.data)
-        # body
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 5, 6, 4, 5, 0, 6, placeholder_3[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 24, 4, 1, "int8", 4, 6, 16, 4, 0, 6, ethosu_write_1[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 96, 16, 1, 3, 3, 1, 1, 1, 1, buffer_1[0], 848, T.int8(-1), T.int8(-1), 12, buffer[0], 160, T.int8(-1), T.int8(-1), 1, 1, 0, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 5, 6, 4, 5, 0, 6, placeholder_3[72], 0, 0, 0, T.float32(0.5), 10, "NHWC", 24, 4, 1, "int8", 4, 6, 16, 4, 0, 6, ethosu_write_1[384], 0, 0, 0, T.float32(0.25), 14, "NHWC", 96, 16, 1, 3, 3, 1, 1, 1, 1, buffer_1[0], 848, T.int8(-1), T.int8(-1), 12, buffer[0], 160, T.int8(-1), T.int8(-1), 0, 1, 1, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-
-
-# undefined vars used
-@tvm.script.ir_module(check_well_formed=False)
-class Conv2dInlineReshape2:
-    @T.prim_func
-    def main(input_placeholder_3: T.Buffer((1, 24, 8), "int8"), input_ethosu_write_1: T.Buffer((1, 8, 6, 16), "int8")) -> None:
-        # function attr dict
-        T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
-        buffer = T.Buffer([160], "uint8")
-        buffer_1 = T.Buffer([848], "uint8")
-        placeholder_3 = T.Buffer([192], 'int8', data=input_placeholder_3.data)
-        ethosu_write_1 = T.Buffer([768], 'int8', data=input_ethosu_write_1.data)
-        # body
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 5, 6, 4, 5, 0, 6, placeholder_3[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 24, 4, 1, "int8", 4, 6, 16, 4, 0, 6, ethosu_write_1[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 96, 16, 1, 3, 3, 1, 1, 1, 1, buffer_1[0], 848, T.int8(-1), T.int8(-1), 12, buffer[0], 160, T.int8(-1), T.int8(-1), 1, 1, 0, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 5, 6, 4, 5, 0, 6, placeholder_3[72], 0, 0, 0, T.float32(0.5), 10, "NHWC", 24, 4, 1, "int8", 4, 6, 16, 4, 0, 6, ethosu_write_1[384], 0, 0, 0, T.float32(0.25), 14, "NHWC", 96, 16, 1, 3, 3, 1, 1, 1, 1, buffer_1[0], 848, T.int8(-1), T.int8(-1), 12, buffer[0], 160, T.int8(-1), T.int8(-1), 0, 1, 1, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-
-
-# undefined vars used
-@tvm.script.ir_module(check_well_formed=False)
-class Conv2dInlineReshape3:
-    @T.prim_func
-    def main(input_placeholder_3: T.Buffer((192, 1), "int8"), input_ethosu_write_1: T.Buffer((1, 8, 6, 16), "int8")) -> None:
-        # function attr dict
-        T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
-        buffer = T.Buffer([160], "uint8")
-        buffer_1 = T.Buffer([848], "uint8")
-        placeholder_3 = T.Buffer([192], 'int8', data=input_placeholder_3.data)
-        ethosu_write_1 = T.Buffer([768], 'int8', data=input_ethosu_write_1.data)
-        # body
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 5, 6, 4, 5, 0, 6, placeholder_3[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 24, 4, 1, "int8", 4, 6, 16, 4, 0, 6, ethosu_write_1[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 96, 16, 1, 3, 3, 1, 1, 1, 1, buffer_1[0], 848, T.int8(-1), T.int8(-1), 12, buffer[0], 160, T.int8(-1), T.int8(-1), 1, 1, 0, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 5, 6, 4, 5, 0, 6, placeholder_3[72], 0, 0, 0, T.float32(0.5), 10, "NHWC", 24, 4, 1, "int8", 4, 6, 16, 4, 0, 6, ethosu_write_1[384], 0, 0, 0, T.float32(0.25), 14, "NHWC", 96, 16, 1, 3, 3, 1, 1, 1, 1, buffer_1[0], 848, T.int8(-1), T.int8(-1), 12, buffer[0], 160, T.int8(-1), T.int8(-1), 0, 1, 1, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-
-
-# undefined vars used
-@tvm.script.ir_module(check_well_formed=False)
-class Conv2dInlineReshape4:
-    @T.prim_func
-    def main(placeholder_3: T.Buffer((192,), "int8"), input_ethosu_write_1: T.Buffer((1, 8, 6, 16), "int8")) -> None:
-        # function attr dict
-        T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
-        buffer = T.Buffer([160], "uint8")
-        buffer_1 = T.Buffer([848], "uint8")
-        ethosu_write_1 = T.Buffer([768], 'int8', data=input_ethosu_write_1.data)
-        # body
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 5, 6, 4, 5, 0, 6, placeholder_3[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 24, 4, 1, "int8", 4, 6, 16, 4, 0, 6, ethosu_write_1[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 96, 16, 1, 3, 3, 1, 1, 1, 1, buffer_1[0], 848, T.int8(-1), T.int8(-1), 12, buffer[0], 160, T.int8(-1), T.int8(-1), 1, 1, 0, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 5, 6, 4, 5, 0, 6, placeholder_3[72], 0, 0, 0, T.float32(0.5), 10, "NHWC", 24, 4, 1, "int8", 4, 6, 16, 4, 0, 6, ethosu_write_1[384], 0, 0, 0, T.float32(0.25), 14, "NHWC", 96, 16, 1, 3, 3, 1, 1, 1, 1, buffer_1[0], 848, T.int8(-1), T.int8(-1), 12, buffer[0], 160, T.int8(-1), T.int8(-1), 0, 1, 1, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-# fmt: on
-
-
-@pytest.mark.parametrize(
-    "trial",
-    [
-        [Conv2dInlineReshape1, (4, 6, 8, 1), (1, 8, 6, 4), "NHWC"],
-        [Conv2dInlineReshape2, (1, 4 * 6, 8), (1, 8, 6, 4), "NHWC"],
-        [Conv2dInlineReshape3, (4 * 6 * 8, 1), (1, 8, 6, 4), "NHWC"],
-        [Conv2dInlineReshape4, (4 * 6 * 8,), (1, 8, 6, 4), "NHWC"],
-    ],
-)
-def test_conv2d_inline_reshape(trial):
-    def _get_func(ifm_shape, reshaped, ifm_layout):
-        ifm = relay.var("ifm", shape=ifm_shape, dtype="int8")
-        ifm_reshaped = relay.reshape(ifm, reshaped)
-        conv = make_ethosu_conv2d(
-            ifm_reshaped,
-            reshaped[3],
-            16,
-            (3, 3),
-            (1, 1),
-            (1, 1),
-            (1, 1),
-            activation="NONE",
-            ifm_layout=ifm_layout,
-        )
-        func = relay.Function(relay.analysis.free_vars(conv), conv)
-        func = run_opt_pass(func, relay.transform.InferType())
-        return func
-
-    reference_mod = trial[0]
-    params = trial[1:]
-    func = _get_func(*params)
-    mod, _ = _lower_to_tir(func, cascader=total_cascader((1, 4, 6, 16)))
-    script = mod.script()
-    mod = tvm.script.from_source(script, check_well_formed=False)
-    tvm.ir.assert_structural_equal(mod["main"], reference_mod["main"], True)
-
-
-# TODO(@mbaret) Fix this case
-@pytest.mark.xfail(raises=Exception, strict=True)
-def test_conv2d_big_pad():
-    def _get_func():
-        ifm_shape = (1, 2, 2, 8)
-        ifm = relay.var("ifm", shape=ifm_shape, dtype="int8")
-        conv = make_ethosu_conv2d(
-            ifm, ifm_shape[3], 16, (1, 1), (7, 7), (1, 1), (1, 1), ifm_layout="NHWC"
-        )
-        func = relay.Function(relay.analysis.free_vars(conv), conv)
-        func = run_opt_pass(func, relay.transform.InferType())
-        return func
-
-    func = _get_func()
-    mod, _ = _lower_to_tir(func, cascader=total_cascader((1, 4, 4, 16)))
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/contrib/test_ethosu/test_replace_copy.py b/tests/python/contrib/test_ethosu/test_replace_copy.py
deleted file mode 100644
index ff343517352d..000000000000
--- a/tests/python/contrib/test_ethosu/test_replace_copy.py
+++ /dev/null
@@ -1,138 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import pytest
-
-pytest.importorskip("ethosu.vela")
-import tvm
-from tvm import relay
-from tvm.relay.backend.contrib.ethosu.tir.compiler import _lower_to_tir
-from tvm.relay.backend.contrib.ethosu.tir.scheduler import (
-    OperatorCompute,
-    copy_constants,
-)
-from tvm.relay.testing import run_opt_pass
-from tvm.script import tir as T
-
-from .infra import make_ethosu_conv2d
-
-
-# uninitialized varaibles used
-# fmt: off
-@tvm.script.ir_module(check_well_formed=False)
-class ReferenceModule:
-    @T.prim_func
-    def main(input_placeholder_3: T.Buffer((1, 16, 16, 32), "int8"), input_ethosu_write_1: T.Buffer((1, 16, 16, 8), "int8")) -> None:
-        # function attr dict
-        T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
-        buffer_1 = T.Buffer([384], "uint8")
-        placeholder_3 = T.Buffer([8192], dtype="int8", data=input_placeholder_3.data)
-        ethosu_write_1 = T.Buffer([2048], dtype="int8", data=input_ethosu_write_1.data)
-        # body
-        placeholder_global_data = T.allocate([384], "uint8", "global", annotations={"disable_lower_builtin": True})
-        placeholder_global = T.Buffer([384], "uint8", data=placeholder_global_data)
-        T.evaluate(T.call_extern("ethosu_copy", buffer_1[0], 384, placeholder_global[0], dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, placeholder_3[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 8, 16, 0, 16, ethosu_write_1[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, placeholder_global[0], 304, T.int8(-1), T.int8(-1), 12, placeholder_global[304], 80, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-# fmt: on
-
-
-def test_copy():
-    def _get_func():
-        data = relay.var("data", shape=(1, 16, 16, 32), dtype="int8")
-        conv = make_ethosu_conv2d(
-            data,
-            32,
-            8,
-            (1, 1),
-            (0, 0),
-            (1, 1),
-            (1, 1),
-        )
-        func = relay.Function(relay.analysis.free_vars(conv), conv)
-        func = run_opt_pass(func, relay.transform.InferType())
-        return func
-
-    func = _get_func()
-    mod, _ = _lower_to_tir(func, cascader=copy_constants())
-
-    script = mod.script()
-    test_mod = tvm.script.from_source(script, check_well_formed=False)
-    reference_mod = ReferenceModule
-    tvm.ir.assert_structural_equal(test_mod["main"], reference_mod["main"], True)
-
-
-# Uninitialized variables used
-# fmt: off
-@tvm.script.ir_module(check_well_formed=False)
-class WeightStream:
-    @T.prim_func
-    def main(input_placeholder_5: T.Buffer((1, 16, 16, 32), "int8"), input_ethosu_write_1: T.Buffer((1, 16, 16, 16), "int8")) -> None:
-        # function attr dict
-        T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
-        buffer = T.Buffer([528], "uint8")
-        buffer_2 = T.Buffer([336], "uint8")
-        placeholder_5 = T.Buffer([8192], dtype="int8", data=input_placeholder_5.data)
-        ethosu_write_1 = T.Buffer([4096], dtype="int8", data=input_ethosu_write_1.data)
-        # body
-        placeholder_d_global_data = T.allocate([528], "uint8", "global", annotations={"disable_lower_builtin": True})
-        placeholder_d_global = T.Buffer([528], "uint8", data=placeholder_d_global_data)
-        placeholder_d_global_1_data = T.allocate([336], "uint8", "global", annotations={"disable_lower_builtin": True})
-        placeholder_d_global_1 = T.Buffer([336], "uint8", data=placeholder_d_global_1_data)
-        T.evaluate(T.call_extern("ethosu_copy", buffer[0], 528, placeholder_d_global[0], dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_copy", buffer_2[0], 336, placeholder_d_global_1[0], dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, placeholder_5[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 10, 16, 0, 16, ethosu_write_1[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 256, 16, 1, 1, 1, 1, 1, 1, 1, placeholder_d_global[0], 416, T.int8(-1), T.int8(-1), 12, placeholder_d_global[416], 112, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, placeholder_5[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 6, 16, 0, 16, ethosu_write_1[10], 0, 0, 0, T.float32(0.25), 14, "NHWC", 256, 16, 1, 1, 1, 1, 1, 1, 1, placeholder_d_global_1[0], 272, T.int8(-1), T.int8(-1), 12, placeholder_d_global_1[272], 64, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-# fmt: on
-
-
-def test_weight_stream():
-    def _cascader(cached_func, const_dict, sch):
-        weight = cached_func.inputs[1]
-        scale_bias = cached_func.inputs[2]
-        out = cached_func.outputs[0]
-        conv_compute = OperatorCompute.from_output(out)
-        co = conv_compute.split(sch, 3, 10)
-        cache_weight = sch.cache_read(weight, "global", [conv_compute.op])
-        cache_scale_bias = sch.cache_read(scale_bias, "global", [conv_compute.op])
-        sch[cache_weight].compute_at(sch[out], co)
-        sch[cache_scale_bias].compute_at(sch[out], co)
-
-    def _get_func():
-        ifm = relay.var("ifm", shape=(1, 16, 16, 32), dtype="int8")
-        conv = make_ethosu_conv2d(
-            ifm,
-            32,
-            16,
-            (1, 1),
-            (0, 0),
-            (1, 1),
-            (1, 1),
-        )
-        func = relay.Function(relay.analysis.free_vars(conv), conv)
-        func = run_opt_pass(func, relay.transform.InferType())
-        return func
-
-    func = _get_func()
-    mod, _ = _lower_to_tir(func, cascader=_cascader)
-
-    script = mod.script()
-    test_mod = tvm.script.from_source(script, check_well_formed=False)
-    reference_mod = WeightStream
-    tvm.ir.assert_structural_equal(test_mod["main"], reference_mod["main"], True)
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/contrib/test_ethosu/test_replace_depthwise_conv2d.py b/tests/python/contrib/test_ethosu/test_replace_depthwise_conv2d.py
deleted file mode 100644
index 32f75621fde0..000000000000
--- a/tests/python/contrib/test_ethosu/test_replace_depthwise_conv2d.py
+++ /dev/null
@@ -1,213 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import pytest
-
-pytest.importorskip("ethosu.vela")
-
-import tvm
-from tvm import relay
-from tvm.relay.testing import run_opt_pass
-from tvm.relay.backend.contrib.ethosu.tir.compiler import _lower_to_tir
-from .infra import make_ethosu_depthwise_conv2d, get_convolutional_args
-
-
-@pytest.mark.parametrize(
-    "trial",
-    [
-        [(1, 8, 8, 3), 3, (3, 2), (0, 0), (1, 1), (1, 1), "CLIP", "NHWC", "NHWC", "TFL"],
-        [(1, 8, 8, 3), 3, (1, 1), (2, 1), (1, 1), (1, 1), "NONE", "NHWC", "NHWC", "NATURAL"],
-        [(1, 8, 8, 3), 3, (1, 1), (0, 0), (1, 1), (1, 1), "NONE", "NHWC", "NHWC", "TRUNCATE"],
-        [(1, 1, 1, 1), 1, (1, 1), (0, 0), (1, 1), (1, 1), "CLIP", "NHWC", "NHWC", "TFL"],
-        [(1, 7, 9, 4), 4, (3, 2), (1, 2), (2, 1), (1, 2), "NONE", "NHWC", "NHWC", "NATURAL"],
-        [
-            (1, 8, 2, 8, 16),
-            18,
-            (1, 1),
-            (2, 1),
-            (1, 1),
-            (1, 1),
-            "CLIP",
-            "NHCWB16",
-            "NHWC",
-            "TRUNCATE",
-        ],
-        [(1, 7, 9, 40), 40, (3, 2), (1, 2), (2, 1), (1, 2), "CLIP", "NHWC", "NHCWB16", "TFL"],
-        [
-            (1, 4, 12, 9, 16),
-            182,
-            (2, 3),
-            (6, 3),
-            (2, 2),
-            (1, 1),
-            "CLIP",
-            "NHCWB16",
-            "NHCWB16",
-            "NATURAL",
-        ],
-        [(1, 7, 9, 4), 4, (3, 2), (1, 2), (2, 1), (2, 2), "CLIP", "NHWC", "NHWC", "TRUNCATE"],
-        [(1, 7, 9, 41), 41, (3, 2), (1, 2), (2, 1), (2, 2), "CLIP", "NHWC", "NHCWB16", "TFL"],
-        [
-            (1, 13, 12, 19, 16),
-            182,
-            (1, 3),
-            (5, 3),
-            (2, 1),
-            (2, 1),
-            "CLIP",
-            "NHCWB16",
-            "NHCWB16",
-            "NATURAL",
-        ],
-    ],
-)
-@tvm.testing.skip_parameterizations(
-    "trial3", reason="See https://github.com/apache/tvm/issues/12841"
-)
-def test_depthwise_conv2d_single(request, trial):
-    def _get_func(
-        ifm_shape,
-        channels,
-        kernel_shape,
-        padding,
-        strides,
-        dilation,
-        activation,
-        ifm_layout,
-        ofm_layout,
-        rounding_mode,
-    ):
-        ifm = relay.var("ifm", shape=ifm_shape, dtype="int8")
-        depthwise = make_ethosu_depthwise_conv2d(
-            ifm,
-            channels,
-            kernel_shape,
-            padding,
-            strides,
-            dilation,
-            activation,
-            ifm_layout,
-            ofm_layout,
-            "int8",
-            "uint8",
-            rounding_mode,
-        )
-        func = relay.Function(relay.analysis.free_vars(depthwise), depthwise)
-        func = run_opt_pass(func, relay.transform.InferType())
-        return func
-
-    func = _get_func(*trial)
-    mod, _ = _lower_to_tir(func)
-    data = []
-
-    def _visit(stmt):
-        if isinstance(stmt, tvm.tir.Call):
-            data.append(get_convolutional_args(stmt, remove_constants=True))
-
-    tvm.tir.stmt_functor.post_order_visit(mod["main"].body, _visit)
-    (
-        ifm_shape,
-        channels,
-        kernel_shape,
-        padding,
-        strides,
-        dilation,
-        activation,
-        ifm_layout,
-        ofm_layout,
-        rounding_mode,
-    ) = trial
-    dilated_kernel_h = (kernel_shape[0] - 1) * dilation[0] + 1
-    dilated_kernel_w = (kernel_shape[1] - 1) * dilation[1] + 1
-    if ifm_layout == "NHWC":
-        ifm_stride_c = 1
-        ifm_stride_w = ifm_shape[3]
-        ifm_stride_h = ifm_shape[2] * ifm_shape[3]
-        ofm_height = (ifm_shape[1] - dilated_kernel_h + padding[0] + padding[0]) // strides[0] + 1
-        ofm_width = (ifm_shape[2] - dilated_kernel_w + padding[1] + padding[1]) // strides[1] + 1
-    else:
-        ifm_stride_w = 16
-        ifm_stride_c = 16 * ifm_shape[3]
-        ifm_stride_h = 16 * ifm_shape[2] * ifm_shape[3]
-        ofm_height = (ifm_shape[1] - dilated_kernel_h + padding[0] + padding[0]) // strides[0] + 1
-        ofm_width = (ifm_shape[3] - dilated_kernel_w + padding[1] + padding[1]) // strides[1] + 1
-
-    if ofm_layout == "NHWC":
-        ofm_stride_c = 1
-        ofm_stride_w = channels if ofm_width > 1 else 1
-        ofm_stride_h = channels * ofm_width if ofm_height > 1 else 1
-    else:
-        ofm_stride_w = 16
-        ofm_stride_c = 16 * ofm_width
-        ofm_stride_h = 16 * ofm_width * ((channels - 1) // 16 + 1)
-
-    answer = [
-        "int8",
-        ifm_shape[1],
-        ifm_shape[2] if ifm_layout == "NHWC" else ifm_shape[3],
-        channels,
-        ifm_shape[1],
-        0,
-        ifm_shape[2] if ifm_layout == "NHWC" else ifm_shape[3],
-        0,
-        0,
-        0,
-        0,
-        0.6,
-        11,
-        ifm_layout,
-        ifm_stride_h,
-        ifm_stride_w,
-        ifm_stride_c,
-        "int8",
-        ofm_height,
-        ofm_width,
-        channels,
-        ofm_height,
-        0,
-        ofm_width,
-        0,
-        0,
-        0,
-        0,
-        0.26,
-        15,
-        ofm_layout,
-        ofm_stride_h,
-        ofm_stride_w,
-        ofm_stride_c,
-        kernel_shape[1],
-        kernel_shape[0],
-        strides[1],
-        strides[0],
-        dilation[1],
-        dilation[0],
-        13,
-        padding[0],
-        padding[1],
-        padding[0],
-        padding[1],
-        activation,
-        15 if activation == "CLIP" else 0,
-        105 if activation == "CLIP" else 0,
-        rounding_mode,
-        "NONE",
-        0,
-        0,
-        0,
-    ]
-    assert data[0] == answer, data[0]
diff --git a/tests/python/contrib/test_ethosu/test_replace_identity.py b/tests/python/contrib/test_ethosu/test_replace_identity.py
deleted file mode 100644
index 775ef1260665..000000000000
--- a/tests/python/contrib/test_ethosu/test_replace_identity.py
+++ /dev/null
@@ -1,116 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import pytest
-
-pytest.importorskip("ethosu.vela")
-
-import tvm
-from tvm import relay
-from tvm.relay.testing import run_opt_pass
-from tvm.relay.backend.contrib.ethosu.tir import spec
-from tvm.relay.backend.contrib.ethosu.tir.compiler import _lower_to_tir
-from .infra import make_ethosu_identity, get_pooling_args
-
-
-@pytest.mark.parametrize("ifm_shape", [[1, 5, 9, 3], [20, 14, 7], [31, 40], [101]])
-def test_identity(ifm_shape):
-    ifm = relay.var("ifm", shape=ifm_shape, dtype="int8")
-    identity = make_ethosu_identity(ifm)
-
-    func = relay.Function(relay.analysis.free_vars(identity), identity)
-    func = run_opt_pass(func, relay.transform.InferType())
-    mod, _ = _lower_to_tir(func)
-    data = []
-
-    def _visit(stmt):
-        if isinstance(stmt, tvm.tir.Call):
-            data.append(get_pooling_args(stmt))
-
-    # Construct the ifm shape that the initial ifm shape gets legalized into
-    ref_ifm_shape = ifm_shape
-
-    if len(ref_ifm_shape) < 4:
-        ref_ifm_shape = [1] + ref_ifm_shape
-
-    while len(ref_ifm_shape) < 4:
-        ref_ifm_shape.append(1)
-
-    tvm.tir.stmt_functor.post_order_visit(mod["main"].body, _visit)
-    ifm_stride_c = 1
-    ifm_stride_w = ref_ifm_shape[3]
-    ifm_stride_h = ref_ifm_shape[2] * ref_ifm_shape[3]
-    ofm_height = ref_ifm_shape[1]
-    ofm_width = ref_ifm_shape[2]
-    ofm_channels = ref_ifm_shape[3]
-    ofm_stride_c = 1
-    ofm_stride_w = ofm_channels if ofm_width > 1 else 1
-    ofm_stride_h = ofm_channels * ofm_width if ofm_height > 1 else 1
-
-    # The identity operator TIR gets converted into serial pooling
-    serial_pooling = spec.SerialPooling(
-        ifm=spec.SerialFeatureMap(
-            data_type="int8",
-            height=ref_ifm_shape[1],
-            width=ref_ifm_shape[2],
-            channels=ofm_channels,
-            tile_height_0=ref_ifm_shape[1],
-            tile_height_1=0,
-            tile_width_0=ref_ifm_shape[2],
-            tile_address_0=0,
-            tile_address_1=0,
-            tile_address_2=0,
-            tile_address_3=0,
-            scale=1.0,
-            zero_point=0,
-            layout="NHWC",
-            stride_h=ifm_stride_h,
-            stride_w=ifm_stride_w,
-            stride_c=ifm_stride_c,
-        ),
-        ofm=spec.SerialFeatureMap(
-            data_type="int8",
-            height=ofm_height,
-            width=ofm_width,
-            channels=ofm_channels,
-            tile_height_0=ofm_height,
-            tile_height_1=0,
-            tile_width_0=ofm_width,
-            tile_address_0=0,
-            tile_address_1=0,
-            tile_address_2=0,
-            tile_address_3=0,
-            scale=1.0,
-            zero_point=0,
-            layout="NHWC",
-            stride_h=ofm_stride_h,
-            stride_w=ofm_stride_w,
-            stride_c=ofm_stride_c,
-        ),
-        pooling_type="AVG",
-        pool_shape=spec.SerialKernel(1, 1, 1, 1, 1, 1),
-        padding=spec.SerialPadding(0, 0, 0, 0),
-        activation=spec.SerialActivation(op="NONE", clip_min=0, clip_max=0),
-        upscale="NONE",
-        rounding_mode="TFL",
-        block_config=spec.SerialBlockConfig(0, 0, 0),
-    )
-
-    assert data[0] == ["ethosu_identity"] + list(serial_pooling)
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/contrib/test_ethosu/test_replace_pooling.py b/tests/python/contrib/test_ethosu/test_replace_pooling.py
deleted file mode 100644
index e4438eb62abd..000000000000
--- a/tests/python/contrib/test_ethosu/test_replace_pooling.py
+++ /dev/null
@@ -1,346 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import pytest
-
-pytest.importorskip("ethosu.vela")
-
-import tvm
-from tvm import relay
-from tvm.relay.testing import run_opt_pass
-from tvm.relay.backend.contrib.ethosu.tir import spec
-from tvm.relay.backend.contrib.ethosu.tir.compiler import _lower_to_tir
-from .infra import make_ethosu_pooling, get_pooling_args
-
-
-def _create_serial_pooling(
-    ifm_shape,
-    ofm_channels,
-    ifm_layout,
-    ofm_layout,
-    pool_shape,
-    pooling_type,
-    strides,
-    padding,
-    activation="NONE",
-    rounding_mode="TFL",
-    upscale="NONE",
-    ofm_dtype="int8",
-):
-    upscale_factor = 2 if upscale != "NONE" else 1
-    if ifm_layout == "NHWC":
-        ifm_stride_c = 1
-        ifm_stride_w = ifm_shape[3]
-        ifm_stride_h = ifm_shape[2] * ifm_shape[3]
-        ofm_height = (
-            ifm_shape[1] * upscale_factor - pool_shape[0] + padding[0] + padding[2]
-        ) // strides[0] + 1
-        ofm_width = (
-            ifm_shape[2] * upscale_factor - pool_shape[1] + padding[1] + padding[3]
-        ) // strides[1] + 1
-    else:
-        ifm_stride_w = 16
-        ifm_stride_c = 16 * ifm_shape[3] if ofm_channels >= 16 else 1
-        ifm_stride_h = 16 * ifm_shape[2] * ifm_shape[3]
-        ofm_height = (
-            ifm_shape[1] * upscale_factor - pool_shape[0] + padding[0] + padding[2]
-        ) // strides[0] + 1
-        ofm_width = (
-            ifm_shape[3] * upscale_factor - pool_shape[1] + padding[1] + padding[3]
-        ) // strides[1] + 1
-
-    if ofm_layout == "NHWC":
-        ofm_stride_c = 1
-        ofm_stride_w = ofm_channels if ofm_width > 1 else 1
-        ofm_stride_h = ofm_channels * ofm_width if ofm_height > 1 else 1
-    else:
-        ofm_stride_w = 16
-        ofm_stride_c = 16 * ofm_width if ofm_channels >= 16 else 1
-        ofm_stride_h = 16 * ofm_width * ((ofm_channels - 1) // 16 + 1)
-
-    ifm_channels = ofm_channels if pooling_type != "SUM" else ifm_shape[-1]
-
-    return spec.SerialPooling(
-        ifm=spec.SerialFeatureMap(
-            data_type="int8",
-            height=ifm_shape[1],
-            width=ifm_shape[2] if ifm_layout == "NHWC" else ifm_shape[3],
-            channels=ifm_channels,
-            tile_height_0=ifm_shape[1],
-            tile_height_1=0,
-            tile_width_0=ifm_shape[2] if ifm_layout == "NHWC" else ifm_shape[3],
-            tile_address_0=0,
-            tile_address_1=0,
-            tile_address_2=0,
-            tile_address_3=0,
-            scale=1.0,
-            zero_point=0,
-            layout=ifm_layout,
-            stride_h=ifm_stride_h,
-            stride_w=ifm_stride_w,
-            stride_c=ifm_stride_c,
-        ),
-        ofm=spec.SerialFeatureMap(
-            data_type=ofm_dtype,
-            height=ofm_height,
-            width=ofm_width,
-            channels=ofm_channels,
-            tile_height_0=ofm_height,
-            tile_height_1=0,
-            tile_width_0=ofm_width,
-            tile_address_0=0,
-            tile_address_1=0,
-            tile_address_2=0,
-            tile_address_3=0,
-            scale=1.0,
-            zero_point=0,
-            layout=ofm_layout,
-            stride_h=ofm_stride_h,
-            stride_w=ofm_stride_w,
-            stride_c=ofm_stride_c,
-        ),
-        pooling_type=pooling_type,
-        pool_shape=spec.SerialKernel(
-            width=pool_shape[1],
-            height=pool_shape[0],
-            stride_w=strides[1],
-            stride_h=strides[0],
-            dilation_w=1,
-            dilation_h=1,
-        ),
-        padding=spec.SerialPadding(
-            top=padding[0], left=padding[1], bottom=padding[2], right=padding[3]
-        ),
-        activation=spec.SerialActivation(
-            op=activation,
-            clip_min=10 if activation == "CLIP" else 0,
-            clip_max=100 if activation == "CLIP" else 0,
-        ),
-        rounding_mode=rounding_mode,
-        upscale=upscale,
-        block_config=spec.SerialBlockConfig(0, 0, 0),
-    )
-
-
-@pytest.mark.parametrize(
-    "ifm_shape, ofm_channels, ifm_layout, ofm_layout, rounding_mode, upscale",
-    [
-        ((1, 5, 9, 3), 3, "NHWC", "NHWC", "TFL", "NONE"),
-        ((1, 8, 3, 9, 16), 40, "NHCWB16", "NHCWB16", "NATURAL", "NONE"),
-        ((1, 8, 3, 9, 16), 40, "NHCWB16", "NHWC", "TRUNCATE", "ZEROS"),
-        ((1, 8, 9, 40), 40, "NHWC", "NHCWB16", "TFL", "ZEROS"),
-        ((1, 8, 9, 8), 8, "NHWC", "NHCWB16", "TFL", "NEAREST"),
-        ((1, 5, 9, 3), 3, "NHWC", "NHWC", "TFL", "NEAREST"),
-    ],
-)
-@pytest.mark.parametrize("pooling_type", ["AVG", "MAX"])
-@pytest.mark.parametrize("activation", ["NONE", "CLIP"])
-def test_avg_max_pooling_single(
-    ifm_shape,
-    ofm_channels,
-    ifm_layout,
-    ofm_layout,
-    pooling_type,
-    activation,
-    rounding_mode,
-    upscale,
-):
-    pool_shape = (3, 2)
-    strides = (1, 2)
-
-    # When strides are not (1, 1) it is possible to create invalid
-    # padding configurations. It is possible to construct a pooling
-    # operation with invalid padding, but the compiler will account
-    # for this and adjust the padding accordingly, leading to a
-    # mismatch between the expected and actual result. Therefore,
-    # hardcoded padding values are used for each case.
-    padding = (1, 1, 1, 0) if upscale == "NONE" else (0, 0, 0, 0)
-
-    dtype = "int8"
-
-    ifm = relay.var("ifm", shape=ifm_shape, dtype=dtype)
-    pooling = make_ethosu_pooling(
-        ifm,
-        pooling_type,
-        pool_shape,
-        ofm_channels,
-        dtype,
-        strides,
-        padding,
-        activation,
-        ifm_layout,
-        ofm_layout,
-        rounding_mode,
-        upscale,
-    )
-    func = relay.Function(relay.analysis.free_vars(pooling), pooling)
-    func = run_opt_pass(func, relay.transform.InferType())
-    mod, _ = _lower_to_tir(func)
-    data = []
-
-    def _visit(stmt):
-        if isinstance(stmt, tvm.tir.Call):
-            data.append(get_pooling_args(stmt))
-
-    tvm.tir.stmt_functor.post_order_visit(mod["main"].body, _visit)
-
-    serial_pooling = _create_serial_pooling(
-        ifm_shape,
-        ofm_channels,
-        ifm_layout,
-        ofm_layout,
-        pool_shape,
-        pooling_type,
-        strides,
-        padding,
-        activation,
-        rounding_mode,
-        upscale,
-    )
-    assert data[0] == ["ethosu_pooling"] + list(serial_pooling)
-
-
-@pytest.mark.parametrize(
-    "ifm_shape, ofm_layout, rounding_mode",
-    [
-        ((1, 5, 9, 3), "NHWC", "TFL"),
-        ((1, 8, 9, 40), "NHCWB16", "TFL"),
-        ((1, 8, 9, 8), "NHCWB16", "TRUNCATE"),
-        ((1, 5, 9, 3), "NHWC", "NATURAL"),
-    ],
-)
-@pytest.mark.parametrize("activation", ["NONE", "CLIP"])
-def test_sum_pooling_single(
-    ifm_shape,
-    ofm_layout,
-    activation,
-    rounding_mode,
-):
-    ifm = relay.var("ifm", shape=ifm_shape, dtype="int8")
-    pooling = make_ethosu_pooling(
-        ifm=ifm,
-        pooling_type="SUM",
-        pool_shape=(1, 1),
-        ofm_channels=1,
-        ofm_dtype="int32",
-        strides=(1, 1),
-        padding=(0, 0, 0, 0),
-        activation=activation,
-        ofm_layout=ofm_layout,
-        rounding_mode=rounding_mode,
-    )
-    func = relay.Function(relay.analysis.free_vars(pooling), pooling)
-    func = run_opt_pass(func, relay.transform.InferType())
-    mod, _ = _lower_to_tir(func)
-    data = []
-
-    def _visit(stmt):
-        if isinstance(stmt, tvm.tir.Call):
-            data.append(get_pooling_args(stmt))
-
-    tvm.tir.stmt_functor.post_order_visit(mod["main"].body, _visit)
-
-    serial_pooling = _create_serial_pooling(
-        ifm_shape=ifm_shape,
-        ofm_channels=1,
-        ifm_layout="NHWC",
-        ofm_layout=ofm_layout,
-        pool_shape=(1, 1),
-        pooling_type="SUM",
-        strides=(1, 1),
-        padding=(0, 0, 0, 0),
-        activation=activation,
-        rounding_mode=rounding_mode,
-        ofm_dtype="int32",
-    )
-    assert data[0] == ["ethosu_pooling"] + list(serial_pooling)
-
-
-def test_correct_stride_with_multiple_pooling():
-    """Testing a specific case of two pooling operations with NHWC inputs/outputs
-    but a NHCWB16 intermediate tensor. This lead to elements being accessed in the
-    wrong order by the NPU, due to incorrect stride values being calculated."""
-
-    ifm_shape = (1, 4, 4, 8)
-    ofm_channels = 8
-    pooling_type = "MAX"
-    pool_shape = (1, 1)
-    strides = (1, 1)
-    padding = (0, 0, 0, 0)
-    dtype = "int8"
-
-    ifm = relay.var("ifm", shape=ifm_shape, dtype=dtype)
-    op = make_ethosu_pooling(
-        ifm,
-        pooling_type,
-        pool_shape,
-        ofm_channels,
-        dtype,
-        strides,
-        padding,
-        ifm_layout="NHWC",
-        ofm_layout="NHCWB16",
-    )
-    op = make_ethosu_pooling(
-        op,
-        pooling_type,
-        pool_shape,
-        ofm_channels,
-        dtype,
-        strides,
-        padding,
-        ifm_layout="NHCWB16",
-        ofm_layout="NHWC",
-    )
-    func = relay.Function(relay.analysis.free_vars(op), op)
-    func = run_opt_pass(func, relay.transform.InferType())
-    mod, _ = _lower_to_tir(func)
-
-    data = []
-
-    def _visit(stmt):
-        if isinstance(stmt, tvm.tir.Call):
-            data.append(get_pooling_args(stmt))
-
-    tvm.tir.stmt_functor.post_order_visit(mod["main"].body, _visit)
-
-    serial_pooling_1 = _create_serial_pooling(
-        [1, 4, 4, 8],
-        8,
-        "NHWC",
-        "NHCWB16",
-        pool_shape,
-        pooling_type,
-        strides,
-        padding,
-    )
-    serial_pooling_2 = _create_serial_pooling(
-        [1, 4, 1, 4, 16],
-        8,
-        "NHCWB16",
-        "NHWC",
-        pool_shape,
-        pooling_type,
-        strides,
-        padding,
-    )
-
-    assert data[0] == ["ethosu_pooling"] + list(serial_pooling_1)
-    assert data[1] == ["ethosu_pooling"] + list(serial_pooling_2)
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/contrib/test_ethosu/test_replace_unary_elementwise.py b/tests/python/contrib/test_ethosu/test_replace_unary_elementwise.py
deleted file mode 100644
index f61ace0d51ec..000000000000
--- a/tests/python/contrib/test_ethosu/test_replace_unary_elementwise.py
+++ /dev/null
@@ -1,157 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import pytest
-
-pytest.importorskip("ethosu.vela")
-import tvm
-import tvm.script
-from tvm import relay
-from tvm.relay.testing import run_opt_pass
-from tvm.relay.backend.contrib.ethosu.tir import spec
-from tvm.relay.backend.contrib.ethosu.tir.compiler import _lower_to_tir
-from .infra import make_ethosu_unary_elementwise
-
-
-def _get_unary_elementwise_args(call, include_buffers=False, remove_constants=False):
-    args = call.args
-    unary_elementwise_args = []
-
-    for i, arg in enumerate(args):
-        if isinstance(arg, tvm.tir.expr.IntImm) or isinstance(arg, tvm.tir.expr.FloatImm):
-            unary_elementwise_args.append(arg.value)
-        elif isinstance(arg, tvm.tir.expr.BufferLoad) and not include_buffers:
-            unary_elementwise_args.append(arg.indices[0])
-        else:
-            unary_elementwise_args.append(arg)
-
-    return unary_elementwise_args
-
-
-@pytest.mark.parametrize(
-    "ifm_shape, ifm_channels, ifm_layout, ofm_layout, rounding_mode",
-    [
-        ((1, 5, 9, 3), 3, "NHWC", "NHWC", "TFL"),
-        ((1, 8, 3, 9, 16), 40, "NHCWB16", "NHCWB16", "NATURAL"),
-        ((1, 8, 3, 9, 16), 40, "NHCWB16", "NHWC", "TRUNCATE"),
-        ((1, 8, 9, 40), 40, "NHWC", "NHCWB16", "TFL"),
-    ],
-)
-@pytest.mark.parametrize("operator_type, data_type", [("ABS", "int8"), ("CLZ", "int32")])
-@pytest.mark.parametrize("activation", ["NONE"])
-def test_unary_elementwise_single(
-    ifm_shape,
-    ifm_channels,
-    ifm_layout,
-    ofm_layout,
-    rounding_mode,
-    operator_type,
-    activation,
-    data_type,
-):
-    ifm = relay.var("ifm", shape=ifm_shape, dtype=data_type)
-
-    unary_elementwise = make_ethosu_unary_elementwise(
-        ifm, ifm_channels, operator_type, activation, ifm_layout, ofm_layout, rounding_mode
-    )
-    func = relay.Function(relay.analysis.free_vars(unary_elementwise), unary_elementwise)
-    func = run_opt_pass(func, relay.transform.InferType())
-    mod, _ = _lower_to_tir(func)
-    data = []
-
-    def _visit(stmt):
-        if isinstance(stmt, tvm.tir.Call):
-            data.append(_get_unary_elementwise_args(stmt, remove_constants=True))
-
-    tvm.tir.stmt_functor.post_order_visit(mod["main"].body, _visit)
-    if ifm_layout == "NHWC":
-        ifm_stride_c = 1
-        ifm_stride_w = ifm_shape[3] if ifm_shape[2] != 1 else 1
-        ifm_stride_h = ifm_shape[2] * ifm_shape[3] if ifm_shape[1] != 1 else 1
-
-        ofm_height = ifm_shape[1]
-        ofm_width = ifm_shape[2]
-    else:
-        ifm_stride_w = 16
-        ifm_stride_c = 16 * ifm_shape[3]
-        ifm_stride_h = 16 * ifm_shape[2] * ifm_shape[3]
-
-        ofm_height = ifm_shape[1]
-        ofm_width = ifm_shape[3]
-
-    if ofm_layout == "NHWC":
-        ofm_stride_c = 1
-        ofm_stride_w = ifm_channels if ofm_width > 1 else 1
-        ofm_stride_h = ifm_channels * ofm_width if ofm_height > 1 else 1
-    else:
-        ofm_stride_w = 16
-        ofm_stride_c = 16 * ofm_width
-        ofm_stride_h = 16 * ofm_width * ((ifm_channels - 1) // 16 + 1)
-
-    serial_unary_elementwise = spec.SerialUnaryElementwise(
-        ifm=spec.SerialFeatureMap(
-            data_type=data_type,
-            height=ifm_shape[1],
-            width=ifm_shape[2] if ifm_layout == "NHWC" else ifm_shape[3],
-            channels=ifm_channels,
-            tile_height_0=ifm_shape[1],
-            tile_height_1=0,
-            tile_width_0=ifm_shape[2] if ifm_layout == "NHWC" else ifm_shape[3],
-            tile_address_0=0,
-            tile_address_1=0,
-            tile_address_2=0,
-            tile_address_3=0,
-            scale=1.0,
-            zero_point=0,
-            layout=ifm_layout,
-            stride_h=ifm_stride_h,
-            stride_w=ifm_stride_w,
-            stride_c=ifm_stride_c,
-        ),
-        ofm=spec.SerialFeatureMap(
-            data_type=data_type,
-            height=ofm_height,
-            width=ofm_width,
-            channels=ifm_channels,
-            tile_height_0=ofm_height,
-            tile_height_1=0,
-            tile_width_0=ofm_width,
-            tile_address_0=0,
-            tile_address_1=0,
-            tile_address_2=0,
-            tile_address_3=0,
-            scale=1.0,
-            zero_point=0,
-            layout=ofm_layout,
-            stride_h=ofm_stride_h,
-            stride_w=ofm_stride_w,
-            stride_c=ofm_stride_c,
-        ),
-        operator_type=operator_type,
-        activation=spec.SerialActivation(
-            op=activation,
-            clip_min=10 if activation == "CLIP" else 0,
-            clip_max=100 if activation == "CLIP" else 0,
-        ),
-        rounding_mode=rounding_mode,
-        block_config=spec.SerialBlockConfig(0, 0, 0),
-    )
-
-    assert data[0] == ["ethosu_unary_elementwise"] + list(serial_unary_elementwise)
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/contrib/test_ethosu/test_rolling_buffer.py b/tests/python/contrib/test_ethosu/test_rolling_buffer.py
deleted file mode 100644
index 58376d8b614c..000000000000
--- a/tests/python/contrib/test_ethosu/test_rolling_buffer.py
+++ /dev/null
@@ -1,103 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import pytest
-
-pytest.importorskip("ethosu.vela")
-import tvm
-from tvm.relay.backend.contrib.ethosu.tir.scheduler import OperatorCompute
-import tvm.relay.backend.contrib.ethosu.codegen as codegen
-import tensorflow as tf
-from . import infra
-
-
-@pytest.mark.parametrize(
-    "axis, ifm_shape, pool_shape",
-    [
-        (1, (1, 12, 1, 2), (3, 1)),
-        (1, (1, 12, 12, 2), (3, 3)),
-        (2, (1, 1, 12, 2), (1, 3)),
-        (2, (1, 12, 12, 2), (3, 3)),
-    ],
-)
-def test_rolling_buffer_2_layers(axis, ifm_shape, pool_shape):
-    accel_type = "ethos-u55-256"
-    strides = (1, 1)
-
-    @tf.function
-    def tf_model(x):
-        padding = "VALID"
-        pool_0 = tf.nn.max_pool(x, pool_shape, strides, padding)
-        pool_1 = tf.nn.max_pool(pool_0, pool_shape, strides, padding)
-        return pool_1
-
-    def _cascader(cached_func, const_dict, sch):
-        pool_b_out = cached_func.outputs[0]
-        pool_b_compute = OperatorCompute.from_output(pool_b_out)
-
-        pool_a_out = pool_b_compute.read.op.input_tensors[0]
-        pool_a_compute = OperatorCompute.from_output(pool_a_out)
-
-        outer = pool_b_compute.split(sch, axis=axis, val=4)
-        pool_a_compute.compute_at(sch, stage=sch[pool_b_out], axis=outer)
-        pool_a_compute.rolling_buffer(sch)
-
-    codegen.SCHEDULER = lambda: _cascader
-    infra.compare_tvm_with_tflite(tf_model, [ifm_shape], accel_type)
-
-
-@pytest.mark.parametrize(
-    "axis, ifm_shape, pool_shape",
-    [
-        (1, (1, 12, 1, 2), (3, 1)),
-        (1, (1, 12, 1, 17), (3, 1)),
-        (1, (1, 12, 12, 2), (3, 3)),
-        (1, (1, 12, 12, 17), (3, 3)),
-        (2, (1, 1, 12, 2), (1, 3)),
-        (2, (1, 1, 12, 17), (1, 3)),
-        (2, (1, 12, 12, 2), (3, 3)),
-        (2, (1, 12, 12, 17), (3, 3)),
-    ],
-)
-def test_rolling_buffer_3_layers(axis, ifm_shape, pool_shape):
-    accel_type = "ethos-u55-256"
-    strides = (1, 1)
-
-    @tf.function
-    def tf_model(x):
-        padding = "VALID"
-        pool_0 = tf.nn.max_pool(x, pool_shape, strides, padding)
-        pool_1 = tf.nn.max_pool(pool_0, pool_shape, strides, padding)
-        pool_2 = tf.nn.max_pool(pool_1, pool_shape, strides, padding)
-        return pool_2
-
-    def _cascader(cached_func, const_dict, sch):
-        pool_b_out = cached_func.outputs[0]
-        pool_b_compute = OperatorCompute.from_output(pool_b_out)
-
-        pool_a_out = pool_b_compute.read.op.input_tensors[0]
-        pool_a_compute = OperatorCompute.from_output(pool_a_out)
-
-        outer = pool_b_compute.split(sch, axis=axis, val=4)
-        pool_a_compute.compute_at(sch, stage=sch[pool_b_out], axis=outer)
-        pool_a_compute.rolling_buffer(sch)
-
-    codegen.SCHEDULER = lambda: _cascader
-    infra.compare_tvm_with_tflite(tf_model, [ifm_shape], accel_type)
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/contrib/test_ethosu/test_scheduler.py b/tests/python/contrib/test_ethosu/test_scheduler.py
deleted file mode 100644
index 0b6f4a2629b7..000000000000
--- a/tests/python/contrib/test_ethosu/test_scheduler.py
+++ /dev/null
@@ -1,272 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import pytest
-
-pytest.importorskip("ethosu.vela")
-import tvm
-from tvm.script import tir as T
-from tvm import relay
-from tvm.relay.testing import run_opt_pass
-from tvm import te, topi
-from tvm.relay.backend.contrib.ethosu.tir.scheduler import (
-    tile_nd,
-    schedule_pragmas,
-    inline_no_ops,
-    total_cascader,
-    copy_constants,
-    schedule_cache_reads,
-    copy_luts,
-)
-from tvm.relay.backend.contrib.ethosu.tir.compiler import (
-    lower_to_te,
-    extract_constants,
-    _lower_to_tir,
-)
-from .infra import (
-    AttachType,
-    make_ethosu_conv2d,
-    make_ethosu_identity,
-    make_ethosu_binary_elementwise,
-)
-
-
-class TestTEGraph:
-    def __init__(self, inputs, outputs):
-        self.inputs = inputs
-        self.outputs = outputs
-
-
-def test_tile_nd():
-    input = te.placeholder((12, 12), dtype="uint8", name="input")
-    out = topi.nn.relu(input)
-    sch = te.create_schedule([out.op])
-    outer_iters, inner_iters = tile_nd(sch, out, (3, 4))
-    assert tuple(sch[out].leaf_iter_vars) == (*outer_iters, *inner_iters)
-
-
-def test_schedule_pragmas():
-    input = te.placeholder((12, 12), dtype="uint8", name="input")
-    out = te.compute(
-        (12, 12),
-        lambda i, j: input[i, j],
-        attrs={
-            "op": "unity",
-            "info": 1,
-        },
-    )
-    sch = te.create_schedule([out.op])
-    sch[out].split(out.op.axis[0], 3)
-    schedule_pragmas(sch)
-    iter_var = sch[out].leaf_iter_vars[1]
-    assert list(sch[out].iter_var_attrs[iter_var].pragma_keys) == ["op", "info"]
-    assert list(sch[out].iter_var_attrs[iter_var].pragma_values) == ["unity", 1]
-
-
-def test_schedule_pragmas_for_const():
-    input = te.placeholder((12, 12), dtype="uint8", name="input")
-    const = te.compute((), lambda: 2)
-    add = topi.add(input, const)
-    sch = te.create_schedule([add.op])
-    schedule_pragmas(sch)
-
-
-def test_inline_no_ops():
-    input = relay.var("input", shape=(12, 12), dtype="uint8")
-    slice = relay.strided_slice(input, [0, 0], [6, 6])
-    relu1 = relay.nn.relu(slice)
-    reshape = relay.reshape(relu1, (36,))
-    relu2 = relay.nn.relu(reshape)
-    func = relay.Function(relay.analysis.free_vars(relu2), relu2)
-    func = run_opt_pass(func, relay.transform.InferType())
-
-    cached_func = lower_to_te(func)
-    sch = te.create_schedule([cached_func.outputs[0].op])
-    inline_no_ops(cached_func, sch)
-    reshape_tensor = cached_func.outputs[0].op.input_tensors[0]
-    slice_tensor = reshape_tensor.op.input_tensors[0].op.input_tensors[0]
-    assert sch[reshape_tensor].attach_type == AttachType.kInline
-    assert sch[slice_tensor].attach_type == AttachType.kInline
-
-
-def test_total_cascader():
-    input = te.placeholder((12, 12), dtype="uint8", name="input")
-    relu1 = topi.nn.relu(input)
-    relu2 = topi.nn.relu(relu1)
-    relu3 = topi.nn.relu(relu2)
-    sch = te.create_schedule([relu3.op])
-    cascader = total_cascader((4, 4))
-    cascader(TestTEGraph([input], [relu3]), {}, sch)
-    assert sch[relu1].attach_type == AttachType.kScope
-    assert sch[relu2].attach_type == AttachType.kScope
-    assert sch[relu3].attach_type == AttachType.kGroupRoot
-    # Check that the attaches are at the correct iter var
-    assert sch[relu1].attach_ivar == sch[relu3].leaf_iter_vars[1]
-    assert sch[relu2].attach_ivar == sch[relu3].leaf_iter_vars[1]
-
-
-def test_copy_constants():
-    ifm_a = relay.var("IFM_A", shape=(1, 26, 26, 32), dtype="int8")
-    conv_a = make_ethosu_conv2d(ifm_a, 32, 8, (3, 3), (0, 0), (1, 1), (1, 1))
-    conv_b = make_ethosu_conv2d(conv_a, 8, 4, (1, 1), (0, 0), (1, 1), (1, 1))
-    func = relay.Function(relay.analysis.free_vars(conv_b), conv_b)
-    func = run_opt_pass(func, relay.transform.InferType())
-
-    func, const_dict = extract_constants(func)
-    cached_func = lower_to_te(func)
-
-    sch = te.create_schedule([cached_func.outputs[0].op])
-    planner = copy_constants()
-    planner(cached_func, const_dict, sch)
-    assert len(sch.stages) == 23
-    assert ".global" in sch.stages[6].op.name
-    assert ".global" in sch.stages[8].op.name
-    assert ".global" in sch.stages[17].op.name
-    assert ".global" in sch.stages[19].op.name
-
-
-def test_no_copy_constants():
-    ifm_a = relay.var("IFM_A", shape=(1, 26, 26, 32), dtype="int8")
-    conv_a = make_ethosu_conv2d(ifm_a, 32, 8, (3, 3), (0, 0), (1, 1), (1, 1))
-    conv_b = make_ethosu_conv2d(conv_a, 8, 4, (1, 1), (0, 0), (1, 1), (1, 1))
-    func = relay.Function(relay.analysis.free_vars(conv_b), conv_b)
-    func = run_opt_pass(func, relay.transform.InferType())
-
-    func, _ = extract_constants(func)
-    cached_func = lower_to_te(func)
-
-    sch = te.create_schedule([cached_func.outputs[0].op])
-    assert len(sch.stages) == 19
-    ops_names = [x.op.name for x in sch.stages]
-    assert all(".global" not in x for x in ops_names)
-
-
-# This test makes sure that constants and LUTs have a correct storage scope
-def test_copy_luts():
-    ifm_shape = (1, 33, 33, 11)
-    ifm = relay.var("IFM", shape=ifm_shape, dtype="int8")
-    lut = relay.const([i for i in range(256)], dtype="int8")
-    conv = make_ethosu_conv2d(
-        ifm, ifm_shape[3], 8, (3, 3), (0, 0), (1, 1), (1, 1), lut=lut, activation="TANH"
-    )
-    identity = make_ethosu_identity(conv, lut=lut, activation="TANH")
-    func = relay.Function(relay.analysis.free_vars(identity), identity)
-    func = run_opt_pass(func, relay.transform.InferType())
-
-    func, const_dict = extract_constants(func)
-    te_graph = lower_to_te(func)
-
-    sch = te.create_schedule([te_graph.outputs[0].op])
-    copy_constants()(te_graph, const_dict, sch)
-    copy_luts()(te_graph, const_dict, sch)
-    assert len(sch.stages) == 17
-    assert ".global" in sch.stages[6].op.name
-    assert ".global" in sch.stages[8].op.name
-    assert ".local" in sch.stages[10].op.name
-
-
-# This test makes sure that LUT have a correct size
-@pytest.mark.parametrize("dtype,lut_size", [["int8", 256], ["int16", 512]])
-def test_lut_size(dtype, lut_size):
-    ifm_shape = (1, 2, 4, 8)
-    ifm = relay.var("IFM", shape=ifm_shape, dtype=dtype)
-    lut = relay.const([i for i in range(lut_size)], dtype=dtype)
-    identity = make_ethosu_identity(ifm, lut=lut, activation="TANH")
-    func = relay.Function(relay.analysis.free_vars(identity), identity)
-    func = run_opt_pass(func, relay.transform.InferType())
-
-    func, const_dict = extract_constants(func)
-    te_graph = lower_to_te(func)
-
-    sch = te.create_schedule([te_graph.outputs[0].op])
-    copy_luts()(te_graph, const_dict, sch)
-
-    assert sch.stages[3].all_iter_vars[0].dom == tvm.ir.expr.Range(0, lut_size)
-
-
-def test_schedule_cache_reads():
-    a = te.placeholder((12, 12), dtype="uint8", name="a")
-    b = te.placeholder((12, 12), dtype="uint8", name="b")
-    add = topi.add(a, b)
-    sch = te.create_schedule([add.op])
-    cr = sch.cache_read(b, "global", [add])
-    schedule_cache_reads(sch)
-    assert len(sch.stages) == 4
-    assert len(sch[cr].leaf_iter_vars) == 1
-    iv = sch[cr].leaf_iter_vars[0]
-    assert list(sch[cr].iter_var_attrs[iv].pragma_keys) == ["op"]
-    assert list(sch[cr].iter_var_attrs[iv].pragma_values) == ["ethosu_copy"]
-
-
-# uninitialized variables used
-# fmt: off
-@tvm.script.ir_module(check_well_formed=False)
-class DiamondGraphTir:
-    @T.prim_func
-    def main(input_placeholder: T.Buffer((1, 56, 56, 96), "int8"), input_ethosu_write: T.Buffer((1, 56, 56, 24), "int8")) -> None:
-        T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
-        placeholder = T.Buffer([301056], dtype='int8', data=input_placeholder.data)
-        ethosu_write = T.Buffer([75264], dtype='int8', data=input_ethosu_write.data)
-        buffer1 = T.Buffer([2848], "uint8")
-        buffer3 = T.Buffer([976], "uint8")
-        p1_data = T.allocate([2848], "uint8", "global", annotations={"disable_lower_builtin":True})
-        p1 = T.Buffer([2848], "uint8", data=p1_data)
-        p2_data = T.allocate([976], "uint8", "global", annotations={"disable_lower_builtin":True})
-        p2 = T.Buffer([976], "uint8", data=p2_data)
-        p5_data = T.allocate([75264], "int8", "global", annotations={"disable_lower_builtin":True})
-        p5 = T.Buffer([75264], "int8", data=p5_data)
-        p6_data = T.allocate([75264], "int8", "global", annotations={"disable_lower_builtin":True})
-        p6 = T.Buffer([75264], "int8", data=p6_data)
-        T.evaluate(T.call_extern("ethosu_copy", buffer1[0], 2848, p1[0], dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_copy", buffer3[0], 976, p2[0], dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 56, 56, 96, 56, 0, 56, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 5376, 96, 1, "int8", 56, 56, 24, 56, 0, 56, p5[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 1344, 24, 1, 1, 1, 1, 1, 1, 1, p1[0], 2608, T.int8(-1), T.int8(-1), 12, p1[2608], 240, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 56, 56, 24, 56, 0, 56, p5[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 1344, 24, 1, "int8", 56, 56, 24, 56, 0, 56, p6[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 1344, 24, 1, 1, 1, 1, 1, 1, 1, p2[0], 736, T.int8(-1), T.int8(-1), 12, p2[736], 240, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_binary_elementwise", "int8", 56, 56, 24, 56, 0, 56, p5[0], 0, 0, 0,T.float32(1), 0, "NHWC", 1344, 24, 1, "int8", 56, 56, 24, 56, 0, 56, p6[0], 0, 0, 0, T.float32(1), 0, "NHWC", 1344, 24, 1, "int8", 56, 56, 24, 56, 0, 56, ethosu_write[0], 0, 0, 0, T.float32(1), 0, "NHWC", 1344, 24, 1, "ADD", 0, "NONE", 0, 0, "TFL", 0, 0, 0, 0, 0, 0, dtype="handle"))
-# fmt: on
-
-
-def test_schedule_diamond_graph():
-    ifm_a = relay.var("IFM_A", shape=(1, 56, 56, 96), dtype="int8")
-    conv_a = make_ethosu_conv2d(ifm_a, 96, 24, (1, 1), (0, 0), (1, 1), (1, 1))
-    conv_b = make_ethosu_conv2d(conv_a, 24, 24, (1, 1), (0, 0), (1, 1), (1, 1))
-    add = make_ethosu_binary_elementwise(conv_a, conv_b, 24, 24, "ADD", "int8")
-
-    func = relay.Function(relay.analysis.free_vars(add), add)
-    func = run_opt_pass(func, relay.transform.InferType())
-
-    test_mod, _ = _lower_to_tir(func, copy_constants())
-    reference_mod = DiamondGraphTir
-    tvm.ir.assert_structural_equal(test_mod["main"], reference_mod["main"], True)
-
-
-def test_copy_constants_fully_connected_weights():
-    """Check that MatMul-like conv2d ops do not copy weights to SRAM."""
-    ifm = relay.var("IFM", shape=(1, 1, 1, 32), dtype="int8")
-    conv = make_ethosu_conv2d(ifm, 32, 8, (1, 1), (0, 0), (1, 1), (1, 1))
-    func = relay.Function(relay.analysis.free_vars(conv), conv)
-    func = run_opt_pass(func, relay.transform.InferType())
-
-    func, const_dict = extract_constants(func)
-    cached_func = lower_to_te(func)
-
-    sch = te.create_schedule([cached_func.outputs[0].op])
-    planner = copy_constants()
-    planner(cached_func, const_dict, sch)
-    assert True not in [".global" in s.op.name for s in sch.stages]
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/contrib/test_ethosu/test_tir_to_cs_translator.py b/tests/python/contrib/test_ethosu/test_tir_to_cs_translator.py
deleted file mode 100644
index 69076f5337c8..000000000000
--- a/tests/python/contrib/test_ethosu/test_tir_to_cs_translator.py
+++ /dev/null
@@ -1,1529 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name, unused-argument
-import pytest
-
-pytest.importorskip("ethosu.vela")
-import numpy as np
-
-import tvm
-from tvm.tir import stmt_functor
-from tvm.script import tir as T
-from tvm.relay.backend.contrib.ethosu import tir_to_cs_translator
-from tvm.relay.backend.contrib.ethosu import util
-import ethosu.vela.api as vapi
-
-
-# fmt: off
-# Undefined vars used
-"""A sample tir test case for translator"""
-@tvm.script.ir_module(check_well_formed=False)
-class SingleEthosUConv2D:
-    @T.prim_func
-    def main(placeholder_3: T.Buffer((8192,), "int8"), ethosu_conv2d_1: T.Buffer((1024,), "int8")) -> None:
-        # function attr dict
-        T.func_attr({"global_symbol": "main", "tir.noalias": True})
-        placeholder_4 = T.Buffer([1], "uint8")
-        placeholder_5 = T.Buffer([1], "uint8")
-        # body
-        T.evaluate(T.call_extern("ethosu_conv2d", "uint8", 8, 8, 3, 8, 0, 8, placeholder_3[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 24, 3, 1, "uint8", 8, 8, 16, 8, 0, 8, ethosu_conv2d_1[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 16, 1, 1, 1, 1, 1, 1, 1, placeholder_4[0], 0, T.int8(-1), T.int8(-1), 12, placeholder_5[0], 0, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "CLIP", 0, 255, "TFL", "NONE", 0, 0, 0, dtype="uint8"))
-# fmt: on
-
-
-# fmt: off
-# undefined vars used
-"""A sample tir test case with multiple convolutions for translator"""
-@tvm.script.ir_module(check_well_formed=False)
-class MultiEthosUConv2D:
-    @T.prim_func
-    def main(placeholder_6: T.Buffer((192,), "int8"), ethosu_conv2d_1: T.Buffer((512,), "int8")) -> None:
-        # function attr dict
-        T.func_attr({"global_symbol": "main", "tir.noalias": True})
-        placeholder_9 = T.Buffer([1], "uint8")
-        placeholder_7 = T.Buffer([1], "uint8")
-        placeholder_8 = T.Buffer([1], "uint8")
-        placeholder_5 = T.Buffer([1], "uint8")
-        # body
-        ethosu_conv2d_2 = T.decl_buffer([1024], "uint8")
-        ethosu_conv2d_3 = T.decl_buffer([2048], "uint8")
-        T.evaluate(T.call_extern("ethosu_conv2d", "uint8", 4, 8, 3, 4, 0, 8, placeholder_6[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 24, 3, 1, "uint8", 4, 8, 32, 4, 0, 8, ethosu_conv2d_2[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 256, 32, 1, 1, 1, 1, 1, 1, 1, placeholder_7[0], 0, T.int8(-1), T.int8(-1), 12, placeholder_8[0], 0, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="uint8"))
-        T.evaluate(T.call_extern("ethosu_conv2d", "uint8", 4, 8, 32, 4, 0, 8, ethosu_conv2d_2[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 256, 32, 1, "uint8", 4, 8, 8, 4, 0, 8, ethosu_conv2d_1[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 64, 8, 1, 1, 1, 1, 1, 1, 1, placeholder_9[0], 0, T.int8(-1), T.int8(-1), 12, placeholder_5[0], 0, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "CLIP", 0, 255, "TFL", "NONE", 0, 0, 0, dtype="uint8"))
-        T.evaluate(T.call_extern("ethosu_conv2d", "uint8", 4, 8, 3, 4, 0, 8, placeholder_6[96], 0, 0, 0, T.float32(0.5), 10, "NHWC", 24, 3, 1, "uint8", 4, 8, 32, 4, 0, 8, ethosu_conv2d_2[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 256, 32, 1, 1, 1, 1, 1, 1, 1, placeholder_7[0], 0, T.int8(-1), T.int8(-1), 12, placeholder_8[0], 0, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "CLIP", 0, 255, "TFL", "NONE", 0, 0, 0, dtype="uint8"))
-        T.evaluate(T.call_extern("ethosu_conv2d", "uint8", 4, 8, 32, 4, 0, 8, ethosu_conv2d_2[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 256, 32, 1, "uint8", 4, 8, 8, 4, 0, 8, ethosu_conv2d_1[256], 0, 0, 0, T.float32(0.25), 14, "NHWC", 64, 8, 1, 1, 1, 1, 1, 1, 1, placeholder_9[0], 0, T.int8(-1), T.int8(-1), 12, placeholder_5[0], 0, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "CLIP", 0, 255, "TFL", "NONE", 0, 0, 0, dtype="uint8"))
-# fmt: on
-
-
-# fmt: off
-# undefined vars used
-"""A sample tir test case with copy operations for translator"""
-@tvm.script.ir_module(check_well_formed=False)
-class MultiEthosUCopy:
-    @T.prim_func
-    def main(placeholder_3: T.Buffer((8192,), "int8"), ethosu_conv2d_1: T.Buffer((2048,), "int8")) -> None:
-        # function attr dict
-        T.func_attr({"global_symbol": "main", "tir.noalias": True})
-        placeholder_5 = T.Buffer([1], "int32")
-        placeholder_4 = T.Buffer([1], "uint8")
-        # body
-        placeholder_global = T.decl_buffer([256], "uint8")
-        placeholder_d_global = T.decl_buffer([8], "int32")
-        T.evaluate(T.call_extern("ethosu_copy", placeholder_4[0], 256,  placeholder_global[0], dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_copy", placeholder_5[0], 8, placeholder_d_global[0], dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_conv2d", "uint8", 16, 16, 32, 16, 0, 16, placeholder_3[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "uint8", 16, 16, 8, 16, 0, 16, ethosu_conv2d_1[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, placeholder_global[0], 0, T.int8(-1), T.int8(-1), 12, placeholder_d_global[0], 0, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "CLIP", 0, 255, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-# fmt: on
-
-
-# fmt: off
-# undefined vars used
-"""A tir test case with copy operation having a buffer size less than the minimum for a DMA operation"""
-@tvm.script.ir_module(check_well_formed=False)
-class CopyLessMinimal:
-    @T.prim_func
-    def main(ethos_u_0_i0: T.Buffer((1, 4), "int8"), ethosu_write: T.Buffer((1, 4), "int8")):
-        T.func_attr({"from_legacy_te_schedule": T.bool(True), "global_symbol": "main", "tir.noalias": T.bool(True)})
-        p1_global = T.allocate([4], "int8", "global", annotations={"disable_lower_builtin": T.bool(True)})
-        ethosu_write_1 = T.allocate([4], "int8", "global", annotations={"disable_lower_builtin": T.bool(True)})
-        p1 = T.Buffer((4,), "int8")
-        p1_global_1 = T.Buffer((4,), "int8", data=p1_global)
-        T.call_extern("handle", "ethosu_copy", p1[0], 4, p1_global_1[0])
-        ethos_u_0_i0_1 = T.Buffer((4,), "int8", data=ethos_u_0_i0.data)
-        ethosu_write_2 = T.Buffer((4,), "int8", data=ethosu_write_1, align=4)
-        T.call_extern("handle", "ethosu_binary_elementwise", "int8", 1, 1, 4, 1, 0, 1, ethos_u_0_i0_1[0], 0, 0, 0, T.float32(0.0039170472882688046), -128, "NHWC", 1, 1, 1, "int8", 1, 1, 4, 1, 0, 1, p1_global_1[0], 0, 0, 0, T.float32(0.0028046639636158943), -128, "NHWC", 1, 1, 1, "int8", 1, 1, 4, 1, 0, 1, ethosu_write_2[0], 0, 0, 0, T.float32(0.0067217112518846989), -128, "NHWC", 1, 1, 1, "ADD", 0, "NONE", 0, 0, "TFL", 0, 0, 0, 0, 0, 0)
-        ethosu_write_3 = T.Buffer((4,), "int8", data=ethosu_write.data)
-        T.call_extern("handle", "ethosu_identity", "int8", 1, 4, 1, 1, 0, 4, ethosu_write_2[0], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 1, 1, "int8", 1, 4, 1, 1, 0, 4, ethosu_write_3[0], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 1, 1, "AVG", 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0)
-# fmt: on
-
-
-# fmt: off
-# undefined vars used
-"""A TIR test module of weight streaming"""
-@tvm.script.ir_module(check_well_formed=False)
-class WeightStreamOnly:
-    @T.prim_func
-    def main(placeholder: T.Buffer((8192,), "int8"), ethosu_write: T.Buffer((2048,), "int8")) -> None:
-        buffer = T.Buffer([1], "uint8")
-        buffer_1 = T.Buffer([1], "uint8")
-        buffer_2 = T.Buffer([1], "uint8")
-        buffer_3 = T.Buffer([1], "uint8")
-        buffer_4 = T.Buffer([1], "uint8")
-        buffer_5 = T.Buffer([1], "uint8")
-        buffer_6 = T.Buffer([1], "uint8")
-        buffer_7 = T.Buffer([1], "uint8")
-        # function attr dict
-        T.func_attr({"from_legacy_te_schedule": True,
-                     "global_symbol": "main", "tir.noalias": True,
-                     "constants": {buffer.name: buffer,
-                                       buffer_1.name: buffer_1,
-                                       buffer_2.name: buffer_2,
-                                       buffer_3.name: buffer_3,
-                                       buffer_4.name: buffer_4,
-                                       buffer_5.name: buffer_5,
-                                       buffer_6.name: buffer_6,
-                                       buffer_7.name: buffer_7}})
-        # body
-        placeholder_global_data = T.allocate([128], "uint8", "global", annotations={"disable_lower_builtin":True})
-        placeholder_global = T.decl_buffer([128], "uint8", data=placeholder_global_data)
-        placeholder_d_global_data = T.allocate([32], "uint8", "global", annotations={"disable_lower_builtin":True})
-        placeholder_d_global = T.decl_buffer([32], "uint8", data=placeholder_d_global_data)
-        T.evaluate(T.call_extern("ethosu_copy", buffer[0], 128, placeholder_global[0], dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_copy", buffer_1[0], 32, placeholder_d_global[0], dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, placeholder_global[0], 128, T.int8(-1), T.int8(-1), 12, placeholder_d_global[0], 32, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_copy", buffer_2[0], 112, placeholder_global[0], dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_copy", buffer_3[0], 32, placeholder_d_global[0], dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write[2], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, placeholder_global[0], 112, T.int8(-1), T.int8(-1), 12, placeholder_d_global[0], 32, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_copy", buffer_4[0], 112, placeholder_global[0], dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_copy", buffer_5[0], 32, placeholder_d_global[0], dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write[4], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, placeholder_global[0], 112, T.int8(-1), T.int8(-1), 12, placeholder_d_global[0], 32, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_copy", buffer_6[0], 112, placeholder_global[0], dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_copy", buffer_7[0], 32, placeholder_d_global[0], dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write[6], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, placeholder_global[0], 112, T.int8(-1), T.int8(-1), 12, placeholder_d_global[0], 32, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-# fmt: on
-
-
-# fmt: off
-# undefined vars used
-"""A TIR test module of weight streaming and direct reading"""
-@tvm.script.ir_module(check_well_formed=False)
-class MixedRead:
-    @T.prim_func
-    def main(placeholder: T.Buffer((8192,), "int8"), ethosu_write: T.Buffer((2048,), "int8")) -> None:
-        buffer = T.Buffer([1], "uint8")
-        buffer_1 = T.Buffer([1], "uint8")
-        buffer_2 = T.Buffer([1], "uint8")
-        buffer_3 = T.Buffer([1], "uint8")
-        buffer_4 = T.Buffer([1], "uint8")
-        buffer_5 = T.Buffer([1], "uint8")
-        buffer_6 = T.Buffer([1], "uint8")
-        buffer_7 = T.Buffer([1], "uint8")
-        buffer_8 = T.Buffer([1], "uint8")
-        buffer_9 = T.Buffer([1], "uint8")
-        # function attr dict
-        T.func_attr({"from_legacy_te_schedule": True,
-                     "global_symbol": "main", "tir.noalias": True,
-                     "constants": {buffer.name: buffer,
-                                   buffer_1.name: buffer_1,
-                                   buffer_2.name: buffer_2,
-                                   buffer_3.name: buffer_3,
-                                   buffer_4.name: buffer_4,
-                                   buffer_5.name: buffer_5,
-                                   buffer_6.name: buffer_6,
-                                   buffer_7.name: buffer_7,
-                                   buffer_8.name: buffer_8,
-                                   buffer_9.name: buffer_9}})
-        # body
-        ethosu_write_1_data = T.allocate([4096], "int8", "global", annotations={"disable_lower_builtin":True})
-        ethosu_write_1 = T.Buffer([4096], "int8", data=ethosu_write_1_data)
-        placeholder_global_data = T.allocate([80], "uint8", "global", annotations={"disable_lower_builtin":True})
-        placeholder_global = T.Buffer([80], "uint8", data=placeholder_global_data)
-        placeholder_d_global_data = T.allocate([32], "uint8", "global", annotations={"disable_lower_builtin":True})
-        placeholder_d_global = T.Buffer([32], "uint8", data=placeholder_d_global_data)
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 16, 16, 0, 16, ethosu_write_1[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 256, 16, 1, 1, 1, 1, 1, 1, 1, buffer[0], 592, T.int8(-1), T.int8(-1), 12, buffer_1[0], 160, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_copy", buffer_2[0], 80, placeholder_global[0], dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_copy", buffer_3[0], 32, placeholder_d_global[0], dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 16, 16, 0, 16, ethosu_write_1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 256, 16, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, placeholder_global[0], 80, T.int8(-1), T.int8(-1), 12, placeholder_d_global[0], 32, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_copy", buffer_4[0], 80, placeholder_global[0], dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_copy", buffer_5[0], 32, placeholder_d_global[0], dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 16, 16, 0, 16, ethosu_write_1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 256, 16, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write[2], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, placeholder_global[0], 80, T.int8(-1), T.int8(-1), 12, placeholder_d_global[0], 32, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_copy", buffer_6[0], 80, placeholder_global[0], dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_copy", buffer_7[0], 32, placeholder_d_global[0], dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 16, 16, 0, 16, ethosu_write_1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 256, 16, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write[4], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, placeholder_global[0], 80, T.int8(-1), T.int8(-1), 12, placeholder_d_global[0], 32, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_copy", buffer_8[0], 80, placeholder_global[0], dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_copy", buffer_9[0], 32, placeholder_d_global[0], dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 16, 16, 0, 16, ethosu_write_1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 256, 16, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write[6], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, placeholder_global[0], 80, T.int8(-1), T.int8(-1), 12, placeholder_d_global[0], 32, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-# fmt: on
-
-
-def test_buffer_info_extraction():
-    test_cases = [
-        {
-            # Stimulus
-            "tir_module": SingleEthosUConv2D,
-            "param_dict": {
-                tvm.tir.Var("placeholder_4", "uint8"): np.random.randint(
-                    np.iinfo("uint8").min, np.iinfo("uint8").max, [1, 1, 3, 16], "uint8"
-                ),
-                tvm.tir.Var("placeholder_5", "uint8"): np.random.randint(
-                    np.iinfo("int32").min, np.iinfo("int32").max, [16], "int32"
-                ),
-            },
-            # Reference Outputs
-            "data_buffers": {
-                "placeholder_3": (
-                    [1, 8, 8, 3],
-                    "uint8",
-                    tir_to_cs_translator.BufferType.input_or_output,
-                ),
-                "ethosu_conv2d_1": (
-                    [1, 8, 8, 16],
-                    "uint8",
-                    tir_to_cs_translator.BufferType.input_or_output,
-                ),
-            },
-        },
-        {
-            "tir_module": MultiEthosUConv2D,
-            "param_dict": {
-                tvm.tir.Var("placeholder_7", "uint8"): np.random.randint(
-                    np.iinfo("uint8").min, np.iinfo("uint8").max, [1, 1, 3, 32], "uint8"
-                ),
-                tvm.tir.Var("placeholder_8", "uint8"): np.random.randint(
-                    np.iinfo("int32").min, np.iinfo("int32").max, [32], "int32"
-                ),
-                tvm.tir.Var("placeholder_8", "uint8"): np.random.randint(
-                    np.iinfo("uint8").min, np.iinfo("uint8").max, [1, 1, 32, 8], "uint8"
-                ),
-                tvm.tir.Var("placeholder_5", "uint8"): np.random.randint(
-                    np.iinfo("int32").min, np.iinfo("int32").max, [8], "int32"
-                ),
-            },
-            # Reference Outputs
-            "data_buffers": {
-                "placeholder_6": (
-                    [1, 8, 8, 3],
-                    "uint8",
-                    tir_to_cs_translator.BufferType.input_or_output,
-                ),
-                "ethosu_conv2d_1": (
-                    [1, 8, 8, 8],
-                    "uint8",
-                    tir_to_cs_translator.BufferType.input_or_output,
-                ),
-                "ethosu_conv2d_2": (
-                    [1024],
-                    "uint8",
-                    tir_to_cs_translator.BufferType.scratch,
-                ),
-                "ethosu_conv2d_3": (
-                    [2048],
-                    "uint8",
-                    tir_to_cs_translator.BufferType.scratch,
-                ),
-            },
-        },
-    ]
-    for test_case in test_cases:
-        # With Target Hooks the TIR module needs a target attached
-        # and lowered via make unpacked API.
-        tir_mod = test_case["tir_module"]
-        tir_mod["main"] = tir_mod["main"].with_attr(
-            "target", tvm.target.Target("ethos-u", host="ethos-u")
-        )
-        tir_mod = tvm.tir.transform.MakeUnpackedAPI()(tir_mod)
-        buffer_info = tir_to_cs_translator.extract_buffer_info(tir_mod, test_case["param_dict"])
-        for buffer_var, info in buffer_info.items():
-            if buffer_var in test_case["param_dict"].keys():
-                assert (
-                    info.values.flatten() == test_case["param_dict"][buffer_var].flatten()
-                ).all()
-                assert info.dtype == test_case["param_dict"][buffer_var].dtype
-                info.btype == tir_to_cs_translator.BufferType.constant
-            else:
-                buffer_name = buffer_var.name
-                assert info.btype == test_case["data_buffers"][buffer_name][2]
-
-
-def test_translate_ethosu_conv2d():
-    test_cases = [
-        {
-            # Stimulus
-            "tir_module": SingleEthosUConv2D,
-            "param_dict": {
-                1: np.random.randint(
-                    np.iinfo("uint8").min, np.iinfo("uint8").max, [1, 1, 3, 16], "uint8"
-                ),
-                2: np.random.randint(np.iinfo("int32").min, np.iinfo("int32").max, [16], "int32"),
-            },
-            # Reference outputs
-            "ref": [
-                {
-                    "ifm": {
-                        "data_type": vapi.NpuDataType.UINT8,
-                        "shape": vapi.NpuShape3D(8, 8, 3),
-                        "tiles": vapi.NpuTileBox(8, 0, 8, [0, 0, 0, 0]),
-                        "quantization": vapi.NpuQuantization(0.5, 10),
-                        "layout": vapi.NpuLayout.NHWC,
-                        "strides": vapi.NpuShape3D(24, 3, 1),
-                    },
-                    "ofm": {
-                        "data_type": vapi.NpuDataType.UINT8,
-                        "shape": vapi.NpuShape3D(8, 8, 16),
-                        "tiles": vapi.NpuTileBox(8, 0, 8, [0, 0, 0, 0]),
-                        "quantization": vapi.NpuQuantization(0.25, 14),
-                        "layout": vapi.NpuLayout.NHWC,
-                        "strides": vapi.NpuShape3D(128, 16, 1),
-                    },
-                    "kernel": vapi.NpuKernel(
-                        w=1, h=1, stride_x=1, stride_y=1, dilation_x=1, dilation_y=1
-                    ),
-                    "padding": vapi.NpuPadding(top=0, left=0, bottom=0, right=0),
-                    "activation": {
-                        "op": vapi.NpuActivationOp.NONE_OR_RELU,
-                        "min": -3.5,
-                        "max": 60.25,
-                    },
-                    "rounding_mode": vapi.NpuRoundingMode.TFL,
-                    "ifm_upscale": vapi.NpuResamplingMode.NONE,
-                    "w_zero_point": 12,
-                }
-            ],
-        },
-        {
-            "tir_module": MultiEthosUConv2D,
-            "param_dict": {
-                1: np.random.randint(
-                    np.iinfo("uint8").min, np.iinfo("uint8").max, [1, 1, 3, 32], "uint8"
-                ),
-                2: np.random.randint(np.iinfo("int32").min, np.iinfo("int32").max, [32], "int32"),
-                3: np.random.randint(
-                    np.iinfo("uint8").min, np.iinfo("uint8").max, [1, 1, 32, 8], "uint8"
-                ),
-                4: np.random.randint(np.iinfo("int32").min, np.iinfo("int32").max, [8], "int32"),
-            },
-            # Reference Outputs
-            "ref": [
-                {
-                    "ifm": {
-                        "data_type": vapi.NpuDataType.UINT8,
-                        "shape": vapi.NpuShape3D(4, 8, 3),
-                        "tiles": vapi.NpuTileBox(4, 0, 8, [0, 0, 0, 0]),
-                        "quantization": vapi.NpuQuantization(0.5, 10),
-                        "layout": vapi.NpuLayout.NHWC,
-                        "strides": vapi.NpuShape3D(24, 3, 1),
-                    },
-                    "ofm": {
-                        "data_type": vapi.NpuDataType.UINT8,
-                        "shape": vapi.NpuShape3D(4, 8, 32),
-                        "tiles": vapi.NpuTileBox(4, 0, 8, [0, 0, 0, 0]),
-                        "quantization": vapi.NpuQuantization(0.25, 14),
-                        "layout": vapi.NpuLayout.NHWC,
-                        "strides": vapi.NpuShape3D(256, 32, 1),
-                    },
-                    "kernel": vapi.NpuKernel(
-                        w=1, h=1, stride_x=1, stride_y=1, dilation_x=1, dilation_y=1
-                    ),
-                    "padding": vapi.NpuPadding(top=0, left=0, bottom=0, right=0),
-                    "activation": {"op": None},
-                    "rounding_mode": vapi.NpuRoundingMode.TFL,
-                    "ifm_upscale": vapi.NpuResamplingMode.NONE,
-                    "w_zero_point": 12,
-                },
-                {
-                    "ifm": {
-                        "data_type": vapi.NpuDataType.UINT8,
-                        "shape": vapi.NpuShape3D(4, 8, 32),
-                        "tiles": vapi.NpuTileBox(4, 0, 8, [0, 0, 0, 0]),
-                        "quantization": vapi.NpuQuantization(0.5, 10),
-                        "layout": vapi.NpuLayout.NHWC,
-                        "strides": vapi.NpuShape3D(256, 32, 1),
-                    },
-                    "ofm": {
-                        "data_type": vapi.NpuDataType.UINT8,
-                        "shape": vapi.NpuShape3D(4, 8, 8),
-                        "tiles": vapi.NpuTileBox(4, 0, 8, [0, 0, 0, 0]),
-                        "quantization": vapi.NpuQuantization(0.25, 14),
-                        "layout": vapi.NpuLayout.NHWC,
-                        "strides": vapi.NpuShape3D(64, 8, 1),
-                    },
-                    "kernel": vapi.NpuKernel(
-                        w=1, h=1, stride_x=1, stride_y=1, dilation_x=1, dilation_y=1
-                    ),
-                    "padding": vapi.NpuPadding(top=0, left=0, bottom=0, right=0),
-                    "activation": {
-                        "op": vapi.NpuActivationOp.NONE_OR_RELU,
-                        "min": -3.5,
-                        "max": 60.25,
-                    },
-                    "rounding_mode": vapi.NpuRoundingMode.TFL,
-                    "ifm_upscale": vapi.NpuResamplingMode.NONE,
-                    "w_zero_point": 12,
-                },
-                {
-                    "ifm": {
-                        "data_type": vapi.NpuDataType.UINT8,
-                        "shape": vapi.NpuShape3D(4, 8, 3),
-                        "tiles": vapi.NpuTileBox(4, 0, 8, [0, 0, 0, 0]),
-                        "quantization": vapi.NpuQuantization(0.5, 10),
-                        "layout": vapi.NpuLayout.NHWC,
-                        "strides": vapi.NpuShape3D(24, 3, 1),
-                    },
-                    "ofm": {
-                        "data_type": vapi.NpuDataType.UINT8,
-                        "shape": vapi.NpuShape3D(4, 8, 32),
-                        "tiles": vapi.NpuTileBox(4, 0, 8, [0, 0, 0, 0]),
-                        "quantization": vapi.NpuQuantization(0.25, 14),
-                        "layout": vapi.NpuLayout.NHWC,
-                        "strides": vapi.NpuShape3D(256, 32, 1),
-                    },
-                    "kernel": vapi.NpuKernel(
-                        w=1, h=1, stride_x=1, stride_y=1, dilation_x=1, dilation_y=1
-                    ),
-                    "padding": vapi.NpuPadding(top=0, left=0, bottom=0, right=0),
-                    "activation": {
-                        "op": vapi.NpuActivationOp.NONE_OR_RELU,
-                        "min": -3.5,
-                        "max": 60.25,
-                    },
-                    "rounding_mode": vapi.NpuRoundingMode.TFL,
-                    "ifm_upscale": vapi.NpuResamplingMode.NONE,
-                    "w_zero_point": 12,
-                },
-                {
-                    "ifm": {
-                        "data_type": vapi.NpuDataType.UINT8,
-                        "shape": vapi.NpuShape3D(4, 8, 32),
-                        "tiles": vapi.NpuTileBox(4, 0, 8, [0, 0, 0, 0]),
-                        "quantization": vapi.NpuQuantization(0.5, 10),
-                        "layout": vapi.NpuLayout.NHWC,
-                        "strides": vapi.NpuShape3D(256, 32, 1),
-                    },
-                    "ofm": {
-                        "data_type": vapi.NpuDataType.UINT8,
-                        "shape": vapi.NpuShape3D(4, 8, 8),
-                        "tiles": vapi.NpuTileBox(4, 0, 8, [0, 0, 0, 0]),
-                        "quantization": vapi.NpuQuantization(0.25, 14),
-                        "layout": vapi.NpuLayout.NHWC,
-                        "strides": vapi.NpuShape3D(64, 8, 1),
-                    },
-                    "kernel": vapi.NpuKernel(
-                        w=1, h=1, stride_x=1, stride_y=1, dilation_x=1, dilation_y=1
-                    ),
-                    "padding": vapi.NpuPadding(top=0, left=0, bottom=0, right=0),
-                    "activation": {
-                        "op": vapi.NpuActivationOp.NONE_OR_RELU,
-                        "min": -3.5,
-                        "max": 60.25,
-                    },
-                    "rounding_mode": vapi.NpuRoundingMode.TFL,
-                    "ifm_upscale": vapi.NpuResamplingMode.NONE,
-                    "w_zero_point": 12,
-                },
-            ],
-        },
-    ]
-
-    def extract_ethosu_conv2d_extern_calls(mod):
-        """This function will obtain all ethosu_conv2d
-        calls from a NPU TIR module
-        Parameters
-        ----------
-        mod : tvm.IRModule
-            This is a NPU TIR Module
-
-        Returns
-        -------
-        list
-            of tvm.tir.Call objects
-            that are tir extern calls
-            for ethosu_conv2d
-        """
-        # There should only be a single function
-        assert len(mod.functions.items()) == 1
-        primfunc = mod.functions.items()[0][1]
-
-        ethosu_conv2d_calls = list()
-
-        def populate_ethosu_conv2d_calls(stmt):
-            if (
-                isinstance(stmt, tvm.tir.Call)
-                and stmt.op.name == "tir.call_extern"
-                and stmt.args[0] == "ethosu_conv2d"
-            ):
-                ethosu_conv2d_calls.append(stmt)
-
-        stmt_functor.post_order_visit(primfunc.body, populate_ethosu_conv2d_calls)
-        return ethosu_conv2d_calls
-
-    for test_case in test_cases:
-        ethosu_conv2d_calls = extract_ethosu_conv2d_extern_calls(test_case["tir_module"])
-        for idx, ethosu_conv2d_call in enumerate(ethosu_conv2d_calls):
-            ref = test_case["ref"][idx]
-            npu_op, w_zero_point = tir_to_cs_translator.translate_ethosu_conv2d(ethosu_conv2d_call)
-            # Compare IFM
-            assert npu_op.ifm.data_type == ref["ifm"]["data_type"]
-            assert npu_op.ifm.shape == ref["ifm"]["shape"]
-            assert npu_op.ifm.tiles.height_0 == ref["ifm"]["tiles"].height_0
-            assert npu_op.ifm.tiles.height_1 == ref["ifm"]["tiles"].height_1
-            assert npu_op.ifm.tiles.width_0 == ref["ifm"]["tiles"].width_0
-            assert npu_op.ifm.quantization == ref["ifm"]["quantization"]
-            assert npu_op.ifm.layout == ref["ifm"]["layout"]
-            assert npu_op.ifm.strides == ref["ifm"]["strides"]
-            # Compare OFM
-            assert npu_op.ofm.data_type == ref["ofm"]["data_type"]
-            assert npu_op.ofm.shape == ref["ofm"]["shape"]
-            assert npu_op.ofm.tiles.height_0 == ref["ofm"]["tiles"].height_0
-            assert npu_op.ofm.tiles.height_1 == ref["ofm"]["tiles"].height_1
-            assert npu_op.ofm.tiles.width_0 == ref["ofm"]["tiles"].width_0
-            assert npu_op.ofm.quantization == ref["ofm"]["quantization"]
-            assert npu_op.ofm.layout == ref["ofm"]["layout"]
-            assert npu_op.ofm.strides == ref["ofm"]["strides"]
-            # Compare kernel and padding
-            assert npu_op.kernel.__dict__ == ref["kernel"].__dict__
-            assert npu_op.padding == ref["padding"]
-            # Compare activation
-            if ref["activation"]["op"] is None:
-                assert npu_op.activation is None
-            else:
-                assert npu_op.activation.op_type == ref["activation"]["op"]
-                assert npu_op.activation.min == ref["activation"]["min"]
-                assert npu_op.activation.max == ref["activation"]["max"]
-            # Compare rounding mode
-            assert npu_op.rounding_mode == ref["rounding_mode"]
-            # Compare ifm upscaling
-            assert npu_op.ifm_upscale == ref["ifm_upscale"]
-            # Compare weight quantization parameters
-            assert w_zero_point == ref["w_zero_point"]
-
-
-# fmt: off
-"""A ethosu_depthwise_conv2d tir testcase for the translator"""
-@tvm.script.ir_module
-class SingleEthosuDepthwiseConv2D:
-    @T.prim_func
-    def main(placeholder: T.handle, placeholder_1: T.handle, placeholder_2: T.handle, ethosu_depthwise_conv2d: T.handle) -> None:
-        # function attr dict
-        T.func_attr({"global_symbol": "main", "tir.noalias": True})
-        placeholder_4 = T.match_buffer(placeholder_1, [18], dtype="int8", elem_offset=0, align=64, offset_factor=1)
-        placeholder_5 = T.match_buffer(placeholder_2, [30], dtype="uint8", elem_offset=0, align=64, offset_factor=1)
-        placeholder_3 = T.match_buffer(placeholder, [192], dtype="int8", elem_offset=0, align=64, offset_factor=1)
-        ethosu_depthwise_conv2d_1 = T.match_buffer(ethosu_depthwise_conv2d, [126], dtype="int8", elem_offset=0, align=64, offset_factor=1)
-        # body
-        T.evaluate(T.call_extern("ethosu_depthwise_conv2d", "int8", 8, 8, 3, 8, 0, 8, placeholder_3[0], 0, 0, 0, T.float32(0.6), 11, "NHWC", 24, 3, 1, "int8", 6, 7, 3, 6, 0, 7, ethosu_depthwise_conv2d_1[0], 0, 0, 0, T.float32(0.26), 15, "NHWC", 21, 3, 1, 2, 3, 1, 1, 1, 1, placeholder_4[0], 18, 13, placeholder_5[0], 30, 0, 0, 0, 0, "CLIP", 15, 105, "TFL", "NONE", 0, 0, 0, dtype="int8"))
-# fmt: on
-
-
-def test_translate_ethosu_depthwise_conv2d():
-    def extract_ethosu_depthwise_conv2d_extern_call(mod):
-        # There should only be a single function
-        assert len(mod.functions.items()) == 1
-        primfunc = mod.functions.items()[0][1]
-
-        ethosu_depthwise_conv2d_calls = list()
-
-        def populate_ethosu_depthwise_conv2d_calls(stmt):
-            if (
-                isinstance(stmt, tvm.tir.Call)
-                and stmt.op.name == "tir.call_extern"
-                and stmt.args[0] == "ethosu_depthwise_conv2d"
-            ):
-                ethosu_depthwise_conv2d_calls.append(stmt)
-
-        stmt_functor.post_order_visit(primfunc.body, populate_ethosu_depthwise_conv2d_calls)
-        return ethosu_depthwise_conv2d_calls[0]
-
-    depthwise_conv2d_call = extract_ethosu_depthwise_conv2d_extern_call(SingleEthosuDepthwiseConv2D)
-    npu_op, w_zero_point = tir_to_cs_translator.translate_ethosu_depthwise_conv2d(
-        depthwise_conv2d_call
-    )
-
-    assert npu_op.ifm.data_type == vapi.NpuDataType.INT8
-    assert npu_op.ifm.shape == vapi.NpuShape3D(8, 8, 3)
-    assert npu_op.ifm.tiles.height_0 == vapi.NpuTileBox(8, 0, 8, [0, 0, 0, 0]).height_0
-    assert npu_op.ifm.tiles.height_1 == vapi.NpuTileBox(8, 0, 8, [0, 0, 0, 0]).height_1
-    assert npu_op.ifm.tiles.width_0 == vapi.NpuTileBox(8, 0, 8, [0, 0, 0, 0]).width_0
-    assert npu_op.ifm.quantization == pytest.approx(vapi.NpuQuantization(0.6, 11))
-    assert npu_op.ifm.layout == vapi.NpuLayout.NHWC
-    assert npu_op.ifm.strides == vapi.NpuShape3D(24, 3, 1)
-    # Compare OFM
-    assert npu_op.ofm.data_type == vapi.NpuDataType.INT8
-    assert npu_op.ofm.shape == vapi.NpuShape3D(6, 7, 3)
-    assert npu_op.ofm.tiles.height_0 == vapi.NpuTileBox(6, 0, 8, [0, 0, 0, 0]).height_0
-    assert npu_op.ofm.tiles.height_1 == vapi.NpuTileBox(6, 0, 7, [0, 0, 0, 0]).height_1
-    assert npu_op.ofm.tiles.width_0 == vapi.NpuTileBox(6, 0, 7, [0, 0, 0, 0]).width_0
-    assert npu_op.ofm.quantization == pytest.approx(vapi.NpuQuantization(0.26, 15))
-    assert npu_op.ofm.layout == vapi.NpuLayout.NHWC
-    assert npu_op.ofm.strides == vapi.NpuShape3D(21, 3, 1)
-    # Compare kernel and padding
-    assert (
-        npu_op.kernel.__dict__
-        == vapi.NpuKernel(w=2, h=3, stride_x=1, stride_y=1, dilation_x=1, dilation_y=1).__dict__
-    )
-    assert npu_op.padding == vapi.NpuPadding(top=0, left=0, bottom=0, right=0)
-    # Compare activation
-    assert npu_op.activation.op_type == vapi.NpuActivationOp.NONE_OR_RELU
-    assert npu_op.activation.min == 0
-    assert npu_op.activation.max == pytest.approx(23.4)
-    # Compare rounding mode
-    assert npu_op.rounding_mode == vapi.NpuRoundingMode.TFL
-    # Compare ifm upscaling
-    assert npu_op.ifm_upscale == vapi.NpuResamplingMode.NONE
-    # Compare weight quantization parameters
-    assert w_zero_point == 13
-
-
-def test_translate_ethosu_copy():
-    def extract_ethosu_copy_extern_calls(mod):
-        """This function will obtain all ethosu_conv2d
-        calls from a NPU TIR module
-        Parameters
-        ----------
-        mod : tvm.IRModule
-            This is a NPU TIR Module
-
-        Returns
-        -------
-        list
-            of tvm.tir.Call objects
-            that are tir extern calls
-            for ethosu_conv2d
-        """
-        # There should only be a single function
-        assert len(mod.functions.items()) == 1
-        primfunc = mod.functions.items()[0][1]
-
-        ethosu_copy_calls = list()
-
-        def populate_ethosu_copy_calls(stmt):
-            if (
-                isinstance(stmt, tvm.tir.Call)
-                and stmt.op.name == "tir.call_extern"
-                and stmt.args[0] == "ethosu_copy"
-            ):
-                ethosu_copy_calls.append(stmt)
-
-        stmt_functor.post_order_visit(primfunc.body, populate_ethosu_copy_calls)
-        return ethosu_copy_calls
-
-    test_cases = [
-        {
-            # Stimulus
-            "tir_module": MultiEthosUCopy,
-            "param_dict": {
-                1: np.random.randint(
-                    np.iinfo("uint8").min, np.iinfo("uint8").max, [8, 1, 1, 32], "uint8"
-                ),
-                2: np.random.randint(np.iinfo("int32").min, np.iinfo("int32").max, [8], "int32"),
-            },
-            # Reference outputs
-            "ref": [
-                {
-                    "src": "placeholder_4",
-                    "dest": "placeholder_global",
-                    "length": 256,
-                },
-                {
-                    "src": "placeholder_5",
-                    "dest": "placeholder_d_global",
-                    "length": 32,
-                },
-            ],
-        },
-        {
-            # Mod contains a copy operation with a buffer size of 4 bytes and it should be replaced by 16
-            "tir_module": CopyLessMinimal,
-            "param_dict": {
-                1: np.random.randint(np.iinfo("int8").min, np.iinfo("int8").max, [1, 4], "int8"),
-            },
-            # Reference outputs
-            "ref": [
-                {
-                    "src": "p1",
-                    "dest": "p1_global_1",
-                    "length": 16,
-                },
-            ],
-        },
-    ]
-
-    for test_case in test_cases:
-        ethosu_copy_calls = extract_ethosu_copy_extern_calls(test_case["tir_module"])
-        for idx, ethosu_copy_call in enumerate(ethosu_copy_calls):
-            npu_dma_op = tir_to_cs_translator.translate_ethosu_tir_call_extern(ethosu_copy_call)
-            assert npu_dma_op.src.address.buffer.name == test_case["ref"][idx]["src"]
-            assert npu_dma_op.dest.address.buffer.name == test_case["ref"][idx]["dest"]
-            assert npu_dma_op.src.length == test_case["ref"][idx]["length"]
-            assert npu_dma_op.dest.length == test_case["ref"][idx]["length"]
-
-
-# fmt: off
-# undefined vars used
-@tvm.script.ir_module(check_well_formed=False)
-class MixedConstantDatatypes:
-    @T.prim_func
-    def main(placeholder_4: T.Buffer((2048,), "int8"), ethosu_write_1: T.Buffer((16,), "int8")) -> None:
-        buffer = T.Buffer([1], "uint8")
-        buffer_1 = T.Buffer([1], "uint8")
-        buffer_2 = T.Buffer([1], "int16")
-        # function attr dict
-        T.func_attr({"from_legacy_te_schedule": True,
-                     "global_symbol": "main", "tir.noalias": True,
-                     "constants": {buffer.name: buffer,
-                                   buffer_1.name: buffer_1,
-                                   buffer_2.name: buffer_2}})
-        # body
-        placeholder_global = T.decl_buffer([272], "uint8")
-        placeholder_d_global = T.decl_buffer([160], "uint8")
-        ethosu_write_2 = T.decl_buffer([16], "int16")
-        placeholder_d_global_1 = T.decl_buffer([1], "int16")
-        T.evaluate(T.call_extern("ethosu_copy", buffer_1[0], 272, placeholder_global[0], dtype="uint8"))
-        T.evaluate(T.call_extern("ethosu_copy", buffer[0], 160, placeholder_d_global[0], dtype="uint8"))
-        T.evaluate(T.call_extern("ethosu_depthwise_conv2d", "int8", 8, 16, 16, 8, 0, 16, placeholder_4[0], 0, 0, 0, T.float32(0.0039215548895299435), -128, "NHWC", 256, 16, 1, "int16", 1, 1, 16, 1, 0, 1, ethosu_write_2[0], 0, 0, 0, T.float32(0.0023205536417663097), -128, "NHWC", 1, 1, 1, 16, 8, 1, 1, 1, 1, placeholder_global[0], 272, 0, placeholder_d_global[0], 160, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="int16"))
-        T.evaluate(T.call_extern("ethosu_copy", buffer_2[0], 1, placeholder_d_global_1[0], dtype="int16"))
-        T.evaluate(T.call_extern("ethosu_binary_elementwise", "int16", 1, 1, 16, 1, 0, 1, ethosu_write_2[0], 0, 0, 0, T.float32(0.0023205536417663097), -128, "NHWC", 1, 1, 1, "int16", 1, 1, 1, 1, 0, 1, placeholder_d_global_1[0], 0, 0, 0, T.float32(0.0078125018482064768), 0, "NHWC", 1, 1, 1, "int8", 1, 1, 16, 1, 0, 1, ethosu_write_1[0], 0, 0, 0, T.float32(0.0023205536417663097), -128, "NHWC", 1, 1, 1, "MUL", 0, "NONE", 0, 0, "NATURAL", 0, 0, 0, 0, 0, 0, dtype="int8"))
-# fmt: on
-
-
-def test_assign_addresses():
-    test_cases = [
-        {
-            # Stimulus
-            "tir_module": WeightStreamOnly,
-            "param_dict": {
-                WeightStreamOnly["main"].attrs["constants"]["buffer"]: np.random.randint(
-                    np.iinfo("uint8").min, np.iinfo("uint8").max, [128], "uint8"
-                ),
-                WeightStreamOnly["main"].attrs["constants"]["buffer_1"]: np.random.randint(
-                    np.iinfo("uint8").min, np.iinfo("uint8").max, [32], "uint8"
-                ),
-                WeightStreamOnly["main"].attrs["constants"]["buffer_2"]: np.random.randint(
-                    np.iinfo("uint8").min, np.iinfo("uint8").max, [112], "uint8"
-                ),
-                WeightStreamOnly["main"].attrs["constants"]["buffer_3"]: np.random.randint(
-                    np.iinfo("uint8").min, np.iinfo("uint8").max, [32], "uint8"
-                ),
-                WeightStreamOnly["main"].attrs["constants"]["buffer_4"]: np.random.randint(
-                    np.iinfo("uint8").min, np.iinfo("uint8").max, [112], "uint8"
-                ),
-                WeightStreamOnly["main"].attrs["constants"]["buffer_5"]: np.random.randint(
-                    np.iinfo("uint8").min, np.iinfo("uint8").max, [32], "uint8"
-                ),
-                WeightStreamOnly["main"].attrs["constants"]["buffer_6"]: np.random.randint(
-                    np.iinfo("uint8").min, np.iinfo("uint8").max, [112], "uint8"
-                ),
-                WeightStreamOnly["main"].attrs["constants"]["buffer_7"]: np.random.randint(
-                    np.iinfo("uint8").min, np.iinfo("uint8").max, [32], "uint8"
-                ),
-            },
-        },
-        {
-            # Stimulus
-            "tir_module": MixedRead,
-            "param_dict": {
-                MixedRead["main"].attrs["constants"]["buffer"]: np.random.randint(
-                    np.iinfo("uint8").min, np.iinfo("uint8").max, [592], "uint8"
-                ),
-                MixedRead["main"].attrs["constants"]["buffer_1"]: np.random.randint(
-                    np.iinfo("uint8").min, np.iinfo("uint8").max, [160], "uint8"
-                ),
-                MixedRead["main"].attrs["constants"]["buffer_2"]: np.random.randint(
-                    np.iinfo("uint8").min, np.iinfo("uint8").max, [80], "uint8"
-                ),
-                MixedRead["main"].attrs["constants"]["buffer_3"]: np.random.randint(
-                    np.iinfo("uint8").min, np.iinfo("uint8").max, [32], "uint8"
-                ),
-                MixedRead["main"].attrs["constants"]["buffer_4"]: np.random.randint(
-                    np.iinfo("uint8").min, np.iinfo("uint8").max, [80], "uint8"
-                ),
-                MixedRead["main"].attrs["constants"]["buffer_5"]: np.random.randint(
-                    np.iinfo("uint8").min, np.iinfo("uint8").max, [32], "uint8"
-                ),
-                MixedRead["main"].attrs["constants"]["buffer_6"]: np.random.randint(
-                    np.iinfo("uint8").min, np.iinfo("uint8").max, [80], "uint8"
-                ),
-                MixedRead["main"].attrs["constants"]["buffer_7"]: np.random.randint(
-                    np.iinfo("uint8").min, np.iinfo("uint8").max, [32], "uint8"
-                ),
-                MixedRead["main"].attrs["constants"]["buffer_8"]: np.random.randint(
-                    np.iinfo("uint8").min, np.iinfo("uint8").max, [80], "uint8"
-                ),
-                MixedRead["main"].attrs["constants"]["buffer_9"]: np.random.randint(
-                    np.iinfo("uint8").min, np.iinfo("uint8").max, [32], "uint8"
-                ),
-            },
-        },
-        {
-            # Stimulus
-            "tir_module": MixedConstantDatatypes,
-            "param_dict": {
-                MixedConstantDatatypes["main"].attrs["constants"]["buffer"]: np.random.randint(
-                    np.iinfo("uint8").min, np.iinfo("uint8").max, [160], "uint8"
-                ),
-                MixedConstantDatatypes["main"].attrs["constants"]["buffer_2"]: np.random.randint(
-                    np.iinfo("int16").min, np.iinfo("int16").max, [1], "int16"
-                ),
-                MixedConstantDatatypes["main"].attrs["constants"]["buffer_1"]: np.random.randint(
-                    np.iinfo("uint8").min, np.iinfo("uint8").max, [272], "uint8"
-                ),
-            },
-        },
-    ]
-
-    def extract_call_extern_list(mod):
-        """This function will obtain all ethosu_conv2d
-        calls from a NPU TIR module
-        Parameters
-        ----------
-        mod : tvm.IRModule
-            This is a NPU TIR Module
-
-        Returns
-        -------
-        list
-            of tvm.tir.Call objects
-            that are tir extern calls
-            for ethosu_conv2d
-        """
-        # There should only be a single function
-        assert len(mod.functions.items()) == 1
-        primfunc = mod.functions.items()[0][1]
-
-        extern_calls = list()
-
-        def populate_extern_calls(stmt):
-            if isinstance(stmt, tvm.tir.Call) and stmt.op.name == "tir.call_extern":
-                extern_calls.append(stmt)
-
-        stmt_functor.post_order_visit(primfunc.body, populate_extern_calls)
-        return extern_calls
-
-    def collect_tir_buffer_info(npu_ops):
-        """This is run prior to address assigning to collect tir buffer information
-        for verification later on"""
-        _npu_op_tir_buffers = dict()
-        for npu_op in npu_ops:
-            if isinstance(npu_op, vapi.NpuDmaOperation):
-                _npu_op_tir_buffers[npu_op] = (npu_op.src.address, npu_op.dest.address)
-            elif issubclass(type(npu_op), vapi.NpuBlockOperation):
-                _npu_op_tir_buffers[npu_op] = (
-                    npu_op.ifm.tiles.addresses[0],
-                    npu_op.ofm.tiles.addresses[0],
-                    npu_op.weights,
-                    npu_op.biases,
-                )
-        return _npu_op_tir_buffers
-
-    def _check_buffer(address, region, length, buffer_var):
-        """Checks whether the buffer information is valid with
-        original tir buffers.
-        - If its constant, this will check
-          the slice in the constant tensor has the values.
-        - If its scratch, this will check
-          the slice is within scratch and does not have conflicts
-          with other scratch tensors.
-        - If its input/output, this will check the
-          address is zero
-        """
-        inverse_region_map = {
-            0: tir_to_cs_translator.BufferType.constant,
-            1: tir_to_cs_translator.BufferType.scratch,
-            3: tir_to_cs_translator.BufferType.input,
-            4: tir_to_cs_translator.BufferType.output,
-        }
-        buffer_type = inverse_region_map[region]
-        buffer_dtype = buffer_var.type_annotation.element_type.dtype
-        dtype_bytes = np.iinfo(np.dtype(buffer_dtype)).bits // 8
-        if buffer_type == tir_to_cs_translator.BufferType.constant:
-            ref = buffer_info[buffer_var].values
-            hex_from = address * dtype_bytes * 2
-            hex_to = hex_from + length * dtype_bytes * 2
-            constant_hex = constant_hex_string[hex_from:hex_to]
-            constant_tensor = np.frombuffer(bytearray.fromhex(constant_hex), dtype=buffer_dtype)
-            np.array_equal(constant_tensor, ref)
-            # Every buffer is adjusted to align to 16 bytes
-            length = util.round_up(length, 16)
-            # Mark these constants are read at least once
-            constant_tensor_read_mask[address : address + length] = np.ones(
-                length, dtype=buffer_dtype
-            )
-        elif buffer_type == tir_to_cs_translator.BufferType.scratch:
-            assert address < tvmbaw_workspace_size
-
-            size_in_bytes = allocate_node_sizes[buffer_var]
-            # Every buffer is adjusted to align to 16 bytes
-            size_in_bytes = util.round_up(size_in_bytes, 16)
-            assert address + size_in_bytes <= tvmbaw_workspace_size
-            # The scratch area should not be used by any other buffer
-            assert not tvmbaw_workspace_mask[address : address + size_in_bytes].any()
-            # The scratch area is marked as used
-            tvmbaw_workspace_mask[address : address + size_in_bytes] = np.ones(
-                size_in_bytes, dtype="uint8"
-            )
-        elif buffer_type == tir_to_cs_translator.BufferType.input:
-            assert address == 0
-        else:
-            assert buffer_type == tir_to_cs_translator.BufferType.output
-            assert address == 0
-
-    def _get_allocate_node_sizes(mod):
-        # There should only be a single function
-        assert len(mod.functions.items()) == 1
-        primfunc = mod.functions.items()[0][1]
-        _allocate_node_sizes = dict()
-
-        def analyze_remaining_allocates(stmt):
-            if isinstance(stmt, tvm.tir.stmt.Allocate):
-                allocate = stmt
-                pointer_type = allocate.buffer_var.type_annotation
-                storage_scope = pointer_type.storage_scope
-                if storage_scope == "global":
-                    dtype_bytes = np.iinfo(np.dtype(allocate.dtype)).bits // 8
-                    size_in_bytes = int(dtype_bytes * np.prod(list(allocate.extents)))
-                    # Every memory address the NPU access have to be 16 byte aligned
-                    size_in_bytes = util.round_up(size_in_bytes, 16)
-                    _allocate_node_sizes[allocate.buffer_var] = size_in_bytes
-
-        tvm.tir.stmt_functor.post_order_visit(primfunc.body, analyze_remaining_allocates)
-        return _allocate_node_sizes
-
-    def verify(npu_ops):
-        """This wrapper verifies the allocated addresses matches with original tir buffers"""
-        checked_buffers = set()
-
-        def check_buffer(address, region, length, buffer_var):
-            if buffer_var not in checked_buffers:
-                _check_buffer(address, region, length, buffer_var)
-                checked_buffers.add(buffer_var)
-
-        for npu_op in npu_ops:
-            if isinstance(npu_op, vapi.NpuDmaOperation):
-                src_tir_buffer_var = npu_op_tir_buffers[npu_op][0].buffer.data
-                check_buffer(
-                    npu_op.src.address, npu_op.src.region, npu_op.src.length, src_tir_buffer_var
-                )
-                dest_tir_load = npu_op_tir_buffers[npu_op][1].buffer.data
-                check_buffer(
-                    npu_op.dest.address,
-                    npu_op.dest.region,
-                    npu_op.dest.length,
-                    dest_tir_load,
-                )
-            elif issubclass(type(npu_op), vapi.NpuBlockOperation):
-                ifm_tir_buffer_var = npu_op_tir_buffers[npu_op][0].buffer.data
-                ifm_length = (
-                    npu_op.ifm.shape.height * npu_op.ifm.shape.width * npu_op.ifm.shape.depth
-                )
-                check_buffer(
-                    npu_op.ifm.tiles.addresses[0],
-                    npu_op.ifm.region,
-                    ifm_length,
-                    ifm_tir_buffer_var,
-                )
-                ofm_tir_buffer_var = npu_op_tir_buffers[npu_op][1].buffer.data
-                ofm_length = (
-                    npu_op.ofm.shape.height * npu_op.ofm.shape.width * npu_op.ofm.shape.depth
-                )
-                check_buffer(
-                    npu_op.ofm.tiles.addresses[0],
-                    npu_op.ofm.region,
-                    ofm_length,
-                    ofm_tir_buffer_var,
-                )
-                for idx, weight in enumerate(npu_op_tir_buffers[npu_op][2]):
-                    assert isinstance(weight, vapi.NpuAddressRange)
-                    check_buffer(
-                        npu_op.weights[idx].address,
-                        npu_op.weights[idx].region,
-                        npu_op.weights[idx].length,
-                        weight.address.buffer.data,
-                    )
-                for idx, bias in enumerate(npu_op_tir_buffers[npu_op][3]):
-                    assert isinstance(bias, vapi.NpuAddressRange)
-                    check_buffer(
-                        npu_op.biases[idx].address,
-                        npu_op.biases[idx].region,
-                        npu_op.biases[idx].length,
-                        bias.address.buffer.data,
-                    )
-
-    for test_case in test_cases:
-        tir_mod = test_case["tir_module"]
-        tir_mod["main"] = tir_mod["main"].with_attr(
-            "target", tvm.target.Target("ethos-u", host="ethos-u")
-        )
-        tir_mod = tvm.tir.transform.MakeUnpackedAPI()(tir_mod)
-        candidate_regions_for_scratch = [5, 2, 1]
-        (
-            scratch_region_map,
-            tvmbaw_workspace_size,
-            _,
-        ) = tir_to_cs_translator.analyze_scratch_memory_acesses(
-            tir_mod, candidate_regions_for_scratch
-        )
-        allocate_node_sizes = _get_allocate_node_sizes(tir_mod)
-        buffer_info = tir_to_cs_translator.extract_buffer_info(tir_mod, test_case["param_dict"])
-        extern_calls = extract_call_extern_list(tir_mod)
-        _npu_ops = list()
-        for extern_call in extern_calls:
-            _npu_ops.append(tir_to_cs_translator.translate_ethosu_tir_call_extern(extern_call))
-        npu_op_tir_buffers = collect_tir_buffer_info(_npu_ops)
-        (_npu_ops, constant_hex_string) = tir_to_cs_translator.assign_addresses(
-            buffer_info, _npu_ops, scratch_region_map
-        )
-        tvmbaw_workspace_mask = np.zeros(tvmbaw_workspace_size, dtype="uint8")
-        constant_tensor_read_mask = np.zeros(len(constant_hex_string) // 2, dtype="uint8")
-        verify(_npu_ops)
-        # This will be only 1 if all allocated scratch is used.
-        assert np.prod(tvmbaw_workspace_mask) == 1
-        # This will be only 1 if all constant tensors is read at least once.
-        assert np.prod(constant_tensor_read_mask) == 1
-
-
-# fmt: off
-"""A ethosu_pooling tir testcase for the translator"""
-@tvm.script.ir_module
-class SingleEthosuPooling:
-    @T.prim_func
-    def main(placeholder: T.handle, placeholder_3: T.handle, ethosu_write: T.handle) -> None:
-        # function attr dict
-        T.func_attr({"global_symbol": "main", "tir.noalias": True})
-        placeholder_4 = T.match_buffer(placeholder, [135], dtype="int8", elem_offset=0, align=64, offset_factor=1)
-        ethosu_write_2 = T.match_buffer(ethosu_write, [75], dtype="int8", elem_offset=0, align=64, offset_factor=1)
-        # body
-        T.evaluate(T.call_extern("ethosu_pooling", "int8", 5, 9, 3, 5, 0, 9, placeholder_4[0], 0, 0, 0, T.float32(1.0), 0, "NHWC", 27, 3, 1, "int8", 5, 5, 3, 5, 0, 5, ethosu_write_2[0], 0, 0, 0, T.float32(1.0), 0, "NHWC", 15, 3, 1, "AVG", 2, 3, 2, 1, 1, 1, 1, 1, 1, 0, "CLIP", 10, 100, "TFL", "NONE", 0, 0, 0, dtype="int8"))
-# fmt: on
-
-
-def test_translate_ethosu_pooling():
-    def extract_ethosu_pooling_extern_call(mod):
-        # There should only be a single function
-        assert len(mod.functions.items()) == 1
-        primfunc = mod.functions.items()[0][1]
-
-        ethosu_pooling_calls = list()
-
-        def populate_ethosu_pooling_calls(stmt):
-            if (
-                isinstance(stmt, tvm.tir.Call)
-                and stmt.op.name == "tir.call_extern"
-                and stmt.args[0] == "ethosu_pooling"
-            ):
-                ethosu_pooling_calls.append(stmt)
-
-        stmt_functor.post_order_visit(primfunc.body, populate_ethosu_pooling_calls)
-        return ethosu_pooling_calls[0]
-
-    pooling_call = extract_ethosu_pooling_extern_call(SingleEthosuPooling)
-    npu_op = tir_to_cs_translator.translate_ethosu_pooling(pooling_call)
-
-    assert npu_op.ifm.data_type == vapi.NpuDataType.INT8
-    assert npu_op.ifm.shape == vapi.NpuShape3D(5, 9, 3)
-    assert npu_op.ifm.tiles.height_0 == vapi.NpuTileBox(5, 0, 9, [0, 0, 0, 0]).height_0
-    assert npu_op.ifm.tiles.height_1 == vapi.NpuTileBox(5, 0, 9, [0, 0, 0, 0]).height_1
-    assert npu_op.ifm.tiles.width_0 == vapi.NpuTileBox(5, 0, 9, [0, 0, 0, 0]).width_0
-    assert npu_op.ifm.quantization == vapi.NpuQuantization(1.0, 0)
-    assert npu_op.ifm.layout == vapi.NpuLayout.NHWC
-    assert npu_op.ifm.strides == vapi.NpuShape3D(27, 3, 1)
-    # Compare OFM
-    assert npu_op.ofm.data_type == vapi.NpuDataType.INT8
-    assert npu_op.ofm.shape == vapi.NpuShape3D(5, 5, 3)
-    assert npu_op.ofm.tiles.height_0 == vapi.NpuTileBox(5, 0, 5, [0, 0, 0, 0]).height_0
-    assert npu_op.ofm.tiles.height_1 == vapi.NpuTileBox(5, 0, 5, [0, 0, 0, 0]).height_1
-    assert npu_op.ofm.tiles.width_0 == vapi.NpuTileBox(5, 0, 5, [0, 0, 0, 0]).width_0
-    assert npu_op.ofm.quantization == vapi.NpuQuantization(1.0, 0)
-    assert npu_op.ofm.layout == vapi.NpuLayout.NHWC
-    assert npu_op.ofm.strides == vapi.NpuShape3D(15, 3, 1)
-    # Compare pooling_type
-    assert npu_op.sub_op_type == vapi.NpuPoolingOp.AVERAGE
-    # Compare kernel and padding
-    assert (
-        npu_op.kernel.__dict__
-        == vapi.NpuKernel(w=2, h=3, stride_x=2, stride_y=1, dilation_x=1, dilation_y=1).__dict__
-    )
-    assert npu_op.padding == vapi.NpuPadding(top=1, left=1, bottom=1, right=0)
-    # Compare activation
-    assert npu_op.activation.op_type == vapi.NpuActivationOp.NONE_OR_RELU
-    assert npu_op.activation.min == 10
-    assert npu_op.activation.max == 100
-    # Compare rounding mode
-    assert npu_op.rounding_mode == vapi.NpuRoundingMode.TFL
-    # Compare ifm upscaling
-    assert npu_op.ifm_upscale == vapi.NpuResamplingMode.NONE
-
-
-# fmt: off
-"""A ethosu_binary_elementwise ADD tir testcase for the translator"""
-@tvm.script.ir_module
-class SingleEthosuBinaryElementwiseAdd:
-    @T.prim_func
-    def main(placeholder: T.handle, ethosu_write: T.handle) -> None:
-        # function attr dict
-        T.func_attr({"global_symbol": "main", "tir.noalias": True})
-        placeholder_2 = T.match_buffer(
-            placeholder, [270], dtype="int8", elem_offset=0, align=64, offset_factor=1
-        )
-        ethosu_write_2 = T.match_buffer(
-            ethosu_write, [135], dtype="int8", elem_offset=0, align=64, offset_factor=1
-        )
-        # body
-        T.evaluate(T.call_extern( "ethosu_binary_elementwise", "int8", 5, 9, 3, 5, 0, 9, placeholder_2[0], 0, 0, 0, T.float32(1.0), 0, "NHWC", 27, 3, 1, "int8", 5, 9, 3, 5, 0, 9, placeholder_2[135], 0, 0, 0, T.float32(1.0), 0, "NHWC", 27, 3, 1, "int8", 5, 9, 3, 5, 0, 9, ethosu_write_2[0], 0, 0, 0, T.float32(1.0), 0, "NHWC", 27, 3, 1, "ADD", 0, "CLIP", 10, 100, "TFL", 0, 0, 0, 0, 0, 0, dtype="int8"))
-# fmt: on
-
-# fmt: off
-"""A ethosu_binary_elementwise SUB tir testcase for the translator"""
-@tvm.script.ir_module
-class SingleEthosuBinaryElementwiseSub:
-    @T.prim_func
-    def main(placeholder: T.handle, ethosu_write: T.handle) -> None:
-        # function attr dict
-        T.func_attr({"global_symbol": "main", "tir.noalias": True})
-        placeholder_2 = T.match_buffer(placeholder, [270], dtype="int8", elem_offset=0, align=64, offset_factor=1)
-        ethosu_write_2 = T.match_buffer(ethosu_write, [135], dtype="int8", elem_offset=0, align=64, offset_factor=1)
-        # body
-        T.evaluate(T.call_extern("ethosu_binary_elementwise", "int8", 5, 9, 3, 5, 0, 9, placeholder_2[0], 0, 0, 0, T.float32(1.0), 0, "NHWC", 27, 3, 1, "int8", 5, 9, 3, 5, 0, 9, placeholder_2[135], 0, 0, 0, T.float32(1.0), 0, "NHWC", 27, 3, 1, "int8", 5, 9, 3, 5, 0, 9, ethosu_write_2[0], 0, 0, 0, T.float32(1.0), 0, "NHWC", 27, 3, 1, "SUB", 0, "CLIP", 10, 100, "TFL", 0, 0, 0, 0, 0, 0, dtype="int8"))
-# fmt: on
-
-# fmt: off
-"""A ethosu_binary_elementwise MUL tir testcase for the translator"""
-@tvm.script.ir_module
-class SingleEthosuBinaryElementwiseMul:
-    @T.prim_func
-    def main(placeholder: T.handle, ethosu_write: T.handle) -> None:
-        # function attr dict
-        T.func_attr({"global_symbol": "main", "tir.noalias": True})
-        placeholder_2 = T.match_buffer(placeholder, [270], dtype="int8", elem_offset=0, align=64, offset_factor=1)
-        ethosu_write_2 = T.match_buffer(ethosu_write, [135], dtype="int8", elem_offset=0, align=64, offset_factor=1)
-        # body
-        T.evaluate(T.call_extern("ethosu_binary_elementwise", "int8", 5, 9, 3, 5, 0, 9, placeholder_2[0], 0, 0, 0, T.float32(1.0), 0, "NHWC", 27, 3, 1, "int8", 5, 9, 3, 5, 0, 9, placeholder_2[135], 0, 0, 0, T.float32(1.0), 0, "NHWC", 27, 3, 1, "int8", 5, 9, 3, 5, 0, 9, ethosu_write_2[0], 0, 0, 0, T.float32(1.0), 0, "NHWC", 27, 3, 1, "MUL", 0, "CLIP", 10, 100, "TFL", 0, 0, 0, 0, 0, 0, dtype="int8"))
-# fmt: on
-
-
-# fmt: off
-"""A ethosu_binary_elementwise MIN tir testcase for the translator"""
-@tvm.script.ir_module
-class SingleEthosuBinaryElementwiseMin:
-    @T.prim_func
-    def main(placeholder: T.handle, ethosu_write: T.handle) -> None:
-        # function attr dict
-        T.func_attr({"global_symbol": "main", "tir.noalias": True})
-        placeholder_2 = T.match_buffer(placeholder, [270], dtype="int8", elem_offset=0, align=64, offset_factor=1)
-        ethosu_write_2 = T.match_buffer(ethosu_write, [135], dtype="int8", elem_offset=0, align=64, offset_factor=1)
-        # body
-        T.evaluate(T.call_extern("ethosu_binary_elementwise", "int8", 5, 9, 3, 5, 0, 9, placeholder_2[0], 0, 0, 0, T.float32(1.0), 0, "NHWC", 27, 3, 1, "int8", 5, 9, 3, 5, 0, 9, placeholder_2[135], 0, 0, 0, T.float32(1.0), 0, "NHWC", 27, 3, 1, "int8", 5, 9, 3, 5, 0, 9, ethosu_write_2[0], 0, 0, 0, T.float32(1.0), 0, "NHWC", 27, 3, 1, "MIN", 0, "CLIP", 10, 100, "TFL", 0, 0, 0, 0, 0, 0, dtype="int8"))
-# fmt: on
-
-
-# fmt: off
-"""A ethosu_binary_elementwise Max tir testcase for the translator"""
-@tvm.script.ir_module
-class SingleEthosuBinaryElementwiseMax:
-    @T.prim_func
-    def main(placeholder: T.handle, ethosu_write: T.handle) -> None:
-        # function attr dict
-        T.func_attr({"global_symbol": "main", "tir.noalias": True})
-        placeholder_2 = T.match_buffer(placeholder, [270], dtype="int8", elem_offset=0, align=64, offset_factor=1)
-        ethosu_write_2 = T.match_buffer(ethosu_write, [135], dtype="int8", elem_offset=0, align=64, offset_factor=1)
-        # body
-        T.evaluate(T.call_extern("ethosu_binary_elementwise", "int8", 5, 9, 3, 5, 0, 9, placeholder_2[0], 0, 0, 0, T.float32(1.0), 0, "NHWC", 27, 3, 1, "int8", 5, 9, 3, 5, 0, 9, placeholder_2[135], 0, 0, 0, T.float32(1.0), 0, "NHWC", 27, 3, 1, "int8", 5, 9, 3, 5, 0, 9, ethosu_write_2[0], 0, 0, 0, T.float32(1.0), 0, "NHWC", 27, 3, 1, "MAX", 0, "CLIP", 10, 100, "TFL", 0, 0, 0, 0, 0, 0, dtype="int8"))
-# fmt: on
-
-
-# fmt: off
-"""A ethosu_binary_elementwise SHR tir testcase for the translator"""
-@tvm.script.ir_module
-class SingleEthosuBinaryElementwiseShr:
-    @T.prim_func
-    def main(placeholder: T.handle, ethosu_write: T.handle) -> None:
-        # function attr dict
-        T.func_attr({"global_symbol": "main", "tir.noalias": True})
-        placeholder_2 = T.match_buffer(placeholder, [270], dtype="int32", elem_offset=0, align=64, offset_factor=1)
-        ethosu_write_2 = T.match_buffer(ethosu_write, [135], dtype="int32", elem_offset=0, align=64, offset_factor=1)
-        # body
-        T.evaluate(T.call_extern("ethosu_binary_elementwise", "int32", 5, 9, 3, 5, 0, 9, placeholder_2[0], 0, 0, 0, T.float32(1.0), 0, "NHWC", 27, 3, 1, "int32", 5, 9, 3, 5, 0, 9, placeholder_2[135], 0, 0, 0, T.float32(1.0), 0, "NHWC", 27, 3, 1, "int32", 5, 9, 3, 5, 0, 9, ethosu_write_2[0], 0, 0, 0, T.float32(1.0), 0, "NHWC", 27, 3, 1, "SHR", 0, "NONE", 0, 0, "TFL", 0, 0, 0, 0, 0, 0, dtype="int32"))
-# fmt: on
-
-
-# fmt: off
-"""A ethosu_binary_elementwise SHL tir testcase for the translator"""
-@tvm.script.ir_module
-class SingleEthosuBinaryElementwiseShl:
-    @T.prim_func
-    def main(placeholder: T.handle, ethosu_write: T.handle) -> None:
-        # function attr dict
-        T.func_attr({"global_symbol": "main", "tir.noalias": True})
-        placeholder_2 = T.match_buffer(placeholder, [270], dtype="int32", elem_offset=0, align=64, offset_factor=1)
-        ethosu_write_2 = T.match_buffer(ethosu_write, [135], dtype="int32", elem_offset=0, align=64, offset_factor=1)
-        # body
-        T.evaluate(T.call_extern("ethosu_binary_elementwise", "int32", 5, 9, 3, 5, 0, 9, placeholder_2[0], 0, 0, 0, T.float32(1.0), 0, "NHWC", 27, 3, 1, "int32", 5, 9, 3, 5, 0, 9, placeholder_2[135], 0, 0, 0, T.float32(1.0), 0, "NHWC", 27, 3, 1, "int32", 5, 9, 3, 5, 0, 9, ethosu_write_2[0], 0, 0, 0, T.float32(1.0), 0, "NHWC", 27, 3, 1, "SHL", 0, "CLIP", 10, 100, "TFL", 0, 0, 0, 0, 0, 0, dtype="int32"))
-# fmt: on
-
-
-@pytest.mark.parametrize("operator_type", ["ADD", "SUB", "MUL", "MIN", "MAX", "SHR", "SHL"])
-def test_translate_ethosu_binary_elementwise(operator_type):
-    if operator_type == "SHR" or operator_type == "SHL":
-        data_type = vapi.NpuDataType.INT32
-        data_type_bytes = 4
-    else:
-        data_type = vapi.NpuDataType.INT8
-        data_type_bytes = 1
-
-    def extract_ethosu_binary_elementwise_call_extern(mod):
-        # There should only be a single function
-        assert len(mod.functions.items()) == 1
-        primfunc = mod.functions.items()[0][1]
-
-        ethosu_binary_elementwise_calls = list()
-
-        def populate_ethosu_binary_elementwise_calls(stmt):
-            if (
-                isinstance(stmt, tvm.tir.Call)
-                and stmt.op.name == "tir.call_extern"
-                and stmt.args[0] == "ethosu_binary_elementwise"
-            ):
-                ethosu_binary_elementwise_calls.append(stmt)
-
-        stmt_functor.post_order_visit(primfunc.body, populate_ethosu_binary_elementwise_calls)
-        return ethosu_binary_elementwise_calls[0]
-
-    if operator_type == "ADD":
-        binary_elementwise = SingleEthosuBinaryElementwiseAdd
-    elif operator_type == "SUB":
-        binary_elementwise = SingleEthosuBinaryElementwiseSub
-    elif operator_type == "MUL":
-        binary_elementwise = SingleEthosuBinaryElementwiseMul
-    elif operator_type == "MIN":
-        binary_elementwise = SingleEthosuBinaryElementwiseMin
-    elif operator_type == "MAX":
-        binary_elementwise = SingleEthosuBinaryElementwiseMax
-    elif operator_type == "SHR":
-        binary_elementwise = SingleEthosuBinaryElementwiseShr
-    elif operator_type == "SHL":
-        binary_elementwise = SingleEthosuBinaryElementwiseShl
-    binary_elementwise_call = extract_ethosu_binary_elementwise_call_extern(binary_elementwise)
-    npu_op = tir_to_cs_translator.translate_ethosu_binary_elementwise(binary_elementwise_call)
-
-    # Compare IFM
-    assert npu_op.ifm.data_type == data_type
-    assert npu_op.ifm.shape == vapi.NpuShape3D(5, 9, 3)
-    assert npu_op.ifm.tiles.height_0 == vapi.NpuTileBox(5, 0, 9, [0, 0, 0, 0]).height_0
-    assert npu_op.ifm.tiles.height_1 == vapi.NpuTileBox(5, 0, 9, [0, 0, 0, 0]).height_1
-    assert npu_op.ifm.tiles.width_0 == vapi.NpuTileBox(5, 0, 9, [0, 0, 0, 0]).width_0
-    assert npu_op.ifm.quantization == vapi.NpuQuantization(1.0, 0)
-    assert npu_op.ifm.layout == vapi.NpuLayout.NHWC
-    assert npu_op.ifm.strides == vapi.NpuShape3D(
-        27 * data_type_bytes, 3 * data_type_bytes, 1 * data_type_bytes
-    )
-    # Compare IFM2
-    assert npu_op.ifm2.data_type == data_type
-    assert npu_op.ifm2.shape == vapi.NpuShape3D(5, 9, 3)
-    assert npu_op.ifm2.tiles.height_0 == vapi.NpuTileBox(5, 0, 9, [0, 0, 0, 0]).height_0
-    assert npu_op.ifm2.tiles.height_1 == vapi.NpuTileBox(5, 0, 9, [0, 0, 0, 0]).height_1
-    assert npu_op.ifm2.tiles.width_0 == vapi.NpuTileBox(5, 0, 9, [0, 0, 0, 0]).width_0
-    assert npu_op.ifm2.quantization == vapi.NpuQuantization(1.0, 0)
-    assert npu_op.ifm2.layout == vapi.NpuLayout.NHWC
-    assert npu_op.ifm2.strides == vapi.NpuShape3D(
-        27 * data_type_bytes, 3 * data_type_bytes, 1 * data_type_bytes
-    )
-    # Compare OFM
-    assert npu_op.ofm.data_type == data_type
-    assert npu_op.ofm.shape == vapi.NpuShape3D(5, 9, 3)
-    assert npu_op.ofm.tiles.height_0 == vapi.NpuTileBox(5, 0, 9, [0, 0, 0, 0]).height_0
-    assert npu_op.ofm.tiles.height_1 == vapi.NpuTileBox(5, 0, 9, [0, 0, 0, 0]).height_1
-    assert npu_op.ofm.tiles.width_0 == vapi.NpuTileBox(5, 0, 9, [0, 0, 0, 0]).width_0
-    assert npu_op.ofm.quantization == vapi.NpuQuantization(1.0, 0)
-    assert npu_op.ofm.layout == vapi.NpuLayout.NHWC
-    assert npu_op.ofm.strides == vapi.NpuShape3D(
-        27 * data_type_bytes, 3 * data_type_bytes, 1 * data_type_bytes
-    )
-    # Compare op type
-    if operator_type == "ADD":
-        assert npu_op.sub_op_type == vapi.NpuElementWiseOp.ADD
-    elif operator_type == "SUB":
-        assert npu_op.sub_op_type == vapi.NpuElementWiseOp.SUB
-    elif operator_type == "MUL":
-        assert npu_op.sub_op_type == vapi.NpuElementWiseOp.MUL
-    elif operator_type == "MIN":
-        assert npu_op.sub_op_type == vapi.NpuElementWiseOp.MIN
-    elif operator_type == "MAX":
-        assert npu_op.sub_op_type == vapi.NpuElementWiseOp.MAX
-    elif operator_type == "SHR":
-        assert npu_op.sub_op_type == vapi.NpuElementWiseOp.SHR
-    elif operator_type == "SHL":
-        assert npu_op.sub_op_type == vapi.NpuElementWiseOp.SHL
-    # Compare reversed_operands
-    assert npu_op.reversed_operands == False
-    # Compare activation
-    if operator_type == "SHR":
-        assert npu_op.activation is None
-    else:
-        assert npu_op.activation.op_type == vapi.NpuActivationOp.NONE_OR_RELU
-        assert npu_op.activation.min == 10
-        assert npu_op.activation.max == 100
-    # Compare rounding mode
-    assert npu_op.rounding_mode == vapi.NpuRoundingMode.TFL
-
-
-# fmt: off
-"""A ethosu_binary_elementwise ADD with broadcasting tir testcase for the translator"""
-@tvm.script.ir_module
-class SingleEthosuBinaryElementwiseAddBroadcasting:
-    @T.prim_func
-    def main(placeholder: T.handle, ethosu_write: T.handle) -> None:
-        # function attr dict
-        T.func_attr({"global_symbol": "main", "tir.noalias": True})
-        placeholder_2 = T.match_buffer(placeholder, [27], dtype="int8", elem_offset=0, align=64, offset_factor=1)
-        ethosu_write_2 = T.match_buffer(ethosu_write, [24], dtype="int8", elem_offset=0, align=64, offset_factor=1)
-        # body
-        T.evaluate(T.call_extern("ethosu_binary_elementwise", "int8", 2, 3, 4, 2, 0, 3, placeholder_2[0], 0, 0, 0, T.float32(1.0), 0, "NHWC", 12, 4, 1, "int8", 1, 3, 1, 1, 0, 3, placeholder_2[0], 0, 0, 0, T.float32(1.0), 0, "NHWC", 1, 1, 1, "int8", 2, 3, 4, 2, 0, 3, ethosu_write_2[0], 0, 0, 0, T.float32(1.0), 0, "NHWC", 12, 4, 1, "ADD", 1, "CLIP", 10, 100, "TFL", 0, 0, 0, 0, 0, 0, dtype="int8"))
-# fmt: on
-
-# fmt: off
-"""A ethosu_binary_elementwise SUB with broadcasting tir testcase for the translator"""
-@tvm.script.ir_module
-class SingleEthosuBinaryElementwiseSubBroadcasting:
-    @T.prim_func
-    def main(placeholder: T.handle, ethosu_write: T.handle) -> None:
-        # function attr dict
-        T.func_attr({"global_symbol": "main", "tir.noalias": True})
-        placeholder_2 = T.match_buffer(placeholder, [27], dtype="int8", elem_offset=0, align=64, offset_factor=1)
-        ethosu_write_2 = T.match_buffer(ethosu_write, [24], dtype="int8", elem_offset=0, align=64, offset_factor=1)
-        # body
-        T.evaluate(T.call_extern("ethosu_binary_elementwise", "int8", 2, 3, 4, 2, 0, 3, placeholder_2[0], 0, 0, 0, T.float32(1.0), 0, "NHWC", 12, 4, 1, "int8", 1, 3, 1, 1, 0, 3, placeholder_2[0], 0, 0, 0, T.float32(1.0), 0, "NHWC", 1, 1, 1, "int8", 2, 3, 4, 2, 0, 3, ethosu_write_2[0], 0, 0, 0, T.float32(1.0), 0, "NHWC", 12, 4, 1, "SUB", 1, "CLIP", 10, 100, "TFL", 0, 0, 0, 0, 0, 0, dtype="int8"))
-# fmt: on
-
-# fmt: off
-"""A ethosu_binary_elementwise MUL with broadcasting tir testcase for the translator"""
-@tvm.script.ir_module
-class SingleEthosuBinaryElementwiseMulBroadcasting:
-    @T.prim_func
-    def main(placeholder: T.handle, ethosu_write: T.handle) -> None:
-        # function attr dict
-        T.func_attr({"global_symbol": "main", "tir.noalias": True})
-        placeholder_2 = T.match_buffer(placeholder, [27], dtype="int8", elem_offset=0, align=64, offset_factor=1)
-        ethosu_write_2 = T.match_buffer(ethosu_write, [24], dtype="int8", elem_offset=0, align=64, offset_factor=1)
-        # body
-        T.evaluate(T.call_extern("ethosu_binary_elementwise", "int8", 2, 3, 4, 2, 0, 3, placeholder_2[0], 0, 0, 0, T.float32(1.0), 0, "NHWC", 12, 4, 1, "int8", 1, 3, 1, 1, 0, 3, placeholder_2[0], 0, 0, 0, T.float32(1.0), 0, "NHWC", 1, 1, 1, "int8", 2, 3, 4, 2, 0, 3, ethosu_write_2[0], 0, 0, 0, T.float32(1.0), 0, "NHWC", 12, 4, 1, "MUL", 1, "CLIP", 10, 100, "TFL", 0, 0, 0, 0, 0, 0, dtype="int8"))
-# fmt: on
-
-
-# fmt: off
-"""A ethosu_binary_elementwise MIN with broadcasting tir testcase for the translator"""
-@tvm.script.ir_module
-class SingleEthosuBinaryElementwiseMinBroadcasting:
-    @T.prim_func
-    def main(placeholder: T.handle, ethosu_write: T.handle) -> None:
-        # function attr dict
-        T.func_attr({"global_symbol": "main", "tir.noalias": True})
-        placeholder_2 = T.match_buffer(placeholder, [27], dtype="int8", elem_offset=0, align=64, offset_factor=1)
-        ethosu_write_2 = T.match_buffer(ethosu_write, [24], dtype="int8", elem_offset=0, align=64, offset_factor=1)
-        # body
-        T.evaluate(T.call_extern("ethosu_binary_elementwise", "int8", 2, 3, 4, 2, 0, 3, placeholder_2[0], 0, 0, 0, T.float32(1.0), 0, "NHWC", 12, 4, 1, "int8", 1, 3, 1, 1, 0, 3, placeholder_2[0], 0, 0, 0, T.float32(1.0), 0, "NHWC", 1, 1, 1, "int8", 2, 3, 4, 2, 0, 3, ethosu_write_2[0], 0, 0, 0, T.float32(1.0), 0, "NHWC", 12, 4, 1, "MIN", 1, "CLIP", 10, 100, "TFL", 0, 0, 0, 0, 0, 0, dtype="int8"))
-# fmt: on
-
-
-# fmt: off
-"""A ethosu_binary_elementwise MAX with broadcasting tir testcase for the translator"""
-@tvm.script.ir_module
-class SingleEthosuBinaryElementwiseMaxBroadcasting:
-    @T.prim_func
-    def main(placeholder: T.handle, ethosu_write: T.handle) -> None:
-        # function attr dict
-        T.func_attr({"global_symbol": "main", "tir.noalias": True})
-        placeholder_2 = T.match_buffer(placeholder, [27], dtype="int8", elem_offset=0, align=64, offset_factor=1)
-        ethosu_write_2 = T.match_buffer(ethosu_write, [24], dtype="int8", elem_offset=0, align=64, offset_factor=1)
-        # body
-        T.evaluate(T.call_extern("ethosu_binary_elementwise", "int8", 2, 3, 4, 2, 0, 3, placeholder_2[0], 0, 0, 0, T.float32(1.0), 0, "NHWC", 12, 4, 1, "int8", 1, 3, 1, 1, 0, 3, placeholder_2[0], 0, 0, 0, T.float32(1.0), 0, "NHWC", 1, 1, 1, "int8", 2, 3, 4, 2, 0, 3, ethosu_write_2[0], 0, 0, 0, T.float32(1.0), 0, "NHWC", 12, 4, 1, "MAX", 1, "CLIP", 10, 100, "TFL", 0, 0, 0, 0, 0, 0, dtype="int8"))
-# fmt: on
-
-
-# fmt: off
-"""A ethosu_binary_elementwise SHR with broadcasting tir testcase for the translator"""
-@tvm.script.ir_module
-class SingleEthosuBinaryElementwiseShrBroadcasting:
-    @T.prim_func
-    def main(placeholder: T.handle, ethosu_write: T.handle) -> None:
-        # function attr dict
-        T.func_attr({"global_symbol": "main", "tir.noalias": True})
-        placeholder_2 = T.match_buffer(placeholder, [27], dtype="int32", elem_offset=0, align=64, offset_factor=1)
-        ethosu_write_2 = T.match_buffer(ethosu_write, [24], dtype="int32", elem_offset=0, align=64, offset_factor=1)
-        # body
-        T.evaluate(T.call_extern("ethosu_binary_elementwise", "int32", 2, 3, 4, 2, 0, 3, placeholder_2[0], 0, 0, 0, T.float32(1.0), 0, "NHWC", 12, 4, 1, "int32", 1, 3, 1, 1, 0, 3, placeholder_2[0], 0, 0, 0, T.float32(1.0), 0, "NHWC", 1, 1, 1, "int32", 2, 3, 4, 2, 0, 3, ethosu_write_2[0], 0, 0, 0, T.float32(1.0), 0, "NHWC", 12, 4, 1, "SHR", 1, "NONE", 0, 0, "TFL", 0, 0, 0, 0, 0, 0, dtype="int32"))
-# fmt: on
-
-
-# fmt: off
-"""A ethosu_binary_elementwise SHL with broadcasting tir testcase for the translator"""
-@tvm.script.ir_module
-class SingleEthosuBinaryElementwiseShlBroadcasting:
-    @T.prim_func
-    def main(placeholder: T.handle, ethosu_write: T.handle) -> None:
-        # function attr dict
-        T.func_attr({"global_symbol": "main", "tir.noalias": True})
-        placeholder_2 = T.match_buffer(placeholder, [27], dtype="int32", elem_offset=0, align=64, offset_factor=1)
-        ethosu_write_2 = T.match_buffer(ethosu_write, [24], dtype="int32", elem_offset=0, align=64, offset_factor=1)
-        # body
-        T.evaluate(T.call_extern("ethosu_binary_elementwise", "int32", 2, 3, 4, 2, 0, 3, placeholder_2[0], 0, 0, 0, T.float32(1.0), 0, "NHWC", 12, 4, 1, "int32", 1, 3, 1, 1, 0, 3, placeholder_2[0], 0, 0, 0, T.float32(1.0), 0, "NHWC", 1, 1, 1, "int32", 2, 3, 4, 2, 0, 3, ethosu_write_2[0], 0, 0, 0, T.float32(1.0), 0, "NHWC", 12, 4, 1, "SHL", 1, "CLIP", 10, 100, "TFL", 0, 0, 0, 0, 0, 0, dtype="int32"))
-# fmt: on
-
-
-@pytest.mark.parametrize("operator_type", ["ADD", "SUB", "MUL", "MIN", "MAX", "SHR", "SHL"])
-def test_translate_ethosu_binary_elementwise_broadcasting(operator_type):
-    if operator_type == "SHR" or operator_type == "SHL":
-        data_type = vapi.NpuDataType.INT32
-        data_type_bytes = 4
-    else:
-        data_type = vapi.NpuDataType.INT8
-        data_type_bytes = 1
-
-    def extract_ethosu_binary_elementwise_broadcasting_call_extern(mod):
-        # There should only be a single function
-        assert len(mod.functions.items()) == 1
-        primfunc = mod.functions.items()[0][1]
-
-        ethosu_binary_elementwise_calls = list()
-
-        def populate_ethosu_binary_elementwise_calls(stmt):
-            if (
-                isinstance(stmt, tvm.tir.Call)
-                and stmt.op.name == "tir.call_extern"
-                and stmt.args[0] == "ethosu_binary_elementwise"
-            ):
-                ethosu_binary_elementwise_calls.append(stmt)
-
-        stmt_functor.post_order_visit(primfunc.body, populate_ethosu_binary_elementwise_calls)
-        return ethosu_binary_elementwise_calls[0]
-
-    if operator_type == "ADD":
-        binary_elementwise = SingleEthosuBinaryElementwiseAddBroadcasting
-    elif operator_type == "SUB":
-        binary_elementwise = SingleEthosuBinaryElementwiseSubBroadcasting
-    elif operator_type == "MUL":
-        binary_elementwise = SingleEthosuBinaryElementwiseMulBroadcasting
-    elif operator_type == "MIN":
-        binary_elementwise = SingleEthosuBinaryElementwiseMinBroadcasting
-    elif operator_type == "MAX":
-        binary_elementwise = SingleEthosuBinaryElementwiseMaxBroadcasting
-    elif operator_type == "SHR":
-        binary_elementwise = SingleEthosuBinaryElementwiseShrBroadcasting
-    elif operator_type == "SHL":
-        binary_elementwise = SingleEthosuBinaryElementwiseShlBroadcasting
-    binary_elementwise_call = extract_ethosu_binary_elementwise_broadcasting_call_extern(
-        binary_elementwise
-    )
-    npu_op = tir_to_cs_translator.translate_ethosu_binary_elementwise(binary_elementwise_call)
-
-    # Compare IFM
-    assert npu_op.ifm.data_type == data_type
-    assert npu_op.ifm.shape == vapi.NpuShape3D(2, 3, 4)
-    assert npu_op.ifm.tiles.height_0 == vapi.NpuTileBox(2, 0, 3, [0, 0, 0, 0]).height_0
-    assert npu_op.ifm.tiles.height_1 == vapi.NpuTileBox(2, 0, 3, [0, 0, 0, 0]).height_1
-    assert npu_op.ifm.tiles.width_0 == vapi.NpuTileBox(2, 0, 3, [0, 0, 0, 0]).width_0
-    assert npu_op.ifm.quantization == vapi.NpuQuantization(1.0, 0)
-    assert npu_op.ifm.layout == vapi.NpuLayout.NHWC
-    assert npu_op.ifm.strides == vapi.NpuShape3D(
-        12 * data_type_bytes, 4 * data_type_bytes, 1 * data_type_bytes
-    )
-    # Compare IFM2
-    assert npu_op.ifm2.data_type == data_type
-    assert npu_op.ifm2.shape == vapi.NpuShape3D(1, 3, 1)
-    assert npu_op.ifm2.tiles.height_0 == vapi.NpuTileBox(1, 0, 3, [0, 0, 0, 0]).height_0
-    assert npu_op.ifm2.tiles.height_1 == vapi.NpuTileBox(1, 0, 3, [0, 0, 0, 0]).height_1
-    assert npu_op.ifm2.tiles.width_0 == vapi.NpuTileBox(1, 0, 3, [0, 0, 0, 0]).width_0
-    assert npu_op.ifm2.quantization == vapi.NpuQuantization(1.0, 0)
-    assert npu_op.ifm2.layout == vapi.NpuLayout.NHWC
-    assert npu_op.ifm2.strides == vapi.NpuShape3D(
-        1 * data_type_bytes, 1 * data_type_bytes, 1 * data_type_bytes
-    )
-    # Compare OFM
-    assert npu_op.ofm.data_type == data_type
-    assert npu_op.ofm.shape == vapi.NpuShape3D(2, 3, 4)
-    assert npu_op.ofm.tiles.height_0 == vapi.NpuTileBox(2, 0, 3, [0, 0, 0, 0]).height_0
-    assert npu_op.ofm.tiles.height_1 == vapi.NpuTileBox(2, 0, 3, [0, 0, 0, 0]).height_1
-    assert npu_op.ofm.tiles.width_0 == vapi.NpuTileBox(2, 0, 3, [0, 0, 0, 0]).width_0
-    assert npu_op.ofm.quantization == vapi.NpuQuantization(1.0, 0)
-    assert npu_op.ofm.layout == vapi.NpuLayout.NHWC
-    assert npu_op.ofm.strides == vapi.NpuShape3D(
-        12 * data_type_bytes, 4 * data_type_bytes, 1 * data_type_bytes
-    )
-    # Compare op type
-    if operator_type == "ADD":
-        assert npu_op.sub_op_type == vapi.NpuElementWiseOp.ADD
-    elif operator_type == "SUB":
-        assert npu_op.sub_op_type == vapi.NpuElementWiseOp.SUB
-    elif operator_type == "MUL":
-        assert npu_op.sub_op_type == vapi.NpuElementWiseOp.MUL
-    elif operator_type == "MIN":
-        assert npu_op.sub_op_type == vapi.NpuElementWiseOp.MIN
-    elif operator_type == "MAX":
-        assert npu_op.sub_op_type == vapi.NpuElementWiseOp.MAX
-    elif operator_type == "SHR":
-        assert npu_op.sub_op_type == vapi.NpuElementWiseOp.SHR
-    elif operator_type == "SHL":
-        assert npu_op.sub_op_type == vapi.NpuElementWiseOp.SHL
-    # Compare reversed_operands
-    assert npu_op.reversed_operands == True
-    # Compare activation
-    if operator_type == "SHR":
-        assert npu_op.activation is None
-    else:
-        assert npu_op.activation.op_type == vapi.NpuActivationOp.NONE_OR_RELU
-        assert npu_op.activation.min == 10
-        assert npu_op.activation.max == 100
-    # Compare rounding mode
-    assert npu_op.rounding_mode == vapi.NpuRoundingMode.TFL
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/contrib/test_ethosu/test_type_inference.py b/tests/python/contrib/test_ethosu/test_type_inference.py
deleted file mode 100644
index 48a4dbde81c3..000000000000
--- a/tests/python/contrib/test_ethosu/test_type_inference.py
+++ /dev/null
@@ -1,485 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import pytest
-
-pytest.importorskip("ethosu.vela")
-
-from tvm import relay, TVMError
-from tvm.relay.testing import run_opt_pass
-from .infra import make_ethosu_conv2d
-from .infra import make_ethosu_depthwise_conv2d
-from .infra import make_ethosu_pooling
-from .infra import make_ethosu_binary_elementwise
-from .infra import make_ethosu_identity
-from .infra import make_ethosu_unary_elementwise
-
-
-@pytest.mark.parametrize(
-    ["ifm_shape", "ifm_layout"], [((1, 56, 72, 55), "NHWC"), ((1, 56, 4, 72, 16), "NHCWB16")]
-)
-@pytest.mark.parametrize(
-    "ofm_shape,ofm_layout", [((1, 54, 38, 122), "NHWC"), ((1, 54, 8, 38, 16), "NHCWB16")]
-)
-def test_ethosu_conv2d_type_inference(
-    ifm_shape,
-    ifm_layout,
-    ofm_shape,
-    ofm_layout,
-):
-    ifm_channels = 55
-    ofm_channels = 122
-    kernel_shape = (3, 2)
-    padding = (0, 1, 2, 3)
-    strides = (1, 2)
-    dilation = (2, 1)
-    ifm = relay.var("ifm", shape=ifm_shape, dtype="int8")
-    conv2d = make_ethosu_conv2d(
-        ifm,
-        ifm_channels,
-        ofm_channels,
-        kernel_shape,
-        padding,
-        strides,
-        dilation,
-        ifm_layout=ifm_layout,
-        ofm_layout=ofm_layout,
-    )
-    func = relay.Function([ifm], conv2d)
-    func = run_opt_pass(func, relay.transform.InferType())
-    assert tuple(func.body.checked_type.shape) == ofm_shape
-
-
-@pytest.mark.parametrize(
-    "ifm_dtype,weight_dtype,scale_bias_dtype",
-    [("float32", "int8", "uint8"), ("int8", "float32", "uint8"), ("int8", "int8", "float32")],
-)
-def test_ethosu_conv2d_invalid_dtypes(ifm_dtype, weight_dtype, scale_bias_dtype):
-    ifm_channels = 55
-    ofm_channels = 122
-    kernel_shape = (3, 2)
-    padding = (0, 1, 2, 3)
-    strides = (1, 2)
-    dilation = (2, 1)
-    ifm = relay.var("ifm", shape=(1, 56, 72, 55), dtype=ifm_dtype)
-    conv2d = make_ethosu_conv2d(
-        ifm,
-        ifm_channels,
-        ofm_channels,
-        kernel_shape,
-        padding,
-        strides,
-        dilation,
-        weight_dtype=weight_dtype,
-        scale_bias_dtype=scale_bias_dtype,
-    )
-    func = relay.Function([ifm], conv2d)
-    with pytest.raises(TVMError):
-        run_opt_pass(func, relay.transform.InferType())
-
-
-def test_ethosu_conv2d_invalid_upscale_method():
-    invalid_upscale_method = "FOO"
-    ifm_channels = 55
-    ofm_channels = 122
-    kernel_shape = (3, 2)
-    padding = (0, 1, 2, 3)
-    strides = (1, 2)
-    dilation = (2, 1)
-    ifm = relay.var("ifm", shape=(1, 56, 72, 55), dtype="int8")
-    conv2d = make_ethosu_conv2d(
-        ifm,
-        ifm_channels,
-        ofm_channels,
-        kernel_shape,
-        padding,
-        strides,
-        dilation,
-        weight_dtype="int8",
-        scale_bias_dtype="uint8",
-        upscale=invalid_upscale_method,
-    )
-    func = relay.Function([ifm], conv2d)
-    with pytest.raises(TVMError):
-        run_opt_pass(func, relay.transform.InferType())
-
-
-@pytest.mark.parametrize(
-    "ifm_shape, ifm_layout", [((1, 46, 71, 55), "NHWC"), ((1, 46, 4, 71, 16), "NHCWB16")]
-)
-@pytest.mark.parametrize(
-    "ofm_shape, ofm_layout", [((1, 44, 37, 55), "NHWC"), ((1, 44, 4, 37, 16), "NHCWB16")]
-)
-def test_ethosu_depthwise_conv2d_type_inference(
-    ifm_shape,
-    ifm_layout,
-    ofm_shape,
-    ofm_layout,
-):
-    channels = 55
-    kernel_shape = (3, 2)
-    padding = (0, 1, 2, 3)
-    strides = (1, 2)
-    dilation = (2, 1)
-    ifm = relay.var("ifm", shape=ifm_shape, dtype="int8")
-    depthwise_conv2d = make_ethosu_depthwise_conv2d(
-        ifm,
-        channels,
-        kernel_shape,
-        padding,
-        strides,
-        dilation,
-        ifm_layout=ifm_layout,
-        ofm_layout=ofm_layout,
-    )
-    func = relay.Function([ifm], depthwise_conv2d)
-    func = run_opt_pass(func, relay.transform.InferType())
-    assert tuple(func.body.checked_type.shape) == ofm_shape
-
-
-@pytest.mark.parametrize(
-    "ifm_dtype,weight_dtype,scale_bias_dtype",
-    [("float32", "int8", "uint8"), ("int8", "float32", "uint8"), ("int8", "int8", "float32")],
-)
-def test_ethosu_depthwise_conv2d_invalid_dtypes(ifm_dtype, weight_dtype, scale_bias_dtype):
-    channels = 55
-    kernel_shape = (3, 2)
-    padding = (0, 1, 2, 3)
-    strides = (1, 2)
-    dilation = (2, 1)
-    dilation = (2, 1)
-    ifm = relay.var("ifm", shape=(1, 56, 72, 55), dtype=ifm_dtype)
-    depthwise_conv2d = make_ethosu_depthwise_conv2d(
-        ifm,
-        channels,
-        kernel_shape,
-        padding,
-        strides,
-        dilation,
-        weight_dtype=weight_dtype,
-        scale_bias_dtype=scale_bias_dtype,
-    )
-    func = relay.Function([ifm], depthwise_conv2d)
-    with pytest.raises(TVMError):
-        run_opt_pass(func, relay.transform.InferType())
-
-
-@pytest.mark.parametrize(
-    "ifm_shape, ifm_layout", [((1, 56, 72, 55), "NHWC"), ((1, 56, 4, 72, 16), "NHCWB16")]
-)
-@pytest.mark.parametrize(
-    "ofm_shape, ofm_layout", [((1, 56, 38, 55), "NHWC"), ((1, 56, 4, 38, 16), "NHCWB16")]
-)
-def test_ethosu_pooling_type_inference(
-    ifm_shape,
-    ifm_layout,
-    ofm_shape,
-    ofm_layout,
-):
-    dtype = "int8"
-    ifm = relay.var("ifm", shape=ifm_shape, dtype=dtype)
-    pooling_type = "AVG"
-    pool_shape = (3, 2)
-    ofm_channels = 55
-    strides = (1, 2)
-    padding = (0, 1, 2, 3)
-    pooling = make_ethosu_pooling(
-        ifm,
-        pooling_type,
-        pool_shape,
-        ofm_channels,
-        dtype,
-        strides,
-        padding,
-        ifm_layout=ifm_layout,
-        ofm_layout=ofm_layout,
-    )
-    func = relay.Function([ifm], pooling)
-    func = run_opt_pass(func, relay.transform.InferType())
-    assert tuple(func.body.checked_type.shape) == ofm_shape
-    assert func.body.checked_type.dtype == dtype
-
-
-def test_ethosu_pooling_invalid_pooling_type():
-    invalid_pooling_type = "A"
-    dtype = "int8"
-
-    ifm = relay.var("ifm", shape=[1, 56, 72, 55], dtype=dtype)
-    pool_shape = (3, 2)
-    ofm_channels = 55
-    strides = (1, 2)
-    padding = (0, 1, 2, 3)
-    pooling = make_ethosu_pooling(
-        ifm,
-        invalid_pooling_type,
-        pool_shape,
-        ofm_channels,
-        dtype,
-        strides,
-        padding,
-    )
-    func = relay.Function([ifm], pooling)
-    with pytest.raises(TVMError):
-        run_opt_pass(func, relay.transform.InferType())
-
-
-def test_ethosu_pooling_invalid_dtype():
-    invalid_dtype = "int32"
-    ifm = relay.var("ifm", shape=[1, 56, 72, 55], dtype=invalid_dtype)
-    pooling_type = "MAX"
-    pool_shape = (3, 2)
-    ofm_channels = 55
-    strides = (1, 2)
-    padding = (0, 1, 2, 3)
-    pooling = make_ethosu_pooling(
-        ifm,
-        pooling_type,
-        pool_shape,
-        ofm_channels,
-        "int8",
-        strides,
-        padding,
-    )
-    func = relay.Function([ifm], pooling)
-    with pytest.raises(TVMError):
-        run_opt_pass(func, relay.transform.InferType())
-
-
-def test_ethosu_pooling_invalid_upscale_method():
-    invalid_upscale_method = "FOO"
-    dtype = "int8"
-
-    ifm = relay.var("ifm", shape=[1, 56, 72, 55], dtype=dtype)
-    pooling = make_ethosu_pooling(
-        ifm,
-        "MAX",
-        (3, 2),
-        55,
-        dtype,
-        (1, 2),
-        (0, 1, 2, 3),
-        upscale=invalid_upscale_method,
-    )
-    func = relay.Function([ifm], pooling)
-    with pytest.raises(TVMError):
-        run_opt_pass(func, relay.transform.InferType())
-
-
-@pytest.mark.parametrize(
-    "ifm_shape, ifm_layout", [((1, 4, 5, 33), "NHWC"), ((1, 4, 3, 5, 16), "NHCWB16")]
-)
-@pytest.mark.parametrize(
-    "ofm_shape, ofm_layout", [((1, 4, 5, 33), "NHWC"), ((1, 4, 3, 5, 16), "NHCWB16")]
-)
-def test_ethosu_binary_elementwise_type_inference(
-    ifm_shape,
-    ifm_layout,
-    ofm_shape,
-    ofm_layout,
-):
-    dtype = "int8"
-    ifm = relay.var("ifm", shape=ifm_shape, dtype=dtype)
-    ifm2 = relay.var("ifm2", shape=ifm_shape, dtype=dtype)
-    operator_type = "ADD"
-    ifm_channels, ifm2_channels = 33, 33
-    binary_elementwise = make_ethosu_binary_elementwise(
-        ifm,
-        ifm2,
-        ifm_channels,
-        ifm2_channels,
-        operator_type,
-        dtype,
-        ifm_layout=ifm_layout,
-        ifm2_layout=ifm_layout,
-        ofm_layout=ofm_layout,
-    )
-    func = relay.Function([ifm, ifm2], binary_elementwise)
-    func = run_opt_pass(func, relay.transform.InferType())
-    assert tuple(func.body.checked_type.shape) == ofm_shape
-    assert func.body.checked_type.dtype == dtype
-
-
-def test_ethosu_binary_elementwise_invalid_operator_type():
-    invalid_operator_type = "A"
-    ifm_shape = [1, 4, 5, 33]
-    dtype = "int8"
-    ifm = relay.var("ifm", shape=ifm_shape, dtype=dtype)
-    ifm2 = relay.var("ifm2", shape=ifm_shape, dtype=dtype)
-    ifm_channels, ifm2_channels = 33, 33
-    binary_elementwise = make_ethosu_binary_elementwise(
-        ifm,
-        ifm2,
-        ifm_channels,
-        ifm2_channels,
-        invalid_operator_type,
-        dtype,
-    )
-    func = relay.Function([ifm, ifm2], binary_elementwise)
-    with pytest.raises(TVMError):
-        run_opt_pass(func, relay.transform.InferType())
-
-
-def test_ethosu_binary_elementwise_invalid_data_types():
-    dtype = "int8"
-    dtype2 = "int32"
-    operator_type = "ADD"
-    ifm_shape = [1, 4, 5, 33]
-    ifm = relay.var("ifm", shape=ifm_shape, dtype=dtype)
-    ifm2 = relay.var("ifm2", shape=ifm_shape, dtype=dtype2)
-    ifm_channels, ifm2_channels = 33, 33
-    binary_elementwise = make_ethosu_binary_elementwise(
-        ifm,
-        ifm2,
-        ifm_channels,
-        ifm2_channels,
-        operator_type,
-        dtype,
-    )
-    func = relay.Function([ifm, ifm2], binary_elementwise)
-    with pytest.raises(TVMError):
-        run_opt_pass(func, relay.transform.InferType())
-
-
-@pytest.mark.parametrize("operator_type", ["MIN", "MAX"])
-def test_ethosu_binary_elementwise_min_max_invalid_data_type(operator_type):
-    invalid_dtype = "int32"
-    ifm_shape = [1, 4, 5, 33]
-    ifm = relay.var("ifm", shape=ifm_shape, dtype=invalid_dtype)
-    ifm2 = relay.var("ifm2", shape=ifm_shape, dtype=invalid_dtype)
-    ifm_channels, ifm2_channels = 33, 33
-    binary_elementwise = make_ethosu_binary_elementwise(
-        ifm,
-        ifm2,
-        ifm_channels,
-        ifm2_channels,
-        operator_type,
-        invalid_dtype,
-    )
-    func = relay.Function([ifm, ifm2], binary_elementwise)
-    with pytest.raises(TVMError):
-        run_opt_pass(func, relay.transform.InferType())
-
-
-@pytest.mark.parametrize("invalid_dtype", ["int8", "uint8"])
-@pytest.mark.parametrize("operator_type", ["RHS", "SHR"])
-def test_ethosu_binary_elementwise_shift_invalid_data_type(invalid_dtype, operator_type):
-    ifm_shape = [1, 4, 5, 33]
-    ifm = relay.var("ifm", shape=ifm_shape, dtype=invalid_dtype)
-    ifm2 = relay.var("ifm2", shape=ifm_shape, dtype=invalid_dtype)
-    ifm_channels, ifm2_channels = 33, 33
-    binary_elementwise = make_ethosu_binary_elementwise(
-        ifm,
-        ifm2,
-        ifm_channels,
-        ifm2_channels,
-        operator_type,
-        invalid_dtype,
-    )
-    func = relay.Function([ifm, ifm2], binary_elementwise)
-    with pytest.raises(TVMError):
-        run_opt_pass(func, relay.transform.InferType())
-
-
-@pytest.mark.parametrize("shape", [(1, 56, 72, 55), (241, 7, 755), (28, 44), (5003,)])
-def test_ethosu_identity_type_inference(shape):
-    dtype = "int8"
-    ifm = relay.var("ifm", shape=shape, dtype=dtype)
-    identity = make_ethosu_identity(ifm)
-    func = relay.Function([ifm], identity)
-    func = run_opt_pass(func, relay.transform.InferType())
-    assert tuple(func.body.checked_type.shape) == shape
-    assert func.body.checked_type.dtype == dtype
-
-
-def test_ethosu_identity_invalid_shape():
-    invalid_shape = [1, 2, 3, 4, 5]
-    dtype = "int8"
-    ifm = relay.var("ifm", shape=invalid_shape, dtype=dtype)
-
-    identity = make_ethosu_identity(ifm)
-    func = relay.Function([ifm], identity)
-    with pytest.raises(TVMError):
-        run_opt_pass(func, relay.transform.InferType())
-
-
-def test_ethosu_identity_invalid_dtype():
-    invalid_dtype = "int32"
-    ifm = relay.var("ifm", shape=[6000], dtype=invalid_dtype)
-
-    identity = make_ethosu_identity(ifm)
-    func = relay.Function([ifm], identity)
-    with pytest.raises(TVMError):
-        run_opt_pass(func, relay.transform.InferType())
-
-
-@pytest.mark.parametrize(
-    "ifm_shape, ifm_layout", [((1, 4, 5, 33), "NHWC"), ((1, 4, 3, 5, 16), "NHCWB16")]
-)
-@pytest.mark.parametrize(
-    "ofm_shape, ofm_layout", [((1, 4, 5, 33), "NHWC"), ((1, 4, 3, 5, 16), "NHCWB16")]
-)
-@pytest.mark.parametrize("operator_type, data_type", [("ABS", "int8"), ("CLZ", "int32")])
-def test_ethosu_unary_elementwise_type_inference(
-    ifm_shape,
-    ifm_layout,
-    ofm_shape,
-    ofm_layout,
-    operator_type,
-    data_type,
-):
-    ifm = relay.var("ifm", shape=ifm_shape, dtype=data_type)
-    ofm_channels = 33
-    unary_elementwise = make_ethosu_unary_elementwise(
-        ifm,
-        ofm_channels,
-        operator_type,
-        ifm_layout=ifm_layout,
-        ofm_layout=ofm_layout,
-    )
-    f = relay.Function([ifm], unary_elementwise)
-    f = run_opt_pass(f, relay.transform.InferType())
-    assert tuple(f.body.checked_type.shape) == ofm_shape
-
-
-def test_ethosu_unary_elementwise_invalid_operator_type():
-    ifm = relay.var("ifm", shape=(1, 3, 7, 12), dtype="int8")
-    invalid_op_type = "ABBBS"
-    unary_elementwise = make_ethosu_unary_elementwise(
-        ifm,
-        12,
-        invalid_op_type,
-    )
-    func = relay.Function([ifm], unary_elementwise)
-    with pytest.raises(TVMError):
-        run_opt_pass(func, relay.transform.InferType())
-
-
-def test_ethosu_unary_elementwise_invalid_dtype():
-    invalid_dtype = "int32"
-    ifm = relay.var("ifm", shape=(1, 5, 15, 25), dtype=invalid_dtype)
-
-    unary_elementwise = make_ethosu_unary_elementwise(
-        ifm,
-        25,
-        "ABS",
-    )
-    func = relay.Function([ifm], unary_elementwise)
-    with pytest.raises(TVMError):
-        run_opt_pass(func, relay.transform.InferType())
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/contrib/test_ethosu/test_vela_api.py b/tests/python/contrib/test_ethosu/test_vela_api.py
deleted file mode 100644
index 7f4b5b8c7052..000000000000
--- a/tests/python/contrib/test_ethosu/test_vela_api.py
+++ /dev/null
@@ -1,608 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import pytest
-
-pytest.importorskip("ethosu.vela")
-import numpy as np
-from ethosu.vela import api as vapi
-from unittest.mock import patch
-
-import tvm
-from tvm.script import tir as T
-from tvm.tir import stmt_functor
-from tvm.relay.backend.contrib.ethosu import vela_api
-import tvm.relay.backend.contrib.ethosu.tir_to_cs_translator as tirtocs
-
-ACCEL_TYPES = [
-    vapi.NpuAccelerator.Ethos_U55_256,
-    vapi.NpuAccelerator.Ethos_U55_128,
-    vapi.NpuAccelerator.Ethos_U55_64,
-    vapi.NpuAccelerator.Ethos_U55_32,
-]
-
-
-"""Test case 1"""
-
-
-@tvm.script.ir_module
-class Module1:
-    @T.prim_func
-    def main(
-        placeholder: T.handle,
-        placeholder_1: T.handle,
-        placeholder_2: T.handle,
-        ethosu_conv2d: T.handle,
-    ) -> None:
-        # function attr dict
-        T.func_attr({"global_symbol": "main", "tir.noalias": True})
-        placeholder_3 = T.match_buffer(
-            placeholder, [192], dtype="uint8", elem_offset=0, align=64, offset_factor=1
-        )
-        placeholder_4 = T.match_buffer(
-            placeholder_1, [48], dtype="uint8", elem_offset=0, align=64, offset_factor=1
-        )
-        placeholder_5 = T.match_buffer(
-            placeholder_2, [16], dtype="int32", elem_offset=0, align=64, offset_factor=1
-        )
-        ethosu_conv2d_1 = T.match_buffer(
-            ethosu_conv2d, [1024], dtype="uint8", elem_offset=0, align=64, offset_factor=1
-        )
-        # body
-        T.evaluate(
-            T.call_extern(
-                "ethosu_conv2d",
-                "uint8",
-                8,
-                8,
-                3,
-                8,
-                0,
-                8,
-                placeholder_3[0],
-                0,
-                0,
-                0,
-                T.float32(0.5),
-                10,
-                "NHWC",
-                24,
-                3,
-                1,
-                "uint8",
-                8,
-                8,
-                16,
-                8,
-                0,
-                8,
-                ethosu_conv2d_1[0],
-                0,
-                0,
-                0,
-                T.float32(0.25),
-                14,
-                "NHWC",
-                128,
-                16,
-                1,
-                1,
-                1,
-                1,
-                1,
-                1,
-                1,
-                placeholder_4[0],
-                0,
-                12,
-                placeholder_5[0],
-                0,
-                0,
-                0,
-                0,
-                0,
-                "CLIP",
-                0,
-                0,
-                "TFL",
-                "NONE",
-                dtype="uint8",
-            )
-        )
-
-
-"""Test case 2 with per-channel quantization"""
-
-
-@tvm.script.ir_module
-class Module2:
-    @T.prim_func
-    def main(
-        placeholder: T.handle,
-        placeholder_1: T.handle,
-        placeholder_2: T.handle,
-        placeholder_6: T.handle,
-        ethosu_conv2d: T.handle,
-    ) -> None:
-        # function attr dict
-        T.func_attr({"global_symbol": "main", "tir.noalias": True})
-        placeholder_3 = T.match_buffer(
-            placeholder, [192], dtype="uint8", elem_offset=0, align=64, offset_factor=1
-        )
-        placeholder_4 = T.match_buffer(
-            placeholder_1, [48], dtype="uint8", elem_offset=0, align=64, offset_factor=1
-        )
-        placeholder_5 = T.match_buffer(
-            placeholder_2, [16], dtype="int32", elem_offset=0, align=64, offset_factor=1
-        )
-        # Per-channel weight scales
-        placeholder_7 = T.match_buffer(
-            placeholder_6, [16], dtype="float32", elem_offset=0, align=64, offset_factor=1
-        )
-        ethosu_conv2d_1 = T.match_buffer(
-            ethosu_conv2d, [1024], dtype="uint8", elem_offset=0, align=64, offset_factor=1
-        )
-        # body
-        T.evaluate(
-            T.call_extern(
-                "ethosu_conv2d",
-                "uint8",
-                8,
-                8,
-                3,
-                8,
-                0,
-                8,
-                placeholder_3[0],
-                0,
-                0,
-                0,
-                T.float32(0.5),
-                10,
-                "NHWC",
-                24,
-                3,
-                1,
-                "uint8",
-                8,
-                8,
-                16,
-                8,
-                0,
-                8,
-                ethosu_conv2d_1[0],
-                0,
-                0,
-                0,
-                T.float32(0.25),
-                14,
-                "NHWC",
-                128,
-                16,
-                1,
-                1,
-                1,
-                1,
-                1,
-                1,
-                1,
-                placeholder_4[0],
-                0,
-                12,
-                placeholder_5[0],
-                0,
-                0,
-                0,
-                0,
-                0,
-                "CLIP",
-                0,
-                0,
-                "TFL",
-                "NONE",
-                dtype="uint8",
-            )
-        )
-
-
-# Complains of the use of undefined vars
-# fmt: off
-@tvm.script.ir_module(check_well_formed=False)
-class Module3:
-    @T.prim_func
-    def main(ethos_u_0_i0: T.Buffer((1, 299, 299, 2), "int8"), ethosu_write: T.Buffer((1, 299, 299, 3), "int8")):
-        T.func_attr({"from_legacy_te_schedule": T.bool(True), "global_symbol": "main", "tir.noalias": T.bool(True)})
-        p2_global = T.allocate([128], "uint8", "global", annotations={"disable_lower_builtin": T.bool(True)})
-        ax0_ax1_fused_ax2_fused_ax3_fused = T.int32()
-        p2_global_1 = T.Buffer((128,), "uint8", data=p2_global)
-        with T.attr(T.iter_var(ax0_ax1_fused_ax2_fused_ax3_fused, None, "DataPar", ""), "pragma_compute_cycles_hint", 1056):
-            p1_encoded = T.Buffer((128,), "uint8")
-            T.call_extern("handle", "ethosu_copy", p1_encoded[0], 128, p2_global_1[0])
-        nn = T.int32()
-        T.attr(T.iter_var(nn, None, "DataPar", ""), "pragma_compute_cycles_hint", T.int64(179570))
-        ethos_u_0_i0_1 = T.Buffer((178802,), "int8", data=ethos_u_0_i0.data)
-        ethosu_write_1 = T.Buffer((268203,), "int8", data=ethosu_write.data)
-        T.call_extern("handle", "ethosu_conv2d", "int8", 299, 299, 2, 299, 0, 299, ethos_u_0_i0_1[0], 0, 0, 0, T.float32(0.0039215683937072754), -128, "NHWC", 598, 2, 1, "int8", 299, 299, 3, 299, 0, 299, ethosu_write_1[0], 0, 0, 0, T.float32(0.025585981085896492), -128, "NHWC", 897, 3, 1, 2, 3, 1, 1, 1, 2, p2_global_1[0], 96, T.int8(-1), T.int8(-1), 0, p2_global_1[96], 32, T.int8(-1), T.int8(-1), 2, 0, 2, 1, "NONE", 0, 0, "TFL", "NONE", 32, 12, 8)
-# fmt: on
-
-
-def test_get_optimal_block_config():
-    block_configs_cases = [
-        {
-            "test": [
-                vapi.NpuShape3D(10, 20, 8),
-                vapi.NpuShape3D(10, 30, 16),
-                vapi.NpuShape3D(10, 40, 32),
-            ],
-            "ref": vapi.NpuShape3D(10, 40, 32),
-        },
-        {
-            "test": [
-                vapi.NpuShape3D(10, 20, 8),
-                vapi.NpuShape3D(10, 50, 32),
-                vapi.NpuShape3D(10, 40, 32),
-            ],
-            "ref": vapi.NpuShape3D(10, 50, 32),
-        },
-        {
-            "test": [
-                vapi.NpuShape3D(50, 50, 8),
-                vapi.NpuShape3D(10, 30, 32),
-                vapi.NpuShape3D(8, 8, 64),
-            ],
-            "ref": vapi.NpuShape3D(8, 8, 64),
-        },
-    ]
-
-    for test_case in block_configs_cases:
-        assert vela_api._get_optimal_block_config(test_case["test"]) == test_case["ref"]
-
-
-@pytest.mark.parametrize(
-    "block_config_str, expected_block_config",
-    [("4x4x8", vapi.NpuShape3D(4, 4, 8)), ("3x7x16", vapi.NpuShape3D(3, 7, 16))],
-)
-def test_force_block_config(block_config_str, expected_block_config):
-    config = {
-        "dev_force_block_config": block_config_str,
-    }
-    with tvm.transform.PassContext(config={"relay.ext.ethos-u.options": config}):
-        block_config = vela_api.get_optimal_block_config(None, vapi.NpuAccelerator.Ethos_U55_128)
-        assert block_config == expected_block_config
-
-
-def test_compress_weights():
-    test_vecs = [
-        {
-            # Stimulus
-            "accel": vapi.NpuAccelerator.Ethos_U55_256,
-            "block_depth": 8,
-            "ifm_dtype": np.uint8,
-            "shape": (3, 3, 16, 64),
-            "layout": "HWIO",
-            "zero_point": np.int64(134),
-            "dilation": (1, 1),
-            "is_depthwise": False,
-            # Reference outputs
-            "block_traversal": vapi.NpuBlockTraversal.PART_KERNEL_FIRST,
-        },
-        {
-            # Stimulus
-            "accel": vapi.NpuAccelerator.Ethos_U55_256,
-            "block_depth": 8,
-            "ifm_dtype": np.uint8,
-            "shape": (3, 3, 32, 64),
-            "layout": "HWIO",
-            "zero_point": np.int64(134),
-            "dilation": (1, 1),
-            "is_depthwise": False,
-            # Reference outputs
-            "block_traversal": vapi.NpuBlockTraversal.DEPTH_FIRST,
-        },
-        {
-            # Stimulus
-            "accel": vapi.NpuAccelerator.Ethos_U55_256,
-            "block_depth": 8,
-            "ifm_dtype": np.int16,
-            "shape": (3, 3, 16, 64),
-            "layout": "HWIO",
-            "zero_point": np.int64(134),
-            "dilation": (1, 1),
-            "is_depthwise": False,
-            # Reference outputs
-            "block_traversal": vapi.NpuBlockTraversal.DEPTH_FIRST,
-        },
-        # Pass-through value check
-        {
-            # Stimulus
-            "accel": vapi.NpuAccelerator.Ethos_U55_128,
-            "block_depth": 16,
-            "ifm_dtype": np.uint8,
-            "shape": (243, 152, 7, 1),
-            "layout": "HWOI",
-            "zero_point": np.int64(110),
-            "dilation": (2, 2),
-            "is_depthwise": True,
-            # Reference outputs
-            "block_traversal": vapi.NpuBlockTraversal.DEPTH_FIRST,
-        },
-        {
-            # Stimulus
-            "accel": vapi.NpuAccelerator.Ethos_U55_128,
-            "block_depth": 32,
-            "ifm_dtype": np.uint8,
-            "shape": (64, 67, 35, 8),
-            "layout": "OHWI",
-            "zero_point": np.int64(100),
-            "dilation": (1, 2),
-            "is_depthwise": False,
-            # Reference outputs
-            "block_traversal": vapi.NpuBlockTraversal.PART_KERNEL_FIRST,
-        },
-    ]
-
-    def verify(test_vec, mock_obj):
-        layout_transform_indices = {
-            "HWIO": (3, 0, 1, 2),
-            "HWOI": (2, 0, 1, 3),
-            "OHWI": (0, 1, 2, 3),
-        }
-
-        assert mock_obj
-        mock_obj.assert_called_once()
-        assert mock_obj.call_args[1]["accelerator"] == test_vec["accel"]
-        assert mock_obj.call_args[1]["accelerator"] == test_vec["accel"]
-        ishape = test_vec["shape"]
-        shape_owhi = (
-            ishape[layout_transform_indices[test_vec["layout"]][0]],
-            ishape[layout_transform_indices[test_vec["layout"]][1]],
-            ishape[layout_transform_indices[test_vec["layout"]][2]],
-            ishape[layout_transform_indices[test_vec["layout"]][3]],
-        )
-        assert mock_obj.call_args[1]["weights_volume"].shape == shape_owhi
-        assert mock_obj.call_args[1]["dilation_xy"] == test_vec["dilation"]
-        assert mock_obj.call_args[1]["ifm_bitdepth"] == np.iinfo(test_vec["ifm_dtype"]).bits
-        assert mock_obj.call_args[1]["ofm_block_depth"] == test_vec["block_depth"]
-        assert mock_obj.call_args[1]["is_depthwise"] == test_vec["is_depthwise"]
-        assert mock_obj.call_args[1]["block_traversal"] == test_vec["block_traversal"]
-
-    def create_mock(test_vec):
-        with patch("ethosu.vela.api.npu_encode_weights") as mock_npu_encode_weights:
-            ifm_bitdepth = np.iinfo(test_vec["ifm_dtype"]).bits
-            ifm_dtype = test_vec["ifm_dtype"]
-            max = np.iinfo(ifm_dtype).max
-            min = np.iinfo(ifm_dtype).min
-            values = np.random.randint(min, max, test_vec["shape"], ifm_dtype)
-            vela_api.compress_weights(
-                weights=values,
-                weights_zp=test_vec["zero_point"],
-                weights_layout=test_vec["layout"],
-                ifm_bitdepth=ifm_bitdepth,
-                block_depth=test_vec["block_depth"],
-                dilation=test_vec["dilation"],
-                accel_config=test_vec["accel"],
-                is_depthwise=test_vec["is_depthwise"],
-            )
-            return mock_npu_encode_weights
-
-    for tv in test_vecs:
-        mock_obj = create_mock(tv)
-        verify(tv, mock_obj)
-
-
-def test_pack_biases():
-    test_vecs = [
-        {
-            # Stimulus
-            "bias_length": 3,
-            "ifm_scale": np.single(1.11111111),
-            "ifm_dtype": np.uint8,
-            "weight_scales": np.array(
-                [np.single(0.91111111), np.single(1.01111111), np.single(1.11111111)]
-            ),
-            "ofm_scale": np.single(1.2),
-            "is_activation_tanh_or_sigmoid": False,
-            # Reference outputs
-            "hw_scales": (1811663288, 2010504240, 1104672703),
-            "hw_shifts": (31, 31, 30),
-        },
-        {
-            # Stimulus
-            "bias_length": 3,
-            "ifm_scale": np.single(1.11111111),
-            "ifm_dtype": np.int8,
-            "weight_scales": np.array(
-                [np.single(0.91111111), np.single(1.01111111), np.single(1.11111111)]
-            ),
-            "ofm_scale": np.single(1.2),
-            "is_activation_tanh_or_sigmoid": False,
-            # Reference outputs
-            "hw_scales": (1811663185, 2010504312, 1104672720),
-            "hw_shifts": (31, 31, 30),
-        },
-        {
-            # Stimulus
-            "bias_length": 3,
-            "ifm_scale": np.single(1.11111111),
-            "ifm_dtype": np.int16,
-            "weight_scales": np.array(
-                [np.single(0.91111111), np.single(1.01111111), np.single(1.11111111)]
-            ),
-            "ofm_scale": np.single(1.2),
-            "is_activation_tanh_or_sigmoid": False,
-            # Reference outputs
-            "hw_scales": (27644, 30678, 16856),
-            "hw_shifts": (15, 15, 14),
-        },
-    ]
-
-    def verify(test_vec, mock_obj, packed_biases):
-        assert mock_obj
-        for idx, val in enumerate(test_vec["bias_values"]):
-            assert val == mock_obj.call_args_list[idx][0][0]
-            assert test_vec["hw_scales"][idx] == mock_obj.call_args_list[idx][0][1]
-            assert test_vec["hw_shifts"][idx] == mock_obj.call_args_list[idx][0][2]
-
-    def create_mock(test_vec):
-        with patch("ethosu.vela.api.npu_encode_bias") as mock_npu_encode_bias:
-            mock_npu_encode_bias.return_value = bytearray(10)
-            ifm_dtype = test_vec["ifm_dtype"]
-            max = np.iinfo(ifm_dtype).max
-            min = np.iinfo(ifm_dtype).min
-            # tvm will always create biases in int32
-            biases = np.random.randint(min, max, test_vec["bias_length"], np.int32)
-            packed_biases = vela_api.pack_biases(
-                biases=biases,
-                ifm_scale=test_vec["ifm_scale"],
-                ifm_dtype=test_vec["ifm_dtype"],
-                weight_scales=test_vec["weight_scales"],
-                ofm_scale=test_vec["ofm_scale"],
-                is_activation_tanh_or_sigmoid=test_vec["is_activation_tanh_or_sigmoid"],
-            )
-            test_vec["bias_values"] = biases
-            return mock_npu_encode_bias, packed_biases
-        return None
-
-    for _test_vec in test_vecs:
-        mock_obj, packed_biases = create_mock(_test_vec)
-        verify(_test_vec, mock_obj, packed_biases)
-
-
-def extract_ethosu_conv2d_extern_calls(mod):
-    """This function will obtain all ethosu_conv2d
-    calls from a NPU TIR module
-
-    Parameters
-    ----------
-    mod : tvm.IRModule
-        This is a NPU TIR Module
-
-    Returns
-    -------
-    list
-        List of tvm.tir.Call objects
-        that are tir extern calls
-        for ethosu_conv2d
-    """
-    # There should only be a single function
-    assert len(mod.functions.items()) == 1
-    primfunc = mod.functions.items()[0][1]
-
-    ethosu_conv2d_calls = list()
-
-    def populate_ethosu_conv2d_calls(stmt):
-        if (
-            isinstance(stmt, tvm.tir.Call)
-            and stmt.op.name == "T.call_extern"
-            and stmt.args[0] == "ethosu_conv2d"
-        ):
-            ethosu_conv2d_calls.append(stmt)
-
-    stmt_functor.post_order_visit(primfunc.body, populate_ethosu_conv2d_calls)
-    return ethosu_conv2d_calls
-
-
-@pytest.mark.parametrize(
-    "accel",
-    ACCEL_TYPES,
-)
-def test_encode_weights(accel):
-    test_vecs = [
-        {
-            # Stimulus
-            "tir_module": Module1,
-            "param_dict": {
-                1: np.random.randint(np.iinfo("uint8").min, np.iinfo("uint8").max, [48], "uint8"),
-                2: np.random.randint(np.iinfo("int32").min, np.iinfo("int32").max, [16], "int32"),
-            },
-            "accel_type": accel,
-            # Reference outputs
-            "block_traversal": vapi.NpuBlockTraversal.PART_KERNEL_FIRST,
-        },
-    ]
-
-    def create_mock(test_vec):
-        with patch("ethosu.vela.api.npu_encode_weights") as mock_enc_w:
-            with patch("ethosu.vela.api.npu_find_block_configs") as mock_blk_cfg:
-                mock_blk_cfg.return_value = [vapi.NpuShape3D(8, 8, 8)]
-                ethosu_conv2d_calls = extract_ethosu_conv2d_extern_calls(test_vec["tir_module"])
-                buffer_info = tirtocs.extract_buffer_info(
-                    test_vec["tir_module"], test_vec["param_dict"]
-                )
-                for ethosu_conv2d_call in ethosu_conv2d_calls:
-                    npu_op, _ = tirtocs.translate_ethosu_conv2d(ethosu_conv2d_call)
-                    weights = buffer_info[npu_op.weights[0].address.buffer_var][0]
-                    vela_api.encode_weights(ethosu_conv2d_call, weights, accel)
-                return mock_enc_w
-
-    def verify(test_vec, mock_enc_w):
-        ethosu_conv2d_calls = extract_ethosu_conv2d_extern_calls(test_vec["tir_module"])
-        buffer_info = tirtocs.extract_buffer_info(test_vec["tir_module"], test_vec["param_dict"])
-        for ethosu_conv2d_call in ethosu_conv2d_calls:
-            npu_op, w_zero_point = tirtocs.translate_ethosu_conv2d(ethosu_conv2d_call)
-            weights = buffer_info[npu_op.weights[0].address.buffer_var][0]
-
-            assert mock_enc_w.call_args[1]["accelerator"] == accel
-            assert (
-                mock_enc_w.call_args[1]["weights_volume"].flatten()
-                == weights.astype(np.int64) - w_zero_point
-            ).all()
-            assert mock_enc_w.call_args[1]["dilation_xy"] == (
-                npu_op.kernel.dilation_x,
-                npu_op.kernel.dilation_y,
-            )
-            assert mock_enc_w.call_args[1]["dilation_xy"] == (
-                npu_op.kernel.dilation_x,
-                npu_op.kernel.dilation_y,
-            )
-            assert mock_enc_w.call_args[1]["ifm_bitdepth"] == npu_op.ifm.data_type.size_in_bits()
-            assert mock_enc_w.call_args[1]["block_traversal"] == test_vec["block_traversal"]
-
-    for _test_vec in test_vecs:
-        _mock_enc_w = create_mock(_test_vec)
-        verify(_test_vec, _mock_enc_w)
-
-
-def test_find_block_config_with_vela():
-    block_configs_cases = [
-        {
-            "accel_type": vapi.NpuAccelerator.Ethos_U55_256,
-            "ref": vapi.NpuShape3D(30, 12, 8),
-        },
-        {
-            "accel_type": vapi.NpuAccelerator.Ethos_U55_128,
-            "ref": vapi.NpuShape3D(17, 10, 8),
-        },
-        {
-            "accel_type": vapi.NpuAccelerator.Ethos_U55_64,
-            "ref": vapi.NpuShape3D(25, 5, 8),
-        },
-        {
-            "accel_type": vapi.NpuAccelerator.Ethos_U55_32,
-            "ref": vapi.NpuShape3D(25, 5, 4),
-        },
-    ]
-
-    mod = Module3
-    ethosu_conv2d_call = mod["main"].body.body.seq[1].body.value
-    npu_op, _ = tirtocs.translate_ethosu_conv2d(ethosu_conv2d_call)
-
-    for case in block_configs_cases:
-        assert vela_api._find_block_config_with_vela(npu_op, case["accel_type"]) == case["ref"]
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/contrib/test_uma/test_uma_pipeline.py b/tests/python/contrib/test_uma/test_uma_pipeline.py
deleted file mode 100644
index 0d7ed3ab9587..000000000000
--- a/tests/python/contrib/test_uma/test_uma_pipeline.py
+++ /dev/null
@@ -1,218 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import pytest
-
-pytest.importorskip("tflite")
-pytest.importorskip("tensorflow")
-
-import os
-import tensorflow as tf
-from tvm.micro.testing.aot_test_utils import AOT_DEFAULT_RUNNER
-from tvm.relay import transform, testing
-from tvm.testing.aot import (
-    AOTTestModel,
-    AOTTestRunner,
-    generate_ref_data,
-    compile_and_run,
-    create_relay_module_and_inputs_from_tflite_file,
-)
-
-import tvm
-from test_uma_vanilla_accelerator import VanillaAcceleratorBackend
-from tvm import relay
-import numpy as np
-from collections import OrderedDict
-
-from tvm.relay.backend.contrib.uma.api.utils import uma_available
-
-pytestmark = pytest.mark.skipif(not uma_available(), reason="UMA not available")
-
-
-@pytest.mark.parametrize(
-    "interface_api,use_unpacked_api,test_runner,groups,weight_shape",
-    [("c", True, AOT_DEFAULT_RUNNER, 1, 32)],
-)
-def test_conv2d(interface_api, use_unpacked_api, test_runner, groups, weight_shape):
-    """Test a subgraph with a single conv2d operator."""
-    mod, inputs, output_list, test_runner = create_conv2d(groups, test_runner, weight_shape)
-
-    uma_backend = VanillaAcceleratorBackend()
-    uma_backend.register()
-    mod = uma_backend.partition(mod)
-    target = tvm.target.Target("vanilla_accelerator", host=tvm.target.Target("c"))
-
-    compile_and_run(
-        AOTTestModel(module=mod, inputs=inputs, outputs=output_list),
-        test_runner,
-        interface_api,
-        use_unpacked_api,
-        target=target,
-    )
-
-
-def create_conv2d(groups=1, test_runner=AOT_DEFAULT_RUNNER, weight_shape=32):
-    dtype = "float32"
-    ishape = (1, 32, 14, 14)
-    wshape = (32, weight_shape, 3, 3)
-    pass_config = {"tir.usmp.enable": True}
-    test_runner = AOTTestRunner(
-        makefile=test_runner.makefile,
-        prologue=test_runner.prologue,
-        epilogue=test_runner.epilogue,
-        includes=test_runner.includes,
-        parameters=test_runner.parameters,
-        pass_config=pass_config,
-    )
-    data0 = relay.var("data", shape=ishape, dtype=dtype)
-    weight0 = relay.var("weight", shape=wshape, dtype=dtype)
-    out = relay.nn.conv2d(data0, weight0, kernel_size=(3, 3), padding=(1, 1), groups=groups)
-    main_f = relay.Function([data0, weight0], out)
-    mod = tvm.IRModule()
-    mod["main"] = main_f
-    mod = transform.InferType()(mod)
-    i_data = np.random.uniform(0, 1, ishape).astype(dtype)
-    w1_data = np.random.uniform(0, 1, wshape).astype(dtype)
-    inputs = OrderedDict([("data", i_data), ("weight", w1_data)])
-    output_list = generate_ref_data(mod, inputs)
-    return mod, inputs, output_list, test_runner
-
-
-def _generate_runtime_data(input_shapes: dict, output_shapes: dict) -> [OrderedDict, OrderedDict]:
-    assert len(input_shapes) == 1
-    assert len(output_shapes) == 1
-
-    iname = list(input_shapes.keys())[0]
-    oname = list(output_shapes.keys())[0]
-    ishape = input_shapes[iname]
-    oshape = output_shapes[oname]
-    i_data = np.random.uniform(0, 1, ishape).astype("float32")
-    o_data = np.random.uniform(0, 1, oshape).astype("float32")
-    oname = "output"  # name set by relay.build in executor_codegen_metadata.outputs
-    inputs = OrderedDict([(iname, i_data)])
-    outputs = OrderedDict([(oname, o_data)])
-    return inputs, outputs
-
-
-def test_mobilenet():
-    """Full network test with Mobilenet"""
-    use_unpacked_api = True
-    interface_api = "c"
-    test_runner = AOT_DEFAULT_RUNNER
-
-    mod, params = testing.mobilenet.get_workload(batch_size=1)
-
-    uma_backend = VanillaAcceleratorBackend()
-    uma_backend.register()
-    target = tvm.target.Target("vanilla_accelerator", host=tvm.target.Target("c"))
-    target_c = tvm.target.Target("c")
-
-    data_shape = [int(x) for x in mod["main"].checked_type.arg_types[0].shape]
-    data = np.random.uniform(size=data_shape).astype("float32")
-    input_list = {"data": data}
-    output_list = generate_ref_data(mod, input_list, params)
-    mod = uma_backend.partition(mod)
-    aot_test_model = AOTTestModel(module=mod, inputs=input_list, outputs=output_list, params=params)
-
-    compile_and_run(
-        aot_test_model,
-        test_runner,
-        interface_api,
-        use_unpacked_api,
-        workspace_byte_alignment=1,
-        debug_calculated_workspaces=False,
-        target=[target_c, target],
-    )
-
-
-def test_tflite_model():
-    """
-    End-to-end test of TF-Lite file using UMA
-    """
-    tflite_file = "/tmp/model.tflite"
-    if os.path.exists(tflite_file):
-        os.remove(tflite_file)
-    generate_tflite_file(tflite_file)
-
-    pytest.importorskip("tflite")
-
-    interpreter = tf.lite.Interpreter(model_path=tflite_file)
-    tf_model_details = interpreter.get_input_details()
-    mod, _, params = create_relay_module_and_inputs_from_tflite_file(
-        tflite_file, bind_params_by_name=False
-    )
-
-    uma_backend = VanillaAcceleratorBackend()
-    uma_backend.register()
-    target = tvm.target.Target("vanilla_accelerator", host=tvm.target.Target("c"))
-    target_c = tvm.target.Target("c")
-
-    # Generation of test input and output
-    data_shape = [int(x) for x in mod["main"].params[0].type_annotation.shape]
-    data = np.random.uniform(size=data_shape).astype("float32")
-    input_list = {str(tf_model_details[0]["name"]): data}
-    output_list = generate_ref_data(mod, input_list, params)
-
-    # UMA partitioning (needs to be done after generate_ref_data)
-    mod = uma_backend.partition(mod)
-
-    aot_test_model = AOTTestModel(module=mod, inputs=input_list, outputs=output_list, params=params)
-    test_runner = AOTTestRunner(
-        pass_config={"tir.usmp.enable": True, "tir.usmp.algorithm": "greedy_by_size"}
-    )
-
-    compile_and_run(
-        aot_test_model,
-        test_runner,
-        interface_api="c",
-        use_unpacked_api=True,
-        workspace_byte_alignment=1,
-        debug_calculated_workspaces=False,
-        target=[target_c, target],
-    )
-
-
-def generate_tflite_file(tflite_filename):
-    mnist = tf.keras.datasets.mnist
-    (x_train, y_train), (x_test, y_test) = mnist.load_data()
-    x_train, x_test = x_train / 255.0, x_test / 255.0
-    x_train, x_test = x_train.reshape(-1, 28, 28, 1), x_test.reshape(-1, 28, 28, 1)
-    tf_model = tf.keras.models.Sequential(
-        [
-            tf.keras.Input(shape=(28, 28, 1)),
-            tf.keras.layers.Conv2D(4, (3, 3), padding="same", activation="relu"),
-            tf.keras.layers.Flatten(input_shape=(28, 28)),
-            tf.keras.layers.Dense(32, activation="relu"),
-            tf.keras.layers.Dropout(0.2),
-            tf.keras.layers.Dense(10),
-        ]
-    )
-    output = tf_model(x_train[:1])
-    output = output.numpy()
-    loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
-    loss(y_train[:1], output).numpy()
-    tf_model.compile(metrics=["accuracy"], optimizer="adam", loss=loss)
-    tf_model.fit(x_train, y_train, epochs=1)
-
-    tflite_converter = tf.lite.TFLiteConverter.from_keras_model(tf_model)
-    tflite_model = tflite_converter.convert()
-    with open(tflite_filename, "wb") as f:
-        f.write(tflite_model)
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/driver/tvmc/test_command_line.py b/tests/python/driver/tvmc/test_command_line.py
index 2b7d00510058..af6cf8a26f73 100644
--- a/tests/python/driver/tvmc/test_command_line.py
+++ b/tests/python/driver/tvmc/test_command_line.py
@@ -25,7 +25,6 @@
 
 import tvm
 from tvm.driver.tvmc.main import _main
-from tvm.driver.tvmc.model import TVMCException
 from tvm.driver.tvmc import compiler
 from unittest.mock import MagicMock
 
diff --git a/tests/python/driver/tvmc/test_compiler.py b/tests/python/driver/tvmc/test_compiler.py
index 4bf6b27ccfb0..0019bb366b8d 100644
--- a/tests/python/driver/tvmc/test_compiler.py
+++ b/tests/python/driver/tvmc/test_compiler.py
@@ -28,7 +28,6 @@
 from tvm.ir.memory_pools import WorkspacePoolInfo, WorkspaceMemoryPools
 from tvm.target import Target
 import tvm.testing
-from tvm.relay.op.contrib.ethosn import ethosn_available
 from tvm.relay.backend import Runtime, Executor
 from tvm import relay
 
@@ -51,396 +50,6 @@ def test_save_dumps(tmpdir_factory):
     assert path.exists("{}/{}".format(tmpdir, "fake_module.relay"))
 
 
-def test_save_dump_offloads_ethosu(tmp_path_factory):
-
-    tflite = pytest.importorskip("tflite")
-    tensorflow = pytest.importorskip("tensorflow")
-    pytest.importorskip("ethosu.vela")
-
-    import tensorflow as tf
-    import tflite.Model
-    from tvm.driver.tvmc.model import TVMCModel
-
-    inp = (224, 224, 9)
-    input_shape = (1, *inp)
-    kernel_shape = (3, 3)
-    padding = (1, 1, 1, 1)
-    padding_out = (1, 33, 33, 1)
-
-    @tf.function
-    def simple_net(x):
-        weight_shape = [kernel_shape[0], kernel_shape[1], input_shape[3], 3]
-        weights = tf.constant(np.random.uniform(size=weight_shape), dtype=tf.float32)
-        weight_shape[2] = 3
-        weights1 = tf.constant(np.random.uniform(size=weight_shape), dtype=tf.float32)
-        weights2 = tf.constant(np.random.uniform(size=weight_shape), dtype=tf.float32)
-        op = tf.nn.conv2d(
-            x,
-            filters=weights,
-            strides=1,
-            padding="SAME",
-            data_format="NHWC",
-            dilations=1,
-        )
-        op1 = tf.nn.conv2d(
-            op,
-            filters=weights1,
-            strides=1,
-            padding="SAME",
-            data_format="NHWC",
-            dilations=1,
-        )
-        op2 = tf.nn.conv2d(
-            op,
-            filters=weights2,
-            strides=1,
-            padding="SAME",
-            data_format="NHWC",
-            dilations=1,
-        )
-        op = tf.concat([op1, op2], 1)
-        op = tf.pad(
-            op,
-            [[0, 0], [padding[0], padding_out[1]], [padding_out[2], padding[3]], [0, 0]],
-            "CONSTANT",
-        )
-        return op
-
-    from tests.python.contrib.test_ethosu.infra import get_tflite_graph
-
-    _, tflite_graph = get_tflite_graph(simple_net, [input_shape])
-    tflite_model = tflite.Model.Model.GetRootAsModel(tflite_graph, 0)
-    mod, params = relay.frontend.from_tflite(tflite_model)
-
-    tvmc_model = TVMCModel(mod, params)
-
-    output_dir = tmp_path_factory.mktemp("tmp")
-    output_file_name = os.path.join(str(output_dir), "list.txt")
-
-    tvmc.compiler.compile_model(
-        tvmc_model,
-        target="ethos-u,cmsis-nn,c",
-        runtime=Runtime("crt"),
-        tuning_records="",
-        package_path="module.tar",
-        executor=Executor("aot", {"unpacked-api": 1, "interface-api": "c", "link-params": True}),
-        cross="",
-        cross_options="",
-        output_format="mlf",
-        dump_offloads=output_file_name,
-        disabled_pass=[""],
-        pass_context_configs=[
-            "tir.disable_vectorize=1",
-            "tir.usmp.enable=1",
-            "tir.usmp.algorithm=hill_climb",
-            "tir.disable_storage_rewrite=1",
-            "relay.frontend.fill_span=1",
-        ],
-        additional_target_options={
-            "c": {"mcpu": "cortex-m55"},
-            "cmsis-nn": {"mcpu": "cortex-m55"},
-            "ethos-u": {
-                "accelerator_config": "ethos-u55-256",
-            },
-        },
-    )
-
-    expected = [
-        r"Total number of operators and distribution by targets",
-        r"Total: 11",
-        r"ethos-u: 10",
-        r"generic: 1",
-        r"",
-        r"ethos-u        <-     ethos-u.qnn_conv2d",
-        r'ethos-u        <-          %0 = qnn.conv2d(%x, %v_param_1, -128, 0, 0.00392157f, meta[relay.Constant][0], padding=[1, 1, 1, 1], channels=3, kernel_size=[3, 3], data_layout="NHWC", kernel_layout="HWIO", out_dtype="int32")',
-        r"ethos-u        <-          %1 = nn.bias_add(%0, %v_param_2, axis=3)",
-        r'ethos-u        <-          %2 = qnn.requantize(%1, meta[relay.Constant][1], 0, 0.11364f, -128, axis=3, out_dtype="int8")',
-        r"ethos-u        <-     ethos-u.qnn_conv2d",
-        r'ethos-u        <-          %3 = qnn.conv2d(%2, %v_param_3, -128, 0, 0.11364f, meta[relay.Constant][2], padding=[1, 1, 1, 1], channels=3, kernel_size=[3, 3], data_layout="NHWC", kernel_layout="HWIO", out_dtype="int32")',
-        r"ethos-u        <-          %4 = nn.bias_add(%3, %v_param_4, axis=3)",
-        r'ethos-u        <-          %7 = qnn.requantize(%4, meta[relay.Constant][3], 0, 1.56803f, -128, axis=3, out_dtype="int8")',
-        r"ethos-u        <-     ethos-u.qnn_conv2d",
-        r'ethos-u        <-          %5 = qnn.conv2d(%2, %v_param_5, -128, 0, 0.11364f, meta[relay.Constant][4], padding=[1, 1, 1, 1], channels=3, kernel_size=[3, 3], data_layout="NHWC", kernel_layout="HWIO", out_dtype="int32")',
-        r"ethos-u        <-          %6 = nn.bias_add(%5, %v_param_6, axis=3)",
-        r'ethos-u        <-          %8 = qnn.requantize(%6, meta[relay.Constant][5], 0, 1.20538f, -128, axis=3, out_dtype="int8")',
-        r"                      %9 = (%7, %8)",
-        r"                      %10 = (1.59778f, 1.59778f)",
-        r"                      %11 = (-128, -128)",
-        r"ethos-u        <-     ethos-u.concat",
-        r"ethos-u        <-          %12 = qnn.concatenate(%9, %10, %11, 1.59778f, -128, axis=1)",
-        r"generic        <-     nn.pad(%12, -128f, pad_width=[[0, 0], [1, 33], [33, 1], [0, 0]])",
-    ]
-
-    file_path = os.path.abspath(output_file_name)
-    # check that file file_path was created
-    assert os.path.exists(file_path)
-    with open(file_path, "r") as f:
-        for i, file_string in enumerate(f):
-            r_output = re.search(r"(.*)\(", file_string.strip(), re.DOTALL)
-            r_expected = re.search(r"(.*)\(", expected[i].strip(), re.DOTALL)
-            # check that there is the same sequence of operations and composites,
-            # combined with target names
-            if r_output and r_expected:
-                assert r_output.group(0) == r_expected.group(0)
-            else:
-                assert r_output == r_expected
-
-
-def test_save_dump_offloads_cmsis(tmp_path_factory):
-
-    tflite = pytest.importorskip("tflite")
-    tensorflow = pytest.importorskip("tensorflow")
-    pytest.importorskip("ethosu.vela")
-
-    import tensorflow as tf
-    from tvm.driver.tvmc.model import TVMCModel
-
-    inp = (224, 224, 9)
-    input_shape = (1, *inp)
-    kernel_shape = (3, 3)
-    padding = (1, 1, 1, 1)
-    padding_out = (1, 33, 33, 1)
-
-    @tf.function
-    def simple_net(x):
-        weight_shape = [kernel_shape[0], kernel_shape[1], input_shape[3], 3]
-        weights = tf.constant(np.random.uniform(size=weight_shape), dtype=tf.float32)
-        weight_shape[2] = 3
-        weights1 = tf.constant(np.random.uniform(size=weight_shape), dtype=tf.float32)
-        weights2 = tf.constant(np.random.uniform(size=weight_shape), dtype=tf.float32)
-        op = tf.nn.conv2d(
-            x,
-            filters=weights,
-            strides=1,
-            padding="SAME",
-            data_format="NHWC",
-            dilations=1,
-        )
-        op1 = tf.nn.conv2d(
-            op,
-            filters=weights1,
-            strides=1,
-            padding="SAME",
-            data_format="NHWC",
-            dilations=1,
-        )
-        op2 = tf.nn.conv2d(
-            op,
-            filters=weights2,
-            strides=1,
-            padding="SAME",
-            data_format="NHWC",
-            dilations=1,
-        )
-        op = tf.concat([op1, op2], 1)
-        op = tf.pad(
-            op,
-            [[0, 0], [padding[0], padding_out[1]], [padding_out[2], padding[3]], [0, 0]],
-            "CONSTANT",
-        )
-        return op
-
-    from tests.python.contrib.test_ethosu.infra import get_tflite_graph
-
-    _, tflite_graph = get_tflite_graph(simple_net, [input_shape])
-    tflite_model = tflite.Model.Model.GetRootAsModel(tflite_graph, 0)
-    mod, params = relay.frontend.from_tflite(tflite_model)
-
-    tvmc_model = TVMCModel(mod, params)
-
-    output_dir = tmp_path_factory.mktemp("tmp")
-    output_file_name = os.path.join(str(output_dir), "list.txt")
-
-    tvmc.compiler.compile_model(
-        tvmc_model,
-        target="cmsis-nn,c",
-        runtime=Runtime("crt"),
-        tuning_records="",
-        package_path="module.tar",
-        executor=Executor("aot", {"unpacked-api": 1, "interface-api": "c", "link-params": True}),
-        cross="",
-        cross_options="",
-        output_format="mlf",
-        dump_offloads=output_file_name,
-        disabled_pass=[""],
-        pass_context_configs=[
-            "tir.disable_vectorize=1",
-            "tir.usmp.enable=1",
-            "tir.usmp.algorithm=hill_climb",
-            "tir.disable_storage_rewrite=1",
-            "relay.frontend.fill_span=1",
-        ],
-        additional_target_options={
-            "c": {"mcpu": "cortex-m55"},
-            "cmsis-nn": {"mcpu": "cortex-m55"},
-        },
-    )
-
-    expected = [
-        r"Total number of operators and distribution by targets",
-        r"Total: 11",
-        r"cmsis-nn: 9",
-        r"generic: 2",
-        r"",
-        r"cmsis-nn       <-     cmsis-nn.qnn_conv2d",
-        r'cmsis-nn       <-          %0 = qnn.conv2d(%x, %v_param_1, -128, 0, 0.00392157f, meta[relay.Constant][0], padding=[1, 1, 1, 1], channels=3, kernel_size=[3, 3], data_layout="NHWC", kernel_layout="HWIO", out_dtype="int32")',
-        r"cmsis-nn       <-          %1 = nn.bias_add(%0, %v_param_2, axis=3)",
-        r'cmsis-nn       <-          %2 = qnn.requantize(%1, meta[relay.Constant][1], 0, 0.115114f, -128, axis=3, out_dtype="int8")',
-        r"cmsis-nn       <-     cmsis-nn.qnn_conv2d",
-        r'cmsis-nn       <-          %3 = qnn.conv2d(%2, %v_param_3, -128, 0, 0.115114f, meta[relay.Constant][2], padding=[1, 1, 1, 1], channels=3, kernel_size=[3, 3], data_layout="NHWC", kernel_layout="HWIO", out_dtype="int32")',
-        r"cmsis-nn       <-          %4 = nn.bias_add(%3, %v_param_4, axis=3)",
-        r'cmsis-nn       <-          %7 = qnn.requantize(%4, meta[relay.Constant][3], 0, 1.59328f, -128, axis=3, out_dtype="int8")',
-        r"cmsis-nn       <-     cmsis-nn.qnn_conv2d",
-        r'cmsis-nn       <-          %5 = qnn.conv2d(%2, %v_param_5, -128, 0, 0.115114f, meta[relay.Constant][4], padding=[1, 1, 1, 1], channels=3, kernel_size=[3, 3], data_layout="NHWC", kernel_layout="HWIO", out_dtype="int32")',
-        r"cmsis-nn       <-          %6 = nn.bias_add(%5, %v_param_6, axis=3)",
-        r'cmsis-nn       <-          %8 = qnn.requantize(%6, meta[relay.Constant][5], 0, 1.59328f, -128, axis=3, out_dtype="int8")',
-        r"                      %9 = (%7, %8)",
-        r"                      %10 = (1.59328f, 1.59328f)",
-        r"                      %11 = (-128, -128)",
-        r"generic        <-     %12 = qnn.concatenate(%9, %10, %11, 1.59328f, -128, axis=1)",
-        r"generic        <-     nn.pad(%12, -128f, pad_width=[[0, 0], [1, 33], [33, 1], [0, 0]])",
-    ]
-
-    file_path = os.path.abspath(output_file_name)
-    # check that file file_path was created
-    assert os.path.exists(file_path)
-    with open(file_path, "r") as f:
-        for i, file_string in enumerate(f):
-            r_output = re.search(r"(.*)\(", file_string.replace("\n", ""), re.DOTALL)
-            r_expected = re.search(r"(.*)\(", expected[i], re.DOTALL)
-            # check that there is the same sequence of operations and composites,
-            # combined with target names
-            if r_output and r_expected:
-                assert r_output.group(0) == r_expected.group(0)
-            else:
-                assert file_string.replace("\n", "") == expected[i]
-
-
-def test_save_dump_offloads_generic(tmp_path_factory):
-
-    tflite = pytest.importorskip("tflite")
-    tensorflow = pytest.importorskip("tensorflow")
-    pytest.importorskip("ethosu.vela")
-
-    import tensorflow as tf
-    from tvm.driver.tvmc.model import TVMCModel
-
-    inp = (224, 224, 9)
-    input_shape = (1, *inp)
-    kernel_shape = (3, 3)
-    padding = (1, 1, 1, 1)
-    padding_out = (1, 33, 33, 1)
-
-    @tf.function
-    def simple_net(x):
-        weight_shape = [kernel_shape[0], kernel_shape[1], input_shape[3], 3]
-        weights = tf.constant(np.random.uniform(size=weight_shape), dtype=tf.float32)
-        weight_shape[2] = 3
-        weights1 = tf.constant(np.random.uniform(size=weight_shape), dtype=tf.float32)
-        weights2 = tf.constant(np.random.uniform(size=weight_shape), dtype=tf.float32)
-        op = tf.nn.conv2d(
-            x,
-            filters=weights,
-            strides=1,
-            padding="SAME",
-            data_format="NHWC",
-            dilations=1,
-        )
-        op1 = tf.nn.conv2d(
-            op,
-            filters=weights1,
-            strides=1,
-            padding="SAME",
-            data_format="NHWC",
-            dilations=1,
-        )
-        op2 = tf.nn.conv2d(
-            op,
-            filters=weights2,
-            strides=1,
-            padding="SAME",
-            data_format="NHWC",
-            dilations=1,
-        )
-        op = tf.concat([op1, op2], 1)
-        op = tf.pad(
-            op,
-            [[0, 0], [padding[0], padding_out[1]], [padding_out[2], padding[3]], [0, 0]],
-            "CONSTANT",
-        )
-        return op
-
-    from tests.python.contrib.test_ethosu.infra import get_tflite_graph
-
-    _, tflite_graph = get_tflite_graph(simple_net, [input_shape])
-    tflite_model = tflite.Model.Model.GetRootAsModel(tflite_graph, 0)
-    mod, params = relay.frontend.from_tflite(tflite_model)
-
-    tvmc_model = TVMCModel(mod, params)
-
-    output_dir = tmp_path_factory.mktemp("tmp")
-    output_file_name = os.path.join(str(output_dir), "list.txt")
-
-    tvmc.compiler.compile_model(
-        tvmc_model,
-        target="c",
-        runtime=Runtime("crt"),
-        tuning_records="",
-        package_path="module.tar",
-        executor=Executor("aot", {"unpacked-api": 1, "interface-api": "c", "link-params": True}),
-        cross="",
-        cross_options="",
-        output_format="mlf",
-        dump_offloads=output_file_name,
-        disabled_pass=[""],
-        pass_context_configs=[
-            "tir.disable_vectorize=1",
-            "tir.usmp.enable=1",
-            "tir.usmp.algorithm=hill_climb",
-            "tir.disable_storage_rewrite=1",
-            "relay.frontend.fill_span=1",
-        ],
-        additional_target_options={
-            "c": {"mcpu": "cortex-m55"},
-        },
-    )
-
-    expected = [
-        r"Total number of operators and distribution by targets",
-        r"Total: 11",
-        r"generic: 11",
-        r"",
-        r'generic        <-     %0 = qnn.conv2d(%x, %v_param_1, -128, 0, 0.00392157f, meta[relay.Constant][0], padding=[1, 1, 1, 1], channels=3, kernel_size=[3, 3], data_layout="NHWC", kernel_layout="HWIO", out_dtype="int32")',
-        r"generic        <-     %1 = nn.bias_add(%0, %v_param_2, axis=3)",
-        r'generic        <-     %2 = qnn.requantize(%1, meta[relay.Constant][1], 0, 0.109484f, -128, axis=3, out_dtype="int8")',
-        r'generic        <-     %3 = qnn.conv2d(%2, %v_param_3, -128, 0, 0.109484f, meta[relay.Constant][2], padding=[1, 1, 1, 1], channels=3, kernel_size=[3, 3], data_layout="NHWC", kernel_layout="HWIO", out_dtype="int32")',
-        r"generic        <-     %4 = nn.bias_add(%3, %v_param_4, axis=3)",
-        r'generic        <-     %5 = qnn.conv2d(%2, %v_param_5, -128, 0, 0.109484f, meta[relay.Constant][4], padding=[1, 1, 1, 1], channels=3, kernel_size=[3, 3], data_layout="NHWC", kernel_layout="HWIO", out_dtype="int32")',
-        r"generic        <-     %6 = nn.bias_add(%5, %v_param_6, axis=3)",
-        r'generic        <-     %7 = qnn.requantize(%4, meta[relay.Constant][3], 0, 1.45572f, -128, axis=3, out_dtype="int8")',
-        r'generic        <-     %8 = qnn.requantize(%6, meta[relay.Constant][5], 0, 1.45572f, -128, axis=3, out_dtype="int8")',
-        r"                      %9 = (%7, %8)",
-        r"                      %10 = (1.45572f, 1.45572f)",
-        r"                      %11 = (-128, -128)",
-        r"generic        <-     %12 = qnn.concatenate(%9, %10, %11, 1.45572f, -128, axis=1)",
-        r"generic        <-     nn.pad(%12, -128f, pad_width=[[0, 0], [1, 33], [33, 1], [0, 0]])",
-    ]
-
-    file_path = os.path.abspath(output_file_name)
-    # check that file file_path was created
-    assert os.path.exists(file_path)
-    with open(file_path, "r") as f:
-        for i, file_string in enumerate(f):
-            r_output = re.search(r"(.*)\(", file_string.replace("\n", ""), re.DOTALL)
-            r_expected = re.search(r"(.*)\(", expected[i], re.DOTALL)
-            # check that there is the same sequence of operations and composites,
-            # combined with target names
-            if r_output and r_expected:
-                assert r_output.group(0) == r_expected.group(0)
-            else:
-                assert file_string.replace("\n", "") == expected[i]
-
-
 # End to end tests for compilation
 
 
@@ -801,55 +410,6 @@ def test_compile_opencl(tflite_mobilenet_v1_0_25_128):
     assert path.exists("{}.{}".format(tvmc_package.package_path, "opencl"))
 
 
-@tvm.testing.requires_cmsisnn
-def test_compile_tflite_module_with_external_codegen_cmsisnn(
-    tmpdir_factory, tflite_cnn_s_quantized
-):
-    pytest.importorskip("tflite")
-
-    output_dir = tmpdir_factory.mktemp("mlf")
-    tvmc_model = tvmc.load(tflite_cnn_s_quantized)
-
-    output_file_name = f"{output_dir}/file.tar"
-
-    tvmc.compiler.compile_model(
-        tvmc_model,
-        target=f"cmsis-nn, c -mcpu=cortex-m55",
-        runtime=Runtime("crt", {"system-lib": True}),
-        executor=Executor("aot"),
-        output_format="mlf",
-        package_path=output_file_name,
-        pass_context_configs=["tir.disable_vectorize=true"],
-    )
-
-    # check whether an MLF package was created
-    assert os.path.exists(output_file_name)
-
-    # check whether the expected number of C sources are in the tarfile
-    with tarfile.open(output_file_name) as mlf_package:
-        c_source_files = [
-            name
-            for name in mlf_package.getnames()
-            if re.match(r"\./codegen/host/src/\D+\d+\.c", name)
-        ]
-        assert len(c_source_files) == 4
-
-
-@tvm.testing.requires_ethosn
-def test_compile_tflite_module_with_external_codegen_ethos_n78(tflite_mobilenet_v1_1_quant):
-    pytest.importorskip("tflite")
-    tvmc_model = tvmc.load(tflite_mobilenet_v1_1_quant)
-    tvmc_package = tvmc.compile(tvmc_model, target="ethos-n -variant=n78, llvm", dump_code="relay")
-    dumps_path = tvmc_package.package_path + ".relay"
-
-    # check for output types
-    assert type(tvmc_package) is TVMCPackage
-    assert type(tvmc_package.graph) is str
-    assert type(tvmc_package.lib_path) is str
-    assert type(tvmc_package.params) is bytearray
-    assert os.path.exists(dumps_path)
-
-
 @tvm.testing.requires_vitis_ai
 def test_compile_tflite_module_with_external_codegen_vitis_ai(tflite_mobilenet_v1_1_quant):
     pytest.importorskip("tflite")
@@ -888,47 +448,6 @@ def test_compile_pytorch_module_with_external_codegen_mrvl(pytorch_resnet18):
     assert os.path.exists(dumps_path)
 
 
-def test_compile_tflite_module_with_external_codegen_ethosu(
-    tmpdir_factory, tflite_mobilenet_v1_1_quant
-):
-    pytest.importorskip("tflite")
-    pytest.importorskip("ethosu.vela")
-    ACCEL_TYPES = ["ethos-u55-256", "ethos-u55-128", "ethos-u55-64", "ethos-u55-32"]
-
-    output_dir = tmpdir_factory.mktemp("mlf")
-
-    tvmc_model = tvmc.load(tflite_mobilenet_v1_1_quant)
-
-    for accel_type in ACCEL_TYPES:
-        output_file_name = f"{output_dir}/file_{accel_type}.tar"
-
-        tvmc.compiler.compile_model(
-            tvmc_model,
-            target=f"ethos-u -accelerator_config={accel_type}, c -mcpu=cortex-m55",
-            runtime=Runtime("crt"),
-            executor=Executor("aot", {"unpacked-api": True}),
-            output_format="mlf",
-            package_path=output_file_name,
-            pass_context_configs=["tir.disable_vectorize=true"],
-        )
-
-        # check whether an MLF package was created
-        assert os.path.exists(output_file_name)
-
-        # check whether the expected number of C sources are in the tarfile
-        with tarfile.open(output_file_name) as mlf_package:
-            c_source_files = [
-                name
-                for name in mlf_package.getnames()
-                if re.match(r"\./codegen/host/src/\D+\d+\.c", name)
-            ]
-            # The number of c_source_files depends on the number of fused subgraphs that
-            # get offloaded to the NPU, e.g. conv2d->depthwise_conv2d->conv2d gets offloaded
-            # as a single subgraph if both of these operators are supported by the NPU.
-            # Currently there are three source files for CPU execution and one offload graph
-            assert len(c_source_files) == 4
-
-
 @mock.patch("tvm.relay.build")
 @mock.patch("tvm.driver.tvmc.composite_target.get_codegen_by_target")
 @mock.patch("tvm.driver.tvmc.load")
@@ -962,154 +481,6 @@ def test_compile_check_configs_composite_target(mock_pkg, mock_pc, mock_fe, mock
     )
 
 
-def test_compile_tflite_module_with_mod_name(tmpdir_factory, tflite_cnn_s_quantized):
-    pytest.importorskip("tflite")
-
-    output_dir = tmpdir_factory.mktemp("mlf")
-    tvmc_model = tvmc.load(tflite_cnn_s_quantized)
-
-    output_file_name = f"{output_dir}/file.tar"
-
-    tvmc.compiler.compile_model(
-        tvmc_model,
-        target=f"c -mcpu=cortex-m55",
-        runtime=Runtime("crt", {"system-lib": True}),
-        executor=Executor("aot"),
-        output_format="mlf",
-        package_path=output_file_name,
-        pass_context_configs=["tir.disable_vectorize=true"],
-        mod_name="classify",
-    )
-
-    # check that an MLF package was created
-    assert os.path.exists(output_file_name)
-
-    with tarfile.open(output_file_name) as mlf_package:
-        # check that the C source files have been named classify_lib*.c
-        c_source_files = [
-            name
-            for name in mlf_package.getnames()
-            if re.match(r"\./codegen/host/src/classify_lib\d+\.c", name)
-        ]
-        assert len(c_source_files) > 0
-
-        # check that "default" doesn't occur in any of the C source files
-        # check that function names are of the form "tvmgen_classify_*"
-        for file_name in c_source_files:
-            with mlf_package.extractfile(file_name) as f:
-                content = f.read()
-                assert b"default" not in content
-                assert b"tvmgen_classify_" in content
-
-        # check that tvmgen_classify_run() function exists
-        with mlf_package.extractfile("./codegen/host/src/classify_lib0.c") as f:
-            content = f.read()
-            assert b"tvmgen_classify_run(" in content
-
-
-@tvm.testing.requires_cmsisnn
-def test_compile_tflite_module_with_mod_name_and_cmsisnn(tmpdir_factory, tflite_cnn_s_quantized):
-    pytest.importorskip("tflite")
-
-    output_dir = tmpdir_factory.mktemp("mlf")
-    tvmc_model = tvmc.load(tflite_cnn_s_quantized)
-
-    output_file_name = f"{output_dir}/file.tar"
-
-    tvmc.compiler.compile_model(
-        tvmc_model,
-        target=f"cmsis-nn, c -mcpu=cortex-m55",
-        runtime=Runtime("crt", {"system-lib": True}),
-        executor=Executor("aot"),
-        output_format="mlf",
-        package_path=output_file_name,
-        pass_context_configs=["tir.disable_vectorize=true"],
-        mod_name="classify",
-    )
-
-    # check that an MLF package was created
-    assert os.path.exists(output_file_name)
-
-    with tarfile.open(output_file_name) as mlf_package:
-        # check that the C source files have been named classify_lib*.c
-        c_source_files = [
-            name
-            for name in mlf_package.getnames()
-            if re.match(r"\./codegen/host/src/classify_lib\d+\.c", name)
-        ]
-        assert len(c_source_files) > 0
-
-        # check that "default" doesn't occur in any of the C source files
-        # check that function names are of the form "tvmgen_classify_*"
-        for file_name in c_source_files:
-            with mlf_package.extractfile(file_name) as f:
-                content = f.read()
-                assert b"default" not in content
-                assert b"tvmgen_classify_" in content
-
-        # check that tvmgen_classify_run() function exists
-        with mlf_package.extractfile("./codegen/host/src/classify_lib0.c") as f:
-            content = f.read()
-            assert b"tvmgen_classify_run(" in content
-
-        # check that CMSIS-NN function names are of the form "tvmgen_classify_cmsis_nn_main_*"
-        with mlf_package.extractfile("./codegen/host/src/classify_lib2.c") as f:
-            content = f.read()
-            assert b"tvmgen_classify_cmsis_nn_main_" in content
-
-
-def test_compile_tflite_module_with_mod_name_and_ethosu(
-    tmpdir_factory, tflite_mobilenet_v1_1_quant
-):
-    pytest.importorskip("tflite")
-    pytest.importorskip("ethosu.vela")
-
-    output_dir = tmpdir_factory.mktemp("mlf")
-    tvmc_model = tvmc.load(tflite_mobilenet_v1_1_quant)
-    output_file_name = f"{output_dir}/file.tar"
-
-    tvmc.compiler.compile_model(
-        tvmc_model,
-        target=f"ethos-u -accelerator_config=ethos-u55-256, c -mcpu=cortex-m55",
-        runtime=Runtime("crt"),
-        executor=Executor("aot", {"unpacked-api": True}),
-        output_format="mlf",
-        package_path=output_file_name,
-        pass_context_configs=["tir.disable_vectorize=true"],
-        mod_name="classify",
-    )
-
-    # check that an MLF package was created
-    assert os.path.exists(output_file_name)
-
-    with tarfile.open(output_file_name) as mlf_package:
-        # check that the C source files have been named classify_lib*.c
-        c_source_files = [
-            name
-            for name in mlf_package.getnames()
-            if re.match(r"\./codegen/host/src/classify_lib\d+\.c", name)
-        ]
-        assert len(c_source_files) > 0
-
-        # check that "default" doesn't occur in any of the C source files
-        # check that function names are of the form "tvmgen_classify_*"
-        for file_name in c_source_files:
-            with mlf_package.extractfile(file_name) as f:
-                content = f.read()
-                assert b"default" not in content
-                assert b"tvmgen_classify_" in content
-
-        # check that tvmgen_classify_run() function exists
-        with mlf_package.extractfile("./codegen/host/src/classify_lib0.c") as f:
-            content = f.read()
-            assert b"tvmgen_classify_run(" in content
-
-        # check that microNPU function names are of the form "tvmgen_classify_ethos_u_main_*"
-        with mlf_package.extractfile("./codegen/host/src/classify_lib2.c") as f:
-            content = f.read()
-            assert b"tvmgen_classify_ethos_u_main_" in content
-
-
 @mock.patch("tvm.relay.build")
 @mock.patch("tvm.driver.tvmc.load")
 @mock.patch("tvm.driver.tvmc.model.TVMCPackage.__init__", return_value=None)
diff --git a/tests/python/driver/tvmc/test_composite_target.py b/tests/python/driver/tvmc/test_composite_target.py
index 2335563b3e22..0fb89d524be7 100644
--- a/tests/python/driver/tvmc/test_composite_target.py
+++ b/tests/python/driver/tvmc/test_composite_target.py
@@ -33,7 +33,6 @@
 def test_get_codegen_names():
     names = tvmc.composite_target.get_codegen_names()
 
-    assert "ethos-n" in names
     assert "vitis-ai" in names
     assert "mrvl" in names
     assert len(names) > 0
diff --git a/tests/python/driver/tvmc/test_mlf.py b/tests/python/driver/tvmc/test_mlf.py
deleted file mode 100644
index f930f39bca92..000000000000
--- a/tests/python/driver/tvmc/test_mlf.py
+++ /dev/null
@@ -1,168 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import pytest
-import os
-import shlex
-import sys
-
-import tvm
-import tvm.testing
-from tvm.autotvm.measure.executor import Executor
-from tvm.driver import tvmc
-from tvm.driver.tvmc.main import _main
-from tvm.driver.tvmc.model import TVMCPackage, TVMCException
-from tvm.relay import backend
-
-
-def test_tvmc_cl_compile_run_mlf(tflite_mobilenet_v1_1_quant, tmpdir_factory):
-    target = "c"
-    executor = "aot"
-    pass_configs = ["tir.disable_vectorize=1"]
-    pytest.importorskip("tflite")
-
-    output_dir = tmpdir_factory.mktemp("mlf")
-    input_model = tflite_mobilenet_v1_1_quant
-    output_file = os.path.join(output_dir, "mock.tar")
-
-    # Compile the input model and generate a Model Library Format (MLF) archive.
-    pass_config_args = " ".join([f"--pass-config {pass_config}" for pass_config in pass_configs])
-    tvmc_cmd = f"tvmc compile {input_model} --target={target} --executor={executor} {pass_config_args} --output {output_file} --output-format mlf"
-    tvmc_args = shlex.split(tvmc_cmd)[1:]
-    _main(tvmc_args)
-    assert os.path.exists(output_file), "Could not find the exported MLF archive."
-
-    # Run the MLF archive. It must fail since it's only supported on micro targets.
-    tvmc_cmd = f"tvmc run {output_file}"
-    tvmc_args = tvmc_cmd.split(" ")[1:]
-    exit_code = _main(tvmc_args)
-    on_error = "Trying to run a MLF archive must fail because it's only supported on micro targets."
-    assert exit_code != 0, on_error
-
-
-def test_tvmc_export_package_mlf(tflite_mobilenet_v1_1_quant, tmpdir_factory):
-    pytest.importorskip("tflite")
-
-    tvmc_model = tvmc.frontends.load_model(tflite_mobilenet_v1_1_quant)
-    mod, params = tvmc_model.mod, tvmc_model.params
-
-    graph_module = tvm.relay.build(mod, target="llvm", params=params)
-
-    output_dir = tmpdir_factory.mktemp("mlf")
-    output_file = os.path.join(output_dir, "mock.tar")
-
-    # Try to export MLF with no cross compiler set. No exception must be thrown.
-    tvmc_model.export_package(
-        executor_factory=graph_module,
-        package_path=output_file,
-        cross=None,
-        output_format="mlf",
-    )
-    assert os.path.exists(output_file), "Could not find the exported MLF archive."
-
-    # Try to export a MLF whilst also specifying a cross compiler. Since
-    # that's not supported it must throw a TVMCException and report the
-    # reason accordingly.
-    with pytest.raises(TVMCException) as exp:
-        tvmc_model.export_package(
-            executor_factory=graph_module,
-            package_path=output_file,
-            cross="cc",
-            output_format="mlf",
-        )
-    expected_reason = "Specifying the MLF output and a cross compiler is not supported."
-    on_error = "A TVMCException was caught but its reason is not the expected one."
-    assert str(exp.value) == expected_reason, on_error
-
-
-def test_tvmc_import_package_project_dir(tflite_mobilenet_v1_1_quant, tflite_compile_model):
-    pytest.importorskip("tflite")
-
-    # Generate a MLF archive.
-    compiled_model_mlf_tvmc_package = tflite_compile_model(
-        tflite_mobilenet_v1_1_quant, output_format="mlf"
-    )
-
-    # Import the MLF archive setting 'project_dir'. It must succeed.
-    mlf_archive_path = compiled_model_mlf_tvmc_package.package_path
-    tvmc_package = TVMCPackage(mlf_archive_path, project_dir="/tmp/foobar")
-    assert tvmc_package.type == "mlf", "Can't load the MLF archive passing the project directory!"
-
-    # Generate a Classic archive.
-    compiled_model_classic_tvmc_package = tflite_compile_model(tflite_mobilenet_v1_1_quant)
-
-    # Import the Classic archive setting 'project_dir'.
-    # It must fail since setting 'project_dir' is only support when importing a MLF archive.
-    classic_archive_path = compiled_model_classic_tvmc_package.package_path
-    with pytest.raises(TVMCException) as exp:
-        tvmc_package = TVMCPackage(classic_archive_path, project_dir="/tmp/foobar")
-
-    expected_reason = "Setting 'project_dir' is only allowed when importing a MLF.!"
-    on_error = "A TVMCException was caught but its reason is not the expected one."
-    assert str(exp.value) == expected_reason, on_error
-
-
-def test_tvmc_import_package_mlf_graph(tflite_mobilenet_v1_1_quant, tflite_compile_model):
-    pytest.importorskip("tflite")
-
-    tflite_compiled_model_mlf = tflite_compile_model(
-        tflite_mobilenet_v1_1_quant, output_format="mlf"
-    )
-
-    # Compile and export a model to a MLF archive so it can be imported.
-    exported_tvmc_package = tflite_compiled_model_mlf
-    archive_path = exported_tvmc_package.package_path
-
-    # Import the MLF archive. TVMCPackage constructor will call import_package method.
-    tvmc_package = TVMCPackage(archive_path)
-
-    assert tvmc_package.lib_name is None, ".lib_name must not be set in the MLF archive."
-    assert tvmc_package.lib_path is None, ".lib_path must not be set in the MLF archive."
-    assert (
-        tvmc_package.graph is not None
-    ), ".graph must be set in the MLF archive for Graph executor."
-    assert tvmc_package.params is not None, ".params must be set in the MLF archive."
-    assert tvmc_package.type == "mlf", ".type must be set to 'mlf' in the MLF format."
-
-
-def test_tvmc_import_package_mlf_aot(tflite_mobilenet_v1_1_quant, tflite_compile_model):
-    pytest.importorskip("tflite")
-
-    tflite_compiled_model_mlf = tflite_compile_model(
-        tflite_mobilenet_v1_1_quant,
-        target="c",
-        executor=backend.Executor("aot"),
-        output_format="mlf",
-        pass_context_configs=["tir.disable_vectorize=1"],
-    )
-
-    # Compile and export a model to a MLF archive so it can be imported.
-    exported_tvmc_package = tflite_compiled_model_mlf
-    archive_path = exported_tvmc_package.package_path
-
-    # Import the MLF archive. TVMCPackage constructor will call import_package method.
-    tvmc_package = TVMCPackage(archive_path)
-
-    assert tvmc_package.lib_name is None, ".lib_name must not be set in the MLF archive."
-    assert tvmc_package.lib_path is None, ".lib_path must not be set in the MLF archive."
-    assert tvmc_package.graph is None, ".graph must not be set in the MLF archive for AOT executor."
-    assert tvmc_package.params is not None, ".params must be set in the MLF archive."
-    assert tvmc_package.type == "mlf", ".type must be set to 'mlf' in the MLF format."
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/driver/tvmc/test_parse_config_file.py b/tests/python/driver/tvmc/test_parse_config_file.py
index 767cc547d678..cc822ed640a9 100644
--- a/tests/python/driver/tvmc/test_parse_config_file.py
+++ b/tests/python/driver/tvmc/test_parse_config_file.py
@@ -129,32 +129,6 @@ def test_parse_json_config_file_runtime():
     assert tokens == expected_tokens
 
 
-@tvm.testing.requires_cmsisnn
-def test_tvmc_cl_compile_run_config_file(tflite_mobilenet_v1_1_quant, tmpdir_factory):
-    compile_config_file = "compile_config_test.json"
-    pytest.importorskip("tflite")
-
-    output_dir = tmpdir_factory.mktemp("mlf")
-    input_model = tflite_mobilenet_v1_1_quant
-    output_file = os.path.join(output_dir, "mock.tar")
-
-    # Compile the input model and generate a Model Library Format (MLF) archive.
-    tvmc_cmd = (
-        f"tvmc compile --config {compile_config_file} {input_model} --output {output_file} "
-        f"--output-format mlf"
-    )
-    tvmc_args = shlex.split(tvmc_cmd)[1:]
-    _main(tvmc_args)
-    assert os.path.exists(output_file), "Could not find the exported MLF archive."
-
-    # Run the MLF archive. It must fail since it's only supported on micro targets.
-    tvmc_cmd = f"tvmc run {output_file}"
-    tvmc_args = tvmc_cmd.split(" ")[1:]
-    exit_code = _main(tvmc_args)
-    on_error = "Trying to run a MLF archive must fail because it's only supported on micro targets."
-    assert exit_code != 0, on_error
-
-
 def test_tvmc_get_configs_json_dir(tmpdir_factory, monkeypatch):
     # Reset global state
     monkeypatch.setattr(tvm.driver.tvmc.config_options, "CONFIGS_JSON_DIR", None)
diff --git a/tests/python/driver/tvmc/test_target.py b/tests/python/driver/tvmc/test_target.py
index 39e90e6d6ac4..7ce8ee9eae2c 100644
--- a/tests/python/driver/tvmc/test_target.py
+++ b/tests/python/driver/tvmc/test_target.py
@@ -112,30 +112,6 @@ def test_parse_multiple_target():
     assert "llvm" == targets[1]["name"]
 
 
-def test_parse_hybrid_target():
-    """Hybrid Target and external codegen"""
-    targets = parse_target("cmsis-nn -accelerator_config=ethos-u55-256, llvm -device=arm_cpu")
-
-    assert len(targets) == 2
-    assert "cmsis-nn" == targets[0]["name"]
-    assert not targets[0]["is_tvm_target"]
-    assert "llvm" == targets[1]["name"]
-    assert targets[1]["is_tvm_target"]
-
-
-def test_parse_multiple_hybrid_target():
-    """Hybrid Target and multiple external codegen"""
-    targets = parse_target("ethos-u,cmsis-nn,c")
-
-    assert len(targets) == 3
-    assert "ethos-u" == targets[0]["name"]
-    assert not targets[0]["is_tvm_target"]
-    assert "cmsis-nn" == targets[1]["name"]
-    assert not targets[1]["is_tvm_target"]
-    assert "c" == targets[2]["name"]
-    assert targets[2]["is_tvm_target"]
-
-
 def test_parse_quotes_and_separators_on_options():
     targets_no_quote = parse_target("foo -option1=+v1.0x,+value,+bar")
     targets_single_quote = parse_target("foo -option1='+v1.0x,+value'")
@@ -151,15 +127,5 @@ def test_parse_quotes_and_separators_on_options():
     assert "+v1.0x,+value" == targets_double_quote[0]["opts"]["option1"]
 
 
-def test_parse_multiple_target_with_opts_ethos_n78():
-    targets = parse_target("ethos-n -myopt=value, llvm -device=arm_cpu")
-
-    assert len(targets) == 2
-    assert "ethos-n" == targets[0]["name"]
-    assert "myopt" in targets[0]["opts"]
-    assert "value" == targets[0]["opts"]["myopt"]
-    assert "llvm" == targets[1]["name"]
-
-
 if __name__ == "__main__":
     tvm.testing.main()
diff --git a/tests/python/driver/tvmc/test_target_options.py b/tests/python/driver/tvmc/test_target_options.py
index 64218f02a0ab..352bfac7940d 100644
--- a/tests/python/driver/tvmc/test_target_options.py
+++ b/tests/python/driver/tvmc/test_target_options.py
@@ -35,24 +35,6 @@ def test_target_to_argparse():
     assert parsed.target_llvm_mattr == "+fp,+mve"
 
 
-@tvm.testing.requires_cmsisnn
-def test_target_to_argparse_known_codegen():
-    parser = argparse.ArgumentParser()
-    generate_target_args(parser)
-    parsed, _ = parser.parse_known_args(
-        [
-            "--target=cmsis-nn,llvm",
-            "--target-cmsis-nn-mcpu=cortex-m3",
-            "--target-llvm-mattr=+fp,+mve",
-            "--target-llvm-mcpu=cortex-m3",
-        ]
-    )
-    assert parsed.target == "cmsis-nn,llvm"
-    assert parsed.target_llvm_mcpu == "cortex-m3"
-    assert parsed.target_llvm_mattr == "+fp,+mve"
-    assert parsed.target_cmsis_nn_mcpu == "cortex-m3"
-
-
 @tvm.testing.requires_mrvl
 def test_target_to_argparse_for_mrvl_hybrid():
     parser = argparse.ArgumentParser()
@@ -99,14 +81,6 @@ def test_mrvl_build_with_llvm_only_target():
     assert parsed.target == "llvm"
 
 
-@tvm.testing.requires_cmsisnn
-def test_mapping_target_args():
-    parser = argparse.ArgumentParser()
-    generate_target_args(parser)
-    parsed, _ = parser.parse_known_args(["--target=llvm", "--target-llvm-mcpu=cortex-m3"])
-    assert reconstruct_target_args(parsed) == {"llvm": {"mcpu": "cortex-m3"}}
-
-
 @tvm.testing.requires_vitis_ai
 def test_composite_target_cmd_line_help():
     parser = argparse.ArgumentParser()
@@ -130,44 +104,6 @@ def test_composite_target_cmd_line_help():
     )
 
 
-@tvm.testing.requires_cmsisnn
-def test_include_known_codegen():
-    parser = argparse.ArgumentParser()
-    generate_target_args(parser)
-    parsed, _ = parser.parse_known_args(
-        ["--target=cmsis-nn,c", "--target-cmsis-nn-mcpu=cortex-m55", "--target-c-mcpu=cortex-m55"]
-    )
-    assert reconstruct_target_args(parsed) == {
-        "c": {"mcpu": "cortex-m55"},
-        "cmsis-nn": {"mcpu": "cortex-m55"},
-    }
-
-
-@tvm.testing.requires_ethosu
-def test_ethosu_compiler_attrs():
-    # It is checked that the represented string and boolean types in the
-    # EthosUCompilerConfigNode structure can be passed via the command line
-    parser = argparse.ArgumentParser()
-    generate_target_args(parser)
-    parsed, _ = parser.parse_known_args(
-        ["--target-ethos-u-accelerator_config=ethos-u55-32", "--target-ethos-u-enable_cascader=1"]
-    )
-    assert reconstruct_target_args(parsed) == {
-        "ethos-u": {"accelerator_config": "ethos-u55-32", "enable_cascader": 1},
-    }
-
-
-@tvm.testing.requires_cmsisnn
-def test_skip_target_from_codegen():
-    parser = argparse.ArgumentParser()
-    generate_target_args(parser)
-    parsed, left = parser.parse_known_args(
-        ["--target=cmsis-nn, c", "--target-cmsis-nn-from_device=1", "--target-c-mcpu=cortex-m55"]
-    )
-    assert left == ["--target-cmsis-nn-from_device=1"]
-    assert reconstruct_target_args(parsed) == {"c": {"mcpu": "cortex-m55"}}
-
-
 def test_target_recombobulation_single():
     tvm_target, _ = target_from_cli("llvm", {"llvm": {"mcpu": "cortex-m3"}})
 
diff --git a/tests/python/driver/tvmc/test_workspace_pools.py b/tests/python/driver/tvmc/test_workspace_pools.py
index 2e34c90252c3..5d5e0851b2cc 100644
--- a/tests/python/driver/tvmc/test_workspace_pools.py
+++ b/tests/python/driver/tvmc/test_workspace_pools.py
@@ -403,18 +403,3 @@ def test_workspace_pools_recombobulate_single_pool_overrides():
 
     assert len(memory_pools.pools[0].targets) == 2
     assert len(memory_pools.pools[1].targets) == 1
-
-
-@tvm.testing.requires_ethosn
-def test_workspace_pools_recombobulate_ext_codegen():
-    """No error should occur when using an external code generator without an attached Target"""
-
-    parser = argparse.ArgumentParser()
-    generate_workspace_pools_args(parser)
-    parsed, _ = parser.parse_known_args([])
-
-    targets = [Target("llvm")]
-    extra_targets = [{"raw": "ethos-n"}]
-
-    memory_pools = workspace_pools_recombobulate(parsed, targets, extra_targets)
-    assert memory_pools is None
diff --git a/tests/python/integration/test_arm_mprofile_dsp.py b/tests/python/integration/test_arm_mprofile_dsp.py
deleted file mode 100644
index 22b4ebaab832..000000000000
--- a/tests/python/integration/test_arm_mprofile_dsp.py
+++ /dev/null
@@ -1,352 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Test arm mprofile dsp."""
-import numpy as np
-import pytest
-import tvm
-import tvm.testing
-from tvm import relay
-from tvm.testing.aot import AOTTestModel, compile_and_run, generate_ref_data
-from tvm.micro.testing.aot_test_utils import AOT_CORSTONE300_RUNNER
-
-
-@tvm.testing.requires_corstone300
-@pytest.mark.parametrize(
-    "data_shape_nhwc, kernel_size, num_filter, strides, padding, dilation",
-    [
-        ((1, 32, 32, 1), (3, 3), 12, 1, 0, 1),
-        ((1, 32, 10, 3), (3, 3), 16, 1, 0, 1),
-        ((1, 49, 10, 1), (10, 4), 64, (2, 1), (4, 1, 5, 1), 1),
-        ((1, 32, 32, 16), (3, 3), 16, 1, (0, 2, 2, 0), 1),
-        ((1, 32, 32, 16), (3, 3), 16, 1, 0, 1),
-        ((1, 32, 32, 16), (3, 3), 16, 1, 0, 1),
-        ((1, 32, 32, 16), (3, 3), 16, 1, (0, 2, 2, 0), 2),
-        ((1, 32, 32, 16), (3, 3), 16, 1, (1, 1, 2, 2), 2),
-        # bug https://github.com/apache/tvm/issues/9226
-        ((1, 49, 10, 1), (10, 4), 64, (2, 2), (4, 1, 5, 1), 1),
-        # from Visual Wake Word model
-        ((1, 96, 96, 3), (3, 3), 8, (2, 2), (0, 0, 1, 1), 1),
-        # from Image Classification model (one of the MLPerfTiny models)
-        ((1, 16, 16, 32), (1, 1), 64, (2, 2), 0, 1),
-        ((4, 16, 16, 8), (5, 5), 8, 2, (0, 4, 4, 0), 1),
-        ((4, 16, 16, 8), (5, 5), 16, 2, (0, 4, 4, 0), 1),
-        ((4, 16, 16, 8), (5, 5), 8, 2, 0, 1),
-        ((4, 16, 16, 8), (5, 5), 16, 2, 0, 1),
-        ((1, 16, 16, 8), (3, 3), 16, 2, (0, 0, 1, 1), 1),
-        ((1, 16, 16, 8), (3, 3), 16, 2, (1, 1, 2, 2), 1),
-        ((1, 16, 16, 8), (5, 5), 16, 2, (3, 3, 2, 2), 1),
-        ((1, 16, 16, 8), (3, 3), 16, 2, (0, 1, 2, 3), 1),
-    ],
-)
-@pytest.mark.parametrize("dtype", ["int8", "int16"])
-def test_conv2d(data_shape_nhwc, kernel_size, num_filter, strides, padding, dilation, dtype):
-    """Test a subgraph with a single conv2d operator."""
-    ishape = data_shape_nhwc
-    wshape = (*kernel_size, data_shape_nhwc[-1], num_filter)
-
-    weight_data = np.random.randint(low=-10, high=10, size=wshape, dtype=dtype)
-
-    input0 = relay.var("input", relay.TensorType(ishape, dtype))
-    weight0 = relay.const(weight_data)
-    out0 = relay.op.nn.conv2d(
-        input0,
-        weight0,
-        kernel_size=kernel_size,
-        strides=strides,
-        padding=padding,
-        dilation=(dilation, dilation),
-        data_layout="NHWC",
-        kernel_layout="HWIO",
-        out_dtype="int32",
-        out_layout="NHWC",
-    )
-    ref_mod = tvm.IRModule.from_expr(relay.Function([input0], out0))
-
-    input1 = relay.var("input", relay.TensorType(ishape, dtype))
-    weight1 = relay.const(np.moveaxis(weight_data, 2, -1))
-    out1 = relay.op.nn.conv2d(
-        input1,
-        weight1,
-        kernel_size=kernel_size,
-        strides=strides,
-        padding=padding,
-        dilation=(dilation, dilation),
-        data_layout="NHWC",
-        kernel_layout="HWOI",
-        out_dtype="int32",
-        out_layout="NHWC",
-    )
-    mod = tvm.IRModule.from_expr(relay.Function([input1], out1))
-
-    inputs = {"input": np.random.randint(low=-128, high=127, size=ishape, dtype=dtype)}
-    output_list = generate_ref_data(ref_mod, inputs)
-
-    compile_and_run(
-        AOTTestModel(module=mod, inputs=inputs, outputs=output_list),
-        runner=AOT_CORSTONE300_RUNNER,
-        interface_api="c",
-        use_unpacked_api=True,
-        target_opts={
-            "-keys": "arm_cpu",
-            "-mcpu": "cortex-m7",
-        },
-    )
-
-
-@tvm.testing.requires_corstone300
-@pytest.mark.parametrize(
-    "data_shape_nwc, kernel_size, num_filter, strides, padding",
-    [
-        ((1, 32, 12), 3, 16, 1, 0),
-        ((3, 12, 10), 4, 24, 1, 0),
-        ((1, 7, 7), 3, 5, 1, 0),
-        ((1, 10, 2), 4, 4, 2, (1, 1)),
-        ((1, 20, 2), 4, 4, 2, (0, 1)),
-        ((1, 16, 4), 1, 12, 1, (1, 0)),
-        ((1, 24, 16), 1, 32, 3, (2, 2)),
-    ],
-)
-@pytest.mark.parametrize("dtype", ["int8", "int16"])
-def test_conv1d(data_shape_nwc, kernel_size, num_filter, strides, padding, dtype):
-    """Test a subgraph with a single conv1d operator."""
-    ishape = data_shape_nwc
-    wshape = (kernel_size, data_shape_nwc[-1], num_filter)
-
-    weight_data = np.random.randint(low=-10, high=10, size=wshape, dtype=dtype)
-
-    input0 = relay.var("input", relay.TensorType(ishape, dtype))
-    weight0 = relay.const(weight_data)
-    out0 = relay.op.nn.conv1d(
-        input0,
-        weight0,
-        strides=strides,
-        padding=padding,
-        data_layout="NWC",
-        kernel_layout="WIO",
-        out_dtype="int32",
-        out_layout="NWC",
-    )
-    ref_mod = tvm.IRModule.from_expr(relay.Function([input0], out0))
-
-    input1 = relay.var("input", relay.TensorType(ishape, dtype))
-    weight1 = relay.const(np.moveaxis(weight_data, 1, -1))
-    out1 = relay.op.nn.conv1d(
-        input1,
-        weight1,
-        strides=strides,
-        padding=padding,
-        data_layout="NWC",
-        kernel_layout="WOI",
-        out_dtype="int32",
-        out_layout="NWC",
-    )
-    mod = tvm.IRModule.from_expr(relay.Function([input1], out1))
-
-    inputs = {"input": np.random.randint(low=-128, high=127, size=ishape, dtype=dtype)}
-    output_list = generate_ref_data(ref_mod, inputs)
-
-    compile_and_run(
-        AOTTestModel(module=mod, inputs=inputs, outputs=output_list),
-        runner=AOT_CORSTONE300_RUNNER,
-        interface_api="c",
-        use_unpacked_api=True,
-        target_opts={
-            "-keys": "arm_cpu",
-            "-mcpu": "cortex-m7",
-        },
-    )
-
-
-@tvm.testing.requires_corstone300
-@pytest.mark.parametrize(
-    "dim_m, dim_k, dim_n",
-    [
-        (1, 32, 64),
-        (3, 12, 10),
-    ],
-)
-def test_dense(dim_m, dim_k, dim_n):
-    """Test a subgraph with a single dense operator."""
-    ishape = (dim_m, dim_k)
-    wshape = (dim_n, dim_k)
-
-    input0 = relay.var("input", relay.TensorType(ishape, "int8"))
-    dense_f = relay.op.nn.batch_flatten(input0)
-    weight0 = relay.const(np.random.randint(low=-10, high=10, size=wshape, dtype="int8"))
-    out = relay.op.nn.dense(dense_f, weight0, out_dtype="int32")
-
-    mod = tvm.IRModule.from_expr(relay.Function([input0], out))
-    inputs = {"input": np.random.randint(low=-128, high=127, size=ishape, dtype="int8")}
-    output_list = generate_ref_data(mod, inputs)
-
-    compile_and_run(
-        AOTTestModel(module=mod, inputs=inputs, outputs=output_list),
-        runner=AOT_CORSTONE300_RUNNER,
-        interface_api="c",
-        use_unpacked_api=True,
-        target_opts={
-            "-keys": "arm_cpu",
-            "-mcpu": "cortex-m7",
-        },
-    )
-
-
-@tvm.testing.requires_corstone300
-@pytest.mark.parametrize(
-    "data_shape_nhwc, pool_size, strides, padding",
-    [
-        ((1, 32, 32, 1), (3, 3), 1, 0),
-        ((1, 32, 20, 4), (3, 3), (2, 2), 0),
-    ],
-)
-def test_maxpool_2d(data_shape_nhwc, pool_size, strides, padding):
-    """Test a subgraph with a single maxpool_2d operator."""
-
-    ishape = data_shape_nhwc
-
-    input0 = relay.var("input", relay.TensorType(ishape, "int8"))
-    out = relay.op.nn.max_pool2d(input0, pool_size, layout="NHWC", strides=strides, padding=padding)
-
-    mod = tvm.IRModule.from_expr(relay.Function([input0], out))
-    inputs = {"input": np.random.randint(low=-128, high=127, size=ishape, dtype="int8")}
-    output_list = generate_ref_data(mod, inputs)
-
-    compile_and_run(
-        AOTTestModel(module=mod, inputs=inputs, outputs=output_list),
-        runner=AOT_CORSTONE300_RUNNER,
-        interface_api="c",
-        use_unpacked_api=True,
-        target_opts={
-            "-keys": "arm_cpu",
-            "-mcpu": "cortex-m7",
-        },
-    )
-
-
-@tvm.testing.requires_corstone300
-@pytest.mark.parametrize(
-    "data_shape_nwc, pool_size, strides, padding",
-    [
-        ((1, 32, 1), 3, 1, 0),
-        ((1, 20, 4), 3, 2, 0),
-    ],
-)
-def test_maxpool_1d(data_shape_nwc, pool_size, strides, padding):
-    """Test a subgraph with a single maxpool_1d operator."""
-    ishape = data_shape_nwc
-
-    input0 = relay.var("input", relay.TensorType(ishape, "int8"))
-    out = relay.op.nn.max_pool1d(input0, pool_size, layout="NWC", strides=strides, padding=padding)
-
-    mod = tvm.IRModule.from_expr(relay.Function([input0], out))
-    inputs = {"input": np.random.randint(low=-128, high=127, size=ishape, dtype="int8")}
-    output_list = generate_ref_data(mod, inputs)
-
-    compile_and_run(
-        AOTTestModel(module=mod, inputs=inputs, outputs=output_list),
-        runner=AOT_CORSTONE300_RUNNER,
-        interface_api="c",
-        use_unpacked_api=True,
-        target_opts={
-            "-keys": "arm_cpu",
-            "-mcpu": "cortex-m7",
-        },
-    )
-
-
-@tvm.testing.requires_corstone300
-@pytest.mark.parametrize(
-    "data_shape_nchw, pool_size, strides, padding",
-    [
-        ((1, 1, 32, 32), (3, 3), 1, 0),
-        ((1, 4, 32, 20), (3, 3), (2, 2), 0),
-    ],
-)
-def test_avgpool_2d(data_shape_nchw, pool_size, strides, padding):
-    """Test a subgraph with a single avgpool_2d operator."""
-
-    ishape = data_shape_nchw
-
-    input0 = relay.var("input", relay.TensorType(ishape, "int32"))
-    out0 = relay.nn.avg_pool2d(input0, pool_size, layout="NCHW", strides=strides, padding=padding)
-    ref_mod = tvm.IRModule.from_expr(relay.Function([input0], out0))
-
-    input1 = relay.var("input", relay.TensorType(ishape, "int16"))
-    out1 = relay.op.nn.avg_pool2d(
-        input1, pool_size, layout="NCHW", strides=strides, padding=padding
-    )
-    mod = tvm.IRModule.from_expr(relay.Function([input1], out1))
-
-    input_data = np.random.randint(low=-128, high=127, size=ishape, dtype="int32")
-    inputs = {"input": input_data}
-    output_list = generate_ref_data(ref_mod, inputs)
-
-    compile_and_run(
-        AOTTestModel(
-            module=mod, inputs={"input": input_data.astype(dtype="int16")}, outputs=output_list
-        ),
-        runner=AOT_CORSTONE300_RUNNER,
-        interface_api="c",
-        use_unpacked_api=True,
-        target_opts={
-            "-keys": "arm_cpu",
-            "-mcpu": "cortex-m7",
-        },
-    )
-
-
-@tvm.testing.requires_corstone300
-@pytest.mark.parametrize(
-    "data_shape_ncw, pool_size, strides, padding",
-    [
-        ((1, 1, 32), 3, 1, 0),
-        ((1, 4, 20), 3, 2, 2),
-    ],
-)
-def test_avgpool_1d(data_shape_ncw, pool_size, strides, padding):
-    """Test a subgraph with a single avgpool_1d operator."""
-
-    ishape = data_shape_ncw
-
-    input0 = relay.var("input", relay.TensorType(ishape, "int32"))
-    out0 = relay.op.nn.avg_pool1d(input0, pool_size, layout="NCW", strides=strides, padding=padding)
-    ref_mod = tvm.IRModule.from_expr(relay.Function([input0], out0))
-
-    input1 = relay.var("input", relay.TensorType(ishape, "int16"))
-    out1 = relay.op.nn.avg_pool1d(input1, pool_size, layout="NCW", strides=strides, padding=padding)
-    mod = tvm.IRModule.from_expr(relay.Function([input1], out1))
-
-    input_data = np.random.randint(low=-10, high=10, size=ishape, dtype="int32")
-    inputs = {"input": input_data}
-    output_list = generate_ref_data(ref_mod, inputs)
-
-    compile_and_run(
-        AOTTestModel(
-            module=mod, inputs={"input": input_data.astype(dtype="int16")}, outputs=output_list
-        ),
-        runner=AOT_CORSTONE300_RUNNER,
-        interface_api="c",
-        use_unpacked_api=True,
-        target_opts={
-            "-keys": "arm_cpu",
-            "-mcpu": "cortex-m7",
-        },
-    )
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/ir/test_roundtrip_runtime_module.py b/tests/python/ir/test_roundtrip_runtime_module.py
index 494143fc0bf2..96deb35fb6d8 100644
--- a/tests/python/ir/test_roundtrip_runtime_module.py
+++ b/tests/python/ir/test_roundtrip_runtime_module.py
@@ -33,15 +33,6 @@ def test_csource_module():
     assert new_mod.is_binary_serializable
 
 
-def test_aot_module():
-    mod = tvm.get_global_func("relay.build_module._AOTExecutorCodegen")()
-    # aot module that is not binary serializable.
-    # Thus, it would raise an error.
-    assert not mod.is_binary_serializable
-    with pytest.raises(TVMError):
-        tvm.ir.load_json(tvm.ir.save_json(mod))
-
-
 def get_test_mod():
     x = relay.var("x", shape=(1, 10), dtype="float32")
     y = relay.var("y", shape=(1, 10), dtype="float32")
diff --git a/tests/python/micro/test_aot_legalize_packed_call.py b/tests/python/micro/test_aot_legalize_packed_call.py
deleted file mode 100644
index 3e66a96dfb43..000000000000
--- a/tests/python/micro/test_aot_legalize_packed_call.py
+++ /dev/null
@@ -1,119 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=missing-function-docstring,missing-module-docstring
-import pytest
-import tvm
-import tvm.testing
-from tvm import tir
-from tvm.script import tir as T
-
-
-# complains of an undefined var being used
-@tvm.script.ir_module(check_well_formed=False)
-class Module:
-    @T.prim_func
-    def tvm_test_cpacked(
-        A: T.Buffer((1,), "float32"),
-        B: T.Buffer((1,), "float32"),
-        C: T.Buffer((1,), "float32"),
-        device_context: T.Buffer((1,), "float32"),
-    ) -> T.handle:
-        T.evaluate(C.data)
-
-    @T.prim_func
-    def tir_packed_call() -> None:
-        A = T.handle()
-        B = T.handle()
-        C = T.handle()
-        device_context = T.handle()
-        # body
-        T.evaluate(
-            T.tvm_call_cpacked(
-                "tvm_test_cpacked",
-                A,
-                B,
-                C,
-                device_context,
-                dtype="int32",
-            )
-        )
-
-
-@tvm.script.ir_module(check_well_formed=False)
-class Expected:
-    @T.prim_func
-    def tvm_test_cpacked(
-        A: T.Buffer((1,), "float32"),
-        B: T.Buffer((1,), "float32"),
-        C: T.Buffer((1,), "float32"),
-        device_context: T.Buffer((1,), "float32"),
-    ) -> T.handle:
-        T.evaluate(C.data)
-
-    @T.prim_func
-    def tir_packed_call() -> None:
-        A = T.handle()
-        B = T.handle()
-        C = T.handle()
-        device_context = T.handle()
-
-        # body
-        T.evaluate(
-            T.tvm_call_cpacked(
-                "tvm_test_cpacked",
-                T.tvm_stack_make_array(
-                    A,
-                    T.tvm_stack_make_shape(1, dtype="handle"),
-                    T.reinterpret(T.uint64(0), dtype="handle"),
-                    T.uint32(1),
-                    T.Cast("float32", 0),
-                    0,
-                    dtype="handle",
-                ),
-                T.tvm_stack_make_array(
-                    B,
-                    T.tvm_stack_make_shape(1, dtype="handle"),
-                    T.reinterpret(T.uint64(0), dtype="handle"),
-                    T.uint32(1),
-                    T.Cast("float32", 0),
-                    0,
-                    dtype="handle",
-                ),
-                T.tvm_stack_make_array(
-                    C,
-                    T.tvm_stack_make_shape(1, dtype="handle"),
-                    T.reinterpret(T.uint64(0), dtype="handle"),
-                    T.uint32(1),
-                    T.Cast("float32", 0),
-                    0,
-                    dtype="handle",
-                ),
-                device_context,
-                dtype="int32",
-            )
-        )
-
-
-def test_aot_packed_call():
-    mod = Module
-    expected = Expected
-    out = tir.transform.LegalizePackedCalls()(mod)
-    tvm.ir.assert_structural_equal(expected, out, map_free_vars=True)
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/micro/test_crt.py b/tests/python/micro/test_crt.py
deleted file mode 100644
index 9df9ff7de7c1..000000000000
--- a/tests/python/micro/test_crt.py
+++ /dev/null
@@ -1,438 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""Test C runtime"""
-
-import pathlib
-import pytest
-
-import numpy as np
-
-import tvm
-import tvm.relay
-import tvm.testing
-from tvm.target import Target
-from tvm.relay.backend import Runtime
-from tvm.relay.backend import Executor
-
-pytest.importorskip("pty")
-
-BUILD = True
-DEBUG = False
-
-TARGET = tvm.target.target.micro("host")
-
-
-def _make_sess_from_op(temp_dir, op_name, sched, arg_bufs):
-    runtime = Runtime("crt", {"system-lib": True})
-    with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}):
-        mod = tvm.build(sched, arg_bufs, Target(TARGET, TARGET), runtime=runtime, name=op_name)
-
-    return _make_session(temp_dir, mod)
-
-
-def _make_session(temp_dir, mod):
-    template_project_dir = pathlib.Path(tvm.micro.get_microtvm_template_projects("crt"))
-    project = tvm.micro.generate_project(
-        template_project_dir, mod, temp_dir / "project", {"verbose": 1}
-    )
-    project.build()
-    project.flash()
-    return tvm.micro.Session(project.transport())
-
-
-def _make_add_sess(temp_dir):
-    a = tvm.te.placeholder((2,), dtype="int8")
-    b = tvm.te.placeholder((1,), dtype="int8")
-    c = tvm.te.compute(a.shape, lambda i: a[i] + b[0], name="c")
-    sched = tvm.te.create_schedule(c.op)
-    return _make_sess_from_op(temp_dir, "add", sched, [a, b, c])
-
-
-@tvm.testing.requires_micro
-def test_compile_runtime():
-    """Test compiling the on-device runtime."""
-
-    temp_dir = tvm.contrib.utils.tempdir()
-
-    with _make_add_sess(temp_dir) as sess:
-        a_data = tvm.nd.array(np.array([2, 3], dtype="int8"), device=sess.device)
-        assert (a_data.numpy() == np.array([2, 3])).all()
-        b_data = tvm.nd.array(np.array([4], dtype="int8"), device=sess.device)
-        assert (b_data.numpy() == np.array([4])).all()
-        c_data = tvm.nd.array(np.array([0, 0], dtype="int8"), device=sess.device)
-        assert (c_data.numpy() == np.array([0, 0])).all()
-
-        system_lib = sess.get_system_lib()
-        system_lib.get_function("add")(a_data, b_data, c_data)
-        assert (c_data.numpy() == np.array([6, 7])).all()
-
-
-@tvm.testing.requires_micro
-def test_compile_runtime_llvm():
-    """Test targeting the on-device runtime with the llvm backend."""
-    global TARGET
-    old_target = TARGET
-    try:
-        # NOTE: test_compile_runtime uses the "c" backend--re run it using the llvm backend.
-        target_str = str(TARGET)
-        assert target_str.startswith("c ")
-        TARGET = tvm.target.Target("llvm " + str(TARGET)[len("c ") :])
-
-        test_compile_runtime()
-
-    finally:
-        TARGET = old_target
-
-
-@tvm.testing.requires_micro
-def test_reset():
-    """Test when the remote end resets during a session."""
-
-    temp_dir = tvm.contrib.utils.tempdir()
-
-    with _make_add_sess(temp_dir) as sess:
-        try:
-            sess._rpc.get_function("tvm.testing.reset_server")()
-            assert False, "expected to raise SessionTerminatedError; did not raise"
-        except tvm.micro.SessionTerminatedError:
-            pass
-
-
-@tvm.testing.requires_micro
-def test_graph_executor():
-    """Test use of the graph executor with microTVM."""
-
-    temp_dir = tvm.contrib.utils.tempdir()
-    relay_mod = tvm.relay.fromtext(
-        """
-      #[version = "0.0.5"]
-      def @main(%a : Tensor[(1, 2), uint8], %b : Tensor[(1, 2), uint8]) {
-          %0 = %a + %b;
-          %0
-      }"""
-    )
-
-    runtime = Runtime("crt", {"system-lib": True})
-    with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}):
-        factory = tvm.relay.build(relay_mod, target=TARGET, runtime=runtime)
-
-    def do_test(graph_mod):
-
-        a_data = tvm.nd.array(np.array([2, 3], dtype="uint8"), device=sess.device)
-        assert (a_data.numpy() == np.array([2, 3])).all()
-        b_data = tvm.nd.array(np.array([4, 7], dtype="uint8"), device=sess.device)
-        assert (b_data.numpy() == np.array([4, 7])).all()
-
-        assert graph_mod.get_input_index("a") == 0
-        assert graph_mod.get_input_index("b") == 1
-
-        graph_mod.run(a=a_data, b=b_data)
-
-        out = graph_mod.get_output(0)
-        assert (out.numpy() == np.array([6, 10])).all()
-
-    with _make_session(temp_dir, factory) as sess:
-
-        graph_mod_local = tvm.micro.create_local_graph_executor(
-            factory.get_graph_json(), sess.get_system_lib(), sess.device
-        )
-
-        do_test(graph_mod_local)
-
-        graph_mod = tvm.contrib.graph_executor.create(
-            factory.get_graph_json(), sess.get_system_lib(), sess.device
-        )
-
-        do_test(graph_mod)
-
-
-@tvm.testing.requires_micro
-def test_aot_executor():
-    """Test use of the AOT executor with microTVM."""
-
-    temp_dir = tvm.contrib.utils.tempdir()
-    relay_mod = tvm.relay.fromtext(
-        """
-      #[version = "0.0.5"]
-      def @main(%a : Tensor[(1, 2), uint8], %b : Tensor[(1, 2), uint8]) {
-          %0 = %a + %b;
-          %0
-      }"""
-    )
-
-    runtime = Runtime("crt", {"system-lib": True})
-    executor = Executor("aot")
-    with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}):
-        factory = tvm.relay.build(relay_mod, target=TARGET, runtime=runtime, executor=executor)
-
-    def do_test():
-        aot_executor = tvm.micro.create_local_aot_executor(sess)
-
-        assert aot_executor.get_input_index("a") == 0
-        assert aot_executor.get_input_index("b") == 1
-
-        assert aot_executor.get_input_name(0) == "a"
-        assert aot_executor.get_input_name(1) == "b"
-
-        shape_dict, dtype_dict = aot_executor.get_input_info()
-        assert shape_dict == {"a": (1, 2), "b": (1, 2)}
-        assert dtype_dict == {"a": "uint8", "b": "uint8"}
-
-        assert aot_executor.get_num_inputs() == 2
-        assert aot_executor.get_num_outputs() == 1
-
-        a_np = np.array([[2, 3]], dtype="uint8")
-        b_np = np.array([[4, 7]], dtype="uint8")
-
-        aot_executor.get_input("a").copyfrom(a_np)
-        b_data = aot_executor.get_input("b").copyfrom(b_np)
-
-        aot_executor.run()
-
-        out = aot_executor.get_output(0)
-        assert (out.numpy() == np.array([6, 10])).all()
-
-        b_np_new = np.array([[5, 8]])
-        aot_executor.set_input("b", b_np_new)
-        assert (b_data.numpy() == b_np_new).all()
-
-    with _make_session(temp_dir, factory) as sess:
-        do_test()
-
-
-@tvm.testing.requires_micro
-def test_aot_executor_usmp_const_pool():
-    """Test the AOT executor with microTVM using USMP to generate a constant data pool."""
-
-    temp_dir = tvm.contrib.utils.tempdir()
-    relay_mod = tvm.relay.fromtext(
-        """
-      #[version = "0.0.5"]
-      def @main(%a : Tensor[(1, 2), uint8], %b : Tensor[(1, 2), uint8], %c : Tensor[(1,2), uint8]) {
-          %0 = %a + %b;
-          %1 = %0 + %c;
-          %1
-      }"""
-    )
-
-    runtime = Runtime("crt", {"system-lib": True})
-    executor = Executor("aot")
-    main_func = relay_mod["main"]
-    type_dict = {p.name_hint: p.checked_type.dtype for p in main_func.params}
-    c_np = np.array([[8, 9]], dtype="uint8").astype(type_dict["c"])
-    params = {"c": c_np}
-    with tvm.transform.PassContext(
-        opt_level=3, config={"tir.disable_vectorize": True, "tir.usmp.enable": True}
-    ):
-        factory = tvm.relay.build(
-            relay_mod,
-            target=TARGET,
-            runtime=runtime,
-            executor=executor,
-            params=params,
-        )
-
-    def do_test():
-        try:
-            aot_executor = tvm.micro.create_local_aot_executor(sess)
-        except tvm._ffi.base.TVMError as excpt:
-            raise excpt
-
-        assert aot_executor.get_input_index("a") == 0
-        assert aot_executor.get_input_index("b") == 1
-
-        assert aot_executor.get_num_inputs() == 2
-        assert aot_executor.get_num_outputs() == 1
-
-        a_np = np.array([[2, 3]], dtype="uint8")
-        b_np = np.array([[4, 7]], dtype="uint8")
-
-        aot_executor.get_input("a").copyfrom(a_np)
-        b_data = aot_executor.get_input("b").copyfrom(b_np)
-        aot_executor.run()
-
-        out = aot_executor.get_output(0)
-        assert (out.numpy() == np.array([14, 19])).all()
-
-        b_np_new = np.array([[5, 8]])
-        aot_executor.set_input("b", b_np_new)
-        assert (b_data.numpy() == b_np_new).all()
-
-    with _make_session(temp_dir, factory) as sess:
-        do_test()
-
-
-@tvm.testing.requires_micro
-def test_std_math_functions():
-    """Verify that standard math functions can be used."""
-
-    temp_dir = tvm.contrib.utils.tempdir()
-
-    with _make_add_sess(temp_dir) as sess:
-        a_data = tvm.nd.array(np.array([2, 3], dtype="int8"), device=sess.device)
-        assert (a_data.numpy() == np.array([2, 3])).all()
-        b_data = tvm.nd.array(np.array([4], dtype="int8"), device=sess.device)
-        assert (b_data.numpy() == np.array([4])).all()
-        c_data = tvm.nd.array(np.array([0, 0], dtype="int8"), device=sess.device)
-        assert (c_data.numpy() == np.array([0, 0])).all()
-
-        system_lib = sess.get_system_lib()
-        system_lib.get_function("add")(a_data, b_data, c_data)
-
-    temp_dir = tvm.contrib.utils.tempdir()
-    a = tvm.te.placeholder((2,), dtype="float32", name="a")
-    b = tvm.te.compute(a.shape, lambda i: tvm.te.exp(a[i]), name="b")
-    s = tvm.te.create_schedule(b.op)
-
-    with _make_sess_from_op(temp_dir, "myexpf", s, [a, b]) as sess:
-        a_data = tvm.nd.array(np.array([2.0, 3.0], dtype="float32"), device=sess.device)
-        b_data = tvm.nd.array(np.array([2.0, 3.0], dtype="float32"), device=sess.device)
-        lib = sess.get_system_lib()
-        func = lib["myexpf"]
-        func(a_data, b_data)
-        np.testing.assert_allclose(b_data.numpy(), np.array([7.389056, 20.085537]))
-
-
-@tvm.testing.requires_micro
-def test_platform_timer():
-    """Verify the platform timer can be used to time remote functions."""
-
-    temp_dir = tvm.contrib.utils.tempdir()
-    a = tvm.te.placeholder((2,), dtype="float32", name="a")
-    b = tvm.te.compute(a.shape, lambda i: tvm.te.exp(a[i]), name="b")
-    s = tvm.te.create_schedule(b.op)
-
-    with _make_sess_from_op(temp_dir, "myexpf", s, [a, b]) as sess:
-        a_data = tvm.nd.array(np.array([2.0, 3.0], dtype="float32"), device=sess.device)
-        b_data = tvm.nd.array(np.array([2.0, 3.0], dtype="float32"), device=sess.device)
-        lib = sess.get_system_lib()
-        time_eval_f = lib.time_evaluator(
-            "myexpf", sess.device, number=2000, repeat=3, min_repeat_ms=40
-        )
-        result = time_eval_f(a_data, b_data)
-        assert result.mean > 0
-        assert len(result.results) == 3
-
-
-@tvm.testing.requires_micro
-def test_autotune():
-    """Verify that autotune works with micro."""
-
-    runtime = Runtime("crt", {"system-lib": True})
-
-    data = tvm.relay.var("data", tvm.relay.TensorType((1, 3, 64, 64), "float32"))
-    weight = tvm.relay.var("weight", tvm.relay.TensorType((8, 3, 5, 5), "float32"))
-    y = tvm.relay.nn.conv2d(
-        data,
-        weight,
-        padding=(2, 2),
-        kernel_size=(5, 5),
-        kernel_layout="OIHW",
-        out_dtype="float32",
-    )
-    f = tvm.relay.Function([data, weight], y)
-    mod = tvm.IRModule.from_expr(f)
-    mod = tvm.relay.transform.InferType()(mod)
-
-    main_func = mod["main"]
-    shape_dict = {p.name_hint: p.checked_type.concrete_shape for p in main_func.params}
-    type_dict = {p.name_hint: p.checked_type.dtype for p in main_func.params}
-
-    weight_data = np.ones(shape_dict["weight"]).astype(type_dict["weight"])
-    input_data = np.ones(shape_dict["data"]).astype(type_dict["data"])
-    params = {"weight": weight_data}
-    inputs = {"data": input_data}
-
-    target = tvm.target.target.micro("host")
-    template_project_dir = pathlib.Path(tvm.micro.get_microtvm_template_projects("crt"))
-
-    pass_context = tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True})
-    with pass_context:
-        tasks = tvm.autotvm.task.extract_from_program(mod["main"], {}, target)
-    assert len(tasks) > 0
-
-    module_loader = tvm.micro.AutoTvmModuleLoader(
-        template_project_dir=template_project_dir,
-        project_options={},
-    )
-    builder = tvm.autotvm.LocalBuilder(
-        n_parallel=1,
-        build_kwargs={"build_option": {"tir.disable_vectorize": True}},
-        do_fork=True,
-        build_func=tvm.micro.autotvm_build_func,
-        runtime=runtime,
-    )
-    runner = tvm.autotvm.LocalRunner(number=1, repeat=1, module_loader=module_loader)
-
-    measure_option = tvm.autotvm.measure_option(builder=builder, runner=runner)
-
-    tune_log_file = pathlib.Path("crt_autotune.log")
-    if tune_log_file.exists():
-        tune_log_file.unlink()
-
-    num_trials = 10
-    for task in tasks:
-        tuner = tvm.autotvm.tuner.GATuner(task)
-        tuner.tune(
-            n_trial=num_trials,
-            measure_option=measure_option,
-            callbacks=[
-                tvm.autotvm.callback.log_to_file(str(tune_log_file)),
-                tvm.autotvm.callback.progress_bar(num_trials, si_prefix="M"),
-            ],
-            si_prefix="M",
-        )
-        assert tuner.best_flops > 0
-
-    # TODO(mehrdadh): commented due to autotuning errors
-    # check_tune_log(tune_log_file)
-
-    # Build without tuning
-    with pass_context:
-        lowered = tvm.relay.build(mod, target=TARGET, runtime=runtime, params=params)
-
-    temp_dir = tvm.contrib.utils.tempdir()
-    with _make_session(temp_dir, lowered) as sess:
-        graph_mod = tvm.micro.create_local_graph_executor(
-            lowered.get_graph_json(), sess.get_system_lib(), sess.device
-        )
-        graph_mod.set_input(**lowered.get_params())
-        graph_mod.run(**inputs)
-        expected_output = graph_mod.get_output(0).numpy()
-        del graph_mod
-
-    # Build using autotune logs
-    with tvm.autotvm.apply_history_best(str(tune_log_file)):
-        with pass_context:
-            lowered_tuned = tvm.relay.build(mod, target=target, runtime=runtime, params=params)
-
-    temp_dir = tvm.contrib.utils.tempdir()
-    with _make_session(temp_dir, lowered_tuned) as sess:
-        graph_mod = tvm.micro.create_local_graph_executor(
-            lowered_tuned.get_graph_json(), sess.get_system_lib(), sess.device
-        )
-        graph_mod.set_input(**lowered_tuned.get_params())
-        graph_mod.run(**inputs)
-        output = graph_mod.get_output(0).numpy()
-        del graph_mod
-
-    tvm.testing.assert_allclose(output, expected_output, rtol=1e-4, atol=1e-5)
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/micro/test_micro_model_library_format.py b/tests/python/micro/test_micro_model_library_format.py
deleted file mode 100644
index e4f8a4fcd73e..000000000000
--- a/tests/python/micro/test_micro_model_library_format.py
+++ /dev/null
@@ -1,748 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import pathlib
-import sys
-import datetime
-import json
-import os
-import tarfile
-
-import numpy as np
-import pytest
-import platform
-
-pytest.importorskip("tvm.micro")
-
-import tvm
-import tvm.relay
-from tvm.relay.backend import Executor, Runtime
-from tvm.relay.testing import byoc
-import tvm.runtime.module
-import tvm.testing
-from tvm.contrib import utils
-import tvm.micro as micro
-from tvm.micro.testing.utils import get_conv2d_relay_module
-import tvm.micro.model_library_format as model_library_format
-from tvm.micro.model_library_format import _GENERATED_VERSION
-
-
-@tvm.testing.requires_micro
-def test_export_operator_model_library_format():
-    target = tvm.target.target.micro("host")
-    with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}):
-        A = tvm.te.placeholder((2,), dtype="int8")
-        B = tvm.te.placeholder((1,), dtype="int8")
-        C = tvm.te.compute(A.shape, lambda i: A[i] + B[0], name="C")
-        sched = tvm.te.create_schedule(C.op)
-        mod = tvm.build(
-            sched,
-            [A, B, C],
-            tvm.target.Target(target, target),
-            runtime=Runtime("crt", {"system-lib": True}),
-            name="add",
-        )
-
-    temp_dir = utils.tempdir()
-    mlf_tar_path = temp_dir.relpath("lib.tar")
-    micro.export_model_library_format(mod, mlf_tar_path)
-
-    tf = tarfile.open(mlf_tar_path)
-
-    extract_dir = temp_dir.relpath("extract")
-    os.mkdir(extract_dir)
-    tf.extractall(extract_dir)
-
-    with open(os.path.join(extract_dir, "metadata.json")) as json_f:
-        metadata = json.load(json_f)
-        assert metadata["version"] == _GENERATED_VERSION
-        assert metadata["model_name"] == "add"
-        export_datetime = datetime.datetime.strptime(
-            metadata["export_datetime"], "%Y-%m-%d %H:%M:%SZ"
-        )
-        assert (datetime.datetime.now() - export_datetime) < datetime.timedelta(seconds=60 * 5)
-        assert metadata["target"] == [str(target)]
-
-        assert metadata["memory"]["add"][0]["dtype"] == "int8"
-        assert metadata["memory"]["add"][0]["shape"] == [2]
-        assert metadata["memory"]["add"][0]["size_bytes"] == 2
-
-        assert metadata["memory"]["add"][1]["dtype"] == "int8"
-        assert metadata["memory"]["add"][1]["shape"] == [1]
-        assert metadata["memory"]["add"][1]["size_bytes"] == 1
-
-        assert metadata["memory"]["add"][2]["dtype"] == "int8"
-        assert metadata["memory"]["add"][2]["shape"] == [2]
-        assert metadata["memory"]["add"][2]["size_bytes"] == 2
-
-    assert os.path.exists(os.path.join(extract_dir, "codegen", "host", "src", "lib0.c"))
-    assert os.path.exists(os.path.join(extract_dir, "codegen", "host", "src", "lib1.c"))
-
-    assert (
-        len(mod.ir_module_by_target) == 1
-    ), f"expect 1 ir_model_by_target: {mod.ir_module_by_target!r}"
-    for target, ir_mod in mod.ir_module_by_target.items():
-        assert int(tvm.runtime.ndarray.device(str(target)).device_type) == 1
-        with open(os.path.join(extract_dir, "src", "tir-1.txt")) as tir_f:
-            assert tir_f.read() == str(ir_mod)
-
-
-@tvm.testing.requires_micro
-def test_export_multiple_operator_model_library_format():
-    target = tvm.target.target.micro("host")
-    with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}):
-        A = tvm.te.placeholder((2,), dtype="int8")
-        B = tvm.te.placeholder((1,), dtype="int8")
-        C = tvm.te.compute(A.shape, lambda i: A[i] + B[0], name="C")
-        sched = tvm.te.create_schedule(C.op)
-        mod = tvm.build(
-            sched,
-            [A, B, C],
-            tvm.target.Target(target, target),
-            runtime=Runtime("crt", {"system-lib": True}),
-            name="add",
-        )
-
-    temp_dir = utils.tempdir()
-    mlf_tar_path = temp_dir.relpath("lib.tar")
-
-    with pytest.raises(RuntimeError) as exc:
-        micro.export_model_library_format([mod, mod], mlf_tar_path)
-
-        assert str(exc.exception) == ("Multiple operator is not supported.")
-
-
-def validate_graph_json(extract_dir, factory):
-    with open(
-        os.path.join(extract_dir, "executor-config", "graph", f"{factory.libmod_name}.graph")
-    ) as graph_f:
-        graph_json = graph_f.read()
-        assert graph_json == factory.graph_json
-
-        # Just check it parses and looks roughly right.
-        graph = json.loads(graph_json)
-        assert "nodes" in graph
-        assert len(graph["nodes"]) == 4
-        assert "attrs" in graph
-
-
-@tvm.testing.requires_micro
-@pytest.mark.parametrize(
-    "executor,runtime,should_generate_interface,json_constants_size_bytes",
-    [
-        (Executor("graph"), Runtime("crt", {"system-lib": True}), False, 8),
-        (Executor("aot", {"link-params": True}), Runtime("crt"), False, 0),
-        (
-            Executor("aot", {"unpacked-api": True, "interface-api": "c"}),
-            Runtime("crt"),
-            True,
-            0,
-        ),
-    ],
-)
-def test_export_model_library_format_c(
-    executor, runtime, should_generate_interface, json_constants_size_bytes
-):
-    target = tvm.target.target.micro("host")
-    with utils.TempDirectory.set_keep_for_debug(True):
-        with tvm.transform.PassContext(
-            opt_level=3, config={"tir.disable_vectorize": True, "tir.usmp.enable": False}
-        ):
-            relay_mod = tvm.relay.fromtext(
-                """
-            #[version = "0.0.5"]
-            def @main(%a : Tensor[(1, 2), uint8], %b : Tensor[(1, 2), float32], %c : Tensor[(1, 2), float32]) {
-            %0 = cast(%a, dtype="float32") + %b * %c;
-            %0
-            }"""
-            )
-            factory = tvm.relay.build(
-                relay_mod,
-                target,
-                executor=executor,
-                runtime=runtime,
-                mod_name="add",
-                params={"c": np.array([[2.0, 4.0]], dtype="float32")},
-            )
-
-        temp_dir = utils.tempdir()
-        mlf_tar_path = temp_dir.relpath("lib.tar")
-
-        micro.export_model_library_format(factory, mlf_tar_path)
-        tf = tarfile.open(mlf_tar_path)
-
-        extract_dir = temp_dir.relpath("extract")
-        os.mkdir(extract_dir)
-        tf.extractall(extract_dir)
-
-        with open(os.path.join(extract_dir, "metadata.json")) as json_f:
-            metadata = json.load(json_f)
-            module_name = factory.libmod_name
-            assert metadata["version"] == _GENERATED_VERSION
-            assert metadata["modules"][module_name]["model_name"] == "add"
-            export_datetime = datetime.datetime.strptime(
-                metadata["modules"][module_name]["export_datetime"], "%Y-%m-%d %H:%M:%SZ"
-            )
-            assert (datetime.datetime.now() - export_datetime) < datetime.timedelta(seconds=60 * 5)
-            assert metadata["modules"][module_name]["target"] == [str(target)]
-            if executor.name == "graph":
-                assert metadata["modules"][module_name]["memory"]["sids"] == [
-                    {"storage_id": 0, "size_bytes": 2, "input_binding": "a"},
-                    {"storage_id": 1, "size_bytes": 8, "input_binding": "b"},
-                    {"storage_id": 2, "size_bytes": 8, "input_binding": "p0"},
-                    {"storage_id": 3, "size_bytes": 8},
-                ]
-            assert metadata["modules"][module_name]["memory"]["functions"]["main"] == [
-                {
-                    "constants_size_bytes": json_constants_size_bytes,
-                    "device": 1,
-                    "inputs": {
-                        "a": {"dtype": "uint8", "size": 2},
-                        "b": {"dtype": "float32", "size": 8},
-                    },
-                    "io_size_bytes": 18,
-                    "outputs": {"output": {"dtype": "float32", "size": 8}},
-                    "workspace_size_bytes": 0,
-                }
-            ]
-            assert metadata["modules"][module_name]["memory"]["functions"]["operator_functions"][0][
-                "workspace"
-            ] == [{"device": 1, "workspace_size_bytes": 0}]
-            assert (
-                "fused_cast_multiply_add"
-                in metadata["modules"][module_name]["memory"]["functions"]["operator_functions"][0][
-                    "function_name"
-                ]
-            )
-
-        assert os.path.exists(os.path.join(extract_dir, "codegen", "host", "src", "add_lib0.c"))
-        assert os.path.exists(os.path.join(extract_dir, "codegen", "host", "src", "add_lib1.c"))
-        assert should_generate_interface == os.path.exists(
-            os.path.join(extract_dir, "codegen", "host", "include", "tvmgen_add.h")
-        )
-
-        if executor.name == "graph":
-            validate_graph_json(extract_dir, factory)
-
-        with open(os.path.join(extract_dir, "src", f"{module_name}.relay")) as relay_f:
-            assert relay_f.read() == str(relay_mod)
-
-        with open(os.path.join(extract_dir, "parameters", "add.params"), "rb") as params_f:
-            params = tvm.relay.load_param_dict(params_f.read())
-            if json_constants_size_bytes != 0:
-                assert "p0" in params
-            else:
-                assert len(params) == 0
-
-
-@tvm.testing.requires_micro
-def test_export_model_library_format_llvm():
-    with utils.TempDirectory.set_keep_for_debug(True):
-        target = tvm.target.target.micro("host")
-        assert str(target)[:2] == "c "
-        target = tvm.target.Target("llvm " + str(target)[2:])
-        with tvm.transform.PassContext(opt_level=3):
-            relay_mod = tvm.relay.fromtext(
-                """
-            #[version = "0.0.5"]
-            def @main(%a : Tensor[(1, 2), uint8], %b : Tensor[(1, 2), float32], %c : Tensor[(1, 2), float32]) {
-            %0 = cast(%a, dtype="float32") + %b * %c;
-            %0
-            }"""
-            )
-            factory = tvm.relay.build(
-                relay_mod,
-                target,
-                runtime=Runtime("crt", {"system-lib": True}),
-                mod_name="add",
-                params={"c": np.array([[2.0, 4.0]], dtype="float32")},
-            )
-
-        temp_dir = utils.tempdir()
-        mlf_tar_path = temp_dir.relpath("lib.tar")
-
-        micro.export_model_library_format(factory, mlf_tar_path)
-        tf = tarfile.open(mlf_tar_path)
-
-        extract_dir = temp_dir.relpath("extract")
-        os.mkdir(extract_dir)
-        tf.extractall(extract_dir)
-
-        with open(os.path.join(extract_dir, "metadata.json")) as json_f:
-            metadata = json.load(json_f)
-            module_name = factory.libmod_name
-            assert metadata["version"] == _GENERATED_VERSION
-            assert metadata["modules"][module_name]["model_name"] == "add"
-            export_datetime = datetime.datetime.strptime(
-                metadata["modules"][module_name]["export_datetime"], "%Y-%m-%d %H:%M:%SZ"
-            )
-            assert (datetime.datetime.now() - export_datetime) < datetime.timedelta(seconds=60 * 5)
-            assert metadata["modules"][module_name]["target"] == [str(target)]
-            assert metadata["modules"][module_name]["memory"]["sids"] == [
-                {"storage_id": 0, "size_bytes": 2, "input_binding": "a"},
-                {"storage_id": 1, "size_bytes": 8, "input_binding": "b"},
-                {"storage_id": 2, "size_bytes": 8, "input_binding": "p0"},
-                {"storage_id": 3, "size_bytes": 8},
-            ]
-            assert metadata["modules"][module_name]["memory"]["functions"]["main"] == [
-                {
-                    "constants_size_bytes": 8,
-                    "device": 1,
-                    "inputs": {
-                        "a": {"dtype": "uint8", "size": 2},
-                        "b": {"dtype": "float32", "size": 8},
-                    },
-                    "io_size_bytes": 18,
-                    "outputs": {"output": {"dtype": "float32", "size": 8}},
-                    "workspace_size_bytes": 0,
-                }
-            ]
-            assert metadata["modules"][module_name]["memory"]["functions"]["operator_functions"][0][
-                "workspace"
-            ] == [{"device": 1, "workspace_size_bytes": 0}]
-            assert (
-                "fused_cast_multiply_add"
-                in metadata["modules"][module_name]["memory"]["functions"]["operator_functions"][0][
-                    "function_name"
-                ]
-            )
-
-        assert os.path.exists(os.path.join(extract_dir, "codegen", "host", "lib", "add_lib0.o"))
-
-        validate_graph_json(extract_dir, factory)
-
-        with open(os.path.join(extract_dir, "src", f"{module_name}.relay")) as relay_f:
-            assert relay_f.read() == str(relay_mod)
-
-        with open(os.path.join(extract_dir, "parameters", "add.params"), "rb") as params_f:
-            params = tvm.relay.load_param_dict(params_f.read())
-            assert "p0" in params
-
-
-@tvm.testing.requires_micro
-@pytest.mark.parametrize(
-    "executor,runtime",
-    [(Executor("graph"), Runtime("crt", {"system-lib": True})), (Executor("aot"), Runtime("crt"))],
-)
-def test_export_model_library_format_workspace(executor, runtime):
-    target = tvm.target.target.micro("host")
-    with tvm.transform.PassContext(
-        opt_level=3, config={"tir.disable_vectorize": True, "tir.usmp.enable": False}
-    ):
-        relay_mod = tvm.relay.fromtext(
-            """
-            #[version = "0.0.5"]
-            def @main(%p0: Tensor[(1, 56, 56, 128), int16], %p1: Tensor[(3, 3, 128, 1), int16], %p2: Tensor[(1, 1, 1, 128), int32]){
-              %0 = nn.conv2d(%p0, %p1, padding=[1, 1, 1, 1], groups=128, channels=128, kernel_size=[3, 3], data_layout="NHWC", kernel_layout="HWOI", out_dtype="int32") /* ty=Tensor[(1, 56, 56, 128), int32] */;
-              %1 = add(%0, %p2) /* ty=Tensor[(1, 56, 56, 128), int32] */;
-              %2 = fixed_point_multiply(%1, multiplier=2080045879, shift=-4) /* ty=Tensor[(1, 56, 56, 128), int32] */;
-              %3 = clip(%2, a_min=0f, a_max=255f) /* ty=Tensor[(1, 56, 56, 128), int32] */;
-              cast(%3, dtype="uint8") /* ty=Tensor[(1, 56, 56, 128), uint8] */
-            }
-            """
-        )
-        factory = tvm.relay.build(
-            relay_mod,
-            target,
-            executor=executor,
-            runtime=runtime,
-            mod_name="qnn_conv2d",
-        )
-
-    temp_dir = utils.tempdir()
-    mlf_tar_path = temp_dir.relpath("lib.tar")
-
-    micro.export_model_library_format(factory, mlf_tar_path)
-    tf = tarfile.open(mlf_tar_path)
-
-    extract_dir = temp_dir.relpath("extract")
-    os.mkdir(extract_dir)
-    tf.extractall(extract_dir)
-
-    with open(os.path.join(extract_dir, "metadata.json")) as json_f:
-        metadata = json.load(json_f)
-        module_name = factory.libmod_name
-        assert metadata["version"] == _GENERATED_VERSION
-        assert metadata["modules"][module_name]["model_name"] == "qnn_conv2d"
-        export_datetime = datetime.datetime.strptime(
-            metadata["modules"][module_name]["export_datetime"], "%Y-%m-%d %H:%M:%SZ"
-        )
-        assert (datetime.datetime.now() - export_datetime) < datetime.timedelta(seconds=60 * 5)
-        assert metadata["modules"][module_name]["target"] == [str(target)]
-        assert metadata["modules"][module_name]["memory"]["functions"]["main"] == [
-            {
-                "constants_size_bytes": 0,
-                "device": 1,
-                "inputs": {
-                    "p0": {"dtype": "int16", "size": 802816},
-                    "p1": {"dtype": "int16", "size": 2304},
-                    "p2": {"dtype": "int32", "size": 512},
-                },
-                "io_size_bytes": 1207040,
-                "outputs": {"output": {"dtype": "uint8", "size": 401408}},
-                "workspace_size_bytes": 2466816,
-            }
-        ]
-        assert metadata["modules"][module_name]["memory"]["functions"]["operator_functions"][0][
-            "workspace"
-        ] == [{"device": 1, "workspace_size_bytes": 2466816}]
-        assert (
-            "fused_nn_conv2d_add_fixed_point_multiply_clip_cast"
-            in metadata["modules"][module_name]["memory"]["functions"]["operator_functions"][0][
-                "function_name"
-            ]
-        )
-
-
-@tvm.testing.requires_micro
-def test_export_non_dso_exportable():
-    module = tvm.support.FrontendTestModule()
-
-    temp_dir = utils.tempdir()
-
-    with pytest.raises(AssertionError) as exc:
-        model_library_format._populate_codegen_dir([module], temp_dir.relpath("codegen"))
-
-        assert str(exc.exception) == (
-            "Don't know how to export non-c or non-llvm modules; found: ffi_testing"
-        )
-
-
-@tvm.testing.requires_micro
-def test_export_byoc_c_module():
-    """Test BYOC flow when it produces DSO-exportable modules.
-
-    NOTE the general BYOC flow is not fully supported by Model Library Format right now.
-    """
-    x = tvm.relay.var("x", shape=(10, 10))
-    w0 = tvm.relay.var("w0", shape=(10, 10))
-    w1 = tvm.relay.var("w1", shape=(10, 10))
-    w2 = tvm.relay.var("w2", shape=(10, 10))
-    w3 = tvm.relay.var("w3", shape=(10, 10))
-    w4 = tvm.relay.var("w4", shape=(10, 10))
-    w5 = tvm.relay.var("w5", shape=(10, 10))
-    w6 = tvm.relay.var("w6", shape=(10, 10))
-    w7 = tvm.relay.var("w7", shape=(10, 10))
-
-    # C compiler
-    z0 = tvm.relay.add(x, w0)
-    p0 = tvm.relay.subtract(z0, w1)
-    q0 = tvm.relay.multiply(p0, w2)
-
-    z1 = tvm.relay.add(x, w3)
-    p1 = tvm.relay.subtract(z1, w4)
-    q1 = tvm.relay.multiply(p1, w5)
-
-    # Other parts on TVM
-    z2 = tvm.relay.add(x, w6)
-    q2 = tvm.relay.subtract(z2, w7)
-
-    r = tvm.relay.concatenate((q0, q1, q2), axis=0)
-    f = tvm.relay.Function([x, w0, w1, w2, w3, w4, w5, w6, w7], r)
-    mod = tvm.IRModule()
-    ann = byoc.CcompilerAnnotator()
-    mod["main"] = ann.visit(f)
-    mod = tvm.relay.transform.PartitionGraph("mod_name")(mod)
-    mod = tvm.relay.transform.InferType()(mod)
-
-    with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}):
-        factory = tvm.relay.build(mod, tvm.target.target.micro("host"), runtime=Runtime("crt"))
-
-    temp_dir = utils.tempdir()
-    mlf_tar_path = temp_dir.relpath("lib.tar")
-
-    micro.export_model_library_format(factory, mlf_tar_path)
-
-    with tarfile.open(mlf_tar_path, "r:*") as tf:
-        tar_members = [ti.name for ti in tf.getmembers()]
-        print("tar members", tar_members)
-        assert "./metadata.json" in tar_members
-        with tf.extractfile("./metadata.json") as f:
-            metadata = json.load(f)
-        main_md = metadata["modules"][factory.libmod_name]["memory"]["functions"]["main"]
-        assert main_md == [
-            {
-                "constants_size_bytes": 0,
-                "device": 1,
-                "inputs": {
-                    "w0": {"dtype": "float32", "size": 400},
-                    "w1": {"dtype": "float32", "size": 400},
-                    "w2": {"dtype": "float32", "size": 400},
-                    "w3": {"dtype": "float32", "size": 400},
-                    "w4": {"dtype": "float32", "size": 400},
-                    "w5": {"dtype": "float32", "size": 400},
-                    "w6": {"dtype": "float32", "size": 400},
-                    "w7": {"dtype": "float32", "size": 400},
-                    "x": {"dtype": "float32", "size": 400},
-                },
-                "io_size_bytes": 4800,
-                "outputs": {"output": {"dtype": "float32", "size": 1200}},
-                "workspace_size_bytes": 1200,
-            }
-        ]
-
-
-@tvm.testing.requires_micro
-def test_multiple_relay_modules_same_module_name():
-    mod = get_conv2d_relay_module()
-
-    executor = Executor("graph")
-    runtime = Runtime("crt")
-    target = tvm.target.target.micro("host")
-
-    with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}):
-        factory1 = tvm.relay.build(mod, target, runtime=runtime, executor=executor, mod_name="mod")
-        factory2 = tvm.relay.build(mod, target, runtime=runtime, executor=executor, mod_name="mod")
-
-    temp_dir = utils.tempdir()
-    mlf_tar_path = temp_dir.relpath("lib.tar")
-
-    with pytest.raises(AssertionError, match="Multiple modules should have unique names"):
-        micro.export_model_library_format([factory1, factory2], mlf_tar_path)
-
-
-@tvm.testing.requires_micro
-def test_multiple_relay_modules_graph():
-    mod = get_conv2d_relay_module()
-
-    executor = Executor("graph")
-    runtime = Runtime("crt")
-    target = tvm.target.target.micro("host")
-
-    with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}):
-        factory1 = tvm.relay.build(mod, target, runtime=runtime, executor=executor, mod_name="mod1")
-        factory2 = tvm.relay.build(mod, target, runtime=runtime, executor=executor, mod_name="mod2")
-
-    temp_dir = utils.tempdir()
-    mlf_tar_path = temp_dir.relpath("lib.tar")
-    micro.export_model_library_format([factory1, factory2], mlf_tar_path)
-
-    with tarfile.open(mlf_tar_path, "r:*") as tf:
-        tar_members = [ti.name for ti in tf.getmembers()]
-        print("tar members", tar_members)
-        assert "./metadata.json" in tar_members
-        assert "./codegen/host/src/mod1_lib0.c" in tar_members
-        assert "./codegen/host/src/mod2_lib0.c" in tar_members
-
-        with tf.extractfile("./metadata.json") as f:
-            metadata = json.load(f)
-        mod2_main_md = metadata["modules"]["mod2"]["memory"]["functions"]["main"]
-        assert mod2_main_md == [
-            {
-                "constants_size_bytes": 0,
-                "device": 1,
-                "inputs": {
-                    "data": {"dtype": "int8", "size": 12288},
-                    "weight": {"dtype": "int8", "size": 600},
-                },
-                "io_size_bytes": 143960,
-                "outputs": {"output": {"dtype": "int32", "size": 131072}},
-                "workspace_size_bytes": 158088,
-            }
-        ]
-        assert metadata["modules"]["mod1"]["model_name"] == "mod1"
-        assert metadata["modules"]["mod2"]["model_name"] == "mod2"
-
-
-@tvm.testing.requires_micro
-def test_multiple_relay_modules_c():
-    mod = get_conv2d_relay_module()
-
-    executor = Executor("aot", {"unpacked-api": True, "interface-api": "c"})
-    runtime = Runtime("crt")
-    target = tvm.target.target.micro("host")
-
-    with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}):
-        factory1 = tvm.relay.build(mod, target, runtime=runtime, executor=executor, mod_name="mod1")
-        factory2 = tvm.relay.build(mod, target, runtime=runtime, executor=executor, mod_name="mod2")
-
-    temp_dir = utils.tempdir()
-    mlf_tar_path = temp_dir.relpath("lib.tar")
-
-    micro.export_model_library_format([factory1, factory2], mlf_tar_path)
-
-    tf = tarfile.open(mlf_tar_path)
-
-    extract_dir = temp_dir.relpath("extract")
-    os.mkdir(extract_dir)
-    tf.extractall(extract_dir)
-
-    assert os.path.exists(os.path.join(extract_dir, "codegen", "host", "src", "mod1_lib0.c"))
-    assert os.path.exists(os.path.join(extract_dir, "codegen", "host", "src", "mod1_lib1.c"))
-    assert os.path.exists(os.path.join(extract_dir, "codegen", "host", "src", "mod2_lib0.c"))
-    assert os.path.exists(os.path.join(extract_dir, "codegen", "host", "src", "mod2_lib1.c"))
-
-    assert os.path.exists(os.path.join(extract_dir, "codegen", "host", "include", "tvmgen_mod1.h"))
-    assert os.path.exists(os.path.join(extract_dir, "codegen", "host", "include", "tvmgen_mod2.h"))
-
-    # check CRT runtime directory
-    assert os.path.exists(os.path.join(extract_dir, "runtime"))
-
-
-@tvm.testing.requires_micro
-def test_multiple_relay_modules_aot_graph():
-    mod = get_conv2d_relay_module()
-
-    executor1 = Executor("graph")
-    executor2 = Executor("aot", {"unpacked-api": True, "interface-api": "c"})
-    runtime = Runtime("crt")
-    target = tvm.target.target.micro("host")
-
-    with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}):
-        factory1 = tvm.relay.build(
-            mod, target, runtime=runtime, executor=executor1, mod_name="mod1"
-        )
-        factory2 = tvm.relay.build(
-            mod, target, runtime=runtime, executor=executor2, mod_name="mod2"
-        )
-
-    temp_dir = utils.tempdir()
-    mlf_tar_path = temp_dir.relpath("lib.tar")
-
-    micro.export_model_library_format([factory1, factory2], mlf_tar_path)
-
-    tf = tarfile.open(mlf_tar_path)
-    extract_dir = temp_dir.relpath("extract")
-    os.mkdir(extract_dir)
-    tf.extractall(extract_dir)
-
-    assert os.path.exists(os.path.join(extract_dir, "codegen", "host", "src", "mod1_lib0.c"))
-    assert os.path.exists(os.path.join(extract_dir, "codegen", "host", "src", "mod1_lib1.c"))
-    assert os.path.exists(os.path.join(extract_dir, "codegen", "host", "src", "mod2_lib0.c"))
-    assert os.path.exists(os.path.join(extract_dir, "codegen", "host", "src", "mod2_lib1.c"))
-
-    assert os.path.exists(os.path.join(extract_dir, "codegen", "host", "include", "tvmgen_mod2.h"))
-
-    with open(os.path.join(extract_dir, "metadata.json")) as f:
-        metadata = json.load(f)
-
-    assert metadata["modules"]["mod1"]["executors"] == ["graph"]
-    assert metadata["modules"]["mod2"]["executors"] == ["aot"]
-    assert metadata["version"] == _GENERATED_VERSION
-
-
-@tvm.testing.requires_micro
-def test_output_name_single():
-    """Generate a conv2d Relay module for testing."""
-    input_a = tvm.relay.var("input_a", shape=(3, 4, 5), dtype="int64")
-    output_1 = input_a + tvm.relay.const(1, "int64")
-    attrs = tvm.ir.make_node("DictAttrs", output_tensor_names=["test_output_a"])
-    main_func = tvm.relay.Function([input_a], output_1, attrs=attrs)
-    mod = tvm.IRModule.from_expr(main_func)
-    mod = tvm.relay.transform.InferType()(mod)
-
-    executor = Executor("aot", {"unpacked-api": True, "interface-api": "c"})
-    runtime = Runtime("crt")
-    target = tvm.target.target.micro("host")
-
-    with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}):
-        factory = tvm.relay.build(mod, target, runtime=runtime, executor=executor, mod_name="mod1")
-    temp_dir = utils.tempdir()
-    mlf_tar_path = temp_dir.relpath("lib.tar")
-
-    micro.export_model_library_format(factory, mlf_tar_path)
-
-    tf = tarfile.open(mlf_tar_path)
-    extract_dir = temp_dir.relpath("extract")
-    os.mkdir(extract_dir)
-    tf.extractall(extract_dir)
-
-    with open(os.path.join(extract_dir, "metadata.json")) as f:
-        metadata = json.load(f)
-
-    assert metadata["modules"]["mod1"]["memory"]["functions"]["main"][0]["outputs"] == {
-        "test_output_a": {"size": 480, "dtype": "int64"}
-    }
-
-
-@tvm.testing.requires_micro
-def test_output_names_many():
-    """Generate a conv2d Relay module for testing."""
-    input_a = tvm.relay.var("input_a", shape=(3, 4, 5), dtype="int64")
-    input_b = tvm.relay.var("input_b", shape=(3, 4), dtype="int32")
-    input_c = tvm.relay.var("input_c", shape=(3,), dtype="float32")
-
-    output_1 = input_a + tvm.relay.const(1, "int64")
-    output_2 = input_b + tvm.relay.const(2)
-    output_3 = input_b + tvm.relay.const(3)
-    output_4 = input_c + tvm.relay.const(4.0)
-
-    full_output = tvm.relay.Tuple(
-        [output_1, tvm.relay.Tuple([tvm.relay.Tuple([output_2, output_3]), output_4])]
-    )
-    attrs = tvm.ir.make_node(
-        "DictAttrs",
-        output_tensor_names=["test_output_a", "test_output_b", "test_output_c", "test_output_d"],
-    )
-    main_func = tvm.relay.Function([input_a, input_b, input_c], full_output, attrs=attrs)
-    mod = tvm.IRModule.from_expr(main_func)
-    mod = tvm.relay.transform.InferType()(mod)
-
-    executor = Executor("aot", {"unpacked-api": True, "interface-api": "c"})
-    runtime = Runtime("crt")
-    target = tvm.target.target.micro("host")
-
-    with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}):
-        factory = tvm.relay.build(mod, target, runtime=runtime, executor=executor, mod_name="mod1")
-    temp_dir = utils.tempdir()
-    mlf_tar_path = temp_dir.relpath("lib.tar")
-
-    micro.export_model_library_format(factory, mlf_tar_path)
-
-    tf = tarfile.open(mlf_tar_path)
-    extract_dir = temp_dir.relpath("extract")
-    os.mkdir(extract_dir)
-    tf.extractall(extract_dir)
-
-    with open(os.path.join(extract_dir, "metadata.json")) as f:
-        metadata = json.load(f)
-
-    assert metadata["modules"]["mod1"]["memory"]["functions"]["main"][0]["outputs"] == {
-        "test_output_a": {"size": 480, "dtype": "int64"},
-        "test_output_b": {"size": 48, "dtype": "int32"},
-        "test_output_c": {"size": 48, "dtype": "int32"},
-        "test_output_d": {"size": 12, "dtype": "float32"},
-    }
-
-
-@tvm.testing.requires_micro
-def test_template_files():
-    """Check template files in generated model library format."""
-    mod = get_conv2d_relay_module()
-
-    executor = Executor("aot", {"unpacked-api": True, "interface-api": "c"})
-    runtime = Runtime("crt")
-    target = tvm.target.target.micro("host")
-
-    with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}):
-        factory = tvm.relay.build(mod, target, runtime=runtime, executor=executor, mod_name="mod")
-
-    temp_dir = utils.tempdir()
-    mlf_tar_path = temp_dir / "lib.tar"
-    micro.export_model_library_format(factory, mlf_tar_path)
-
-    tf = tarfile.open(mlf_tar_path)
-    extract_dir = temp_dir / "extract"
-    os.mkdir(extract_dir)
-    tf.extractall(extract_dir)
-
-    assert (extract_dir / "templates" / "crt_config.h.template").is_file()
-    assert (extract_dir / "templates" / "platform.c.template").is_file()
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/micro/test_micro_ms_tuning.py b/tests/python/micro/test_micro_ms_tuning.py
deleted file mode 100644
index 1a06c100b424..000000000000
--- a/tests/python/micro/test_micro_ms_tuning.py
+++ /dev/null
@@ -1,130 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import numpy as np
-import pytest
-from types import MappingProxyType
-import pathlib
-import json
-import tvm
-import tvm.testing
-from tvm import relay
-from tvm.relay.backend import Executor
-from tvm.contrib import graph_executor, utils
-from tvm import meta_schedule as ms
-
-
-@pytest.mark.skip(reason="flaky test")
-@tvm.testing.requires_micro
-def test_micro_tuning_with_meta_schedule():
-    from tests.micro.zephyr.test_ms_tuning import create_relay_module
-    from tvm.contrib.micro.meta_schedule.local_builder_micro import get_local_builder_micro
-    from tvm.contrib.micro.meta_schedule.rpc_runner_micro import get_rpc_runner_micro
-
-    platform = "crt"
-    target = tvm.target.target.micro(model="host")
-    options = {}
-
-    work_dir = utils.tempdir()
-    mod, params, model_info = create_relay_module()
-    input_name = model_info["in_tensor"]
-    input_shape = model_info["in_shape"]
-    input_dtype = model_info["in_dtype"]
-    data_sample = np.random.rand(*input_shape).astype(input_dtype)
-
-    runtime = relay.backend.Runtime("crt", {"system-lib": True})
-    executor = Executor("aot", {"link-params": True})
-    # This line is necessary for link-params to take effect during
-    # task extraction and relay.build(...).
-    mod = mod.with_attr("executor", executor)
-
-    builder = get_local_builder_micro()
-
-    with ms.Profiler() as profiler:
-        with get_rpc_runner_micro(
-            platform=platform, options=options, session_timeout_sec=120
-        ) as runner:
-            db: ms.Database = ms.relay_integration.tune_relay(
-                mod=mod,
-                params=params,
-                target=target,
-                builder=builder,
-                runner=runner,
-                strategy="evolutionary",
-                num_trials_per_iter=2,
-                max_trials_per_task=10,
-                max_trials_global=100,
-                work_dir=str(work_dir.path),
-                module_equality="ignore-ndarray",
-            )
-
-        #  Build model using meta_schedule logs
-        ms_mod: tvm.runtime.Module = ms.relay_integration.compile_relay(
-            database=db,
-            mod=mod,
-            target=target,
-            params=params,
-            pass_config=MappingProxyType(
-                {
-                    "relay.backend.use_meta_schedule": True,
-                    "relay.backend.tir_converter": "default",
-                    "tir.disable_vectorize": True,
-                }
-            ),
-            executor=executor,
-            runtime=runtime,
-        )
-    print(profiler.table())
-
-    project = tvm.micro.generate_project(
-        str(tvm.micro.get_microtvm_template_projects(platform)),
-        ms_mod,
-        str(work_dir / "project"),
-        options=options,
-    )
-    project.build()
-    project.flash()
-    with tvm.micro.Session(project.transport()) as session:
-        aot_executor = tvm.runtime.executor.aot_executor.AotModule(session.create_aot_executor())
-        aot_executor.get_input(0).copyfrom(data_sample)
-        result = aot_executor.module.time_evaluator("run", session.device, number=3)()
-        output = aot_executor.get_output(0).numpy()
-
-    # Build reference model (without tuning)
-    dev = tvm.cpu()
-    target = tvm.target.target.micro(model="host")
-    with tvm.transform.PassContext(
-        opt_level=3, config={"tir.disable_vectorize": True}, disabled_pass=["AlterOpLayout"]
-    ):
-        ref_mod = relay.build(
-            mod,
-            target=target,
-            params=params,
-            runtime=runtime,
-        )
-    ref_mod.export_library(work_dir / "compiled_lib2.so")
-    mod2: tvm.runtime.Module = tvm.runtime.load_module(work_dir / "compiled_lib2.so")
-    graph_mod = graph_executor.GraphModule(mod2["default"](dev))
-    graph_mod.set_input(input_name, data_sample)
-    graph_mod.run()
-    ref_output = graph_mod.get_output(0).numpy()
-
-    assert np.allclose(output, ref_output, rtol=1e-4, atol=2e-4), "FAILED"
-    work_dir.remove()
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/micro/test_micro_project_api.py b/tests/python/micro/test_micro_project_api.py
deleted file mode 100644
index 8e4fe6700e00..000000000000
--- a/tests/python/micro/test_micro_project_api.py
+++ /dev/null
@@ -1,533 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import collections
-import io
-import json
-import sys
-import unittest
-from unittest import mock
-
-import pytest
-
-import tvm
-import tvm.testing
-
-
-# Implementing as a fixture so that the tvm.micro import doesn't occur
-# until fixture setup time.  This is necessary for pytest's collection
-# phase to work when USE_MICRO=OFF, while still explicitly listing the
-# tests as skipped.
-@tvm.testing.fixture
-def BaseTestHandler():
-    from tvm.micro import project_api
-
-    class BaseTestHandler_Impl(project_api.server.ProjectAPIHandler):
-
-        DEFAULT_TEST_SERVER_INFO = project_api.server.ServerInfo(
-            platform_name="platform_name",
-            is_template=True,
-            model_library_format_path="./model-library-format-path.sh",
-            project_options=[
-                project_api.server.ProjectOption(
-                    name="foo", optional=["build"], type="bool", help="Option foo"
-                ),
-                project_api.server.ProjectOption(
-                    name="bar",
-                    required=["generate_project"],
-                    type="str",
-                    choices=["qux"],
-                    help="Option bar",
-                ),
-            ],
-        )
-
-        def server_info_query(self, tvm_version):
-            return self.DEFAULT_TEST_SERVER_INFO
-
-        def generate_project(self, model_library_format_path, crt_path, project_path, options):
-            assert False, "generate_project is not implemented for this test"
-
-        def build(self, options):
-            assert False, "build is not implemented for this test"
-
-        def flash(self, options):
-            assert False, "flash is not implemented for this test"
-
-        def open_transport(self, options):
-            assert False, "open_transport is not implemented for this test"
-
-        def close_transport(self, options):
-            assert False, "open_transport is not implemented for this test"
-
-        def read_transport(self, n, timeout_sec):
-            assert False, "read_transport is not implemented for this test"
-
-        def write_transport(self, data, timeout_sec):
-            assert False, "write_transport is not implemented for this test"
-
-    return BaseTestHandler_Impl
-
-
-class Transport:
-    def readable(self):
-        return True
-
-    def writable(self):
-        return True
-
-    def seekable(self):
-        return False
-
-    closed = False
-
-    def __init__(self):
-        self.data = bytearray()
-        self.rpos = 0
-
-        self.items = []
-
-    def read(self, size=-1):
-        to_read = len(self.data) - self.rpos
-        if size != -1:
-            to_read = min(size, to_read)
-
-        rpos = self.rpos
-        self.rpos += to_read
-        return self.data[rpos : self.rpos]
-
-    def write(self, data):
-        self.data.extend(data)
-
-
-class ClientServerFixture:
-    def __init__(self, handler):
-        from tvm.micro import project_api
-
-        self.handler = handler
-        self.client_to_server = Transport()
-        self.server_to_client = Transport()
-
-        self.server = project_api.server.ProjectAPIServer(
-            self.client_to_server, self.server_to_client, handler
-        )
-        self.client = project_api.client.ProjectAPIClient(
-            self.server_to_client,
-            self.client_to_server,
-            testonly_did_write_request=self._process_server_request,
-        )
-
-        self.expect_failure = False
-
-    def _process_server_request(self):
-        assert self.server.serve_one_request() == (
-            not self.expect_failure
-        ), "Server failed to process request"
-
-
-@tvm.testing.requires_micro
-def test_server_info_query(BaseTestHandler):
-    fixture = ClientServerFixture(BaseTestHandler())
-
-    # Examine reply explicitly because these are the defaults for all derivative test cases.
-    reply = fixture.client.server_info_query(tvm.__version__)
-    assert reply["protocol_version"] == 1
-    assert reply["platform_name"] == "platform_name"
-    assert reply["is_template"] == True
-    assert reply["model_library_format_path"] == "./model-library-format-path.sh"
-    assert reply["project_options"] == [
-        {
-            "name": "foo",
-            "choices": None,
-            "default": None,
-            "type": "bool",
-            "required": None,
-            "optional": ["build"],
-            "help": "Option foo",
-        },
-        {
-            "name": "bar",
-            "choices": ["qux"],
-            "default": None,
-            "type": "str",
-            "required": ["generate_project"],
-            "optional": None,
-            "help": "Option bar",
-        },
-    ]
-
-
-@tvm.testing.requires_micro
-def test_server_info_query_wrong_tvm_version(BaseTestHandler):
-    from tvm.micro import project_api
-
-    def server_info_query(tvm_version):
-        raise project_api.server.UnsupportedTVMVersionError()
-
-    with mock.patch.object(BaseTestHandler, "server_info_query", side_effect=server_info_query):
-        fixture = ClientServerFixture(BaseTestHandler())
-        with pytest.raises(project_api.server.UnsupportedTVMVersionError) as exc_info:
-            fixture.client.server_info_query(tvm.__version__)
-
-        assert "UnsupportedTVMVersionError" in str(exc_info.value)
-
-
-@tvm.testing.requires_micro
-def test_server_info_query_wrong_protocol_version(BaseTestHandler):
-    from tvm.micro import project_api
-
-    ServerInfoProtocol = collections.namedtuple(
-        "ServerInfoProtocol", list(project_api.server.ServerInfo._fields) + ["protocol_version"]
-    )
-
-    def server_info_query(tvm_version):
-        return ServerInfoProtocol(
-            protocol_version=0, **BaseTestHandler.DEFAULT_TEST_SERVER_INFO._asdict()
-        )
-
-    with mock.patch.object(BaseTestHandler, "server_info_query", side_effect=server_info_query):
-        fixture = ClientServerFixture(BaseTestHandler())
-        with pytest.raises(project_api.client.UnsupportedProtocolVersionError) as exc_info:
-            fixture.client.server_info_query(tvm.__version__)
-
-        assert "microTVM API Server supports protocol version 0; want 1" in str(exc_info.value)
-
-
-@tvm.testing.requires_micro
-def test_base_test_handler(BaseTestHandler):
-    """All methods should raise AssertionError on BaseTestHandler."""
-    fixture = ClientServerFixture(BaseTestHandler())
-
-    for method in dir(fixture.handler):
-        if method.startswith("_") or not callable(method) or method == "server_info_query":
-            continue
-
-        with self.assertThrows(AssertionError) as exc_info:
-            getattr(fixture.client, method)()
-
-            assert (exc_info.exception) == f"{method} is not implemented for this test"
-
-
-@tvm.testing.requires_micro
-def test_build(BaseTestHandler):
-    with mock.patch.object(BaseTestHandler, "build", return_value=None) as patch:
-        fixture = ClientServerFixture(BaseTestHandler())
-        fixture.client.build(options={"bar": "baz"})
-
-        fixture.handler.build.assert_called_once_with(options={"bar": "baz"})
-
-
-@tvm.testing.requires_micro
-def test_flash(BaseTestHandler):
-    with mock.patch.object(BaseTestHandler, "flash", return_value=None) as patch:
-        fixture = ClientServerFixture(BaseTestHandler())
-        fixture.client.flash(options={"bar": "baz"})
-        fixture.handler.flash.assert_called_once_with(options={"bar": "baz"})
-
-
-@tvm.testing.requires_micro
-def test_open_transport(BaseTestHandler):
-    from tvm.micro import project_api
-
-    timeouts = project_api.server.TransportTimeouts(
-        session_start_retry_timeout_sec=1.0,
-        session_start_timeout_sec=2.0,
-        session_established_timeout_sec=3.0,
-    )
-
-    with mock.patch.object(BaseTestHandler, "open_transport", return_value=timeouts) as patch:
-        fixture = ClientServerFixture(BaseTestHandler())
-        assert fixture.client.open_transport(options={"bar": "baz"}) == {
-            "timeouts": dict(timeouts._asdict())
-        }
-        fixture.handler.open_transport.assert_called_once_with({"bar": "baz"})
-
-
-@tvm.testing.requires_micro
-def test_close_transport(BaseTestHandler):
-    with mock.patch.object(BaseTestHandler, "close_transport", return_value=None) as patch:
-        fixture = ClientServerFixture(BaseTestHandler())
-        fixture.client.close_transport()
-        fixture.handler.close_transport.assert_called_once_with()
-
-
-@tvm.testing.requires_micro
-def test_read_transport(BaseTestHandler):
-    from tvm.micro import project_api
-
-    with mock.patch.object(BaseTestHandler, "read_transport", return_value=b"foo\x1b") as patch:
-        fixture = ClientServerFixture(BaseTestHandler())
-        assert fixture.client.read_transport(128, timeout_sec=5.0) == {"data": b"foo\x1b"}
-
-        fixture.handler.read_transport.assert_called_with(128, 5.0)
-
-        fixture.handler.read_transport.side_effect = project_api.server.IoTimeoutError
-        with pytest.raises(project_api.server.IoTimeoutError) as exc_info:
-            fixture.client.read_transport(256, timeout_sec=10.0)
-
-        fixture.handler.read_transport.assert_called_with(256, 10.0)
-
-        fixture.handler.read_transport.side_effect = project_api.server.TransportClosedError
-        with pytest.raises(project_api.server.TransportClosedError) as exc_info:
-            fixture.client.read_transport(512, timeout_sec=15.0)
-
-        fixture.handler.read_transport.assert_called_with(512, 15.0)
-
-        assert fixture.handler.read_transport.call_count == 3
-
-
-@tvm.testing.requires_micro
-def test_write_transport(BaseTestHandler):
-    from tvm.micro import project_api
-
-    with mock.patch.object(BaseTestHandler, "write_transport", return_value=None) as patch:
-        fixture = ClientServerFixture(BaseTestHandler())
-        assert fixture.client.write_transport(b"foo", timeout_sec=5.0) is None
-        fixture.handler.write_transport.assert_called_with(b"foo", 5.0)
-
-        fixture.handler.write_transport.side_effect = project_api.server.IoTimeoutError
-        with pytest.raises(project_api.server.IoTimeoutError) as exc_info:
-            fixture.client.write_transport(b"bar", timeout_sec=10.0)
-
-        fixture.handler.write_transport.assert_called_with(b"bar", 10.0)
-
-        fixture.handler.write_transport.side_effect = project_api.server.TransportClosedError
-        with pytest.raises(project_api.server.TransportClosedError) as exc_info:
-            fixture.client.write_transport(b"baz", timeout_sec=15.0)
-
-        fixture.handler.write_transport.assert_called_with(b"baz", 15.0)
-
-        assert fixture.handler.write_transport.call_count == 3
-
-
-class ProjectAPITestError(Exception):
-    """An error raised in test."""
-
-
-@tvm.testing.requires_micro
-def test_method_raises_error(BaseTestHandler):
-    from tvm.micro import project_api
-
-    with mock.patch.object(
-        BaseTestHandler, "close_transport", side_effect=ProjectAPITestError
-    ) as patch:
-        fixture = ClientServerFixture(BaseTestHandler())
-        with pytest.raises(project_api.server.ServerError) as exc_info:
-            fixture.client.close_transport()
-
-        fixture.handler.close_transport.assert_called_once_with()
-        assert "ProjectAPITestError" in str(exc_info.value)
-
-
-@tvm.testing.requires_micro
-def test_method_not_found(BaseTestHandler):
-    from tvm.micro import project_api
-
-    fixture = ClientServerFixture(BaseTestHandler())
-
-    with pytest.raises(project_api.server.JSONRPCError) as exc_info:
-        fixture.client._request_reply("invalid_method", {"bar": None})
-
-    assert exc_info.value.code == project_api.server.ErrorCode.METHOD_NOT_FOUND
-
-
-@tvm.testing.requires_micro
-def test_extra_param(BaseTestHandler):
-    from tvm.micro import project_api
-
-    fixture = ClientServerFixture(BaseTestHandler())
-
-    # test one with has_preprocssing and one without
-    assert hasattr(fixture.server, "_dispatch_build") == False
-    with pytest.raises(project_api.server.JSONRPCError) as exc_info:
-        fixture.client._request_reply("build", {"invalid_param_name": None, "options": {}})
-
-    assert exc_info.value.code == project_api.server.ErrorCode.INVALID_PARAMS
-    assert "build: extra parameters: invalid_param_name" in str(exc_info.value)
-
-    assert hasattr(fixture.server, "_dispatch_open_transport") == True
-    with pytest.raises(project_api.server.JSONRPCError) as exc_info:
-        fixture.client._request_reply("open_transport", {"invalid_param_name": None, "options": {}})
-
-    assert exc_info.value.code == project_api.server.ErrorCode.INVALID_PARAMS
-    assert "open_transport: extra parameters: invalid_param_name" in str(exc_info.value)
-
-
-@tvm.testing.requires_micro
-def test_missing_param(BaseTestHandler):
-    from tvm.micro import project_api
-
-    fixture = ClientServerFixture(BaseTestHandler())
-
-    # test one with has_preprocssing and one without
-    assert hasattr(fixture.server, "_dispatch_build") == False
-    with pytest.raises(project_api.server.JSONRPCError) as exc_info:
-        fixture.client._request_reply("build", {})
-
-    assert exc_info.value.code == project_api.server.ErrorCode.INVALID_PARAMS
-    assert "build: parameter options not given" in str(exc_info.value)
-
-    assert hasattr(fixture.server, "_dispatch_open_transport") == True
-    with pytest.raises(project_api.server.JSONRPCError) as exc_info:
-        fixture.client._request_reply("open_transport", {})
-
-    assert exc_info.value.code == project_api.server.ErrorCode.INVALID_PARAMS
-    assert "open_transport: parameter options not given" in str(exc_info.value)
-
-
-@tvm.testing.requires_micro
-def test_incorrect_param_type(BaseTestHandler):
-    from tvm.micro import project_api
-
-    fixture = ClientServerFixture(BaseTestHandler())
-
-    # The error message given at the JSON-RPC server level doesn't make sense when preprocessing is
-    # used. Only test without preprocessing here.
-    assert hasattr(fixture.server, "_dispatch_build") == False
-    with pytest.raises(project_api.server.JSONRPCError) as exc_info:
-        fixture.client._request_reply("build", {"options": None})
-
-    assert exc_info.value.code == project_api.server.ErrorCode.INVALID_PARAMS
-    assert "build: parameter options: want <class 'dict'>, got <class 'NoneType'>" in str(
-        exc_info.value
-    )
-
-
-@tvm.testing.requires_micro
-def test_invalid_request(BaseTestHandler):
-    from tvm.micro import project_api
-
-    fixture = ClientServerFixture(BaseTestHandler())
-
-    # Invalid JSON does not get a reply.
-    fixture.client_to_server.write(b"foobar\n")
-    assert fixture.server.serve_one_request() == False
-    assert fixture.server_to_client.read() == b""
-
-    # EOF causes a clean return
-    assert fixture.server.serve_one_request() == False
-    assert fixture.server_to_client.read() == b""
-
-    def _request_reply(request):
-        fixture.client_to_server.write(request + b"\n")
-        assert fixture.server.serve_one_request() == False
-        return json.loads(fixture.server_to_client.read())
-
-    # Parseable JSON with the wrong schema gets a reply.
-    assert _request_reply(b"1") == {
-        "error": {
-            "code": project_api.server.ErrorCode.INVALID_REQUEST,
-            "data": None,
-            "message": "request: want dict; got 1",
-        },
-        "id": None,
-        "jsonrpc": "2.0",
-    }
-
-    # Incorrect JSON-RPC spec version.
-    assert _request_reply(b'{"jsonrpc": 1.0}') == {
-        "error": {
-            "code": project_api.server.ErrorCode.INVALID_REQUEST,
-            "data": None,
-            "message": 'request["jsonrpc"]: want "2.0"; got 1.0',
-        },
-        "id": None,
-        "jsonrpc": "2.0",
-    }
-
-    # Method not a str
-    assert _request_reply(b'{"jsonrpc": "2.0", "method": 123}') == {
-        "error": {
-            "code": project_api.server.ErrorCode.INVALID_REQUEST,
-            "data": None,
-            "message": 'request["method"]: want str; got 123',
-        },
-        "id": None,
-        "jsonrpc": "2.0",
-    }
-
-    # Method name has invalid characters
-    assert _request_reply(b'{"jsonrpc": "2.0", "method": "bar!"}') == {
-        "error": {
-            "code": project_api.server.ErrorCode.INVALID_REQUEST,
-            "data": None,
-            "message": "request[\"method\"]: should match regex ^[a-zA-Z0-9_]+$; got 'bar!'",
-        },
-        "id": None,
-        "jsonrpc": "2.0",
-    }
-
-    # params not a dict
-    assert _request_reply(b'{"jsonrpc": "2.0", "method": "bar", "params": 123}') == {
-        "error": {
-            "code": project_api.server.ErrorCode.INVALID_REQUEST,
-            "data": None,
-            "message": "request[\"params\"]: want dict; got <class 'int'>",
-        },
-        "id": None,
-        "jsonrpc": "2.0",
-    }
-
-    # id not valid
-    assert _request_reply(b'{"jsonrpc": "2.0", "method": "bar", "params": {}, "id": {}}') == {
-        "error": {
-            "code": project_api.server.ErrorCode.INVALID_REQUEST,
-            "data": None,
-            "message": 'request["id"]: want str, number, null; got {}',
-        },
-        "id": None,
-        "jsonrpc": "2.0",
-    }
-
-
-@tvm.testing.requires_micro
-def test_default_project_options():
-    from tvm.micro import project_api
-
-    default_options = project_api.server.default_project_options()
-    names = []
-    for option in default_options:
-        names.append(option.name)
-        if option.name == "verbose":
-            assert "generate_project" in option.optional
-        if option.name in ["project_type", "board"]:
-            assert "generate_project" in option.required
-        if option.name == "warning_as_error":
-            assert "generate_project" in option.optional
-
-    for name in ["verbose", "project_type", "board", "cmsis_path", "warning_as_error"]:
-        assert name in names
-
-
-@tvm.testing.requires_micro
-def test_modified_project_options():
-    from tvm.micro import project_api
-
-    modified_options = project_api.server.default_project_options(
-        verbose={"optional": ["flash"], "required": ["build"]},
-        board={"choices": ["board1", "board2"]},
-    )
-    for option in modified_options:
-        if option.name == "verbose":
-            assert option.optional == ["flash"]
-            assert option.required == ["build"]
-        if option.name == "board":
-            assert option.choices == ["board1", "board2"]
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/micro/test_micro_transport.py b/tests/python/micro/test_micro_transport.py
deleted file mode 100644
index 804f83587f00..000000000000
--- a/tests/python/micro/test_micro_transport.py
+++ /dev/null
@@ -1,225 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""Tests for common micro transports."""
-
-import logging
-import sys
-import unittest
-
-import pytest
-
-import tvm.testing
-
-
-# Implementing as a fixture so that the tvm.micro import doesn't occur
-# until fixture setup time.  This is necessary for pytest's collection
-# phase to work when USE_MICRO=OFF, while still explicitly listing the
-# tests as skipped.
-@tvm.testing.fixture
-def transport():
-    import tvm.micro
-
-    class MockTransport_Impl(tvm.micro.transport.Transport):
-        def __init__(self):
-            self.exc = None
-            self.to_return = None
-
-        def _raise_or_return(self):
-            if self.exc is not None:
-                to_raise = self.exc
-                self.exc = None
-                raise to_raise
-            elif self.to_return is not None:
-                to_return = self.to_return
-                self.to_return = None
-                return to_return
-            else:
-                assert False, "should not get here"
-
-        def open(self):
-            pass
-
-        def close(self):
-            pass
-
-        def timeouts(self):
-            raise NotImplementedError()
-
-        def read(self, n, timeout_sec):
-            return self._raise_or_return()
-
-        def write(self, data, timeout_sec):
-            return self._raise_or_return()
-
-    return MockTransport_Impl()
-
-
-@tvm.testing.fixture
-def transport_logger(transport):
-    logger = logging.getLogger("transport_logger_test")
-    return tvm.micro.transport.TransportLogger("foo", transport, logger=logger)
-
-
-@tvm.testing.fixture
-def get_latest_log(caplog):
-    def inner():
-        return caplog.records[-1].getMessage()
-
-    with caplog.at_level(logging.INFO, "transport_logger_test"):
-        yield inner
-
-
-@tvm.testing.requires_micro
-def test_open(transport_logger, get_latest_log):
-    transport_logger.open()
-    assert get_latest_log() == "foo: opening transport"
-
-
-@tvm.testing.requires_micro
-def test_close(transport_logger, get_latest_log):
-    transport_logger.close()
-    assert get_latest_log() == "foo: closing transport"
-
-
-@tvm.testing.requires_micro
-def test_read_normal(transport, transport_logger, get_latest_log):
-    transport.to_return = b"data"
-    transport_logger.read(23, 3.0)
-    assert get_latest_log() == (
-        "foo: read { 3.00s}   23 B -> [  4 B]: 64 61 74 61"
-        "                                      data"
-    )
-
-
-@tvm.testing.requires_micro
-def test_read_multiline(transport, transport_logger, get_latest_log):
-    transport.to_return = b"data" * 6
-    transport_logger.read(23, 3.0)
-    assert get_latest_log() == (
-        "foo: read { 3.00s}   23 B -> [ 24 B]:\n"
-        "0000  64 61 74 61 64 61 74 61 64 61 74 61 64 61 74 61  datadatadatadata\n"
-        "0010  64 61 74 61 64 61 74 61                          datadata"
-    )
-
-
-@tvm.testing.requires_micro
-def test_read_no_timeout_prints(transport, transport_logger, get_latest_log):
-    transport.to_return = b"data"
-    transport_logger.read(15, None)
-    assert get_latest_log() == (
-        "foo: read { None }   15 B -> [  4 B]: 64 61 74 61"
-        "                                      data"
-    )
-
-
-@tvm.testing.requires_micro
-def test_read_io_timeout(transport, transport_logger, get_latest_log):
-    # IoTimeoutError includes the timeout value.
-    transport.exc = tvm.micro.transport.IoTimeoutError()
-    with pytest.raises(tvm.micro.transport.IoTimeoutError):
-        transport_logger.read(23, 0.0)
-
-    assert get_latest_log() == ("foo: read { 0.00s}   23 B -> [IoTimeoutError  0.00s]")
-
-
-@tvm.testing.requires_micro
-def test_read_other_exception(transport, transport_logger, get_latest_log):
-    # Other exceptions are logged by name.
-    transport.exc = tvm.micro.transport.TransportClosedError()
-    with pytest.raises(tvm.micro.transport.TransportClosedError):
-        transport_logger.read(8, 0.0)
-
-    assert get_latest_log() == ("foo: read { 0.00s}    8 B -> [err: TransportClosedError]")
-
-
-@tvm.testing.requires_micro
-def test_read_keyboard_interrupt(transport, transport_logger, get_latest_log):
-    # KeyboardInterrupt produces no log record.
-    transport.exc = KeyboardInterrupt()
-    with pytest.raises(KeyboardInterrupt):
-        transport_logger.read(8, 0.0)
-
-    with pytest.raises(IndexError):
-        get_latest_log()
-
-
-@tvm.testing.requires_micro
-def test_write_normal(transport, transport_logger, get_latest_log):
-    transport.to_return = 3
-    transport_logger.write(b"data", 3.0)
-    assert get_latest_log() == (
-        "foo: write { 3.00s}        <- [  4 B]: 64 61 74 61"
-        "                                      data"
-    )
-
-
-@tvm.testing.requires_micro
-def test_write_multiline(transport, transport_logger, get_latest_log):
-    # Normal log, multi-line data written.
-    transport.to_return = 20
-    transport_logger.write(b"data" * 6, 3.0)
-    assert get_latest_log() == (
-        "foo: write { 3.00s}        <- [ 24 B]:\n"
-        "0000  64 61 74 61 64 61 74 61 64 61 74 61 64 61 74 61  datadatadatadata\n"
-        "0010  64 61 74 61 64 61 74 61                          datadata"
-    )
-
-
-@tvm.testing.requires_micro
-def test_write_no_timeout_prints(transport, transport_logger, get_latest_log):
-    transport.to_return = 3
-    transport_logger.write(b"data", None)
-    assert get_latest_log() == (
-        "foo: write { None }        <- [  4 B]: 64 61 74 61"
-        "                                      data"
-    )
-
-
-@tvm.testing.requires_micro
-def test_write_io_timeout(transport, transport_logger, get_latest_log):
-    # IoTimeoutError includes the timeout value.
-    transport.exc = tvm.micro.transport.IoTimeoutError()
-    with pytest.raises(tvm.micro.transport.IoTimeoutError):
-        transport_logger.write(b"data", 0.0)
-
-    assert get_latest_log() == ("foo: write { 0.00s}       <- [  4 B]: [IoTimeoutError  0.00s]")
-
-
-@tvm.testing.requires_micro
-def test_write_other_exception(transport, transport_logger, get_latest_log):
-    # Other exceptions are logged by name.
-    transport.exc = tvm.micro.transport.TransportClosedError()
-    with pytest.raises(tvm.micro.transport.TransportClosedError):
-        transport_logger.write(b"data", 0.0)
-
-    assert get_latest_log() == ("foo: write { 0.00s}       <- [  4 B]: [err: TransportClosedError]")
-
-
-@tvm.testing.requires_micro
-def test_write_keyboard_interrupt(transport, transport_logger, get_latest_log):
-    # KeyboardInterrupt produces no log record.
-    transport.exc = KeyboardInterrupt()
-    with pytest.raises(KeyboardInterrupt):
-        transport_logger.write(b"data", 0.0)
-
-    with pytest.raises(IndexError):
-        get_latest_log()
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/relay/aot/aprofile_aem.mk b/tests/python/relay/aot/aprofile_aem.mk
deleted file mode 100644
index a8d4445e266e..000000000000
--- a/tests/python/relay/aot/aprofile_aem.mk
+++ /dev/null
@@ -1,99 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# Makefile to build and run AOT tests against the AArch64
-# reference system
-
-CC = clang-16
-LD = aarch64-none-elf-gcc
-
-TARGET_ARCH = --target=aarch64-none-elf -march=armv9-a+sme
-SYS_ROOT = /opt/arm/gcc-aarch64-none-elf/aarch64-none-elf/
-
-OBJ_FILES := $(build_dir)/test.o $(build_dir)/aprofile_extra_support_routines.o
-INCLUDES = -I$(SRC_DIR) \
-           -I$(TVM_ROOT)/include \
-           -I$(build_dir)/../include
-
-ifneq ($(CODEGEN_ROOT),)
-    OBJ_FILES := $(OBJ_FILES) $(wildcard $(CODEGEN_ROOT)/host/lib/*.o)
-    INCLUDES := $(INCLUDES) -I$(CODEGEN_ROOT)/host/include
-endif
-
-ifneq ($(STANDALONE_CRT_DIR),)
-    OBJ_FILES := $(OBJ_FILES) $(build_dir)/stack_allocator.o \
-             $(build_dir)/crt_backend_api.o
-    INCLUDES := $(INCLUDES) -isystem$(STANDALONE_CRT_DIR)/include
-endif
-
-PKG_LDFLAGS = --specs=$(SYS_ROOT)lib/aem-ve.specs --sysroot $(SYS_ROOT)
-PKG_CFLAGS = $(INCLUDES) --sysroot $(SYS_ROOT) -c -O3 $(CFLAGS)
-PKG_ASFLAGS = $(INCLUDES) --sysroot $(SYS_ROOT) -c
-
-aot_test_runner: $(build_dir)/aot_test_runner
-
-$(build_dir)/aot_test_runner: $(OBJ_FILES)
-	$(LD) $(INCLUDES) $(PKG_LDFLAGS) -o $@ $^
-
-$(build_dir)/test.o: $(build_dir)/test.c
-	$(CC) $(TARGET_ARCH) $(PKG_CFLAGS) -o $@ $<
-
-# TODO(lhutton1) This is a workaround while __arm_tpidr2_save and
-# __arm_tpidr2_restore are not provided with the toolchain. More
-# information in aprofile_extra_support_routines.c.
-$(build_dir)/aprofile_extra_support_routines.o: ${AOT_TEST_ROOT}/aprofile_extra_support_routines.c
-	$(CC) $(TARGET_ARCH) $(PKG_CFLAGS) -o $@ $<
-
-$(build_dir)/stack_allocator.o: $(STANDALONE_CRT_DIR)/src/runtime/crt/memory/stack_allocator.c
-	$(CC) $(TARGET_ARCH) $(PKG_CFLAGS) -o $@ $<
-
-$(build_dir)/crt_backend_api.o: $(STANDALONE_CRT_DIR)/src/runtime/crt/common/crt_backend_api.c
-	$(CC) $(TARGET_ARCH) $(PKG_CFLAGS) -o $@ $<
-
-run: $(build_dir)/aot_test_runner
-	$(FVP_DIR)/FVP_Base_RevC-2xAEMvA \
-    -a $(build_dir)/aot_test_runner \
-    --plugin $(FVP_DIR)../../plugins/Linux64_GCC-9.3/ScalableVectorExtension.so \
-    -C SVE.ScalableVectorExtension.has_sme2=1 \
-    -C SVE.ScalableVectorExtension.has_sme=1 \
-    -C SVE.ScalableVectorExtension.has_sve2=1 \
-    -C SVE.ScalableVectorExtension.enable_at_reset=1 \
-    -C cluster0.has_arm_v9-2=1 \
-    -C bp.secure_memory=false \
-    -C bp.terminal_0.start_telnet=0 \
-    -C bp.terminal_1.start_telnet=0 \
-    -C bp.terminal_2.start_telnet=0 \
-    -C bp.terminal_3.start_telnet=0 \
-    -C bp.vis.disable_visualisation=1 \
-    -C bp.pl011_uart0.out_file="-" \
-    -C bp.pl011_uart0.shutdown_tag=\"EXITTHESIM\" \
-    -C semihosting-enable=1
-
-# Note: It's possible to trace instructions running on the FVP by adding the option
-# --plugin /opt/arm/fvp/Base_RevC_AEMvA_pkg/plugins/Linux64_GCC-9.3/TarmacTrace.so
-
-clean:
-	rm -rf $(build_dir)/crt
-
-cleanall:
-	rm -rf $(build_dir)
-
-.SUFFIXES:
-
-.DEFAULT: aot_test_runner
-
-.PHONY: run
diff --git a/tests/python/relay/aot/aprofile_extra_support_routines.c b/tests/python/relay/aot/aprofile_extra_support_routines.c
deleted file mode 100644
index 9d8fde158041..000000000000
--- a/tests/python/relay/aot/aprofile_extra_support_routines.c
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-// The support routines __arm_tpidr2_save and __arm_tpidr2_restore are not
-// yet available in the latest release of the gcc-aarch64-none-elf toolchain
-// (13.2.rel1). For now, we can provide the symbol to fix the build at least.
-// When they are provided in later releases, these declarations can be removed.
-void __arm_tpidr2_save(void) {}
-void __arm_tpidr2_restore(void) {}
diff --git a/tests/python/relay/aot/corstone300.ld b/tests/python/relay/aot/corstone300.ld
deleted file mode 100644
index bee82a98436f..000000000000
--- a/tests/python/relay/aot/corstone300.ld
+++ /dev/null
@@ -1,299 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*------------------ Reference System Memories -------------
-  +===================+============+=======+============+============+
-  | Memory            | Address    | Size  | CPU Access | NPU Access |
-  +===================+============+=======+============+============+
-  | ITCM              | 0x00000000 | 512KB | Yes (RO)   | No         |
-  +-------------------+------------+-------+------------+------------+
-  | DTCM              | 0x20000000 | 512KB | Yes (R/W)  | No         |
-  +-------------------+------------+-------+------------+------------+
-  | SSE-300 SRAM      | 0x21000000 |   2MB | Yes (R/W)  | Yes (R/W)  |
-  +-------------------+------------+-------+------------+------------+
-  | Data SRAM         | 0x01000000 |   2MB | Yes (R/W)  | Yes (R/W)  |
-  +-------------------+------------+-------+------------+------------+
-  | DDR               | 0x60000000 |  32MB | Yes (R/W)  | Yes (R/W)  |
-  +-------------------+------------+-------+------------+------------+ */
-
-/*---------------------- ITCM Configuration ----------------------------------
-  <h> Flash Configuration
-    <o0> Flash Base Address <0x0-0xFFFFFFFF:8>
-    <o1> Flash Size (in Bytes) <0x0-0xFFFFFFFF:8>
-  </h>
-  -----------------------------------------------------------------------------*/
-__ROM_BASE = 0x00000000;
-__ROM_SIZE = 0x00080000;
-
-/*--------------------- DTCM RAM Configuration ----------------------------
-  <h> RAM Configuration
-    <o0> RAM Base Address    <0x0-0xFFFFFFFF:8>
-    <o1> RAM Size (in Bytes) <0x0-0xFFFFFFFF:8>
-  </h>
- -----------------------------------------------------------------------------*/
-__RAM_BASE = 0x20000000;
-__RAM_SIZE = 0x00080000;
-
-/*----------------------- Data SRAM Configuration ------------------------------
-  <h> Data SRAM Configuration
-    <o0> DATA_SRAM Base Address    <0x0-0xFFFFFFFF:8>
-    <o1> DATA_SRAM Size (in Bytes) <0x0-0xFFFFFFFF:8>
-  </h>
- -----------------------------------------------------------------------------*/
-__DATA_SRAM_BASE = 0x01000000;
-__DATA_SRAM_SIZE = 0x00200000;
-
-/*--------------------- Embedded SRAM Configuration ----------------------------
-  <h> SRAM Configuration
-    <o0> SRAM Base Address    <0x0-0xFFFFFFFF:8>
-    <o1> SRAM Size (in Bytes) <0x0-0xFFFFFFFF:8>
-  </h>
- -----------------------------------------------------------------------------*/
-__SRAM_BASE = 0x21000000;
-__SRAM_SIZE = 0x00200000;
-
-/*--------------------- Stack / Heap Configuration ----------------------------
-  <h> Stack / Heap Configuration
-    <o0> Stack Size (in Bytes) <0x0-0xFFFFFFFF:8>
-    <o1> Heap Size (in Bytes) <0x0-0xFFFFFFFF:8>
-  </h>
-  -----------------------------------------------------------------------------*/
-__STACK_SIZE = 0x00008000;
-__HEAP_SIZE  = 0x00008000;
-
-/*--------------------- Embedded RAM Configuration ----------------------------
-  <h> DDR Configuration
-    <o0> DDR Base Address    <0x0-0xFFFFFFFF:8>
-    <o1> DDR Size (in Bytes) <0x0-0xFFFFFFFF:8>
-  </h>
- -----------------------------------------------------------------------------*/
-__DDR_BASE = 0x60000000;
-__DDR_SIZE = 0x02000000;
-
-/*
- *-------------------- <<< end of configuration section >>> -------------------
- */
-
-MEMORY
-{
-  ITCM       (rx)  : ORIGIN = __ROM_BASE, LENGTH = __ROM_SIZE
-  DTCM       (rwx) : ORIGIN = __RAM_BASE, LENGTH = __RAM_SIZE
-  DATA_SRAM  (rwx) : ORIGIN = __DATA_SRAM_BASE, LENGTH = __DATA_SRAM_SIZE
-  SRAM       (rwx) : ORIGIN = __SRAM_BASE, LENGTH = __SRAM_SIZE
-  DDR        (rwx) : ORIGIN = __DDR_BASE, LENGTH = __DDR_SIZE
-}
-
-/* Linker script to place sections and symbol values. Should be used together
- * with other linker script that defines memory regions ITCM and RAM.
- * It references following symbols, which must be defined in code:
- *   Reset_Handler : Entry of reset handler
- *
- * It defines following symbols, which code can use without definition:
- *   __exidx_start
- *   __exidx_end
- *   __copy_table_start__
- *   __copy_table_end__
- *   __zero_table_start__
- *   __zero_table_end__
- *   __etext
- *   __data_start__
- *   __preinit_array_start
- *   __preinit_array_end
- *   __init_array_start
- *   __init_array_end
- *   __fini_array_start
- *   __fini_array_end
- *   __data_end__
- *   __bss_start__
- *   __bss_end__
- *   __end__
- *   end
- *   __HeapLimit
- *   __StackLimit
- *   __StackTop
- *   __stack
- */
-ENTRY(Reset_Handler)
-
-SECTIONS
-{
-  /* .ddr is placed before .text so that .rodata.tvm is encountered before .rodata* */
-  .ddr :
-  {
-    . = ALIGN(16);
-    *(ethosu_scratch)
-    . = ALIGN (16);
-    *(.rodata.tvm)
-    . = ALIGN (16);
-    *(.data.tvm)
-    . = ALIGN (16);
-  } > DDR
-
-  .text :
-  {
-    KEEP(*(.vectors))
-    *(.text*)
-
-    KEEP(*(.init))
-    KEEP(*(.fini))
-
-    /* .ctors */
-    *crtbegin.o(.ctors)
-    *crtbegin?.o(.ctors)
-    *(EXCLUDE_FILE(*crtend?.o *crtend.o) .ctors)
-    *(SORT(.ctors.*))
-    *(.ctors)
-
-    /* .dtors */
-    *crtbegin.o(.dtors)
-    *crtbegin?.o(.dtors)
-    *(EXCLUDE_FILE(*crtend?.o *crtend.o) .dtors)
-    *(SORT(.dtors.*))
-    *(.dtors)
-
-    *(.rodata*)
-
-    KEEP(*(.eh_frame*))
-  } > ITCM
-
-  .ARM.extab :
-  {
-    *(.ARM.extab* .gnu.linkonce.armextab.*)
-  } > ITCM
-
-  __exidx_start = .;
-  .ARM.exidx :
-  {
-    *(.ARM.exidx* .gnu.linkonce.armexidx.*)
-  } > ITCM
-  __exidx_end = .;
-
-  .copy.table :
-  {
-    . = ALIGN(4);
-    __copy_table_start__ = .;
-    LONG (__etext)
-    LONG (__data_start__)
-    LONG (__data_end__ - __data_start__)
-    /* Add each additional data section here */
-    __copy_table_end__ = .;
-  } > ITCM
-
-  .zero.table :
-  {
-    . = ALIGN(4);
-    __zero_table_start__ = .;
-    __zero_table_end__ = .;
-  } > ITCM
-
-  /**
-   * Location counter can end up 2byte aligned with narrow Thumb code but
-   * __etext is assumed by startup code to be the LMA of a section in DTCM
-   * which must be 4byte aligned
-   */
-  __etext = ALIGN (4);
-
-  .data : AT (__etext)
-  {
-    __data_start__ = .;
-    *(vtable)
-    *(.data)
-    *(.data.*)
-
-    . = ALIGN(4);
-    /* preinit data */
-    PROVIDE_HIDDEN (__preinit_array_start = .);
-    KEEP(*(.preinit_array))
-    PROVIDE_HIDDEN (__preinit_array_end = .);
-
-    . = ALIGN(4);
-    /* init data */
-    PROVIDE_HIDDEN (__init_array_start = .);
-    KEEP(*(SORT(.init_array.*)))
-    KEEP(*(.init_array))
-    PROVIDE_HIDDEN (__init_array_end = .);
-
-
-    . = ALIGN(4);
-    /* finit data */
-    PROVIDE_HIDDEN (__fini_array_start = .);
-    KEEP(*(SORT(.fini_array.*)))
-    KEEP(*(.fini_array))
-    PROVIDE_HIDDEN (__fini_array_end = .);
-
-    KEEP(*(.jcr*))
-    . = ALIGN(4);
-    /* All data end */
-    __data_end__ = .;
-
-  } > DTCM
-
-  .sram :
-  {
-    . = ALIGN(16);
-    *(.bss.ethosu_fast_memory);
-    . = ALIGN(16);
-  } > SRAM AT > SRAM
-
-  .bss.noinit (NOLOAD):
-  {
-    . = ALIGN(16);
-    *(.bss.noinit.*)
-    . = ALIGN(16);
-  } > SRAM AT > SRAM
-
-  .bss :
-  {
-    . = ALIGN(4);
-    __bss_start__ = .;
-    *(.bss)
-    *(.bss.*)
-    *(COMMON)
-    . = ALIGN(4);
-    __bss_end__ = .;
-  } > DTCM AT > DTCM
-
-  .data_sram :
-  {
-    . = ALIGN(16);
-  } > DATA_SRAM
-
-  .heap (COPY) :
-  {
-    . = ALIGN(8);
-    __end__ = .;
-    PROVIDE(end = .);
-    . = . + __HEAP_SIZE;
-    . = ALIGN(8);
-    __HeapLimit = .;
-  } > DTCM
-
-  .stack (ORIGIN(DTCM) + LENGTH(DTCM) - __STACK_SIZE) (COPY) :
-  {
-    . = ALIGN(8);
-    __StackLimit = .;
-    . = . + __STACK_SIZE;
-    . = ALIGN(8);
-    __StackTop = .;
-  } > DTCM
-  PROVIDE(__stack = __StackTop);
-
-  /* Check if data + stack exceeds DTCM limit */
-  ASSERT(__StackLimit >= __bss_end__, "region DTCM overflowed with stack")
-}
diff --git a/tests/python/relay/aot/corstone300.mk b/tests/python/relay/aot/corstone300.mk
deleted file mode 100644
index 9b3ef462232a..000000000000
--- a/tests/python/relay/aot/corstone300.mk
+++ /dev/null
@@ -1,174 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# Makefile to build and run AOT tests against the reference system
-
-# Setup build environment
-build_dir := build
-TVM_ROOT=$(shell cd ../../../../..; pwd)
-CRT_ROOT ?= ${TVM_ROOT}/build/standalone_crt
-ifeq ($(shell ls -lhd $(CRT_ROOT)),)
-$(error "CRT not found. Ensure you have built the standalone_crt target and try again")
-endif
-
-FVP_DIR ?= /opt/arm/FVP_Corstone_SSE-300_Ethos-U55/models/Linux64_GCC-6.4/
-
-NPU_MACS ?= 256
-NPU_VARIANT ?= U55
-
-MODEL = FVP_Corstone_SSE-300_Ethos-$(NPU_VARIANT)
-
-ARM_CPU ?= ARMCM55
-MCPU ?= cortex-m55
-MCPU_FLAGS ?=
-MFLOAT_ABI ?= hard
-
-DMLC_CORE=${TVM_ROOT}/3rdparty/dmlc-core
-ETHOSU_PATH=/opt/arm/ethosu
-DRIVER_PATH=${ETHOSU_PATH}/core_driver
-CMSIS_PATH=${ETHOSU_PATH}/cmsis
-ETHOSU_PLATFORM_PATH=/opt/arm/ethosu/core_platform
-CORSTONE_300_PATH = ${ETHOSU_PLATFORM_PATH}/targets/corstone-300
-PKG_COMPILE_OPTS = -Wall -Ofast -Wno-incompatible-pointer-types -Wno-format -Werror-implicit-function-declaration -mcpu=${MCPU}${MCPU_FLAGS} -mthumb -mfloat-abi=${MFLOAT_ABI} -std=gnu99
-CMAKE = /opt/arm/cmake/bin/cmake
-CC = arm-none-eabi-gcc
-AR = arm-none-eabi-ar
-RANLIB = arm-none-eabi-ranlib
-CC_OPTS = CC=$(CC) AR=$(AR) RANLIB=$(RANLIB)
-PKG_CFLAGS = ${PKG_COMPILE_OPTS} \
-	${CFLAGS} \
-	-I$(build_dir)/../include \
-	-I${TVM_ROOT}/src/runtime/contrib/ethosu/bare_metal \
-	-I$(CODEGEN_ROOT)/host/include \
-	-I${ETHOSU_PLATFORM_PATH}/drivers/uart/include \
-	-I${DRIVER_PATH}/include \
-	-I${CMSIS_PATH}/Device/ARM/${ARM_CPU}/Include/ \
-	-I${CMSIS_PATH}/CMSIS/Core/Include \
-	-I${CMSIS_PATH}/CMSIS-NN/Include \
-	-I${CMSIS_PATH}/CMSIS/DSP/Include \
-	-isystem$(STANDALONE_CRT_DIR)/include
-CMAKE_FLAGS = -DCMAKE_TOOLCHAIN_FILE=${TVM_ROOT}/tests/python/contrib/test_ethosu/reference_system/arm-none-eabi-gcc.cmake \
-	-DCMAKE_SYSTEM_PROCESSOR=${MCPU}
-
-# -fdata-sections together with --gc-section may lead to smaller statically-linked executables
-PKG_LDFLAGS = -lm -specs=nosys.specs -static -Wl,--gc-sections -T ${AOT_TEST_ROOT}/corstone300.ld
-
-$(ifeq VERBOSE,1)
-QUIET ?=
-$(else)
-QUIET ?= @
-$(endif)
-
-CRT_SRCS = $(shell find $(CRT_ROOT))
-C_CODEGEN_SRCS = $(shell find $(abspath $(CODEGEN_ROOT)/host/src/*.c))
-CC_CODEGEN_SRCS = $(shell find $(abspath $(CODEGEN_ROOT)/host/src/*.cc))
-C_CODEGEN_OBJS = $(subst .c,.o,$(C_CODEGEN_SRCS))
-CC_CODEGEN_OBJS = $(subst .cc,.o,$(CC_CODEGEN_SRCS))
-CMSIS_STARTUP_SRCS = $(shell find ${CMSIS_PATH}/Device/ARM/${ARM_CPU}/Source/*.c)
-CMSIS_NN_SRCS = $(shell find ${CMSIS_PATH}/CMSIS-NN/Source/*/*.c)
-CORSTONE_300_SRCS = $(shell find ${CORSTONE_300_PATH}/*.c)
-
-ifdef ETHOSU_TEST_ROOT
-NPU=$(shell echo "${NPU_VARIANT}" | tr '[:upper:]' '[:lower:]')
-ETHOSU_DRIVER_LIBS = ${DRIVER_PATH}/build_${NPU}/*.a
-ETHOSU_RUNTIME=$(build_dir)/tvm_ethosu_runtime.o
-ETHOSU_INCLUDE=-I$(ETHOSU_TEST_ROOT)
-endif
-
-aot_test_runner: $(build_dir)/aot_test_runner
-
-$(build_dir)/stack_allocator.o: $(TVM_ROOT)/src/runtime/crt/memory/stack_allocator.c
-	$(QUIET)mkdir -p $(@D)
-	$(QUIET)$(CC) -c $(PKG_CFLAGS) -o $@  $^
-
-$(build_dir)/crt_backend_api.o: $(TVM_ROOT)/src/runtime/crt/common/crt_backend_api.c
-	$(QUIET)mkdir -p $(@D)
-	$(QUIET)$(CC) -c $(PKG_CFLAGS) -o $@  $^
-
-ifeq ($(DEBUG_LAST_ERROR), 1)
-$(build_dir)/crt_runtime_api.o: $(TVM_ROOT)/src/runtime/crt/common/crt_runtime_api.c
-	$(QUIET)mkdir -p $(@D)
-	$(QUIET)$(CC) -c $(PKG_CFLAGS) -o $@  $^
-
-$(build_dir)/func_registry.o: $(TVM_ROOT)/src/runtime/crt/common/func_registry.c
-	$(QUIET)mkdir -p $(@D)
-	$(QUIET)$(CC) -c $(PKG_CFLAGS) -o $@  $^
-
-$(build_dir)/ndarray.o: $(TVM_ROOT)/src/runtime/crt/common/ndarray.c
-	$(QUIET)mkdir -p $(@D)
-	$(QUIET)$(CC) -c $(PKG_CFLAGS) -o $@  $^
-
-DEBUG_LAST_ERROR_SOURCES = $(build_dir)/crt_runtime_api.o $(build_dir)/func_registry.o $(build_dir)/ndarray.o
-endif
-
-$(build_dir)/tvm_ethosu_runtime.o: $(TVM_ROOT)/src/runtime/contrib/ethosu/bare_metal/tvm_ethosu_runtime.c
-	$(QUIET)mkdir -p $(@D)
-	$(QUIET)$(CC) -c $(PKG_CFLAGS) -o $@  $^
-
-$(build_dir)/libcodegen.a: $(C_CODEGEN_SRCS) $(CC_CODEGEN_SRCS)
-	$(QUIET)cd $(abspath $(CODEGEN_ROOT)/host/src) && $(CC) -c $(PKG_CFLAGS) $(C_CODEGEN_SRCS) $(CC_CODEGEN_SRCS)
-	$(QUIET)$(AR) -cr $(abspath $(build_dir)/libcodegen.a) $(C_CODEGEN_OBJS) $(CC_CODEGEN_OBJS)
-	$(QUIET)$(RANLIB) $(abspath $(build_dir)/libcodegen.a)
-
-${build_dir}/libcmsis_startup.a: $(CMSIS_STARTUP_SRCS)
-	$(QUIET)mkdir -p $(abspath $(build_dir)/libcmsis_startup)
-	$(QUIET)cd $(abspath $(build_dir)/libcmsis_startup) && $(CC) -c $(PKG_CFLAGS) -D${ARM_CPU} $^
-	$(QUIET)$(AR) -cr $(abspath $(build_dir)/libcmsis_startup.a) $(abspath $(build_dir))/libcmsis_startup/*.o
-	$(QUIET)$(RANLIB) $(abspath $(build_dir)/libcmsis_startup.a)
-
-# -fdata-sections together with --gc-section may lead to smaller statically-linked executables
-${build_dir}/libcmsis_nn.a: $(CMSIS_NN_SRCS)
-	$(QUIET)mkdir -p $(abspath $(build_dir)/libcmsis_nn)
-	$(QUIET)cd $(abspath $(build_dir)/libcmsis_nn) && $(CC) -c $(PKG_CFLAGS) -ffunction-sections -fdata-sections -D${ARM_CPU} $^
-	$(QUIET)$(AR) -cr $(abspath $(build_dir)/libcmsis_nn.a) $(abspath $(build_dir))/libcmsis_nn/*.o
-	$(QUIET)$(RANLIB) $(abspath $(build_dir)/libcmsis_nn.a)
-
-${build_dir}/libcorstone.a: $(CORSTONE_300_SRCS)
-	$(QUIET)mkdir -p $(abspath $(build_dir)/libcorstone)
-	$(QUIET)cd $(abspath $(build_dir)/libcorstone) && $(CC) -c $(PKG_CFLAGS) $^
-	$(QUIET)$(AR) -cr $(abspath $(build_dir)/libcorstone.a) $(abspath $(build_dir))/libcorstone/*.o
-	$(QUIET)$(RANLIB) $(abspath $(build_dir)/libcorstone.a)
-
-# Build UART driver
-${build_dir}/ethosu_core_platform/libethosu_uart_cmsdk_apb.a:
-	$(QUIET)mkdir -p $(@D)
-	$(QUIET)cd ${ETHOSU_PLATFORM_PATH}/drivers/uart && $(CMAKE) -B $(abspath $(build_dir)/ethosu_core_platform) $(CMAKE_FLAGS)
-	$(QUIET)cd $(abspath $(build_dir)/ethosu_core_platform) && $(MAKE)
-
-$(build_dir)/aot_test_runner: $(build_dir)/test.c $(build_dir)/crt_backend_api.o $(build_dir)/stack_allocator.o $(build_dir)/libcodegen.a ${build_dir}/libcmsis_startup.a ${build_dir}/libcmsis_nn.a ${build_dir}/libcorstone.a ${build_dir}/ethosu_core_platform/libethosu_uart_cmsdk_apb.a $(ETHOSU_DRIVER_LIBS) $(ETHOSU_RUNTIME) $(DEBUG_LAST_ERROR_SOURCES)
-	$(QUIET)mkdir -p $(@D)
-	$(QUIET)$(CC) $(PKG_CFLAGS) $(ETHOSU_INCLUDE) -o $@ -Wl,--whole-archive $^ -Wl,--no-whole-archive $(PKG_LDFLAGS)
-
-clean:
-	$(QUIET)rm -rf $(build_dir)/crt
-
-cleanall:
-	$(QUIET)rm -rf $(build_dir)
-
-run: $(build_dir)/aot_test_runner
-	$(FVP_DIR)/$(MODEL) -C cpu0.CFGDTCMSZ=15 \
-	-C cpu0.CFGITCMSZ=15 -C mps3_board.uart0.out_file=\"-\" -C mps3_board.uart0.shutdown_tag=\"EXITTHESIM\" \
-	-C mps3_board.visualisation.disable-visualisation=1 -C mps3_board.telnetterminal0.start_telnet=0 \
-	-C mps3_board.telnetterminal1.start_telnet=0 -C mps3_board.telnetterminal2.start_telnet=0 -C mps3_board.telnetterminal5.start_telnet=0 \
-	-C ethosu.extra_args="--fast" \
-	-C ethosu.num_macs=$(NPU_MACS) $(build_dir)/aot_test_runner
-
-.SUFFIXES:
-
-.DEFAULT: aot_test_runner
-
-.PHONY: run
diff --git a/tests/python/relay/aot/default.mk b/tests/python/relay/aot/default.mk
deleted file mode 100644
index b7258a3c6df8..000000000000
--- a/tests/python/relay/aot/default.mk
+++ /dev/null
@@ -1,81 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# Setup build environment
-#
-AOT_ROOT ?= $(CRT_ROOT)/aot
-
-ENABLE_TVM_PLATFORM_ABORT_BACKTRACE = 0
-DMLC_CORE=$(TVM_ROOT)/3rdparty/dmlc-core
-PKG_COMPILE_OPTS = -g
-CC = gcc
-#CC = g++
-AR = ar
-RANLIB = ranlib
-CC_OPTS = CC=$(CC) AR=$(AR) RANLIB=$(RANLIB)
-
-PKG_CFLAGS = ${PKG_COMPILE_OPTS} \
-	-I$(build_dir)/../include \
-	-I$(CODEGEN_ROOT)/host/include \
-	-isystem$(STANDALONE_CRT_DIR)/include
-
-$(ifeq VERBOSE,1)
-QUIET ?=
-$(else)
-QUIET ?= @
-$(endif)
-
-aot_test_runner: $(build_dir)/aot_test_runner
-
-c_source_libs= $(wildcard $(build_dir)/../codegen/host/src/*.c)
-cc_source_libs= $(wildcard $(build_dir)/../codegen/host/src/*.cc)
-c_lib_objs =$(c_source_libs:.c=.o)
-cc_lib_objs =$(cc_source_libs:.cc=.o)
-
-$(build_dir)/aot_test_runner: $(build_dir)/test.c  $(c_source_libs) $(cc_source_libs) $(build_dir)/stack_allocator.o $(build_dir)/crt_backend_api.o
-	$(QUIET)mkdir -p $(@D)
-	$(QUIET)$(CC) $(CFLAGS) $(PKG_CFLAGS) -o $@ $^ $(PKG_LDFLAGS) $(BACKTRACE_LDFLAGS) $(BACKTRACE_CFLAGS) -lm
-
-$(build_dir)/%.o: $(build_dir)/../codegen/host/src/%.c
-	$(QUIET)mkdir -p $(@D)
-	$(QUIET)$(CC) $(CFLAGS) -c $(PKG_CFLAGS) -o $@  $^ $(BACKTRACE_CFLAGS)
-
-$(build_dir)/%.o: $(build_dir)/../codegen/host/src/%.cc
-	$(QUIET)mkdir -p $(@D)
-	$(QUIET)$(CC) $(CFLAGS) -c $(PKG_CFLAGS) -o $@  $^ $(BACKTRACE_CFLAGS)
-
-$(build_dir)/stack_allocator.o: $(STANDALONE_CRT_DIR)/src/runtime/crt/memory/stack_allocator.c
-	$(QUIET)mkdir -p $(@D)
-	$(QUIET)$(CC) $(CFLAGS) -c $(PKG_CFLAGS) -o $@  $^ $(BACKTRACE_CFLAGS)
-
-$(build_dir)/crt_backend_api.o: $(STANDALONE_CRT_DIR)/src/runtime/crt/common/crt_backend_api.c
-	$(QUIET)mkdir -p $(@D)
-	$(QUIET)$(CC) $(CFLAGS) -c $(PKG_CFLAGS) -o $@  $^ $(BACKTRACE_CFLAGS)
-
-clean:
-	$(QUIET)rm -rf $(build_dir)/crt
-cleanall:
-	$(QUIET)rm -rf $(build_dir)
-
-run: $(build_dir)/aot_test_runner
-	$(build_dir)/aot_test_runner
-
-# Don't define implicit rules; they tend to match on logical target names that aren't targets (i.e. bundle_static)
-.SUFFIXES:
-
-.DEFAULT: aot_test_runner
-
-.PHONY: run
diff --git a/tests/python/relay/aot/test_aot_create_executor_metadata.py b/tests/python/relay/aot/test_aot_create_executor_metadata.py
deleted file mode 100644
index 804738a7866a..000000000000
--- a/tests/python/relay/aot/test_aot_create_executor_metadata.py
+++ /dev/null
@@ -1,176 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=line-too-long,missing-class-docstring,missing-module-docstring,missing-function-docstring,no-self-argument,unused-argument,invalid-name
-import numpy as np
-
-import tvm
-import tvm.testing
-from tvm.script import tir as T
-from tvm.runtime.ndarray import array
-from tvm.relay.backend import Executor
-from tvm.relay.backend.aot import CreateExecutorMetadata
-from tvm.relay import TensorType
-from tvm.tir.usmp.utils import PoolAllocation
-from tvm.ir.memory_pools import AllocatedPoolInfo, ConstantPoolInfo, WorkspacePoolInfo, ConstantInfo
-
-
-def _check_executor_metadata(executor_metadata, expected_metadata):
-    assert list(executor_metadata.inputs) == expected_metadata["inputs"]
-    assert list(executor_metadata.input_tensor_types) == expected_metadata["input_tensor_types"]
-    assert list(executor_metadata.outputs) == expected_metadata["outputs"]
-    assert list(executor_metadata.output_tensor_types) == expected_metadata["output_tensor_types"]
-    assert list(executor_metadata.pools) == expected_metadata["pools"]
-    assert executor_metadata.devices == expected_metadata["devices"]
-    assert executor_metadata.executor == expected_metadata["executor"]
-    assert executor_metadata.mod_name == expected_metadata["mod_name"]
-    assert executor_metadata.interface_api == expected_metadata["interface_api"]
-    assert executor_metadata.unpacked_api == expected_metadata["unpacked_api"]
-    assert executor_metadata.workspace_alignment == expected_metadata["workspace_alignment"]
-    assert executor_metadata.constant_alignment == expected_metadata["constant_alignment"]
-    assert set(executor_metadata.pool_inputs.keys()) == set(expected_metadata["pool_inputs"].keys())
-    assert set(executor_metadata.io_pool_allocations.keys()) == set(
-        expected_metadata["io_pool_allocations"].keys()
-    )
-
-
-def test_create_executor_metadata_single_func():
-    # fmt: off
-    @tvm.script.ir_module
-    class Module:
-        @T.prim_func
-        def __tvm_main__(
-            a: T.handle, output: T.handle, workspace: T.handle("uint8"), constants: T.handle("uint8")
-        ) -> None:
-            # function attr dict
-            T.func_attr({"global_symbol": "test_mod___tvm_main__", "runner_function": True, "target": T.target({"kind": "llvm", "tag": "", "keys": ["cpu"]}), "input_vars": [a], "output_vars": [output], "devices": ["test_device"]})
-            a_buffer = T.match_buffer(a, [5, 7], dtype="float32", align=16)
-            output_buffer = T.match_buffer(output, [5, 7], dtype="float32", align=16)
-            # body
-            sid_3 = T.allocate([140], "int8", "global.workspace")
-            sid_2 = T.allocate([140], "int8", "global.workspace")
-            sid_1 = T.allocate([140], "int8", "global.workspace")
-            constant_0 = T.allocate_const([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "float32", [5, 7])
-            T.evaluate(T.tvm_call_cpacked("test_fused_add_0", a_buffer.data, sid_1, T.reinterpret(T.uint64(0), dtype="handle"), dtype="int32"))
-            T.evaluate(T.tvm_call_cpacked("test_fused_add_0", sid_1, constant_0, T.reinterpret(T.uint64(0), dtype="handle"), dtype="int32"))
-            T.evaluate(T.tvm_call_cpacked("test_fused_add_0", sid_2, sid_3, T.reinterpret(T.uint64(0), dtype="handle"), dtype="int32"))
-            T.evaluate(T.tvm_call_cpacked("test_fused_add_1", sid_2, sid_3, output_buffer.data, T.reinterpret(T.uint64(0), dtype="handle"), dtype="int32"))
-    # fmt: on
-
-    target = Module["__tvm_main__"].attrs["target"]
-    executor = Executor("aot", {"interface-api": "c"})
-    workspace_pool_info = AllocatedPoolInfo(
-        WorkspacePoolInfo("sram", [target]),
-        256,
-        3,
-    )
-    constant_pool_info = AllocatedPoolInfo(
-        ConstantPoolInfo(
-            "flash",
-            [target],
-            [ConstantInfo("a", 0, array(np.array([0])))],
-        ),
-        512,
-        2,
-    )
-    io_pool_allocations = {
-        "a": PoolAllocation(WorkspacePoolInfo("sram", [target]), 0),
-        "output": PoolAllocation(WorkspacePoolInfo("sram", [target]), 0),
-    }
-    mod = Module.with_attr("io_tensor_pool_allocations", io_pool_allocations)
-    mod["__tvm_main__"] = mod["__tvm_main__"].with_attr(
-        "pool_args",
-        [
-            constant_pool_info,
-            workspace_pool_info,
-        ],
-    )
-    f = mod["__tvm_main__"]
-    expected_metadata = {
-        "inputs": [f.params[0]],
-        "input_tensor_types": [TensorType((5, 7), "float32")],
-        "outputs": ["output"],
-        "output_tensor_types": [TensorType((5, 7), "float32")],
-        "pools": f.params[2:],
-        "devices": f.attrs["devices"],
-        "executor": "aot",
-        "mod_name": "test_mod",
-        "interface_api": "c",
-        "unpacked_api": False,
-        "workspace_alignment": 16,
-        "constant_alignment": 1,
-        "pool_inputs": {
-            f.params[2]: workspace_pool_info,
-            f.params[3]: constant_pool_info,
-        },
-        "io_pool_allocations": io_pool_allocations,
-    }
-
-    executor_metadata = CreateExecutorMetadata(mod, "test_mod", executor, 16, 1)
-
-    _check_executor_metadata(executor_metadata, expected_metadata)
-
-
-def test_create_executor_metadata_no_usmp():
-    # fmt: off
-    @tvm.script.ir_module
-    class Module:
-        @T.prim_func
-        def __tvm_main__(
-            a: T.handle, output: T.handle
-        ) -> None:
-            # function attr dict
-            T.func_attr({"global_symbol": "test_mod___tvm_main__", "runner_function": True, "target": T.target({"kind": "llvm", "tag": "", "keys": ["cpu"]}), "input_vars": [a], "output_vars": [output], "devices": ["test_device"]})
-            a_buffer = T.match_buffer(a, [5, 7], dtype="float32", align=16)
-            output_buffer = T.match_buffer(output, [5, 7], dtype="float32", align=16)
-            # body
-            sid_3 = T.allocate([140], "int8", "global.workspace")
-            sid_2 = T.allocate([140], "int8", "global.workspace")
-            sid_1 = T.allocate([140], "int8", "global.workspace")
-            constant_0 = T.allocate_const([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "float32", [5, 7])
-            T.evaluate(T.tvm_call_cpacked("test_fused_add_0", a_buffer.data, sid_1, T.reinterpret(T.uint64(0), dtype="handle"), dtype="int32"))
-            T.evaluate(T.tvm_call_cpacked("test_fused_add_0", sid_1, constant_0, T.reinterpret(T.uint64(0), dtype="handle"), dtype="int32"))
-            T.evaluate(T.tvm_call_cpacked("test_fused_add_0", sid_2, sid_3, T.reinterpret(T.uint64(0), dtype="handle"), dtype="int32"))
-            T.evaluate(T.tvm_call_cpacked("test_fused_add_1", sid_2, sid_3, output_buffer.data, T.reinterpret(T.uint64(0), dtype="handle"), dtype="int32"))
-    # fmt: on
-
-    executor = Executor("aot", {"interface-api": "c"})
-    mod = Module
-    f = mod["__tvm_main__"]
-    expected_metadata = {
-        "inputs": [f.params[0]],
-        "input_tensor_types": [TensorType((5, 7), "float32")],
-        "outputs": ["output"],
-        "output_tensor_types": [TensorType((5, 7), "float32")],
-        "pools": f.params[2:],
-        "devices": f.attrs["devices"],
-        "executor": "aot",
-        "mod_name": "test_mod",
-        "interface_api": "c",
-        "unpacked_api": False,
-        "workspace_alignment": 16,
-        "constant_alignment": 1,
-        "pool_inputs": {},
-        "io_pool_allocations": {},
-    }
-
-    executor_metadata = CreateExecutorMetadata(mod, "test_mod", executor, 16, 1)
-
-    _check_executor_metadata(executor_metadata, expected_metadata)
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/relay/aot/test_aot_create_function_metadata.py b/tests/python/relay/aot/test_aot_create_function_metadata.py
deleted file mode 100644
index 4372ed4c35b0..000000000000
--- a/tests/python/relay/aot/test_aot_create_function_metadata.py
+++ /dev/null
@@ -1,318 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=line-too-long,missing-class-docstring,missing-module-docstring,missing-function-docstring,no-self-argument,unused-argument,invalid-name
-import numpy as np
-
-import tvm
-import tvm.testing
-from tvm.script import tir as T
-from tvm.runtime.ndarray import array
-from tvm.relay.backend.aot import CreateFunctionMetadata
-from tvm.ir.memory_pools import AllocatedPoolInfo, ConstantPoolInfo, WorkspacePoolInfo, ConstantInfo
-
-
-def _check_function_metadata(function_metadata, expected_infos):
-    for symbol, expected_info in expected_infos.items():
-        func_info = function_metadata[symbol]
-        # Check workspace_sizes
-        key, value = func_info.workspace_sizes.items()[0]
-        actual_target = tvm.target.Target(key)
-        assert str(actual_target.kind) == expected_info["target_kind"]
-        assert expected_info["target_key"] in actual_target.keys
-        assert value == expected_info["workspace_sizes"]
-
-        # Check io_sizes
-        key, value = func_info.io_sizes.items()[0]
-        actual_target = tvm.target.Target(key)
-        assert str(actual_target.kind) == expected_info["target_kind"]
-        assert expected_info["target_key"] in actual_target.keys
-        assert value == expected_info["io_sizes"]
-        # Check constant_sizes
-        key, value = func_info.constant_sizes.items()[0]
-        actual_target = tvm.target.Target(key)
-        assert str(actual_target.kind) == expected_info["target_kind"]
-        assert expected_info["target_key"] in actual_target.keys
-        assert value == expected_info["constant_sizes"]
-        # Check tir_primfuncs
-        key, value = func_info.tir_primfuncs.items()[0]
-        actual_target = tvm.target.Target(key)
-        assert str(actual_target.kind) == expected_info["target_kind"]
-        assert expected_info["target_key"] in actual_target.keys
-        tvm.ir.assert_structural_equal(value, expected_info["tir_primfuncs"])
-
-
-def test_create_function_metadata_workspace_allocate_only():
-    # fmt: off
-    @tvm.script.ir_module
-    class Module:
-        @T.prim_func
-        def __tvm_main__(a: T.handle, output: T.handle) -> None:
-            # function attr dict
-            T.func_attr({"global_symbol": "test_mod___tvm_main__", "runner_function": True, "target": T.target({"kind":"llvm", "tag":"", "keys":["cpu"]})})
-            a_buffer = T.match_buffer(a, [5, 7], dtype="float32", align=16)
-            output_buffer = T.match_buffer(output, [5, 7], dtype="float32", align=16)
-            # body
-            sid_3 = T.allocate([140], "int8", "global.workspace")
-            sid_2 = T.allocate([140], "int8", "global.workspace")
-            sid_1 = T.allocate([140], "int8", "global.workspace")
-            T.evaluate(T.tvm_call_cpacked("test_fused_add_0", a_buffer.data, sid_1, T.reinterpret(T.uint64(0), dtype="handle"), dtype="int32"))
-            T.evaluate(T.tvm_call_cpacked("test_fused_add_0", sid_1, sid_2, T.reinterpret(T.uint64(0), dtype="handle"), dtype="int32"))
-            T.evaluate(T.tvm_call_cpacked("test_fused_add_0", sid_2, sid_3, T.reinterpret(T.uint64(0), dtype="handle"), dtype="int32"))
-            T.evaluate(T.tvm_call_cpacked("test_fused_add_1", sid_2, sid_3, output_buffer.data, T.reinterpret(T.uint64(0), dtype="handle"), dtype="int32"))
-    # fmt: on
-
-    expected_infos = {
-        "__tvm_main__": {
-            "target_kind": "llvm",
-            "target_key": "cpu",
-            "workspace_sizes": 432,
-            "io_sizes": 280,
-            "constant_sizes": 0,
-            "tir_primfuncs": Module["__tvm_main__"],
-        }
-    }
-
-    function_metadata = CreateFunctionMetadata(Module, 16, 1)
-
-    _check_function_metadata(function_metadata, expected_infos)
-
-
-def test_create_function_metadata_constant_allocate_only():
-    # fmt: off
-    @tvm.script.ir_module
-    class Module:
-        @T.prim_func
-        def __tvm_main__(a: T.handle, output: T.handle) -> None:
-            # function attr dict
-            T.func_attr({"global_symbol": "test_mod___tvm_main__", "runner_function": True, "target": T.target({"kind":"llvm", "tag":"", "keys":["cpu"]}), "num_inputs": 1, "num_outputs": 1})
-            a_buffer = T.match_buffer(a, [5, 7], dtype="float32", align=16)
-            output_buffer = T.match_buffer(output, [5, 7], dtype="float32", align=16)
-            # body
-            constant_0 = T.allocate_const([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "float32", [5, 7])
-            T.evaluate(T.tvm_call_cpacked("test_fused_add", a_buffer.data, constant_0, output_buffer.data, T.reinterpret(T.uint64(0), dtype="handle"), dtype="int32"))
-    # fmt: on
-
-    expected_infos = {
-        "__tvm_main__": {
-            "target_kind": "llvm",
-            "target_key": "cpu",
-            "workspace_sizes": 0,
-            "io_sizes": 280,
-            "constant_sizes": 140,
-            "tir_primfuncs": Module["__tvm_main__"],
-        }
-    }
-
-    function_metadata = CreateFunctionMetadata(Module, 16, 1)
-
-    _check_function_metadata(function_metadata, expected_infos)
-
-
-def test_create_function_metadata_constant_pool_only():
-    # fmt: off
-    @tvm.script.ir_module
-    class Module:
-        @T.prim_func
-        def __tvm_main__(a: T.handle, output: T.handle) -> None:
-            # function attr dict
-            T.func_attr({"global_symbol": "test_mod___tvm_main__", "runner_function": True, "target": T.target({"kind":"llvm", "tag":"", "keys":["cpu"]}), "num_inputs": 1, "num_outputs": 1})
-            a_buffer = T.match_buffer(a, [5, 7], dtype="float32", align=16)
-            output_buffer = T.match_buffer(output, [5, 7], dtype="float32", align=16)
-            # body
-            T.evaluate(T.tvm_call_cpacked("test_fused_add", a_buffer.data, a_buffer.data, output_buffer.data, T.reinterpret(T.uint64(0), dtype="handle"), dtype="int32"))
-    # fmt: on
-
-    expected_infos = {
-        "__tvm_main__": {
-            "target_kind": "llvm",
-            "target_key": "cpu",
-            "workspace_sizes": 0,
-            "io_sizes": 280,
-            "constant_sizes": 256,
-            "tir_primfuncs": Module["__tvm_main__"],
-        }
-    }
-
-    target = Module["__tvm_main__"].attrs["target"]
-    mod = Module.with_attr(
-        "pool_args",
-        [
-            AllocatedPoolInfo(
-                ConstantPoolInfo(
-                    "flash",
-                    [target],
-                    [ConstantInfo("a", 0, array(np.array([0])))],
-                ),
-                256,
-            ),
-        ],
-    )
-
-    function_metadata = CreateFunctionMetadata(mod, 16, 1)
-
-    _check_function_metadata(function_metadata, expected_infos)
-
-
-def test_create_function_metadata_workspace_pool_only():
-    # fmt: off
-    @tvm.script.ir_module
-    class Module:
-        @T.prim_func
-        def __tvm_main__(a: T.handle, output: T.handle) -> None:
-            # function attr dict
-            T.func_attr({"global_symbol": "test_mod___tvm_main__", "runner_function": True, "target": T.target({"kind":"llvm", "tag":"", "keys":["cpu"]}), "num_inputs": 1, "num_outputs": 1})
-            a_buffer = T.match_buffer(a, [5, 7], dtype="float32", align=16)
-            output_buffer = T.match_buffer(output, [5, 7], dtype="float32", align=16)
-            # body
-            T.evaluate(T.tvm_call_cpacked("test_fused_add", a_buffer.data, a_buffer.data, output_buffer.data, T.reinterpret(T.uint64(0), dtype="handle"), dtype="int32"))
-    # fmt: on
-
-    expected_infos = {
-        "__tvm_main__": {
-            "target_kind": "llvm",
-            "target_key": "cpu",
-            "workspace_sizes": 256,
-            "io_sizes": 280,
-            "constant_sizes": 0,
-            "tir_primfuncs": Module["__tvm_main__"],
-        }
-    }
-
-    target = Module["__tvm_main__"].attrs["target"]
-    mod = Module.with_attr(
-        "pool_args",
-        [
-            AllocatedPoolInfo(
-                WorkspacePoolInfo("sram", [target]),
-                256,
-            ),
-        ],
-    )
-
-    function_metadata = CreateFunctionMetadata(mod, 16, 1)
-
-    _check_function_metadata(function_metadata, expected_infos)
-
-
-def test_create_function_metadata_all_single_func():
-    # fmt: off
-    @tvm.script.ir_module
-    class Module:
-        @T.prim_func
-        def __tvm_main__(a: T.handle, output: T.handle) -> None:
-            # function attr dict
-            T.func_attr({"global_symbol": "test_mod___tvm_main__", "runner_function": True, "target": T.target({"kind":"llvm", "tag":"", "keys":["cpu"]})})
-            a_buffer = T.match_buffer(a, [5, 7], dtype="float32", align=16)
-            output_buffer = T.match_buffer(output, [5, 7], dtype="float32", align=16)
-            # body
-            sid_3 = T.allocate([140], "int8", "global.workspace")
-            sid_2 = T.allocate([140], "int8", "global.workspace")
-            sid_1 = T.allocate([140], "int8", "global.workspace")
-            constant_0 = T.allocate_const([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "float32", [5, 7])
-            T.evaluate(T.tvm_call_cpacked("test_fused_add_0", a_buffer.data, sid_1, T.reinterpret(T.uint64(0), dtype="handle"), dtype="int32"))
-            T.evaluate(T.tvm_call_cpacked("test_fused_add_0", sid_1, constant_0, T.reinterpret(T.uint64(0), dtype="handle"), dtype="int32"))
-            T.evaluate(T.tvm_call_cpacked("test_fused_add_0", sid_2, sid_3, T.reinterpret(T.uint64(0), dtype="handle"), dtype="int32"))
-            T.evaluate(T.tvm_call_cpacked("test_fused_add_1", sid_2, sid_3, output_buffer.data, T.reinterpret(T.uint64(0), dtype="handle"), dtype="int32"))
-    # fmt: on
-
-    expected_infos = {
-        "__tvm_main__": {
-            "target_kind": "llvm",
-            "target_key": "cpu",
-            "workspace_sizes": 688,
-            "io_sizes": 280,
-            "constant_sizes": 652,
-            "tir_primfuncs": Module["__tvm_main__"],
-        }
-    }
-
-    target = Module["__tvm_main__"].attrs["target"]
-    mod = Module.with_attr(
-        "pool_args",
-        [
-            AllocatedPoolInfo(
-                ConstantPoolInfo(
-                    "flash",
-                    [target],
-                    [ConstantInfo("a", 0, array(np.array([0])))],
-                ),
-                512,
-            ),
-            AllocatedPoolInfo(
-                WorkspacePoolInfo("sram", [target]),
-                256,
-            ),
-        ],
-    )
-
-    function_metadata = CreateFunctionMetadata(mod, 16, 1)
-
-    _check_function_metadata(function_metadata, expected_infos)
-
-
-def test_create_function_metadata_workspace_multi_funcs():
-    # fmt: off
-    @tvm.script.ir_module
-    class Module:
-        @T.prim_func
-        def __tvm_main__(a: T.handle, output: T.handle) -> None:
-            # function attr dict
-            T.func_attr({"global_symbol": "test_mod___tvm_main__", "runner_function": True, "target": T.target({"kind":"llvm", "tag":"", "keys":["cpu"]}), "num_inputs": 1, "num_outputs": 1})
-            a_buffer = T.match_buffer(a, [5, 7], dtype="float32", align=16)
-            output_buffer = T.match_buffer(output, [5, 7], dtype="float32", align=16)
-            # body
-            T.evaluate(T.tvm_call_cpacked("test_fused_add", a_buffer.data, a_buffer.data, output_buffer.data, T.reinterpret(T.uint64(0), dtype="handle"), dtype="int32"))
-
-        @T.prim_func
-        def test_fused_add(a: T.handle, b: T.handle, output: T.handle, device_context_unused: T.handle) -> None:
-            # function attr dict
-            T.func_attr({"global_symbol": "test_mod_test_fused_add", "target": T.target({"kind":"llvm", "tag":"", "keys":["cpu"]})})
-            a_buffer = T.match_buffer(a, [5, 7], dtype="float32", align=16)
-            b_buffer = T.match_buffer(b, [5, 7], dtype="float32", align=16)
-            output_buffer = T.match_buffer(output, [5, 7], dtype="float32", align=16)
-            # body
-            sid_0 = T.allocate([140], "int8", "global.workspace")
-            constant_0 = T.allocate_const([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "float32", [5, 7])
-            T.evaluate(T.tvm_call_cpacked("magic", a_buffer.data, b_buffer.data, sid_0, constant_0, output_buffer.data, T.reinterpret(T.uint64(0), dtype="handle"), dtype="int32"))
-    # fmt: on
-
-    expected_infos = {
-        "__tvm_main__": {
-            "target_kind": "llvm",
-            "target_key": "cpu",
-            "workspace_sizes": 0,
-            "io_sizes": 280,
-            "constant_sizes": 0,
-            "tir_primfuncs": Module["__tvm_main__"],
-        },
-        "test_fused_add": {
-            "target_kind": "llvm",
-            "target_key": "cpu",
-            "workspace_sizes": 144,
-            "io_sizes": 420,
-            "constant_sizes": 140,
-            "tir_primfuncs": Module["test_fused_add"],
-        },
-    }
-
-    function_metadata = CreateFunctionMetadata(Module, 16, 1)
-
-    _check_function_metadata(function_metadata, expected_infos)
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/relay/aot/test_aot_test_harness.py b/tests/python/relay/aot/test_aot_test_harness.py
deleted file mode 100644
index 3d10f15d4ab4..000000000000
--- a/tests/python/relay/aot/test_aot_test_harness.py
+++ /dev/null
@@ -1,111 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""
-Tests for the AOT test harness.
-"""
-
-import pytest
-import numpy as np
-
-import tvm
-from tvm import relay
-from tvm.testing.aot import AOTTestRunner, compile_and_run, AOTTestModel
-
-
-def test_output_on_mismatch_option():
-    """
-    Test the print_output_on_mismatch option when there is a mismatch.
-    """
-    interface_api = "packed"
-    use_unpacked_api = True
-    test_runner = AOTTestRunner()
-    dtype = "float32"
-
-    two = relay.add(relay.const(1, dtype=dtype), relay.const(1, dtype=dtype))
-    func = relay.Function([], two)
-    outputs = {
-        "output": np.array(
-            [
-                0,
-            ]
-        ).astype(dtype)
-    }
-
-    msg = ".*Actual, Reference(\n|.)*2.000000, 0.000000(\n|.)*AOT_TEST_FAILURE.*"
-    with pytest.raises(RuntimeError, match=msg):
-        compile_and_run(
-            AOTTestModel(module=tvm.IRModule.from_expr(func), inputs={}, outputs=outputs),
-            test_runner,
-            interface_api,
-            use_unpacked_api,
-            print_output_on_mismatch=True,
-        )
-
-
-def test_output_position_on_mismatch():
-    """
-    Test the mismatch position output for the print_output_on_mismatch option.
-    """
-    interface_api = "packed"
-    use_unpacked_api = True
-    test_runner = AOTTestRunner()
-    dtype = "float32"
-
-    x = np.zeros(shape=(2, 2), dtype=dtype)
-    x[-1, -1] = 1
-    func = relay.Function([], relay.const(x, dtype=dtype))
-    outputs = {"output": np.zeros(shape=(2, 2), dtype=dtype)}
-
-    msg = ".*Element \\[1, 1\\]:.*"
-    with pytest.raises(RuntimeError, match=msg):
-        compile_and_run(
-            AOTTestModel(module=tvm.IRModule.from_expr(func), inputs={}, outputs=outputs),
-            test_runner,
-            interface_api,
-            use_unpacked_api,
-            print_output_on_mismatch=True,
-        )
-
-
-def test_mismatch_percentage():
-    """
-    Test the mismatch percentage for the print_output_on_mismatch option.
-    """
-    interface_api = "packed"
-    use_unpacked_api = True
-    test_runner = AOTTestRunner()
-    dtype = "float32"
-
-    x = np.zeros(shape=(8,), dtype=dtype)
-    x[0] = 1
-    func = relay.Function([], relay.const(x, dtype=dtype))
-    outputs = {"output": np.zeros(shape=(8,), dtype=dtype)}
-
-    msg = ".*Mismatched elements: 1 / 8 \\(12.50%\\).*"
-    with pytest.raises(RuntimeError, match=msg):
-        compile_and_run(
-            AOTTestModel(module=tvm.IRModule.from_expr(func), inputs={}, outputs=outputs),
-            test_runner,
-            interface_api,
-            use_unpacked_api,
-            print_output_on_mismatch=True,
-        )
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/relay/aot/test_c_device_api.py b/tests/python/relay/aot/test_c_device_api.py
deleted file mode 100644
index 343a8c8a17e4..000000000000
--- a/tests/python/relay/aot/test_c_device_api.py
+++ /dev/null
@@ -1,273 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""AOT with C Device API Tests"""
-
-import re
-from collections import OrderedDict
-
-import numpy as np
-import pytest
-import tvm.testing
-from tvm import relay
-from tvm.ir.module import IRModule
-from tvm.micro.testing.aot_test_utils import AOT_DEFAULT_RUNNER
-from tvm.testing.aot import AOTTestModel, compile_models, generate_ref_data
-
-
-@pytest.fixture(name="device_api_main_func")
-def fixture_device_api_main_func():
-    """Test function generator which generates C Device API calls"""
-
-    # Ideally we should have a sample Target registered here
-    # but we're going to re-use this for now
-    pytest.importorskip("ethosu.vela")
-
-    # pylint: disable=import-outside-toplevel
-    import tensorflow as tf
-    import tflite.Model
-    from tvm.relay.op.contrib.ethosu import partition_for_ethosu
-
-    from tests.python.contrib.test_ethosu.infra import (
-        create_test_runner,
-        generate_ref_data_tflite,
-    )
-
-    # pylint: enable=import-outside-toplevel
-
-    tf.config.run_functions_eagerly(True)
-
-    class Model(tf.Module):
-        @tf.function
-        def tf_function(self, x):
-            return tf.nn.max_pool(x, [1, 2], [1, 2], "SAME")
-
-    def representative_dataset():
-        for _ in range(100):
-            data = np.random.rand(1, 3, 4, 3)
-            yield [data.astype(np.float32)]
-
-    model = Model()
-    concrete_func = model.tf_function.get_concrete_function(
-        tf.TensorSpec([1, 3, 4, 3], dtype=tf.float32)
-    )
-
-    converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func])
-    converter.optimizations = [tf.lite.Optimize.DEFAULT]
-    converter.representative_dataset = representative_dataset
-    converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
-    converter.inference_input_type = tf.int8
-    converter.inference_output_type = tf.int8
-
-    tflite_graph = converter.convert()
-    tflite_model = tflite.Model.Model.GetRootAsModel(tflite_graph, 0)
-
-    relay_module, params = relay.frontend.from_tflite(
-        tflite_model,
-        shape_dict={"x": [1, 3, 4, 3]},
-        dtype_dict={"x": "int8"},
-    )
-    mod = partition_for_ethosu(relay_module, params)
-
-    # Generate reference data
-    input_data, output_data = generate_ref_data_tflite(tflite_graph)
-
-    def compile_to_main_func(interface_api="c", use_unpacked_api=True):
-        test_runner = create_test_runner()
-        compiled_models = compile_models(
-            models=AOTTestModel(
-                module=mod,
-                inputs=input_data,
-                outputs=output_data,
-            ),
-            interface_api=interface_api,
-            use_unpacked_api=use_unpacked_api,
-            workspace_byte_alignment=16,
-            pass_config=test_runner.pass_config,
-        )
-        main_ir_module = compiled_models[0].executor_factory.lowered_ir_mods.items()[0][1]
-        main_func = main_ir_module["__tvm_main__"]
-        return main_func
-
-    return compile_to_main_func
-
-
-@pytest.fixture(name="non_device_api_main_func")
-def fixture_non_device_api_main_func():
-    """Test function generator which does not generate C Device API calls"""
-    x = relay.var("x", shape=(10, 10))
-    y = relay.var("y", shape=(1, 10))
-    func = relay.Function([x, y], relay.multiply(x, y))
-    x_data = np.random.rand(10, 10).astype("float32")
-    y_data = np.random.rand(1, 10).astype("float32")
-
-    inputs = OrderedDict([("x", x_data), ("y", y_data)])
-    output_list = generate_ref_data(func, inputs)
-
-    def compile_to_main_func(interface_api="c", use_unpacked_api=True):
-        test_runner = AOT_DEFAULT_RUNNER
-        compiled_models = compile_models(
-            models=AOTTestModel(
-                module=IRModule.from_expr(func),
-                inputs=inputs,
-                outputs=output_list,
-            ),
-            interface_api=interface_api,
-            use_unpacked_api=use_unpacked_api,
-            workspace_byte_alignment=16,
-            pass_config=test_runner.pass_config,
-        )
-        main_ir_module = list(compiled_models[0].executor_factory.lowered_ir_mods.values())[0]
-        main_func = main_ir_module["__tvm_main__"]
-        return main_func
-
-    return compile_to_main_func
-
-
-def test_device_api_hooks_unpacked_api(device_api_main_func):
-    """Check for Device API hooks with unpacked internal calls"""
-    main_func = device_api_main_func(interface_api="c", use_unpacked_api=True)
-
-    # Activate Device
-    assert (
-        str(main_func.body[0].value)
-        == "T.tvm_check_return(0, -1, T.call_extern("
-        + '"int32",'
-        + ' "TVMDeviceEthosUActivate",'
-        + " device_context_ethos_u))"
-    )
-    # Open Device
-    print("main func", repr(main_func.body))
-    assert (
-        str(main_func.body[1].value)
-        == "T.tvm_check_return(0, -1, T.call_extern("
-        + '"int32",'
-        + ' "TVMDeviceEthosUOpen",'
-        + " device_context_ethos_u))"
-    )
-    # Device Call
-    # We dont need to check exact input and output var names in this test.
-    # Hence, using a regex to cover any legal I/O name.
-    regex = re.compile(
-        r"T\.tvm_check_return\("
-        r"0, -1, "
-        r'T\.call_extern\("int32", "tvmgen_default_ethos_u_main_0", '
-        r"\w+, \w+, device_context_ethos_u\)\)"
-    )
-    assert regex.match(str(main_func.body[2].value))
-    # Close Device
-    assert (
-        str(main_func.body[3].value)
-        == "T.tvm_check_return(0, -1, T.call_extern("
-        + '"int32",'
-        + ' "TVMDeviceEthosUClose",'
-        + " device_context_ethos_u))"
-    )
-    # Deactivate Device
-    assert (
-        str(str(main_func.body[4].value))
-        == "T.tvm_check_return(0, -1, T.call_extern("
-        + '"int32",'
-        + ' "TVMDeviceEthosUDeactivate",'
-        + " device_context_ethos_u))"
-    )
-
-
-@pytest.mark.skip(
-    "Skipping this test as this is incorrectly using Arm(R) Ethos(TM)-U NPU "
-    "with packed calling convention which is not supported by the NPU codegen's "
-    "TIR to Runtime Hook. We need to use a different target to test this feature"
-)
-def test_device_api_hooks_packed_api(device_api_main_func):
-    """Check for Device API hooks with packed internal calls"""
-    main_func = device_api_main_func(interface_api="packed", use_unpacked_api=False)
-
-    # Activate Device
-    assert (
-        str(main_func.body[0][0].value)
-        == "@tir.tvm_check_return(0, -1, tir.call_extern("
-        + '"TVMDeviceEthosUActivate",'
-        + " device_context_ethos_u: handle,"
-        + " dtype=int32))"
-    )
-    # Open Device
-    assert (
-        str(main_func.body[1].body.body[0][0][0].value)
-        == "@tir.tvm_check_return(0, -1, tir.call_extern("
-        + '"TVMDeviceEthosUOpen",'
-        + " device_context_ethos_u: handle,"
-        + " dtype=int32))"
-    )
-    # Device Call
-    assert (
-        str(main_func.body[1].body.body[0][0][1][0].value)
-        == "@tir.tvm_call_cpacked("
-        + '"tvmgen_default_ethos_u_main_0",'
-        + " input: handle, output: handle,"
-        + " device_context_ethos_u: handle,"
-        + " dtype=int32)"
-    )
-    # Close Device
-    assert (
-        str(main_func.body[1].body.body[0][0][2].value)
-        == "@tir.tvm_check_return(0, -1, tir.call_extern("
-        + '"TVMDeviceEthosUClose",'
-        + " device_context_ethos_u: handle,"
-        + " dtype=int32))"
-    )
-    # Deactivate Device
-    assert (
-        str(main_func.body[2][0].value)
-        == "@tir.tvm_check_return(0, -1, tir.call_extern("
-        + '"TVMDeviceEthosUDeactivate",'
-        + " device_context_ethos_u: handle,"
-        + " dtype=int32))"
-    )
-
-
-def test_without_device_api_unpacked_api(non_device_api_main_func):
-    """Test a graph without the Device API with the unpacked internal calls"""
-
-    main_func = non_device_api_main_func(interface_api="c", use_unpacked_api=True)
-    body = main_func.body.value
-    assert (
-        repr(body)
-        == 'T.tvm_check_return(0, -1, T.call_extern("int32", '
-        + '"tvmgen_default_fused_multiply",'
-        + " x_buffer_var, y_buffer_var, output_buffer_var))"
-    )
-
-
-def test_without_device_api_packed_api(non_device_api_main_func):
-    """Test a graph without the Device API with the packed internal calls"""
-
-    main_func = non_device_api_main_func(interface_api="packed", use_unpacked_api=False)
-
-    body = main_func.body.value
-    assert repr(body) == (
-        'T.call_cpacked("tvmgen_default_fused_multiply", '
-        "T.tvm_stack_make_array(x_buffer_var, T.tvm_stack_make_shape(10, 10), "
-        'T.reinterpret("handle", T.uint64(0)), T.uint32(2), T.Cast("float32", 0), 0), '
-        "T.tvm_stack_make_array(y_buffer_var, T.tvm_stack_make_shape(1, 10), "
-        'T.reinterpret("handle", T.uint64(0)), T.uint32(2), T.Cast("float32", 0), 0), '
-        "T.tvm_stack_make_array(output_buffer_var, T.tvm_stack_make_shape(10, 10), "
-        'T.reinterpret("handle", T.uint64(0)), T.uint32(2), T.Cast("float32", 0), 0), '
-        'T.reinterpret("handle", T.uint64(0)))'
-    )
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/relay/aot/test_cpp_aot.py b/tests/python/relay/aot/test_cpp_aot.py
deleted file mode 100644
index c1b4fd817a84..000000000000
--- a/tests/python/relay/aot/test_cpp_aot.py
+++ /dev/null
@@ -1,290 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""AOT with C++ Runtime Tests"""
-
-import re
-import textwrap
-
-import numpy as np
-import pytest
-
-import tvm
-from tvm import IRModule
-from tvm import relay
-from tvm.relay import backend, testing
-from tvm.testing.aot import generate_ref_data
-
-
-def test_error_c_interface():
-    """Checks that an error occurs when using the packed API in combination with C interface"""
-
-    two = relay.add(relay.const(1), relay.const(1))
-    func = relay.Function([], two)
-
-    with pytest.raises(
-        tvm.TVMError,
-        match=re.escape(
-            'Need unpacked-api == false (got: 0) and interface-api == "packed" (got: c) when '
-            "targeting c++ runtime"
-        ),
-    ):
-        tvm.relay.build(
-            IRModule.from_expr(func),
-            target="llvm",
-            executor=backend.Executor("aot", {"interface-api": "c"}),
-        )
-
-
-@pytest.mark.parametrize("enable_usmp", [True, False])
-@pytest.mark.parametrize("target_kind", ["c", "llvm"])
-def test_conv2d(enable_usmp, target_kind):
-    """Tests compilation of convolutions"""
-    relay_model = textwrap.dedent(
-        """\
-        #[version = "0.0.5"]
-        def @main(%data : Tensor[(1, 3, 64, 64), uint8], %weight : Tensor[(3, 3, 5, 5), int8]) {
-            %1 = nn.conv2d(
-                 %data,
-                 %weight,
-                 padding=[2, 2],
-                 channels=3,
-                 kernel_size=[5, 5],
-                 data_layout="NCHW",
-                 kernel_layout="OIHW",
-                 out_dtype="int32");
-            %2 = cast(nn.max_pool2d(%1, pool_size=[3, 3]), dtype="int8");
-            %3 = nn.conv2d(
-                 %2,
-                 %weight,
-                 padding=[2, 2],
-                 channels=3,
-                 kernel_size=[5, 5],
-                 data_layout="NCHW",
-                 kernel_layout="OIHW",
-                 out_dtype="int32");
-            %4 = nn.max_pool2d(%3, pool_size=[3, 3]);
-            %4
-        }
-    """
-    )
-    ir_mod = tvm.relay.fromtext(relay_model)
-
-    main_func = ir_mod["main"]
-    shape_dict = {p.name_hint: p.checked_type.concrete_shape for p in main_func.params}
-    type_dict = {p.name_hint: p.checked_type.dtype for p in main_func.params}
-
-    weight_data = np.random.randint(1, 255, shape_dict["weight"]).astype(type_dict["weight"])
-    input_data = np.ones(shape_dict["data"]).astype(type_dict["data"])
-    params = {"weight": weight_data}
-    inputs = {"data": input_data}
-    ref_outputs = generate_ref_data(ir_mod, inputs, params)
-
-    with tvm.transform.PassContext(
-        opt_level=3,
-        config={
-            "tir.disable_vectorize": True,
-            "tir.usmp.enable": enable_usmp,
-        },
-    ):
-        mod = tvm.relay.build(
-            ir_mod,
-            params=params,
-            target=target_kind,
-            executor=backend.Executor("aot", {"interface-api": "packed", "unpacked-api": False}),
-        )
-    temp_dir = tvm.contrib.utils.TempDirectory()
-    test_so_path = temp_dir / "test.so"
-    mod.export_library(test_so_path, cc="gcc", options=["-std=c11", "-g3", "-O0"])
-    loaded_mod = tvm.runtime.load_module(test_so_path)
-    runner = tvm.runtime.executor.AotModule(loaded_mod["default"](tvm.cpu(0)))
-    runner.set_input(**inputs)
-
-    assert runner.get_input_name(0) == "data"
-    shape_dict, dtype_dict = runner.get_input_info()
-    assert shape_dict == {"data": (1, 3, 64, 64)}
-    assert dtype_dict == {"data": "uint8"}
-
-    runner.run()
-    assert (runner.get_output(0).numpy() == list(ref_outputs.values())[0]).all()
-
-
-@pytest.mark.parametrize("enable_usmp", [True, False])
-@pytest.mark.parametrize("target_kind", ["c", "llvm"])
-def test_mobilenet(enable_usmp: bool, target_kind: str):
-    """Full network test with Mobilenet"""
-    ir_mod, params = testing.mobilenet.get_workload(batch_size=1)
-    data_shape = [int(x) for x in ir_mod["main"].checked_type.arg_types[0].shape]
-    data = np.random.uniform(size=data_shape).astype("float32")
-    inputs = {"data": data}
-    ref_outputs = generate_ref_data(ir_mod, inputs, params)
-
-    with tvm.transform.PassContext(
-        opt_level=3, config={"tir.disable_vectorize": True, "tir.usmp.enable": enable_usmp}
-    ):
-        mod = tvm.relay.build(
-            ir_mod,
-            params=params,
-            target=target_kind,
-            executor=backend.Executor("aot", {"interface-api": "packed"}),
-        )
-
-    temp_dir = tvm.contrib.utils.TempDirectory()
-    test_so_path = temp_dir / "test.so"
-    mod.export_library(test_so_path, cc="c++", options=["-std=gnu++17", "-g3", "-O0"])
-    loaded_mod = tvm.runtime.load_module(test_so_path)
-    runner = tvm.runtime.executor.AotModule(loaded_mod["default"](tvm.cpu(0)))
-    runner.set_input(**inputs)
-    runner.run()
-    assert (runner.get_output(0).asnumpy() == list(ref_outputs.values())[0]).all()
-
-
-def test_module_list():
-    """Checks the correct list of module names is generated"""
-    input_x = tvm.relay.var("x", tvm.relay.TensorType([1], dtype="float32"))
-    expr = tvm.relay.add(input_x, tvm.relay.Constant(tvm.nd.array(np.array([1], dtype="float32"))))
-    mod = tvm.relay.build(
-        tvm.IRModule.from_expr(tvm.relay.Function([input_x], expr)),
-        target="c",
-        executor=tvm.relay.backend.Executor("aot", {"interface-api": "packed"}),
-        mod_name="unusual_module_name_fred",
-    )
-    temp_dir = tvm.contrib.utils.TempDirectory()
-    test_so_path = temp_dir / "test.so"
-    mod.export_library(test_so_path, cc="gcc", options=["-std=c11"])
-    loaded_mod = tvm.runtime.load_module(test_so_path)
-    list_module_names = loaded_mod.get_function("list_module_names")
-    names_expected = ["unusual_module_name_fred"]
-    assert list(sorted(names_expected)) == list(sorted(list_module_names()))
-
-
-def test_create_executor():
-    x = tvm.relay.var("x", tvm.relay.TensorType([1], dtype="float32"))
-    expr = tvm.relay.add(x, tvm.relay.Constant(tvm.nd.array(np.array([1], dtype="float32"))))
-    actual = relay.create_executor(
-        "aot", mod=tvm.IRModule.from_expr(tvm.relay.Function([x], expr)), target="c"
-    ).evaluate()(np.array([2], dtype="float32"))
-
-    np.isfinite(np.array([3], dtype="float32"))
-
-    np.testing.assert_allclose(actual.numpy(), np.array([3], dtype="float32"))
-
-
-def test_pass_wrong_device_arg():
-    """Ensure an error is generated if the incorrect number of devices are passed"""
-    x = tvm.relay.var("x", tvm.relay.TensorType([1], dtype="float32"))
-    expr = tvm.relay.add(x, tvm.relay.Constant(tvm.nd.array(np.array([1], dtype="float32"))))
-    with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}):
-        mod = tvm.relay.build(
-            tvm.IRModule.from_expr(tvm.relay.Function([x], expr)),
-            target="c",
-            executor=backend.Executor("aot", {"interface-api": "packed"}),
-        )
-
-    temp_dir = tvm.contrib.utils.TempDirectory()
-    test_so_path = temp_dir / "test.so"
-    mod.export_library(test_so_path, cc="gcc", options=["-std=c11", "-g3", "-O0"])
-    loaded_mod = tvm.runtime.load_module(test_so_path)
-
-    with pytest.raises(tvm.TVMError) as error:
-        tvm.runtime.executor.AotModule(loaded_mod["default"](tvm.cpu(0), tvm.cpu(0)))
-
-        assert (
-            "Check failed: devices_.size() == 1 (2 vs. 1) : Expect exactly 1 device passed."
-            in str(error.exception)
-        )
-    # TODO write asserts for # and type of device.
-
-
-@pytest.mark.parametrize("target_kind", ["c", "llvm"])
-@pytest.mark.parametrize("input_name", ["input:0", "input@0", "input_0"])
-def test_aot_input_name_with_special_character(target_kind: str, input_name: str):
-    """Test name transforms in AOT for input names with special characters."""
-    dtype = "float32"
-    input_1 = relay.var(input_name, shape=(10, 5), dtype=dtype)
-    weight = relay.var("weight", shape=(1, 5), dtype=dtype)
-    output = relay.add(input_1, weight)
-    func = relay.Function([input_1, weight], output)
-
-    input_data = np.random.rand(10, 5).astype(dtype)
-    weight_data = np.random.rand(1, 5).astype(dtype)
-    expected_output = input_data + weight_data
-    params = {"weight": weight_data}
-
-    with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}):
-        mod = tvm.relay.build(
-            tvm.IRModule.from_expr(func),
-            target=target_kind,
-            params=params,
-            executor=tvm.relay.backend.Executor("aot", {"interface-api": "packed"}),
-        )
-    temp_dir = tvm.contrib.utils.TempDirectory()
-    test_so_path = temp_dir / "test.so"
-    mod.export_library(test_so_path, cc="c++", options=["-std=gnu++17", "-g3", "-O0"])
-    # test both original name and transformed name
-    for name in ["input_0", input_name]:
-        loaded_mod = tvm.runtime.load_module(test_so_path)
-        runner = tvm.runtime.executor.AotModule(loaded_mod["default"](tvm.cpu(0)))
-        inputs = {name: input_data}
-        runner.set_input(**inputs)
-
-        input_ind = runner.get_input_index(name)
-        assert (runner.get_input(input_ind).asnumpy() == input_data).all()
-
-        runner.run()
-        assert (runner.get_output(0).asnumpy() == expected_output).all()
-
-
-@pytest.mark.parametrize("target_kind", ["c", "llvm"])
-def test_aot_incorrect_input_name(target_kind: str):
-    """Test passing incorrect input name."""
-    dtype = "float32"
-    correct_input_name = "input"
-    incorrect_input_name = "input1"
-    input1 = relay.var(correct_input_name, shape=(10, 5), dtype=dtype)
-    weight = relay.var("weight", shape=(1, 5), dtype=dtype)
-    output = relay.add(input1, weight)
-    func = relay.Function([input1, weight], output)
-
-    input_data = np.random.rand(10, 5).astype(dtype)
-    weight_data = np.random.rand(1, 5).astype(dtype)
-    params = {"weight": weight_data}
-
-    with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}):
-        mod = tvm.relay.build(
-            tvm.IRModule.from_expr(func),
-            target=target_kind,
-            params=params,
-            executor=tvm.relay.backend.Executor("aot", {"interface-api": "packed"}),
-        )
-    temp_dir = tvm.contrib.utils.TempDirectory()
-    test_so_path = temp_dir / "test.so"
-    mod.export_library(test_so_path, cc="c++", options=["-std=gnu++17", "-g3", "-O0"])
-
-    loaded_mod = tvm.runtime.load_module(test_so_path)
-    runner = tvm.runtime.executor.AotModule(loaded_mod["default"](tvm.cpu(0)))
-    inputs = {incorrect_input_name: input_data}
-
-    error_regex = r"Invalid input name."
-    with pytest.raises(tvm.TVMError, match=error_regex):
-        runner.set_input(**inputs)
-
-    with pytest.raises(tvm.TVMError, match=error_regex):
-        runner.get_input_index(incorrect_input_name)
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/relay/aot/test_crt_aot.py b/tests/python/relay/aot/test_crt_aot.py
deleted file mode 100644
index 1c0f354d31eb..000000000000
--- a/tests/python/relay/aot/test_crt_aot.py
+++ /dev/null
@@ -1,1106 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""AOT with C Runtime Tests"""
-
-import os
-import pathlib
-import re
-import tarfile
-from collections import OrderedDict
-
-import numpy as np
-import pytest
-import tvm
-from tvm import TVMError, relay
-from tvm.contrib import utils
-from tvm.ir.instrument import pass_instrument
-from tvm.ir.module import IRModule
-from tvm.micro import export_model_library_format
-from tvm.micro import model_library_format as mlf
-from tvm.micro.testing.aot_test_utils import AOT_DEFAULT_RUNNER, parametrize_aot_options
-from tvm.micro.testing.utils import get_conv2d_relay_module
-from tvm.relay import testing, transform
-from tvm.relay.backend import Executor, Runtime
-from tvm.relay.op.annotation import compiler_begin, compiler_end
-from tvm.relay.testing import byoc
-from tvm.testing.aot import (
-    AOTTestModel,
-    compile_and_run,
-    compile_models,
-    create_relay_module_and_inputs_from_tflite_file,
-    generate_ref_data,
-)
-
-
-def test_error_c_interface_with_packed_api():
-    """Checks that an error occurs when using the packed API in combination with C interface"""
-    interface_api = "c"
-    use_unpacked_api = False
-    test_runner = AOT_DEFAULT_RUNNER
-
-    two = relay.add(relay.const(1), relay.const(1))
-    func = relay.Function([], two)
-
-    with pytest.raises(
-        tvm.TVMError,
-        match=re.escape(
-            'Either need interface_api == "packed" (got: c) or '
-            "unpacked-api == true (got: 0) when targeting "
-            "c runtime"
-        ),
-    ):
-        compile_and_run(
-            AOTTestModel(
-                module=IRModule.from_expr(func), inputs={}, outputs=generate_ref_data(func, {})
-            ),
-            test_runner,
-            interface_api,
-            use_unpacked_api,
-        )
-
-
-@parametrize_aot_options
-def test_conv_with_params(interface_api, use_unpacked_api, test_runner):
-    """Tests compilation of convolution with parameters"""
-    mod = get_conv2d_relay_module()
-    main_func = mod["main"]
-    shape_dict = {p.name_hint: p.checked_type.concrete_shape for p in main_func.params}
-    type_dict = {p.name_hint: p.checked_type.dtype for p in main_func.params}
-
-    weight_data = np.ones(shape_dict["weight"]).astype(type_dict["weight"])
-    input_data = np.ones(shape_dict["data"]).astype(type_dict["data"])
-
-    params = {"weight": weight_data}
-    inputs = {"data": input_data}
-    output_list = generate_ref_data(mod, inputs, params)
-
-    compile_and_run(
-        AOTTestModel(module=mod, inputs=inputs, outputs=output_list, params=params),
-        test_runner,
-        interface_api,
-        use_unpacked_api,
-        print_output_on_mismatch=True,
-    )
-
-
-@parametrize_aot_options
-def test_add_with_params(interface_api, use_unpacked_api, test_runner):
-    """Tests compilation of add with parameters"""
-    input_x = relay.var("x", shape=(1, 10))
-    input_y = relay.var("y", shape=(1, 10))
-    input_z = relay.add(input_x, input_y)
-    func = relay.Function([input_x, input_y], input_z)
-
-    input_x_data = np.ones((1, 10)).astype("float32")
-    input_y_data = np.random.uniform(size=(1, 10)).astype("float32")
-
-    params = {"x": input_x_data}
-    inputs = {"y": input_y_data}
-    output_list = generate_ref_data(func, inputs, params)
-
-    compile_and_run(
-        AOTTestModel(
-            module=IRModule.from_expr(func),
-            inputs=inputs,
-            outputs=output_list,
-            params=params,
-        ),
-        test_runner,
-        interface_api,
-        use_unpacked_api,
-    )
-
-
-@parametrize_aot_options
-@pytest.mark.parametrize("groups,weight_shape", [(1, 32), (32, 1)])
-def test_conv2d(interface_api, use_unpacked_api, test_runner, groups, weight_shape):
-    """Test a subgraph with a single conv2d operator."""
-    dtype = "float32"
-    ishape = (1, 32, 14, 14)
-    wshape = (32, weight_shape, 3, 3)
-
-    data0 = relay.var("data", shape=ishape, dtype=dtype)
-    weight0 = relay.var("weight", shape=wshape, dtype=dtype)
-    out = relay.nn.conv2d(data0, weight0, kernel_size=(3, 3), padding=(1, 1), groups=groups)
-    main_f = relay.Function([data0, weight0], out)
-    mod = tvm.IRModule()
-    mod["main"] = main_f
-    mod = transform.InferType()(mod)
-
-    i_data = np.random.uniform(0, 1, ishape).astype(dtype)
-    w1_data = np.random.uniform(0, 1, wshape).astype(dtype)
-
-    inputs = OrderedDict([("data", i_data), ("weight", w1_data)])
-
-    output_list = generate_ref_data(mod, inputs)
-    compile_and_run(
-        AOTTestModel(module=mod, inputs=inputs, outputs=output_list),
-        test_runner,
-        interface_api,
-        use_unpacked_api,
-    )
-
-
-def test_packed_global_variables():
-    """Check packed global variables in codegen output."""
-    dtype = "float32"
-    ishape = (1, 32, 14, 14)
-    wshape = (32, 32, 3, 3)
-    interface_api = "packed"
-    use_unpacked_api = False
-
-    data0 = relay.var("data", shape=ishape, dtype=dtype)
-    weight0 = relay.var("weight", shape=wshape, dtype=dtype)
-    out = relay.nn.conv2d(data0, weight0, kernel_size=(3, 3), padding=(1, 1), groups=1)
-    main_f = relay.Function([data0, weight0], out)
-    mod = tvm.IRModule()
-    mod["main"] = main_f
-    mod = transform.InferType()(mod)
-
-    i_data = np.random.uniform(0, 1, ishape).astype(dtype)
-    w1_data = np.random.uniform(0, 1, wshape).astype(dtype)
-
-    inputs = OrderedDict([("data", i_data), ("weight", w1_data)])
-
-    output_list = generate_ref_data(mod, inputs)
-    compiled_models_list = compile_models(
-        models=AOTTestModel(module=mod, inputs=inputs, outputs=output_list),
-        interface_api=interface_api,
-        use_unpacked_api=use_unpacked_api,
-        workspace_byte_alignment=8,
-        enable_op_fusion=True,
-        pass_config=AOT_DEFAULT_RUNNER.pass_config,
-        use_runtime_executor=True,
-        target=tvm.target.Target("c"),
-    )
-    compiled_model = compiled_models_list[0]
-
-    tmp_path = utils.tempdir()
-    base_path = tmp_path.temp_dir
-
-    model = compiled_model.model
-    tar_file = os.path.join(base_path, f"{model.name}.tar")
-    export_model_library_format(compiled_model.executor_factory, tar_file)
-    t = tarfile.open(tar_file)
-    t.extractall(base_path)
-
-    file_list = []
-    for path in (pathlib.Path(base_path) / "codegen" / "host" / "src").iterdir():
-        if path.is_file():
-            file_list.append(path)
-    assert len(file_list) > 0
-
-    for path in file_list:
-        with open(path, "r") as lib_f:
-            lib1 = lib_f.readlines()
-
-        tvmgen_names = []
-        tvmgen_funcs = []
-        for line in lib1:
-            for item in line.split(" "):
-                # Find all names starting with tvmgen_default
-                if item.startswith("tvmgen_default"):
-                    # Collect any name starting with tvmgen_default
-                    tvmgen_names.append(item)
-                    # Collect all functions starting with tvmgen_default
-                    tvmgen_funcs += re.findall(r"(?<=).*(?=\()", item)
-
-        # Check if any function name has a packed variable name in all
-        # items that start with tvmgen_default
-        for func in tvmgen_funcs:
-            assert f"{func}_packed" not in tvmgen_names
-
-
-def test_io_size_definition():
-    """Check network IO size definitions in the codegen output."""
-    dtype = "float32"
-    ishape = (1, 32, 14, 14)
-    wshape = (32, 32, 3, 3)
-    interface_api = "c"
-    use_unpacked_api = True
-
-    data0 = relay.var("data", shape=ishape, dtype=dtype)
-    weight0 = relay.var("weight", shape=wshape, dtype=dtype)
-    out = relay.nn.conv2d(data0, weight0, kernel_size=(3, 3), padding=(1, 1), groups=1)
-    main_f = relay.Function([data0, weight0], out)
-    mod = tvm.IRModule()
-    mod["main"] = main_f
-    mod = transform.InferType()(mod)
-
-    i_data = np.random.uniform(0, 1, ishape).astype(dtype)
-    w_data = np.random.uniform(0, 1, wshape).astype(dtype)
-
-    inputs = OrderedDict([("data", i_data), ("weight", w_data)])
-
-    output_list = generate_ref_data(mod, inputs)
-    compiled_models_list = compile_models(
-        models=AOTTestModel(module=mod, inputs=inputs, outputs=output_list),
-        interface_api=interface_api,
-        use_unpacked_api=use_unpacked_api,
-        workspace_byte_alignment=8,
-        enable_op_fusion=True,
-        pass_config=AOT_DEFAULT_RUNNER.pass_config,
-        use_runtime_executor=True,
-        target=tvm.target.Target("c"),
-    )
-    dtype_itemsize = np.dtype(dtype).itemsize
-    ref_input_size = i_data.size * dtype_itemsize
-    ref_weight_size = w_data.size * dtype_itemsize
-    ref_output_size = output_list["output"].size * dtype_itemsize
-    compiled_model = compiled_models_list[0]
-
-    tmp_path = utils.tempdir()
-    base_path = tmp_path.temp_dir
-
-    model = compiled_model.model
-    tar_file = os.path.join(base_path, f"{model.name}.tar")
-    export_model_library_format(compiled_model.executor_factory, tar_file)
-    t = tarfile.open(tar_file)
-    t.extractall(base_path)
-
-    header_path = f"{base_path}/codegen/host/include/tvmgen_{model.name}.h"
-    with open(header_path, "r") as header:
-        contents = header.readlines()
-        contents = "".join(map(str, contents))
-        assert contents.count("_SIZE") == 4
-        assert f"TVMGEN_DEFAULT_DATA_SIZE {ref_input_size}" in contents
-        assert f"TVMGEN_DEFAULT_WEIGHT_SIZE {ref_weight_size}" in contents
-        assert f"TVMGEN_DEFAULT_OUTPUT_SIZE {ref_output_size}" in contents
-
-
-@parametrize_aot_options
-def test_concatenate(interface_api, use_unpacked_api, test_runner):
-    """Tests compilation of concatenate"""
-    dtype = "float32"
-    input_x = relay.var("x", shape=(10, 5), dtype=dtype)
-    input_y = relay.var("y", shape=(10, 5), dtype=dtype)
-    input_z = relay.var("z", shape=(), dtype=dtype)
-    concat_inputs = relay.concatenate((input_x, input_y), axis=1)
-    func_output = relay.add(input_z, concat_inputs)
-    # Check result.
-    func = relay.Function([input_x, input_y, input_z], func_output)
-    x_data = np.random.rand(10, 5).astype(dtype)
-    y_data = np.random.rand(10, 5).astype(dtype)
-    t_data = np.random.uniform(size=()).astype(dtype)
-    inputs = OrderedDict([("x", x_data), ("y", y_data), ("z", t_data)])
-
-    output_list = generate_ref_data(func, inputs)
-    compile_and_run(
-        AOTTestModel(module=IRModule.from_expr(func), inputs=inputs, outputs=output_list),
-        test_runner,
-        interface_api,
-        use_unpacked_api,
-    )
-
-
-@parametrize_aot_options
-def test_nested_tuples(interface_api, use_unpacked_api, test_runner):
-    """Tests compilation of functions with nested tuple outputs"""
-    input_x = relay.var("x", shape=(10,))
-    output_1 = input_x + relay.const(1.0)
-    output_2 = output_1 + relay.const(1.0)
-    output_3 = output_2 + relay.const(1.0)
-    output_4 = output_3 + relay.const(1.0)
-    full_output = relay.Tuple(
-        [output_1, relay.Tuple([relay.Tuple([output_2, output_3]), output_4])]
-    )
-    func = relay.Function([input_x], full_output)
-
-    x_data = np.random.uniform(size=(10,)).astype(np.float32)
-    inputs = {"x": x_data}
-    output_list = generate_ref_data(func, inputs)
-
-    compile_and_run(
-        AOTTestModel(module=IRModule.from_expr(func), inputs=inputs, outputs=output_list),
-        test_runner,
-        interface_api,
-        use_unpacked_api,
-    )
-
-
-@parametrize_aot_options
-def test_tuple_getitem(interface_api, use_unpacked_api, test_runner):
-    func = relay.Function([], relay.TupleGetItem(relay.Tuple([relay.const(1), relay.const(2)]), 0))
-    output_list = generate_ref_data(func, {})
-
-    compile_and_run(
-        AOTTestModel(module=IRModule.from_expr(func), inputs={}, outputs=output_list),
-        test_runner,
-        interface_api,
-        use_unpacked_api,
-    )
-
-
-@parametrize_aot_options
-def test_id(interface_api, use_unpacked_api, test_runner):
-    x = relay.var("x", "float32")
-    ident = relay.Function([x], x)
-    one = np.array(1.0, "float32")
-    inputs = {"x": one}
-    output_list = generate_ref_data(ident, inputs)
-
-    compile_and_run(
-        AOTTestModel(module=IRModule.from_expr(ident), inputs=inputs, outputs=output_list),
-        test_runner,
-        interface_api,
-        use_unpacked_api,
-    )
-
-
-@parametrize_aot_options
-def test_add_const(interface_api, use_unpacked_api, test_runner):
-    two = relay.add(relay.const(1), relay.const(1))
-    func = relay.Function([], two)
-    output_list = generate_ref_data(func, {})
-
-    compile_and_run(
-        AOTTestModel(module=IRModule.from_expr(func), inputs={}, outputs=output_list),
-        test_runner,
-        interface_api,
-        use_unpacked_api,
-    )
-
-
-@parametrize_aot_options
-def test_multiply(interface_api, use_unpacked_api, test_runner):
-    """Tests compilation of multiply"""
-    x = relay.var("x", shape=(10, 10))
-    y = relay.var("y", shape=(1, 10))
-    func = relay.Function([x, y], relay.multiply(x, y))
-    x_data = np.random.rand(10, 10).astype("float32")
-    y_data = np.random.rand(1, 10).astype("float32")
-
-    inputs = OrderedDict([("x", x_data), ("y", y_data)])
-    output_list = generate_ref_data(func, inputs)
-
-    compile_and_run(
-        AOTTestModel(module=IRModule.from_expr(func), inputs=inputs, outputs=output_list),
-        test_runner,
-        interface_api,
-        use_unpacked_api,
-    )
-
-
-@parametrize_aot_options
-def test_subtract(interface_api, use_unpacked_api, test_runner):
-    i = relay.var("i", shape=[], dtype="int32")
-    sub = relay.subtract(i, relay.const(1, dtype="int32"))
-    func = relay.Function([i], sub, ret_type=relay.TensorType([], "int32"))
-    i_data = np.array(1, dtype="int32")
-    inputs = {"i": i_data}
-    output_list = generate_ref_data(func, inputs)
-    compile_and_run(
-        AOTTestModel(module=IRModule.from_expr(func), inputs=inputs, outputs=output_list),
-        test_runner,
-        interface_api,
-        use_unpacked_api,
-    )
-
-
-@parametrize_aot_options
-def test_tuple_output(interface_api, use_unpacked_api, test_runner):
-    """Tests getting items from tuples"""
-    x = relay.var("x", shape=(6, 9))
-    y = relay.split(x, 3).astuple()
-    a = relay.TupleGetItem(y, 0)
-    b = relay.TupleGetItem(y, 1)
-    out = relay.Tuple([a, b])
-    func = relay.Function([x], out)
-    x_data = np.random.rand(6, 9).astype("float32")
-    inputs = {"x": x_data}
-    output_list = generate_ref_data(func, inputs)
-    compile_and_run(
-        AOTTestModel(module=IRModule.from_expr(func), inputs=inputs, outputs=output_list),
-        test_runner,
-        interface_api,
-        use_unpacked_api,
-    )
-
-
-@pytest.mark.parametrize(
-    ["debug_calculated_workspaces", "workspace_byte_alignment"], [(True, 1), (True, 16), (False, 1)]
-)
-def test_mobilenet(debug_calculated_workspaces, workspace_byte_alignment):
-    """Full network test with Mobilenet"""
-    use_unpacked_api = True
-    interface_api = "c"
-    test_runner = AOT_DEFAULT_RUNNER
-
-    # TODO(@Mousius) - Enable memory planning to take into account debug information
-    debugging_memory_overhead = 1024 * 1024
-
-    mod, params = testing.mobilenet.get_workload(batch_size=1)
-    data_shape = [int(x) for x in mod["main"].checked_type.arg_types[0].shape]
-    data = np.random.uniform(size=data_shape).astype("float32")
-    inputs = {"data": data}
-    output_list = generate_ref_data(mod, inputs, params)
-    compile_and_run(
-        AOTTestModel(
-            module=mod,
-            inputs=inputs,
-            outputs=output_list,
-            params=params,
-            extra_memory_in_bytes=debugging_memory_overhead,
-        ),
-        test_runner,
-        interface_api,
-        use_unpacked_api,
-        workspace_byte_alignment=workspace_byte_alignment,
-        debug_calculated_workspaces=debug_calculated_workspaces,
-    )
-
-
-@pytest.mark.parametrize("merge_compiler_regions", [False, True])
-def test_byoc_microtvm(merge_compiler_regions):
-    """
-    This is a simple test to check BYOC capabilities of AOT
-    with and without merging compiler regions to test for https://github.com/apache/tvm/issues/9036
-    """
-    use_unpacked_api = False
-    interface_api = "packed"
-    test_runner = AOT_DEFAULT_RUNNER
-
-    input_x = relay.var("x", shape=(10, 10))
-    input_w0 = relay.var("w0", shape=(10, 10))
-    input_w1 = relay.var("w1", shape=(10, 10))
-
-    # z0 = x + w0
-    marked_input_x = compiler_begin(input_x, "ccompiler")
-    marked_input_w0 = compiler_begin(input_w0, "ccompiler")
-    add_x_and_w0 = relay.add(marked_input_x, marked_input_w0)
-    end_inner_add = compiler_end(add_x_and_w0, "ccompiler")
-
-    # z1 = z0 + w1
-    marked_inner_add = compiler_begin(end_inner_add, "ccompiler")
-    marked_w1 = compiler_begin(input_w1, "ccompiler")
-    add_nested_and_w1 = relay.add(marked_inner_add, marked_w1)
-    end_outer_add = compiler_end(add_nested_and_w1, "ccompiler")
-
-    # z2 = z0 + z1
-    final_add = relay.add(end_inner_add, end_outer_add)
-
-    relay_func = relay.Function([input_x, input_w0, input_w1], final_add)
-    mod = tvm.IRModule()
-    mod["main"] = relay_func
-
-    if merge_compiler_regions:
-        mod = transform.MergeCompilerRegions()(mod)
-
-    mod = transform.PartitionGraph("mod_name")(mod)
-    mod = transform.InferType()(mod)
-
-    x_data = [("x", np.random.rand(10, 10).astype("float32"))]
-    w_data = [("w{}".format(i), np.random.rand(10, 10).astype("float32")) for i in range(2)]
-
-    map_inputs = OrderedDict(x_data + w_data)
-    output_list = generate_ref_data(mod, map_inputs)
-    compile_and_run(
-        AOTTestModel(name="my_mod", module=mod, inputs=map_inputs, outputs=output_list),
-        test_runner,
-        interface_api,
-        use_unpacked_api,
-    )
-
-
-@pytest.mark.parametrize("merge_compiler_regions", [False, True])
-def test_byoc_microtvm_multiple_subgraphs(merge_compiler_regions):
-    """This is a test case to check BYOC capabilities of AOT with multiple sub graphs"""
-    use_unpacked_api = False
-    interface_api = "packed"
-    test_runner = AOT_DEFAULT_RUNNER
-
-    input_x = relay.var("x", shape=(10, 10))
-    input_w0 = relay.var("w0", shape=(10, 10))
-    input_w1 = relay.var("w1", shape=(10, 10))
-    input_w2 = relay.var("w2", shape=(10, 10))
-    input_w3 = relay.var("w3", shape=(10, 10))
-    input_w4 = relay.var("w4", shape=(10, 10))
-    input_w5 = relay.var("w5", shape=(10, 10))
-    input_w6 = relay.var("w6", shape=(10, 10))
-    input_w7 = relay.var("w7", shape=(10, 10))
-
-    # C compiler
-    ccompiler_add_1 = relay.add(input_x, input_w0)
-    ccompiler_sub_1 = relay.subtract(ccompiler_add_1, input_w1)
-    ccompiler_mul_1 = relay.multiply(ccompiler_sub_1, input_w2)
-
-    ccompiler_add_2 = relay.add(input_x, input_w3)
-    ccompiler_sub_2 = relay.subtract(ccompiler_add_2, input_w4)
-    ccompiler_mul_2 = relay.multiply(ccompiler_sub_2, input_w5)
-
-    # Other parts on TVM
-    tvm_add = relay.add(input_x, input_w6)
-    tvm_sub = relay.subtract(tvm_add, input_w7)
-
-    concat_outputs = relay.concatenate((ccompiler_mul_1, ccompiler_mul_2, tvm_sub), axis=0)
-    relay_func = relay.Function(
-        [input_x, input_w0, input_w1, input_w2, input_w3, input_w4, input_w5, input_w6, input_w7],
-        concat_outputs,
-    )
-    mod = tvm.IRModule()
-    ann = byoc.CcompilerAnnotator()
-    mod["main"] = ann.visit(relay_func)
-
-    if merge_compiler_regions:
-        mod = transform.MergeCompilerRegions()(mod)
-
-    mod = tvm.relay.transform.PartitionGraph("mod_name")(mod)
-    mod = tvm.relay.transform.InferType()(mod)
-
-    x_data = np.random.rand(10, 10).astype("float32")
-    w_data = []
-    for _ in range(8):
-        w_data.append(np.random.rand(10, 10).astype("float32"))
-
-    map_inputs = OrderedDict([("x", x_data)] + [("w{}".format(i), w_data[i]) for i in range(8)])
-    output_list = generate_ref_data(mod, map_inputs)
-    input_list = [map_inputs["x"]]
-    input_list.extend([map_inputs["w{}".format(i)] for i in range(8)])
-    compile_and_run(
-        AOTTestModel(name="my_mod", module=mod, inputs=map_inputs, outputs=output_list),
-        test_runner,
-        interface_api,
-        use_unpacked_api,
-    )
-
-
-@parametrize_aot_options
-def test_add_name_mangling_with_params(interface_api, use_unpacked_api, test_runner):
-    """Checks name mangling works with parameters"""
-    input_x = relay.var("x", shape=(1, 10))
-    input_y = relay.var("y", shape=(1, 10))
-    func_add = relay.add(input_x, input_y)
-    relay_func = relay.Function([input_x, input_y], func_add)
-
-    x_in = np.ones((1, 10)).astype("float32")
-    y_in = np.random.uniform(size=(1, 10)).astype("float32")
-
-    params = {"x": x_in}
-    inputs = {"y": y_in}
-    output_list = generate_ref_data(relay_func, inputs, params)
-
-    compile_and_run(
-        AOTTestModel(
-            name="my_mod",
-            module=relay_func,
-            inputs=inputs,
-            outputs=output_list,
-            params=params,
-        ),
-        test_runner,
-        interface_api,
-        use_unpacked_api,
-    )
-
-
-@parametrize_aot_options
-def test_multiple_models(interface_api, use_unpacked_api, test_runner):
-    """Compiles multiple models to ensure both can be compiled into one output"""
-    # Identity model without params
-    x = relay.var("x", "float32")
-    mod1 = relay.Function([x], x)
-    one = np.array(1.0, "float32")
-    inputs1 = {"x": one}
-    output_list1 = generate_ref_data(mod1, inputs1)
-    params1 = None
-
-    # Convolution model
-    mod2 = get_conv2d_relay_module()
-    main_func = mod2["main"]
-    shape_dict = {p.name_hint: p.checked_type.concrete_shape for p in main_func.params}
-    type_dict = {p.name_hint: p.checked_type.dtype for p in main_func.params}
-
-    weight_data = np.ones(shape_dict["weight"]).astype(type_dict["weight"])
-    input_data = np.ones(shape_dict["data"]).astype(type_dict["data"])
-
-    params2 = {"weight": weight_data}
-    inputs2 = {"data": input_data}
-    output_list2 = generate_ref_data(mod2, inputs2, params2)
-
-    compile_and_run(
-        [
-            AOTTestModel(
-                name="mod1",
-                module=mod1,
-                inputs=inputs1,
-                outputs=output_list1,
-                params=params1,
-            ),
-            AOTTestModel(
-                name="mod2",
-                module=mod2,
-                inputs=inputs2,
-                outputs=output_list2,
-                params=params2,
-            ),
-        ],
-        test_runner,
-        interface_api,
-        use_unpacked_api,
-    )
-
-
-def test_quant_mobilenet_tfl():
-    """Since in AOT we pass directly the output buffer from the user,
-    in quantized networks sharing the output buffers is not possible.
-    This is because the output data type is int8 and the intermediate
-    buffer are int32 or int16. We use mobilenet quantized to stress this
-    situation and verify that the output buffer sharing is disabled in AOT."""
-    pytest.importorskip("tflite")
-
-    import tvm.relay.testing.tf as tf_testing  # pylint: disable=import-outside-toplevel
-
-    use_unpacked_api = True
-    interface_api = "c"
-    test_runner = AOT_DEFAULT_RUNNER
-
-    tflite_model_file = tf_testing.get_workload_official(
-        "https://storage.googleapis.com/download.tensorflow.org/"
-        "models/mobilenet_v1_2018_08_02/mobilenet_v1_1.0_224_quant.tgz",
-        "mobilenet_v1_1.0_224_quant.tflite",
-    )
-    mod, inputs, params = create_relay_module_and_inputs_from_tflite_file(tflite_model_file)
-    output_list = generate_ref_data(mod, inputs, params)
-    compile_and_run(
-        AOTTestModel(module=mod, inputs=inputs, outputs=output_list, params=params),
-        test_runner,
-        interface_api,
-        use_unpacked_api,
-    )
-
-
-@parametrize_aot_options
-def test_transpose(interface_api, use_unpacked_api, test_runner):
-    """Test that non-inpleaceable operations (e.g., transpose) do not happen in-place."""
-
-    dtype = "float32"
-    input_x = relay.var("x", shape=(10, 5), dtype=dtype)
-    input_y = relay.var("y", shape=(10, 5), dtype=dtype)
-    input_z = relay.var("z", shape=(), dtype=dtype)
-    first_add = relay.add(input_x, input_y)
-    transpose_add = relay.transpose(first_add)
-    final_add = relay.add(transpose_add, input_z)
-    # Check result.
-    relay_func = relay.Function([input_x, input_y, input_z], final_add)
-    x_data = np.random.rand(10, 5).astype(dtype)
-    y_data = np.random.rand(10, 5).astype(dtype)
-    t_data = np.random.uniform(size=()).astype(dtype)
-
-    inputs = {"x": x_data, "y": y_data, "z": t_data}
-    output_list = generate_ref_data(relay_func, inputs)
-    compile_and_run(
-        AOTTestModel(module=IRModule.from_expr(relay_func), inputs=inputs, outputs=output_list),
-        test_runner,
-        interface_api,
-        use_unpacked_api,
-        enable_op_fusion=False,
-    )
-
-
-def test_name_sanitiser():
-    """Test that input tensors with special characters in the name don't break compilation"""
-
-    interface_api = "c"
-    use_unpacked_api = True
-    test_runner = AOT_DEFAULT_RUNNER
-
-    func = relay.var("input-x::2", "float32")
-    ident = relay.Function([func], func)
-    one = np.array(1.0, "float32")
-    inputs = {"input-x::2": one}
-    output_list = generate_ref_data(ident, inputs)
-
-    compile_and_run(
-        AOTTestModel(module=IRModule.from_expr(func), inputs=inputs, outputs=output_list),
-        test_runner,
-        interface_api,
-        use_unpacked_api,
-        enable_op_fusion=False,
-    )
-
-
-def test_name_sanitiser_name_clash():
-    """Test that 2 input tensors with names that clash once sanitized, generates an error"""
-
-    interface_api = "c"
-    use_unpacked_api = True
-    test_runner = AOT_DEFAULT_RUNNER
-
-    dtype = "float32"
-    input_non_clashing = relay.var("input::-1", shape=(10, 5), dtype=dtype)
-    # Next 2 input tensor names will clash once sanitized.
-    input_clashing_1 = relay.var("input::-2", shape=(10, 5), dtype=dtype)
-    input_clashing_2 = relay.var("input:--2", shape=(), dtype=dtype)
-    inner_add = relay.add(input_non_clashing, input_clashing_1)
-    transpose_add = relay.transpose(inner_add)
-    final_add = relay.add(transpose_add, input_clashing_2)
-    # Check result.
-    func = relay.Function([input_non_clashing, input_clashing_1, input_clashing_2], final_add)
-    x_data = np.random.rand(10, 5).astype(dtype)
-    y_data = np.random.rand(10, 5).astype(dtype)
-    t_data = np.random.uniform(size=()).astype(dtype)
-
-    inputs = {"input::-1": x_data, "input::-2": y_data, "input:--2": t_data}
-    output_list = generate_ref_data(func, inputs)
-
-    with pytest.raises(TVMError, match="Sanitized input tensor name clash"):
-        compile_and_run(
-            AOTTestModel(module=IRModule.from_expr(func), inputs=inputs, outputs=output_list),
-            test_runner,
-            interface_api,
-            use_unpacked_api,
-            enable_op_fusion=False,
-        )
-
-
-def test_aot_codegen_backend_alloc_workspace_calls():
-    """This test checks whether AoT lowering creates TVMBackendAllocWorkspace calls"""
-
-    # The %data and %weight shapes in the following primitive Relay should create
-    # small tensors that would get lowered to stack allocations in the CPU PrimFuncs.
-    # However, the AoT executor codegen should retain them as TVMBAW calls
-    # pylint: disable=line-too-long
-    relay_mod = tvm.relay.fromtext(
-        """
-        #[version = "0.0.5"]
-        def @main(%data: Tensor[(1, 4, 4, 4), float32], %weight: Tensor[(4, 4, 3, 3), float32], src_layout="OIHW", dst_layout="OIHW4i4o") -> Tensor[(1, 4, 4, 4), float32] {
-        %0 = fn (%p02: Tensor[(1, 4, 4, 4), float32], Primitive=1, hash="9332b3872fb5292c", src_layout="NCHW", dst_layout="NCHW4c") -> Tensor[(1, 1, 4, 4, 4), float32] {
-            layout_transform(%p02, src_layout="NCHW", dst_layout="NCHW4c") /* ty=Tensor[(1, 1, 4, 4, 4), float32] */
-        };
-        %1 = fn (%p03: Tensor[(4, 4, 3, 3), float32], Primitive=1, hash="9f0b2b8a24a4dab3", src_layout="OIHW", dst_layout="OIHW4i4o") -> Tensor[(1, 1, 3, 3, 4, 4), float32] {
-            layout_transform(%p03, src_layout="OIHW", dst_layout="OIHW4i4o") /* ty=Tensor[(1, 1, 3, 3, 4, 4), float32] */
-        };
-        %2 = %0(%data) /* ty=Tensor[(1, 1, 4, 4, 4), float32] */;
-        %3 = %1(%weight) /* ty=Tensor[(1, 1, 3, 3, 4, 4), float32] */;
-        %4 = fn (%p01: Tensor[(1, 1, 4, 4, 4), float32], %p1: Tensor[(1, 1, 3, 3, 4, 4), float32], out_layout="NCHW4c", kernel_layout="OIHW4i4o", Primitive=1, data_layout="NCHW4c") -> Tensor[(1, 1, 4, 4, 4), float32] {
-                                                                                                                                                                                                                                                      nn.contrib_conv2d_NCHWc(%p01, %p1, padding=[1, 1, 1, 1], channels=4, kernel_size=[3, 3], data_layout="NCHW4c", kernel_layout="OIHW4i4o", out_layout="NCHW4c") /* ty=Tensor[(1, 1, 4, 4, 4), float32] */
-        };
-        %5 = %4(%2, %3) /* ty=Tensor[(1, 1, 4, 4, 4), float32] */;
-        %6 = fn (%p0: Tensor[(1, 1, 4, 4, 4), float32], Primitive=1, src_layout="NCHW4c", dst_layout="NCHW") -> Tensor[(1, 4, 4, 4), float32] {
-            layout_transform(%p0, src_layout="NCHW4c", dst_layout="NCHW") /* ty=Tensor[(1, 4, 4, 4), float32] */
-        };
-        %6(%5) /* ty=Tensor[(1, 4, 4, 4), float32] */
-        }
-        """
-    )
-    # pylint: enable=line-too-long
-
-    compiled_test_mods = compile_models(
-        models=AOTTestModel(module=relay_mod, inputs=None, outputs=None),
-        interface_api="c",
-        use_unpacked_api=True,
-        pass_config={"tir.usmp.enable": False},
-    )
-    source = compiled_test_mods[0].executor_factory.lib.imported_modules[0].get_source()
-    # There should be three allocates created for three primitive relay function
-    # calls in the main for the above relay snippet.
-    assert source.count("TVMBackendAllocWorkspace") == 3
-
-
-@pytest.mark.parametrize("constants_byte_alignment", [8, 16, 32])
-def test_constants_alignment(constants_byte_alignment):
-    """Test that constants_byte_alignment correctly sets constants byte alignment"""
-
-    use_unpacked_api = True
-    interface_api = "c"
-
-    mod, params = testing.mobilenet.get_workload(batch_size=1)
-    data_shape = [int(x) for x in mod["main"].checked_type.arg_types[0].shape]
-    data = np.random.uniform(size=data_shape).astype("float32")
-    inputs = {"data": data}
-    output_list = generate_ref_data(mod, inputs, params)
-    target = f"c -constants-byte-alignment={constants_byte_alignment}"
-    compiled_test_mods = compile_models(
-        AOTTestModel(module=mod, inputs=inputs, outputs=output_list, params=params),
-        interface_api,
-        use_unpacked_api,
-        target=tvm.target.Target(target, host=target),
-        pass_config={"tir.usmp.enable": False},
-    )
-    source = compiled_test_mods[0].executor_factory.lib.imported_modules[0].get_source()
-    assert f'__attribute__((section(".rodata.tvm"), aligned({constants_byte_alignment})))' in source
-
-
-def test_output_tensor_names():
-    """Test that the output names generated match those in the model"""
-    pytest.importorskip("tflite")
-
-    # pylint: disable=import-outside-toplevel
-    import tensorflow as tf
-    import tflite.Model
-
-    # pylint: enable=import-outside-toplevel
-
-    ifm_shape = (1, 299, 299, 3)
-    padding = "VALID"
-    strides = (1, 1)
-    dilation = (1, 1)
-    kernel_shape = (3, 2)
-
-    def create_tflite_graph_two_outs():
-        """Create a model with 2 output tensors"""
-
-        class Model(tf.Module):
-            """Simple TFLite test model"""
-
-            @tf.function
-            def tf_function(self, tf_input_x):
-                """Single TFLite function with two convolutions"""
-                tf_strides = [1, strides[0], strides[1], 1]
-                filter_shape = [kernel_shape[0], kernel_shape[1], 3, 3]
-                filter1 = tf.constant(
-                    np.arange(np.prod(filter_shape)).reshape(filter_shape),
-                    dtype=tf.float32,
-                )
-                first_conv2d = tf.nn.conv2d(
-                    tf_input_x,
-                    filters=filter1,
-                    strides=tf_strides,
-                    padding=padding,
-                    dilations=dilation,
-                )
-                first_conv2d = tf.nn.relu(first_conv2d)
-
-                filter2 = tf.constant(
-                    1000 + np.arange(np.prod(filter_shape)).reshape(filter_shape),
-                    dtype=tf.float32,
-                )
-                second_conv2d = tf.nn.conv2d(
-                    tf_input_x,
-                    filters=filter2,
-                    strides=strides,
-                    padding=padding,
-                    data_format="NHWC",
-                    dilations=dilation,
-                )
-                second_conv2d = tf.nn.relu(second_conv2d)
-                return first_conv2d, second_conv2d
-
-        model = Model()
-        concrete_func = model.tf_function.get_concrete_function(
-            tf.TensorSpec(ifm_shape, dtype=tf.float32)
-        )
-
-        # Convert the model
-        def representative_dataset():
-            for _ in range(100):
-                data = np.random.rand(*tuple(ifm_shape))
-                yield [data.astype(np.float32)]
-
-        converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func])
-        converter.optimizations = [tf.lite.Optimize.DEFAULT]
-        converter.representative_dataset = representative_dataset
-        converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
-        converter.inference_input_type = tf.int8
-        converter.inference_output_type = tf.int8
-        tflite_model = converter.convert()
-        return tflite_model
-
-    tflite_graph = create_tflite_graph_two_outs()
-    tflite_model = tflite.Model.Model.GetRootAsModel(tflite_graph, 0)
-    mod, params = relay.frontend.from_tflite(
-        tflite_model,
-        shape_dict={"input": ifm_shape},
-        dtype_dict={"input": "int8"},
-    )
-
-    use_unpacked_api = True
-    interface_api = "c"
-    test_runner = AOT_DEFAULT_RUNNER
-
-    in_min, in_max = (-128, 127)
-    data = np.random.randint(in_min, high=in_max, size=ifm_shape, dtype="int8")
-    input_name = mod["main"].params[0].name_hint
-    inputs = {input_name: data}
-    output_list = generate_ref_data(mod, inputs, params)
-    compile_and_run(
-        AOTTestModel(module=mod, inputs=inputs, outputs=output_list, params=params),
-        test_runner,
-        interface_api,
-        use_unpacked_api,
-    )
-
-    compiled_test_mods = compile_models(
-        AOTTestModel(module=mod, inputs=inputs, outputs=output_list, params=params),
-        interface_api,
-        use_unpacked_api,
-    )
-
-    # Check that the names of the output tensors occur in the source code
-    source = compiled_test_mods[0].executor_factory.lib.get_source()
-    for output_name in output_list.keys():
-        assert output_name in source
-
-
-@pytest.mark.parametrize(
-    "workspace_byte_alignment,main_workspace_size",
-    [
-        (8, 14880),
-        (16, 14880),
-        (256, 15616),
-    ],
-)
-def test_workspace_calculation(workspace_byte_alignment, main_workspace_size):
-    """Checks calculated workspace against known values"""
-    mod, params = tvm.relay.testing.synthetic.get_workload()
-    target = "c"
-    runtime = Runtime("crt")
-    executor = Executor(
-        "aot",
-        {
-            "workspace-byte-alignment": workspace_byte_alignment,
-        },
-    )
-    with tvm.transform.PassContext(
-        opt_level=3,
-        config={
-            "tir.disable_vectorize": True,
-            "tir.usmp.enable": False,
-        },
-    ):
-        lib = tvm.relay.build(mod, target, executor=executor, runtime=runtime, params=params)
-
-    mlf_memory_map = mlf._build_function_memory_map(lib.function_metadata)
-    assert mlf_memory_map["main"][0]["workspace_size_bytes"] == main_workspace_size
-
-
-@tvm.testing.requires_package("tflite")
-@tvm.testing.requires_cmsisnn
-def test_workspace_calculation_cmsis_nn():
-    """This tests cmsis_nn codegen for workspace calculation.
-    This is tested specially because cmsis-nn codegen creates
-    multiple PrimFuncs per offloaded relay function in a non
-    -hierarchical manner."""
-    pytest.importorskip("tflite")
-
-    # pylint: disable=import-outside-toplevel
-    from tvm.contrib.download import download_testdata
-    from tvm.relay.op.contrib import cmsisnn
-
-    # pylint: enable=import-outside-toplevel
-
-    target = "c"
-    runtime = Runtime("crt")
-    executor = Executor(
-        "aot",
-        {
-            "workspace-byte-alignment": 16,
-            "interface-api": "c",
-            "unpacked-api": True,
-        },
-    )
-
-    base_url = (
-        "https://github.com/ARM-software/ML-zoo/raw/"
-        "48a22ee22325d15d2371a6df24eb7d67e21dcc97"
-        "/models/keyword_spotting/cnn_small/tflite_int8"
-    )
-    file_to_download = "cnn_s_quantized.tflite"
-    file_saved = "cnn_s_quantized_15Dec2021.tflite"
-    model_file = download_testdata("{}/{}".format(base_url, file_to_download), file_saved)
-    mod, _, params = create_relay_module_and_inputs_from_tflite_file(model_file)
-    mod = cmsisnn.partition_for_cmsisnn(mod, params)
-    with tvm.transform.PassContext(
-        opt_level=3,
-        config={
-            "tir.disable_vectorize": True,
-        },
-    ):
-        lib = tvm.relay.build(mod, target, executor=executor, runtime=runtime, params=params)
-    mlf_memory_map = mlf._build_function_memory_map(lib.function_metadata)
-    assert mlf_memory_map["main"][0]["workspace_size_bytes"] == 14256
-
-
-def test_aot_codegen_checks_returns():
-    """This test checks whether AoT lowering creates calls that check the return value correctly"""
-    input_x = relay.var("x", shape=(1, 10))
-    input_y = relay.var("y", shape=(1, 10))
-    func_add = relay.add(input_x, input_y)
-    func = relay.Function([input_x, input_y], func_add)
-
-    compiled_test_mods = compile_models(
-        models=AOTTestModel(module=IRModule.from_expr(func), inputs=None, outputs=None),
-        interface_api="c",
-        use_unpacked_api=True,
-    )
-    source = compiled_test_mods[0].executor_factory.lib.imported_modules[0].get_source()
-
-    main_ir_module = compiled_test_mods[0].executor_factory.lowered_ir_mods.items()[0][1]
-    main_func = main_ir_module["__tvm_main__"]
-
-    # Check operator call is wrapped properly
-    body = main_func.body.value
-    assert (
-        repr(body)
-        == 'T.tvm_check_return(0, -1, T.call_extern("int32", "tvmgen_default_fused_add",'
-        + " x_buffer_var, y_buffer_var, output_buffer_var))"
-    )
-    # TODO(Mousius) - Create a better place for C codegen tests
-    assert (
-        "if (tvmgen_default_fused_add(x_buffer_var, y_buffer_var, output_buffer_var) != 0 ) return -1;"  # pylint: disable=line-too-long
-        in source
-    )
-
-
-def test_aot_uses_anf():
-    """Checks that A-Normal Form is being used in the AOT lowering pipeline."""
-    input_x = relay.var("x", shape=(1, 10, 10, 10))
-    input_y = relay.var("y", shape=(1, 10, 10, 10))
-    func_add = relay.add(input_x, input_y)
-    func = relay.Function([input_x, input_y], func_add)
-
-    @pass_instrument
-    class CheckANFRuns:
-        def __init__(self):
-            self.did_run_anf = False
-
-        def run_before_pass(self, _, info):
-            if info.name == "ToANormalForm":
-                self.did_run_anf = True
-            if info.name == "LowerTE":
-                assert self.did_run_anf, "ToANormalForm pass should run before LowerTE."
-
-    check_run_anf = CheckANFRuns()
-
-    model = AOTTestModel(module=IRModule.from_expr(func), inputs=None, outputs=None)
-    runtime = Runtime("crt")
-    executor = Executor(
-        "aot",
-        {
-            "workspace-byte-alignment": 8,
-            "interface-api": "c",
-            "unpacked-api": True,
-        },
-    )
-    config = {"tir.disable_vectorize": True}
-
-    with tvm.transform.PassContext(opt_level=3, config=config, instruments=[check_run_anf]):
-        tvm.relay.build(
-            model.module,
-            tvm.target.Target("c"),
-            executor=executor,
-            runtime=runtime,
-            workspace_memory_pools=None,
-            params=model.params,
-            mod_name=model.name,
-        )
-
-    assert check_run_anf.did_run_anf, "Expected ToANormalForm pass to have run."
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/relay/aot/test_crt_aot_usmp.py b/tests/python/relay/aot/test_crt_aot_usmp.py
deleted file mode 100644
index 130c26b6f8ff..000000000000
--- a/tests/python/relay/aot/test_crt_aot_usmp.py
+++ /dev/null
@@ -1,946 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-""" This file contains test that use USMP + AoT using C runtime APIs"""
-
-from collections import OrderedDict
-import re
-
-import random
-import numpy as np
-import pytest
-
-import tvm
-from tvm import relay
-from tvm.relay import testing  # pylint: disable=W0611
-from tvm.relay import transform
-from tvm.relay.op.annotation import compiler_begin, compiler_end
-from tvm.relay.backend import Executor, Runtime
-from tvm import (
-    WorkspaceMemoryPools,
-    ConstantMemoryPools,
-    WorkspacePoolInfo,
-    ConstantPoolInfo,
-    PoolInfoProperties,
-)
-from tvm.micro import model_library_format as mlf
-from tvm.micro.testing.aot_test_utils import parametrize_aot_options
-from tvm.testing.aot import (
-    AOTTestModel,
-    AOTTestRunner,
-    generate_ref_data,
-    compile_and_run,
-    compile_models,
-    run_and_check,
-    create_relay_module_and_inputs_from_tflite_file,
-)
-from tvm.testing.usmp import is_tvm_backendallocworkspace_calls
-
-
-def _check_for_no_tvm_backendallocworkspace_calls(mod: tvm.runtime.module):
-    assert (
-        is_tvm_backendallocworkspace_calls(mod) is False
-    ), "This is failing because USMP was unable to plan for every tir.allocate node."
-
-
-# U1 test case
-@parametrize_aot_options
-def test_synthetic(interface_api, use_unpacked_api, test_runner):
-    """
-    Simple U1 usecase test
-    """
-    mod, params = tvm.relay.testing.synthetic.get_workload()
-    main_func = mod["main"]
-    shape_dict = {p.name_hint: p.checked_type.concrete_shape for p in main_func.params}
-    type_dict = {p.name_hint: p.checked_type.dtype for p in main_func.params}
-
-    input_data = np.ones(shape_dict["data"]).astype(type_dict["data"])
-    params = {}
-    for name, _ in shape_dict.items():
-        if name != "data":
-            params[name] = np.ones(shape_dict[name]).astype(type_dict[name])
-
-    inputs = {"data": input_data}
-    output_list = generate_ref_data(mod, inputs, params)
-    config = (
-        {
-            "tir.disable_vectorize": True,
-            "tir.disable_storage_rewrite": True,
-            "tir.usmp.enable": True,
-            "tir.usmp.algorithm": "greedy_by_conflicts",
-        },
-    )
-
-    test_runner = AOTTestRunner(
-        makefile=test_runner.makefile,
-        prologue=test_runner.prologue,
-        epilogue=test_runner.epilogue,
-        includes=test_runner.includes,
-        parameters=test_runner.parameters,
-        pass_config={**test_runner.pass_config},
-    )
-    test_runner.pass_config.update(*config)
-    compile_and_run(
-        AOTTestModel(module=mod, inputs=inputs, outputs=output_list, params=params),
-        test_runner,
-        interface_api,
-        use_unpacked_api,
-    )
-
-
-@pytest.mark.parametrize(
-    "workspace_byte_alignment,constant_byte_alignment,"
-    "main_workspace_size,main_constant_size,usmp_algo",
-    [
-        (8, 8, 14208, 948, "greedy_by_conflicts"),
-        (16, 8, 14208, 948, "greedy_by_conflicts"),
-        (256, 8, 14720, 948, "greedy_by_conflicts"),
-        (8, 16, 14208, 956, "greedy_by_conflicts"),
-        (16, 16, 14208, 956, "greedy_by_conflicts"),
-        (256, 16, 14720, 956, "greedy_by_conflicts"),
-        (8, 256, 14208, 1804, "greedy_by_conflicts"),
-        (16, 256, 14208, 1804, "greedy_by_conflicts"),
-        (256, 256, 14720, 1804, "greedy_by_conflicts"),
-        (8, 8, 18576, 948, "greedy_by_size"),
-        (16, 8, 18576, 948, "greedy_by_size"),
-        (256, 8, 19392, 948, "greedy_by_size"),
-        (8, 16, 18576, 956, "greedy_by_size"),
-        (16, 16, 18576, 956, "greedy_by_size"),
-        (256, 16, 19392, 956, "greedy_by_size"),
-        (8, 256, 18576, 1804, "greedy_by_size"),
-        (16, 256, 18576, 1804, "greedy_by_size"),
-        (256, 256, 19392, 1804, "greedy_by_size"),
-        (8, 8, 11424, 948, "hill_climb"),
-        (16, 8, 11424, 948, "hill_climb"),
-        (256, 8, 11920, 948, "hill_climb"),
-        (8, 16, 11424, 956, "hill_climb"),
-        (16, 16, 11424, 956, "hill_climb"),
-        (256, 16, 11920, 956, "hill_climb"),
-        (8, 256, 11424, 1804, "hill_climb"),
-        (16, 256, 11424, 1804, "hill_climb"),
-        (256, 256, 11920, 1804, "hill_climb"),
-    ],
-)
-def test_memory_planning(
-    workspace_byte_alignment,
-    constant_byte_alignment,
-    main_workspace_size,
-    main_constant_size,
-    usmp_algo,
-):
-    """Checks calculated workspace against known values"""
-    random.seed(0)
-    mod, params = tvm.relay.testing.synthetic.get_workload()
-    target = "c"
-    runtime = Runtime("crt")
-    executor = Executor(
-        "aot",
-        {
-            "workspace-byte-alignment": workspace_byte_alignment,
-            "constant-byte-alignment": constant_byte_alignment,
-        },
-    )
-    with tvm.transform.PassContext(
-        opt_level=3,
-        config={
-            "tir.disable_vectorize": True,
-            "tir.disable_storage_rewrite": True,
-            "tir.usmp.enable": True,
-            "tir.usmp.algorithm": usmp_algo,
-        },
-    ):
-        lib = tvm.relay.build(mod, target, executor=executor, runtime=runtime, params=params)
-    # The workspace_size dictionary will have an entry for both the 'primitive' and 'host'
-    # targets, though both are identical.
-    assert (
-        sum(lib.function_metadata["__tvm_main__"].workspace_sizes.values()) == main_workspace_size
-    )
-    assert sum(lib.function_metadata["__tvm_main__"].constant_sizes.values()) == main_constant_size
-
-
-@parametrize_aot_options
-@pytest.mark.parametrize("groups,weight_shape", [(1, 32), (32, 1)])
-def test_conv2d(interface_api, use_unpacked_api, test_runner, groups, weight_shape):
-    """Test a subgraph with a single conv2d operator."""
-    dtype = "float32"
-    ishape = (1, 32, 14, 14)
-    wshape = (32, weight_shape, 3, 3)
-    pass_config = {"tir.usmp.enable": True}
-    test_runner = AOTTestRunner(
-        makefile=test_runner.makefile,
-        prologue=test_runner.prologue,
-        epilogue=test_runner.epilogue,
-        includes=test_runner.includes,
-        parameters=test_runner.parameters,
-        pass_config=pass_config,
-    )
-
-    data0 = relay.var("data", shape=ishape, dtype=dtype)
-    weight0 = relay.var("weight", shape=wshape, dtype=dtype)
-    out = relay.nn.conv2d(data0, weight0, kernel_size=(3, 3), padding=(1, 1), groups=groups)
-    main_f = relay.Function([data0, weight0], out)
-    mod = tvm.IRModule()
-    mod["main"] = main_f
-    mod = transform.InferType()(mod)
-
-    i_data = np.random.uniform(0, 1, ishape).astype(dtype)
-    w1_data = np.random.uniform(0, 1, wshape).astype(dtype)
-
-    inputs = OrderedDict([("data", i_data), ("weight", w1_data)])
-
-    output_list = generate_ref_data(mod, inputs)
-    compile_and_run(
-        AOTTestModel(module=mod, inputs=inputs, outputs=output_list),
-        test_runner,
-        interface_api,
-        use_unpacked_api,
-    )
-    compiled_test_mods = compile_models(
-        models=AOTTestModel(module=mod, inputs=inputs, outputs=output_list),
-        interface_api=interface_api,
-        use_unpacked_api=use_unpacked_api,
-        pass_config=test_runner.pass_config,
-    )
-
-    for compiled_model in compiled_test_mods:
-        _check_for_no_tvm_backendallocworkspace_calls(compiled_model.executor_factory.lib)
-
-    run_and_check(
-        models=compiled_test_mods,
-        runner=test_runner,
-        interface_api=interface_api,
-    )
-
-
-@pytest.mark.parametrize("merge_compiler_regions", [False, True])
-def test_byoc_microtvm(merge_compiler_regions):
-    """
-    This is a simple test to check BYOC capabilities of AOT
-    with and without merging compiler regions to test for https://github.com/apache/tvm/issues/9036
-    """
-    use_unpacked_api = False
-    interface_api = "packed"
-    test_runner = AOTTestRunner(pass_config={"tir.usmp.enable": True})
-
-    input_x = relay.var("x", shape=(10, 10))
-    input_w0 = relay.var("w0", shape=(10, 10))
-    input_w1 = relay.var("w1", shape=(10, 10))
-
-    # z0 = x + w0
-    marked_input_x = compiler_begin(input_x, "ccompiler")
-    marked_input_w0 = compiler_begin(input_w0, "ccompiler")
-    add_x_and_w0 = relay.add(marked_input_x, marked_input_w0)
-    end_inner_add = compiler_end(add_x_and_w0, "ccompiler")
-
-    # z1 = z0 + w1
-    marked_inner_add = compiler_begin(end_inner_add, "ccompiler")
-    marked_w1 = compiler_begin(input_w1, "ccompiler")
-    add_nested_and_w1 = relay.add(marked_inner_add, marked_w1)
-    end_outer_add = compiler_end(add_nested_and_w1, "ccompiler")
-
-    # z2 = z0 + z1
-    final_add = relay.add(end_inner_add, end_outer_add)
-
-    relay_func = relay.Function([input_x, input_w0, input_w1], final_add)
-    mod = tvm.IRModule()
-    mod["main"] = relay_func
-
-    if merge_compiler_regions:
-        mod = transform.MergeCompilerRegions()(mod)
-
-    mod = transform.PartitionGraph("mod_name")(mod)
-    mod = transform.InferType()(mod)
-
-    x_data = [("x", np.random.rand(10, 10).astype("float32"))]
-    w_data = [("w{}".format(i), np.random.rand(10, 10).astype("float32")) for i in range(2)]
-
-    map_inputs = OrderedDict(x_data + w_data)
-    output_list = generate_ref_data(mod, map_inputs)
-
-    compiled_test_mods = compile_models(
-        AOTTestModel(name="my_mod", module=mod, inputs=map_inputs, outputs=output_list),
-        interface_api=interface_api,
-        use_unpacked_api=use_unpacked_api,
-        pass_config=test_runner.pass_config,
-    )
-
-    for compiled_model in compiled_test_mods:
-        _check_for_no_tvm_backendallocworkspace_calls(compiled_model.executor_factory.lib)
-
-    run_and_check(
-        models=compiled_test_mods,
-        runner=test_runner,
-        interface_api=interface_api,
-    )
-
-
-MOBILENET_V1_URL = (
-    "https://storage.googleapis.com/download.tensorflow.org/models/"
-    + "mobilenet_v1_2018_08_02/mobilenet_v1_1.0_224_quant.tgz",
-    "mobilenet_v1_1.0_224_quant.tflite",
-)
-MOBILENET_V2_URL = (
-    "https://storage.googleapis.com/download.tensorflow.org/models/"
-    + "tflite_11_05_08/mobilenet_v2_1.0_224_quant.tgz",
-    "mobilenet_v2_1.0_224_quant.tflite",
-)
-
-
-@pytest.mark.parametrize(
-    "model_url, usmp_algo, workspace_size, constant_size",
-    [
-        (MOBILENET_V1_URL, "greedy_by_size", 4845696, 8468008),
-        (MOBILENET_V1_URL, "greedy_by_conflicts", 4444288, 8468008),
-        (MOBILENET_V1_URL, "hill_climb", 3240064, 8468008),
-    ],
-)
-def test_tflite_model_u1_usecase(model_url, usmp_algo, workspace_size, constant_size):
-    """
-    This checks for ML models and the memory used by them
-    when using USMP with different algorithms
-    """
-    pytest.importorskip("tflite")
-
-    import tvm.relay.testing.tf as tf_testing  # pylint: disable=import-outside-toplevel
-
-    use_unpacked_api = True
-    interface_api = "c"
-    test_runner = AOTTestRunner(
-        pass_config={"tir.usmp.enable": True, "tir.usmp.algorithm": usmp_algo}
-    )
-
-    tflite_model_file = tf_testing.get_workload_official(
-        model_url[0],
-        model_url[1],
-    )
-    mod, inputs, params = create_relay_module_and_inputs_from_tflite_file(tflite_model_file)
-    output_list = generate_ref_data(mod, inputs, params)
-
-    compiled_test_mods = compile_models(
-        AOTTestModel(module=mod, inputs=inputs, outputs=output_list, params=params),
-        interface_api=interface_api,
-        use_unpacked_api=use_unpacked_api,
-        pass_config=test_runner.pass_config,
-    )
-
-    for compiled_model in compiled_test_mods:
-        _check_for_no_tvm_backendallocworkspace_calls(compiled_model.executor_factory.lib)
-
-    # Checking the workspace size reported in model library format
-    mlf_memory_map = mlf._build_function_memory_map(
-        compiled_test_mods[0].executor_factory.function_metadata
-    )
-    assert mlf_memory_map["main"][0]["workspace_size_bytes"] == workspace_size
-    assert mlf_memory_map["main"][0]["constants_size_bytes"] == constant_size
-    # That should match to workspace size that will be codegen'd to the entry point.
-    allocated_pool_info_size = sum(
-        [
-            _.allocated_size
-            for _ in list(
-                dict(
-                    compiled_test_mods[0].executor_factory.executor_codegen_metadata.pool_inputs
-                ).values()
-            )
-        ]
-    )
-    assert allocated_pool_info_size == workspace_size + constant_size
-
-    run_and_check(
-        models=compiled_test_mods,
-        runner=test_runner,
-        interface_api=interface_api,
-    )
-
-
-def _get_workspace_size_define_macro(pool_name: str, model_name="default") -> str:
-    """This function converts pool names to compiler generated
-    pool size macros"""
-
-    prefix = "TVMGEN_" + model_name.upper() + "_"
-    postfix = "_WORKSPACE_POOL_SIZE"
-    return prefix + pool_name.upper() + postfix
-
-
-def _get_constant_size_define_macro(pool_name: str, model_name="default") -> str:
-    """This function converts pool names to compiler generated
-    pool size macros"""
-
-    prefix = "TVMGEN_" + model_name.upper() + "_"
-    postfix = "_CONSTANT_POOL_SIZE"
-    return prefix + pool_name.upper() + postfix
-
-
-def _get_constant_data_define_macro(pool_name: str, model_name="default") -> str:
-    """This function converts pool names to compiler generated
-    pool data macros"""
-
-    prefix = "TVMGEN_" + model_name.upper() + "_"
-    postfix = "_CONSTANT_POOL_DATA"
-    return prefix + pool_name.upper() + postfix
-
-
-def _add_module_prefix(suffix: str, model_name="default") -> str:
-    """A helper function create struct types"""
-    return "tvmgen_" + model_name + "_" + suffix
-
-
-@pytest.mark.parametrize(
-    "model_url, usmp_algo",
-    [
-        (MOBILENET_V1_URL, "greedy_by_size"),
-    ],
-)
-def test_tflite_model_u3_usecase_single_external_pool(model_url, usmp_algo):
-    """This checks for inference with USMP using external pool placed in the application"""
-    pytest.importorskip("tflite")
-
-    import tvm.relay.testing.tf as tf_testing  # pylint: disable=import-outside-toplevel
-
-    use_unpacked_api = True
-    interface_api = "c"
-
-    pool_name = "my_memory_pool"
-    target = tvm.target.Target("c")
-    workspace_memory_pools = WorkspaceMemoryPools([WorkspacePoolInfo(pool_name, [target])])
-    test_runner = AOTTestRunner(
-        pass_config={"tir.usmp.enable": True, "tir.usmp.algorithm": usmp_algo},
-        prologue=f"""
-        __attribute__((section(".data.tvm"), aligned(16)))
-        static uint8_t {pool_name}[{_get_workspace_size_define_macro(pool_name)}];
-        """,
-    )
-
-    tflite_model_file = tf_testing.get_workload_official(
-        model_url[0],
-        model_url[1],
-    )
-    mod, inputs, params = create_relay_module_and_inputs_from_tflite_file(tflite_model_file)
-    output_list = generate_ref_data(mod, inputs, params)
-
-    compiled_test_mods = compile_models(
-        AOTTestModel(module=mod, inputs=inputs, outputs=output_list, params=params),
-        interface_api=interface_api,
-        use_unpacked_api=use_unpacked_api,
-        pass_config=test_runner.pass_config,
-        workspace_memory_pools=workspace_memory_pools,
-        target=target,
-    )
-
-    for compiled_model in compiled_test_mods:
-        _check_for_no_tvm_backendallocworkspace_calls(compiled_model.executor_factory.lib)
-
-    run_and_check(
-        models=compiled_test_mods,
-        runner=test_runner,
-        interface_api=interface_api,
-    )
-
-
-@pytest.mark.parametrize(
-    "usmp_algo",
-    [("greedy_by_size"), ("hill_climb")],
-)
-def test_tflite_model_u3_usecase_conv2d_var_cons(usmp_algo):
-    """This checks for inference using workspace and constant pools placed in the application"""
-
-    mod = tvm.relay.fromtext(
-        """\
-        #[version = "0.0.5"]
-        def @main(%data : Tensor[(1, 3, 64, 64), uint8], %weight : Tensor[(3, 3, 5, 5), int8]) {
-            %1 = nn.conv2d(
-                 %data,
-                 %weight,
-                 padding=[2, 2],
-                 channels=3,
-                 kernel_size=[5, 5],
-                 data_layout="NCHW",
-                 kernel_layout="OIHW",
-                 out_dtype="int32");
-            %2 = cast(nn.max_pool2d(%1, pool_size=[3, 3]), dtype="int8");
-            %3 = nn.conv2d(
-                 %2,
-                 %weight,
-                 padding=[2, 2],
-                 channels=3,
-                 kernel_size=[5, 5],
-                 data_layout="NCHW",
-                 kernel_layout="OIHW",
-                 out_dtype="int32");
-            %4 = nn.max_pool2d(%3, pool_size=[3, 3]);
-            %4
-        }
-    """
-    )
-
-    main_func = mod["main"]
-    shape_dict = {p.name_hint: p.checked_type.concrete_shape for p in main_func.params}
-    type_dict = {p.name_hint: p.checked_type.dtype for p in main_func.params}
-
-    weight_data = np.random.randint(1, 255, shape_dict["weight"]).astype(type_dict["weight"])
-    input_data = np.ones(shape_dict["data"]).astype(type_dict["data"])
-    params = {"weight": weight_data}
-    inputs = {"data": input_data}
-
-    use_unpacked_api = True
-    interface_api = "c"
-
-    target = tvm.target.Target("c")
-    workspace_mem_pools = WorkspaceMemoryPools(
-        [
-            WorkspacePoolInfo(
-                "my_memory_pool_1", [target], PoolInfoProperties(size_hint_bytes=8500000)
-            ),
-        ]
-    )
-
-    constant_mem_pools = ConstantMemoryPools(
-        [
-            ConstantPoolInfo("my_const_pool_1", [target], []),
-        ]
-    )
-
-    test_runner = AOTTestRunner(
-        pass_config={"tir.usmp.enable": True, "tir.usmp.algorithm": usmp_algo},
-        prologue=f"""
-        __attribute__((section(".bss.noinit"), aligned(TVM_RUNTIME_ALLOC_ALIGNMENT_BYTES)))
-        static uint8_t my_memory_pool_1[{_get_workspace_size_define_macro("my_memory_pool_1")}];
-        __attribute__((section(".rodata.tvm"), aligned(TVM_RUNTIME_CONST_ALLOC_ALIGNMENT_BYTES)))
-        static uint8_t my_const_pool_1[{_get_constant_size_define_macro("my_const_pool_1")}] = {{ {_get_constant_data_define_macro("my_const_pool_1")} }};
-        """,
-    )
-
-    output_list = generate_ref_data(mod, inputs, params)
-
-    compiled_test_mods = compile_models(
-        AOTTestModel(module=mod, inputs=inputs, outputs=output_list, params=params),
-        interface_api=interface_api,
-        use_unpacked_api=use_unpacked_api,
-        pass_config=test_runner.pass_config,
-        workspace_memory_pools=workspace_mem_pools,
-        constant_memory_pools=constant_mem_pools,
-        target=target,
-    )
-
-    for compiled_model in compiled_test_mods:
-        _check_for_no_tvm_backendallocworkspace_calls(compiled_model.executor_factory.lib)
-
-    run_and_check(
-        models=compiled_test_mods,
-        runner=test_runner,
-        interface_api=interface_api,
-    )
-
-
-@pytest.mark.parametrize(
-    "model_url, usmp_algo",
-    [
-        (MOBILENET_V1_URL, "greedy_by_size"),
-    ],
-)
-def test_tflite_model_u3_usecase_var_cons_ext_pools(model_url, usmp_algo):
-    """This checks for inference using one external workspace and one external constant
-    pools placed in the application"""
-    pytest.importorskip("tflite")
-
-    import tvm.relay.testing.tf as tf_testing  # pylint: disable=import-outside-toplevel
-
-    use_unpacked_api = True
-    interface_api = "c"
-
-    target = tvm.target.Target("c")
-    workspace_mem_pools = WorkspaceMemoryPools(
-        [
-            WorkspacePoolInfo(
-                "my_memory_pool_1", [target], PoolInfoProperties(size_hint_bytes=8500000)
-            ),
-        ]
-    )
-
-    constant_mem_pools = ConstantMemoryPools(
-        [
-            ConstantPoolInfo("my_const_pool_1", [target], []),
-        ]
-    )
-
-    test_runner = AOTTestRunner(
-        pass_config={"tir.usmp.enable": True, "tir.usmp.algorithm": usmp_algo},
-        prologue=f"""
-        __attribute__((section(".bss.noinit"), aligned(TVM_RUNTIME_ALLOC_ALIGNMENT_BYTES)))
-        static uint8_t my_memory_pool_1[{_get_workspace_size_define_macro("my_memory_pool_1")}];
-        __attribute__((section(".rodata.tvm"), aligned(TVM_RUNTIME_CONST_ALLOC_ALIGNMENT_BYTES)))
-        static uint8_t my_const_pool_1[{_get_constant_size_define_macro("my_const_pool_1")}] = {{ {_get_constant_data_define_macro("my_const_pool_1")} }};
-        """,
-    )
-
-    tflite_model_file = tf_testing.get_workload_official(
-        model_url[0],
-        model_url[1],
-    )
-    mod, inputs, params = create_relay_module_and_inputs_from_tflite_file(tflite_model_file)
-    output_list = generate_ref_data(mod, inputs, params)
-
-    compiled_test_mods = compile_models(
-        AOTTestModel(module=mod, inputs=inputs, outputs=output_list, params=params),
-        interface_api=interface_api,
-        use_unpacked_api=use_unpacked_api,
-        pass_config=test_runner.pass_config,
-        workspace_memory_pools=workspace_mem_pools,
-        constant_memory_pools=constant_mem_pools,
-        target=target,
-    )
-
-    for compiled_model in compiled_test_mods:
-        _check_for_no_tvm_backendallocworkspace_calls(compiled_model.executor_factory.lib)
-
-    run_and_check(
-        models=compiled_test_mods,
-        runner=test_runner,
-        interface_api=interface_api,
-    )
-
-
-@pytest.mark.parametrize(
-    "model_url, usmp_algo",
-    [
-        (MOBILENET_V1_URL, "greedy_by_size"),
-    ],
-)
-def test_tflite_model_u3_usecase_two_external_pools(model_url, usmp_algo):
-    """This checks for inference using two external pools placed in the application"""
-    pytest.importorskip("tflite")
-
-    import tvm.relay.testing.tf as tf_testing  # pylint: disable=import-outside-toplevel
-
-    use_unpacked_api = True
-    interface_api = "c"
-
-    target = tvm.target.Target("c")
-    workspace_memory_pools = WorkspaceMemoryPools(
-        [
-            WorkspacePoolInfo(
-                "my_memory_pool_1", [target], PoolInfoProperties(size_hint_bytes=2500000)
-            ),
-            WorkspacePoolInfo("my_memory_pool_2", [target]),
-        ]
-    )
-    test_runner = AOTTestRunner(
-        pass_config={"tir.usmp.enable": True, "tir.usmp.algorithm": usmp_algo},
-        prologue=f"""
-        __attribute__((section(".data.tvm"), aligned(16)))
-        static uint8_t my_memory_pool_1[{_get_workspace_size_define_macro("my_memory_pool_1")}];
-        __attribute__((section(".data.tvm"), aligned(16)))
-        static uint8_t my_memory_pool_2[{_get_workspace_size_define_macro("my_memory_pool_2")}];
-        """,
-    )
-
-    tflite_model_file = tf_testing.get_workload_official(
-        model_url[0],
-        model_url[1],
-    )
-    mod, inputs, params = create_relay_module_and_inputs_from_tflite_file(tflite_model_file)
-    output_list = generate_ref_data(mod, inputs, params)
-
-    compiled_test_mods = compile_models(
-        AOTTestModel(module=mod, inputs=inputs, outputs=output_list, params=params),
-        interface_api=interface_api,
-        use_unpacked_api=use_unpacked_api,
-        pass_config=test_runner.pass_config,
-        workspace_memory_pools=workspace_memory_pools,
-        target=target,
-    )
-
-    for compiled_model in compiled_test_mods:
-        _check_for_no_tvm_backendallocworkspace_calls(compiled_model.executor_factory.lib)
-
-    run_and_check(
-        models=compiled_test_mods,
-        runner=test_runner,
-        interface_api=interface_api,
-    )
-
-
-@pytest.mark.parametrize(
-    "model_urls, usmp_algo",
-    [
-        ((MOBILENET_V1_URL, MOBILENET_V2_URL), "greedy_by_size"),
-    ],
-)
-def test_two_models_with_a_single_external_pool(model_urls, usmp_algo):
-    """This checks for inference using a single large enough common pool"""
-    pytest.importorskip("tflite")
-
-    import tvm.relay.testing.tf as tf_testing  # pylint: disable=import-outside-toplevel
-
-    use_unpacked_api = True
-    interface_api = "c"
-
-    target = tvm.target.Target("c")
-    workspace_memory_pools = WorkspaceMemoryPools([WorkspacePoolInfo("my_memory_pool", [target])])
-    test_runner = AOTTestRunner(
-        pass_config={"tir.usmp.enable": True, "tir.usmp.algorithm": usmp_algo},
-        prologue=f"""
-        #define MAX(A, B) ((A > B) ? A : B)
-        __attribute__((section(".data.tvm"), aligned(16)))
-        static uint8_t my_memory_pool[MAX({_get_workspace_size_define_macro("my_memory_pool", "mod1")},{_get_workspace_size_define_macro("my_memory_pool", "mod2")})];
-        """,
-    )
-
-    tflite_model_file1 = tf_testing.get_workload_official(
-        model_urls[0][0],
-        model_urls[0][1],
-    )
-    mod1, inputs1, params1 = create_relay_module_and_inputs_from_tflite_file(tflite_model_file1)
-    output_list1 = generate_ref_data(mod1, inputs1, params1)
-
-    tflite_model_file2 = tf_testing.get_workload_official(
-        model_urls[1][0],
-        model_urls[1][1],
-    )
-    mod2, inputs2, params2 = create_relay_module_and_inputs_from_tflite_file(tflite_model_file2)
-    output_list2 = generate_ref_data(mod2, inputs2, params2)
-
-    compiled_test_mods = compile_models(
-        [
-            AOTTestModel(
-                name="mod1", module=mod1, inputs=inputs1, outputs=output_list1, params=params1
-            ),
-            AOTTestModel(
-                name="mod2", module=mod2, inputs=inputs2, outputs=output_list2, params=params2
-            ),
-        ],
-        interface_api=interface_api,
-        use_unpacked_api=use_unpacked_api,
-        pass_config=test_runner.pass_config,
-        workspace_memory_pools=workspace_memory_pools,
-        target=target,
-    )
-
-    for compiled_model in compiled_test_mods:
-        _check_for_no_tvm_backendallocworkspace_calls(compiled_model.executor_factory.lib)
-
-    run_and_check(
-        models=compiled_test_mods,
-        runner=test_runner,
-        interface_api=interface_api,
-    )
-
-
-@pytest.mark.parametrize(
-    "model_url, usmp_algo",
-    [
-        (MOBILENET_V1_URL, "greedy_by_size"),
-    ],
-)
-def test_tflite_model_u4_usecase_single_external_pool(model_url, usmp_algo):
-    """This checks for inference with USMP using external pool placed in the application"""
-    pytest.importorskip("tflite")
-
-    import tvm.relay.testing.tf as tf_testing  # pylint: disable=import-outside-toplevel
-
-    use_unpacked_api = True
-    interface_api = "c"
-
-    pool_name = "my_memory_pool"
-    target = tvm.target.Target("c")
-    workspace_memory_pools = WorkspaceMemoryPools([WorkspacePoolInfo(pool_name, [target])])
-
-    tflite_model_file = tf_testing.get_workload_official(
-        model_url[0],
-        model_url[1],
-    )
-    mod, inputs, params = create_relay_module_and_inputs_from_tflite_file(tflite_model_file)
-    output_list = generate_ref_data(mod, inputs, params)
-
-    input_name, input_data = list(inputs.items())[0]
-    input_size_bytes = input_data.size * input_data.itemsize
-    test_runner = AOTTestRunner(
-        pass_config={
-            "tir.usmp.enable": True,
-            "tir.usmp.algorithm": usmp_algo,
-            "tir.usmp.use_workspace_io": True,
-        },
-        prologue=f"""
-        #include <string.h>
-        __attribute__((section(".data.tvm"), aligned(16)))
-        static uint8_t {pool_name}[{_get_workspace_size_define_macro(pool_name)}];
-        struct {_add_module_prefix("workspace_pools")} {_add_module_prefix("workspace_pools")} = {{
-            .{pool_name} = {pool_name}
-        }};
-        struct {_add_module_prefix("inputs")} {_add_module_prefix("inputs")} = {_add_module_prefix("map_inputs")}(&{_add_module_prefix("workspace_pools")});
-        memcpy({_add_module_prefix("inputs")}.{input_name}, tvmgen_default_input_data_input, {input_size_bytes});
-        struct {_add_module_prefix("outputs")} {_add_module_prefix("outputs")} = {_add_module_prefix("map_outputs")}(&{_add_module_prefix("workspace_pools")});
-        """,
-    )
-
-    compiled_test_mods = compile_models(
-        AOTTestModel(module=mod, inputs=inputs, outputs=output_list, params=params),
-        interface_api=interface_api,
-        use_unpacked_api=use_unpacked_api,
-        pass_config=test_runner.pass_config,
-        workspace_memory_pools=workspace_memory_pools,
-        target=target,
-    )
-
-    for compiled_model in compiled_test_mods:
-        _check_for_no_tvm_backendallocworkspace_calls(compiled_model.executor_factory.lib)
-
-    run_and_check(
-        models=compiled_test_mods,
-        runner=test_runner,
-        interface_api=interface_api,
-        use_workspace_io=True,
-    )
-
-
-@pytest.mark.parametrize(
-    "model_url, usmp_algo",
-    [
-        (MOBILENET_V1_URL, "greedy_by_size"),
-    ],
-)
-def test_tflite_model_u4_usecase_two_external_pools(model_url, usmp_algo):
-    """This checks for inference with USMP using external pool placed in the application"""
-    pytest.importorskip("tflite")
-
-    import tvm.relay.testing.tf as tf_testing  # pylint: disable=import-outside-toplevel
-
-    use_unpacked_api = True
-    interface_api = "c"
-
-    target = tvm.target.Target("c")
-    workspace_memory_pools = WorkspaceMemoryPools(
-        [
-            WorkspacePoolInfo(
-                "my_memory_pool_1", [target], PoolInfoProperties(size_hint_bytes=2500000)
-            ),
-            WorkspacePoolInfo("my_memory_pool_2", [target]),
-        ]
-    )
-
-    tflite_model_file = tf_testing.get_workload_official(
-        model_url[0],
-        model_url[1],
-    )
-    mod, inputs, params = create_relay_module_and_inputs_from_tflite_file(tflite_model_file)
-    output_list = generate_ref_data(mod, inputs, params)
-
-    input_name, input_data = list(inputs.items())[0]
-    input_size_bytes = input_data.size * input_data.itemsize
-    test_runner = AOTTestRunner(
-        pass_config={
-            "tir.usmp.enable": True,
-            "tir.usmp.algorithm": usmp_algo,
-            "tir.usmp.use_workspace_io": True,
-        },
-        prologue=f"""
-        #include <string.h>
-        __attribute__((section(".data.tvm"), aligned(16)))
-        static uint8_t my_memory_pool_1[{_get_workspace_size_define_macro("my_memory_pool_1")}];
-        __attribute__((section(".data.tvm"), aligned(16)))
-        static uint8_t my_memory_pool_2[{_get_workspace_size_define_macro("my_memory_pool_2")}];
-        struct {_add_module_prefix("workspace_pools")} {_add_module_prefix("workspace_pools")} = {{
-            .my_memory_pool_1 = my_memory_pool_1,
-            .my_memory_pool_2 = my_memory_pool_2,
-        }};
-        struct {_add_module_prefix("inputs")} {_add_module_prefix("inputs")} = {_add_module_prefix("map_inputs")}(&{_add_module_prefix("workspace_pools")});
-        memcpy({_add_module_prefix("inputs")}.{input_name}, tvmgen_default_input_data_input, {input_size_bytes});
-        struct {_add_module_prefix("outputs")} {_add_module_prefix("outputs")} = {_add_module_prefix("map_outputs")}(&{_add_module_prefix("workspace_pools")});
-        """,
-    )
-
-    compiled_test_mods = compile_models(
-        AOTTestModel(module=mod, inputs=inputs, outputs=output_list, params=params),
-        interface_api=interface_api,
-        use_unpacked_api=use_unpacked_api,
-        pass_config=test_runner.pass_config,
-        workspace_memory_pools=workspace_memory_pools,
-        target=target,
-    )
-
-    for compiled_model in compiled_test_mods:
-        _check_for_no_tvm_backendallocworkspace_calls(compiled_model.executor_factory.lib)
-
-    run_and_check(
-        models=compiled_test_mods,
-        runner=test_runner,
-        interface_api=interface_api,
-        use_workspace_io=True,
-    )
-
-
-def test_incompatible_interface_api_errors():
-    """Ensures an error is thrown if not using the C interface API"""
-    mod, params = tvm.relay.testing.synthetic.get_workload()
-    target = "c"
-    runtime = Runtime("crt")
-    executor = Executor(
-        "aot",
-        {
-            "interface-api": "packed",
-        },
-    )
-
-    with pytest.raises(
-        tvm.TVMError,
-        match=re.escape(
-            "tir.usmp.use_workspace_io option is only compatible with interface_api c.\n"
-            "Please use interface_api c to be able to enable tir.usmp.use_workspace_io"
-        ),
-    ):
-        with tvm.transform.PassContext(
-            opt_level=3,
-            config={"tir.usmp.enable": True, "tir.usmp.use_workspace_io": True},
-        ):
-            tvm.relay.build(mod, target, executor=executor, runtime=runtime, params=params)
-
-
-@parametrize_aot_options
-def test_usmp_enabled_by_default_for_crt(interface_api, use_unpacked_api, test_runner):
-    """This test checks whether USMP is enabled by default
-    for cortex-M targets.
-    """
-    dtype = "float32"
-    ishape = (1, 32, 14, 14)
-    wshape = (32, 32, 3, 3)
-
-    data0 = relay.var("data", shape=ishape, dtype=dtype)
-    weight0 = relay.var("weight", shape=wshape, dtype=dtype)
-    out = relay.nn.conv2d(data0, weight0, kernel_size=(3, 3), padding=(1, 1), groups=1)
-    main_f = relay.Function([data0, weight0], out)
-    mod = tvm.IRModule()
-    mod["main"] = main_f
-    mod = transform.InferType()(mod)
-
-    i_data = np.random.uniform(0, 1, ishape).astype(dtype)
-    w1_data = np.random.uniform(0, 1, wshape).astype(dtype)
-
-    inputs = OrderedDict([("data", i_data), ("weight", w1_data)])
-    output_list = generate_ref_data(mod, inputs)
-
-    compiled_test_mods = compile_models(
-        models=AOTTestModel(module=mod, inputs=inputs, outputs=output_list),
-        interface_api=interface_api,
-        use_unpacked_api=use_unpacked_api,
-        pass_config=test_runner.pass_config,
-        target=tvm.target.target.micro("host"),
-    )
-
-    for compiled_model in compiled_test_mods:
-        _check_for_no_tvm_backendallocworkspace_calls(compiled_model.executor_factory.lib)
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/relay/aot/test_crt_forward_declarations.py b/tests/python/relay/aot/test_crt_forward_declarations.py
deleted file mode 100644
index 99e2f0c92300..000000000000
--- a/tests/python/relay/aot/test_crt_forward_declarations.py
+++ /dev/null
@@ -1,251 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""test forward function declarations codegen by CodegenCHost."""
-
-from collections import OrderedDict
-import pytest
-import numpy as np
-
-import tvm.testing
-from tvm import relay
-from tvm.contrib.download import download_testdata
-from tvm.relay.op.contrib import cmsisnn
-from tvm.testing.aot import AOTTestModel, compile_models, generate_ref_data
-from tvm.micro.testing.aot_test_utils import (
-    AOT_CORSTONE300_RUNNER,
-    AOT_USMP_CORSTONE300_RUNNER,
-    parametrize_aot_options,
-    AOTTestRunner,
-)
-
-
-def _change_ndarray_layout(arr, src_layout, dst_layout):
-    """Makes a copy of an ndarray, reshaping it to a new data layout.
-
-    Parameter
-    ---------
-    arr : numpy.ndarray
-        The ndarray to be reformatted.
-
-    src_layout : str
-        The current layout of the Relay constant. Must be alphabetic (e.g. NHWC
-        or OIHW, but not NCHW2c).
-
-    dst_layout : str
-        The desired layout of new the Relay constant. Must be alphabetic (e.g. NHWC
-        or OIHW, but not NCHW2c).
-
-    Returns
-    -------
-    dst_shape : numpy.ndarray
-        A copy of the ndarray with the new layout.
-    """
-    assert src_layout.isalpha() and dst_layout.isalpha()
-    axis_order = [src_layout.index(c) for c in dst_layout]
-    return np.transpose(arr, axis_order)
-
-
-@tvm.testing.requires_package("tflite")
-@tvm.testing.requires_cmsisnn
-@pytest.mark.parametrize("test_runner", [AOT_CORSTONE300_RUNNER, AOT_USMP_CORSTONE300_RUNNER])
-def test_external_calls(test_runner):
-    """Download a small network and partition for CMSIS-NN to test forward declarations for external
-    calls outside of __tvm_main__."""
-    # download the model
-    base_url = (
-        "https://github.com/ARM-software/ML-zoo/raw/"
-        "48a22ee22325d15d2371a6df24eb7d67e21dcc97"
-        "/models/keyword_spotting/cnn_small/tflite_int8"
-    )
-    file_to_download = "cnn_s_quantized.tflite"
-    file_saved = "cnn_s_quantized_15Dec2021.tflite"
-    model_file = download_testdata("{}/{}".format(base_url, file_to_download), file_saved)
-
-    # convert the tflite network into relay model
-    # pylint: disable=import-outside-toplevel
-    from tvm.relay.testing.tflite import TFLiteModel
-
-    input_shape = (1, 490)
-    dtype = "int8"
-    tfl_model = TFLiteModel(dtype)
-    tfl_model.load_from_file(model_file, [input_shape])
-    relay_mod, relay_params = tfl_model.convert_to_relay()
-    cmsisnn_mod = cmsisnn.partition_for_cmsisnn(relay_mod, relay_params)
-
-    # obtain the executor factory post relay compilation.
-    input_map, output_map, output_tolerance = tfl_model.generate_reference_data()
-    interface_api = "c"
-    use_unpacked_api = True
-    compiled_models = compile_models(
-        AOTTestModel(
-            module=cmsisnn_mod,
-            inputs=input_map,
-            outputs=output_map,
-            params=None,
-            output_tolerance=output_tolerance,
-        ),
-        interface_api,
-        use_unpacked_api,
-        pass_config=test_runner.pass_config,
-    )
-
-    # Validate frquency of function appearances in the Host C file after forward declarations.
-    lib_mod = compiled_models[0].executor_factory.lib.imported_modules[0]
-    main_source = lib_mod.get_source()
-    assert (
-        main_source.count("TVMBackendAllocWorkspace") == 3
-        or main_source.count("TVMBackendAllocWorkspace") == 0
-    )
-    assert main_source.count("tvmgen_default_fused_reshape") == 3
-    assert main_source.count("tvmgen_default_cmsis_nn_main") == 12
-    cmsisnn_source = lib_mod.imported_modules[0].get_source()
-    assert cmsisnn_source.count("arm_convolve_wrapper") == 1
-    assert cmsisnn_source.count("arm_fully_connected") == 3
-    assert cmsisnn_source.count("arm_softmax") == 1
-
-
-@parametrize_aot_options
-def test_internal_calls(interface_api, use_unpacked_api, test_runner):
-    """Test for all internal function calls. No forward declarations are expected here."""
-    dtype = "float32"
-    groups = 32
-    weight_shape = 1
-    ishape = (1, 32, 14, 14)
-    wshape = (32, weight_shape, 3, 3)
-    pass_config = {"tir.usmp.enable": True}
-    test_runner = AOTTestRunner(
-        makefile=test_runner.makefile,
-        prologue=test_runner.prologue,
-        epilogue=test_runner.epilogue,
-        includes=test_runner.includes,
-        parameters=test_runner.parameters,
-        pass_config=pass_config,
-    )
-
-    data0 = relay.var("data", shape=ishape, dtype=dtype)
-    weight0 = relay.var("weight", shape=wshape, dtype=dtype)
-    out = relay.nn.conv2d(data0, weight0, kernel_size=(3, 3), padding=(1, 1), groups=groups)
-    main_f = relay.Function([data0, weight0], out)
-    mod = tvm.IRModule()
-    mod["main"] = main_f
-    mod = tvm.relay.transform.InferType()(mod)
-
-    i_data = np.random.uniform(0, 1, ishape).astype(dtype)
-    w1_data = np.random.uniform(0, 1, wshape).astype(dtype)
-
-    inputs = OrderedDict([("data", i_data), ("weight", w1_data)])
-
-    output_list = generate_ref_data(mod, inputs)
-    compiled_models = compile_models(
-        models=AOTTestModel(module=mod, inputs=inputs, outputs=output_list),
-        interface_api=interface_api,
-        use_unpacked_api=use_unpacked_api,
-        pass_config=test_runner.pass_config,
-    )
-
-    lib_mod = compiled_models[0].executor_factory.lib.imported_modules[0]
-    main_source = lib_mod.get_source()
-    assert main_source.count("int32_t tvmgen_default_fused_nn_contrib_depthwise_conv2d_NCHWc") == 2
-    assert main_source.count("int32_t tvmgen_default_fused_layout_transform") == 6
-
-
-@tvm.testing.requires_corstone300
-def test_tensorized_calls():
-    """Test a subgraph with a mix of internal and tensorized calls."""
-    data_shape, kernel_size, num_filter, groups, strides, padding, dilation = (
-        (1, 32, 32, 16),
-        (3, 3),
-        16,
-        1,
-        1,
-        (0, 2, 2, 0),
-        1,
-    )
-    in_dtype = "int8"
-    data_layout = "NHWC"
-    kernel_layout = "HWOI"
-    ref_kernel_layout = "HWIO"
-    out_layout = "NHWC"
-    schedule_name = "conv2d_nhwc_dsp.arm_cpu"
-
-    ref_input_data = np.random.randint(low=-128, high=127, size=data_shape, dtype=in_dtype)
-    ref_input_var = relay.var("input", relay.TensorType(data_shape, in_dtype))  # NHWC layout
-    kernel_shape = (*kernel_size, data_shape[-1] // groups, num_filter)  # HWIO layout
-    ref_kernel_data = np.random.randint(low=-10, high=10, size=kernel_shape, dtype=in_dtype)
-
-    ref_relay_op = relay.op.nn.conv2d(
-        ref_input_var,
-        relay.const(_change_ndarray_layout(ref_kernel_data, "HWIO", ref_kernel_layout)),
-        kernel_size=kernel_size,
-        strides=strides,
-        padding=padding,
-        groups=groups,
-        dilation=(dilation, dilation),
-        data_layout="NHWC",
-        kernel_layout=ref_kernel_layout,
-        out_dtype="int32",
-        out_layout="NHWC",
-    )
-    ref_module = tvm.IRModule.from_expr(relay.Function([ref_input_var], ref_relay_op))
-    ref_outputs = generate_ref_data(ref_module, {"input": ref_input_data})
-
-    # Reshape output dictionary to match out_layout
-    assert len(ref_outputs) == 1
-    output_tensor_name, output_tensor = next(iter(ref_outputs.items()))
-    ref_outputs[output_tensor_name] = _change_ndarray_layout(output_tensor, "NHWC", out_layout)
-
-    test_input_data = _change_ndarray_layout(ref_input_data, "NHWC", data_layout)
-    test_input_var = relay.var("input", relay.TensorType(test_input_data.shape, in_dtype))
-    test_kernel_data = _change_ndarray_layout(ref_kernel_data, "HWIO", kernel_layout)
-
-    test_relay_op = relay.op.nn.conv2d(
-        test_input_var,
-        relay.const(test_kernel_data),
-        kernel_size=kernel_size,
-        strides=strides,
-        padding=padding,
-        groups=groups,
-        dilation=(dilation, dilation),
-        data_layout=data_layout,
-        kernel_layout=kernel_layout,
-        out_dtype="int32",
-        out_layout=out_layout,
-    )
-    test_function = relay.Function([test_input_var], test_relay_op)
-    test_model = AOTTestModel(
-        module=tvm.IRModule.from_expr(test_function),
-        inputs={"input": test_input_data},
-        outputs=ref_outputs,
-    )
-    compiled_models = compile_models(
-        test_model,
-        interface_api="c",
-        use_unpacked_api=True,
-        pass_config=AOT_CORSTONE300_RUNNER.pass_config,
-        target="c -keys=arm_cpu -mcpu=cortex-m7",
-        schedule_name=schedule_name,
-    )
-
-    lib_mod = compiled_models[0].executor_factory.lib.imported_modules[0]
-    main_source = lib_mod.get_source()
-    assert main_source.count("tvmgen_default_fused_nn_conv2d") == 3
-    assert main_source.count("gemm_") == 15
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/relay/aot/test_pass_aot_lower_main.py b/tests/python/relay/aot/test_pass_aot_lower_main.py
deleted file mode 100644
index 9667d2093757..000000000000
--- a/tests/python/relay/aot/test_pass_aot_lower_main.py
+++ /dev/null
@@ -1,429 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=line-too-long,missing-class-docstring,missing-module-docstring,missing-function-docstring,no-self-argument,unused-argument,invalid-name
-import numpy as np
-import pytest
-import tvm
-import tvm.testing
-from tvm.ir import assert_structural_equal
-from tvm.relay.backend.aot import AOTLowerMain, CallType
-from tvm.script import tir as T
-
-
-def _make_const(dtype, shape):
-    return tvm.relay.const(np.zeros(shape).astype(dtype))
-
-
-def _make_consts(dtype, shapes):
-    return [_make_const(dtype, shape) for shape in shapes]
-
-
-def _plan_devices(mod):
-    host_target = tvm.target.Target("llvm")
-    prim_target = tvm.target.Target("llvm", host=host_target)
-    ctxt = tvm.transform.PassContext()
-    config = tvm.target.make_compilation_config(ctxt, prim_target)
-    mod = tvm.relay.transform.PlanDevices(config)(mod)
-    mod = tvm.relay.transform.InferType()(mod)
-    return mod, config
-
-
-def _assert_lowered_main(mod, main_func, call_type, print_script=False):
-    mod, config = _plan_devices(mod)
-    mod = AOTLowerMain("test_mod", config, call_type)(mod)
-    if print_script:
-        print(mod["__tvm_main__"].script())
-
-    assert_structural_equal(mod["__tvm_main__"], main_func)
-
-
-def test_single_call_cpacked():
-    mod = tvm.relay.parse(
-        """
-#[version = "0.0.5"]
-def @test_fused_add(%x: Tensor[(5, 7), float32]) { %x }
-
-def @main(%a: Tensor[(5, 7), float32]) -> Tensor[(5, 7), float32] {
-  %0 = (%a,) /* ty=(Tensor[(5, 7), float32],) */;
-  call_lowered(@test_fused_add, %0) /* ty=Tensor[(5, 7), float32] */
-}
-        """,
-    )
-
-    # fmt: off
-    @T.prim_func
-    def func(a: T.handle, output: T.handle) -> None:
-        # function attr dict
-        T.func_attr({"global_symbol": "test_mod___tvm_main__", "runner_function": True, "target": T.target({"kind":"llvm", "tag":"", "keys":["cpu"]}), "input_vars": [a], "output_vars": [output], "devices": []})
-        a_buffer = T.match_buffer(a, [5, 7], dtype="float32", align=16)
-        output_buffer = T.match_buffer(output, [5, 7], dtype="float32", align=16)
-        # body
-        T.evaluate(T.tvm_call_cpacked("test_fused_add", a_buffer.data, output_buffer.data, T.reinterpret(T.uint64(0), dtype="handle"), dtype="int32"))
-    # fmt: on
-
-    _assert_lowered_main(mod, func, CallType.CPacked)
-
-
-def test_single_call_packed():
-    mod = tvm.relay.parse(
-        """
-#[version = "0.0.5"]
-def @test_fused_add(%x: Tensor[(5, 7), float32]) { %x }
-
-def @main(%a: Tensor[(5, 7), float32]) -> Tensor[(5, 7), float32] {
-  %0 = (%a,) /* ty=(Tensor[(5, 7), float32],) */;
-  call_lowered(@test_fused_add, %0) /* ty=Tensor[(5, 7), float32] */
-}
-        """,
-    )
-
-    # fmt: off
-    @T.prim_func
-    def func(a: T.handle, output: T.handle) -> None:
-        # function attr dict
-        T.func_attr({"global_symbol": "test_mod___tvm_main__", "runner_function": True, "target": T.target({"kind":"llvm", "tag":"", "keys":["cpu"]}), "input_vars": [a], "output_vars": [output], "devices": []})
-        a_buffer = T.match_buffer(a, [5, 7], dtype="float32", align=16)
-        output_buffer = T.match_buffer(output, [5, 7], dtype="float32", align=16)
-        # body
-        T.evaluate(T.tvm_check_return(0, -1, T.tvm_call_packed("test_fused_add", a_buffer.data, output_buffer.data, dtype="int32"), dtype="int32"))
-    # fmt: on
-
-    _assert_lowered_main(mod, func, CallType.Packed)
-
-
-def test_single_call_unpacked():
-    mod = tvm.relay.parse(
-        """
-#[version = "0.0.5"]
-def @test_fused_add(%x: Tensor[(5, 7), float32]) { %x }
-
-def @main(%a: Tensor[(5, 7), float32]) -> Tensor[(5, 7), float32] {
-  %0 = (%a,) /* ty=(Tensor[(5, 7), float32],) */;
-  call_lowered(@test_fused_add, %0) /* ty=Tensor[(5, 7), float32] */
-}
-        """,
-    )
-
-    # fmt: off
-    @T.prim_func
-    def func(a: T.handle, output: T.handle) -> None:
-        # function attr dict
-        T.func_attr({"global_symbol": "test_mod___tvm_main__", "runner_function": True, "target": T.target({"kind":"llvm", "tag":"", "keys":["cpu"]}), "input_vars": [a], "output_vars": [output], "devices": []})
-        a_buffer = T.match_buffer(a, [5, 7], dtype="float32", align=16)
-        output_buffer = T.match_buffer(output, [5, 7], dtype="float32", align=16)
-        # body
-        T.evaluate(T.tvm_check_return(0, -1, T.call_extern("test_fused_add", a_buffer.data, output_buffer.data, dtype="int32"), dtype="int32"))
-    # fmt: on
-
-    _assert_lowered_main(mod, func, CallType.Unpacked)
-
-
-def test_constant():
-    mod = tvm.relay.parse(
-        """
-#[version = "0.0.5"]
-def @test_fused_add(%x: Tensor[(5, 7), float32], %y: Tensor[(5, 7), float32]) { %x }
-
-def @main(%a: Tensor[(5, 7), float32]) -> Tensor[(5, 7), float32] {
-  %0 = (%a, meta[relay.Constant][0]) /* ty=(Tensor[(5, 7), float32], Tensor[(5, 7), float32]) */;
-  call_lowered(@test_fused_add, %0) /* ty=Tensor[(5, 7), float32] */
-}
-        """,
-        init_meta_table={"relay.Constant": _make_consts("float32", [(5, 7)])},
-    )
-
-    # fmt: off
-    @T.prim_func
-    def func(a: T.handle, output: T.handle) -> None:
-        # function attr dict
-        T.func_attr({"runner_function": True, "target": T.target({"kind":"llvm", "tag":"", "keys":["cpu"]}), "global_symbol": "test_mod___tvm_main__", "input_vars": [a], "output_vars": [output], "devices": []})
-        a_buffer = T.match_buffer(a, [5, 7], dtype="float32", align=16)
-        output_buffer = T.match_buffer(output, [5, 7], dtype="float32", align=16)
-        # body
-        constant_0 = T.allocate_const([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "float32", [5, 7])
-        T.evaluate(T.tvm_call_cpacked("test_fused_add", a_buffer.data, constant_0, output_buffer.data, T.reinterpret(T.uint64(0), dtype="handle"), dtype="int32"))
-    # fmt: on
-
-    _assert_lowered_main(mod, func, CallType.CPacked)
-
-
-# TODO(@mbaret) There seems to be a TVMScript round-trip bug causing this to fail
-@pytest.mark.xfail()
-def test_copy_to_output():
-    mod = tvm.relay.parse(
-        """
-#[version = "0.0.5"]
-def @main(%a: Tensor[(5, 7), float32]) -> Tensor[(5, 7), float32] {
-  %a
-}
-        """,
-    )
-
-    # fmt: off
-    @T.prim_func
-    def func(a: T.handle, output: T.handle) -> None:
-        # function attr dict
-        T.func_attr({"global_symbol": "test_mod___tvm_main__", "runner_function": True, "target": T.target({"kind":"llvm", "tag":"", "keys":["cpu"]}), "input_vars": [a], "output_vars": [output], "devices": []})
-        tmp_read = T.handle("uint8", "")
-        # buffer definition
-        tmp_read_1 = T.Buffer([T.uint64(140)], dtype="uint8", data=tmp_read)
-        a_buffer = T.match_buffer(a, [5, 7], dtype="float32", align=16)
-        output_buffer = T.match_buffer(output, [5, 7], dtype="float32", align=16)
-        # body
-        tmp_write: T.handle("uint8") = output_buffer.data
-        tmp_write_1 = T.Buffer([T.uint64(140)], dtype="uint8", data=tmp_write)
-        for i in T.serial(140):
-            tmp_write_1[i] = T.Let(tmp_read_1[i], where={tmp_read : a_buffer.data})
-    # fmt: on
-
-    _assert_lowered_main(mod, func, CallType.CPacked)
-
-
-def test_two_calls():
-    mod = tvm.relay.parse(
-        """
-#[version = "0.0.5"]
-def @test_fused_add(%x: Tensor[(5, 7), float32]) { %x }
-
-def @main(%a: Tensor[(5, 7), float32]) -> Tensor[(5, 7), float32] {
-  %0 = (%a,) /* ty=(Tensor[(5, 7), float32],) */;
-  %1 = call_lowered(@test_fused_add, %0) /* ty=Tensor[(5, 7), float32] */;
-  %2 = (%1,) /* ty=(Tensor[(5, 7), float32],) */;
-  call_lowered(@test_fused_add, %2) /* ty=Tensor[(5, 7), float32] */
-}
-        """,
-    )
-
-    # fmt: off
-    @T.prim_func
-    def func(a: T.handle, output: T.handle) -> None:
-        # function attr dict
-        T.func_attr({"global_symbol": "test_mod___tvm_main__", "runner_function": True, "target": T.target({"kind":"llvm", "tag":"", "keys":["cpu"]}), "input_vars": [a], "output_vars": [output], "devices": []})
-        a_buffer = T.match_buffer(a, [5, 7], dtype="float32", align=16)
-        output_buffer = T.match_buffer(output, [5, 7], dtype="float32", align=16)
-        # body
-        sid_2 = T.allocate([140], "int8", "global.workspace")
-        T.evaluate(T.tvm_call_cpacked("test_fused_add", a_buffer.data, sid_2, T.reinterpret(T.uint64(0), dtype="handle"), dtype="int32"))
-        T.evaluate(T.tvm_call_cpacked("test_fused_add", sid_2, output_buffer.data, T.reinterpret(T.uint64(0), dtype="handle"), dtype="int32"))
-    # fmt: on
-
-    _assert_lowered_main(mod, func, CallType.CPacked)
-
-
-def test_tuple_output():
-    mod = tvm.relay.parse(
-        """
-#[version = "0.0.5"]
-def @test_fused_add(%x: Tensor[(5, 7), float32]) { (%x, %x) }
-
-def @main(%a: Tensor[(5, 7), float32]) -> (Tensor[(5, 7), float32], Tensor[(5, 7), float32]) {
-  %0 = (%a,) /* ty=(Tensor[(5, 7), float32],) */;
-  call_lowered(@test_fused_add, %0) /* ty=(Tensor[(5, 7), float32], Tensor[(5, 7), float32]) */
-}
-        """,
-    )
-
-    # fmt: off
-    @T.prim_func
-    def func(a: T.handle, output0: T.handle, output1: T.handle) -> None:
-        # function attr dict
-        T.func_attr({"global_symbol": "test_mod___tvm_main__", "runner_function": True, "target": T.target({"kind":"llvm", "tag":"", "keys":["cpu"]}), "input_vars": [a], "output_vars": [output0, output1], "devices": []})
-        a_buffer = T.match_buffer(a, [5, 7], dtype="float32", align=16)
-        output0_buffer = T.match_buffer(output0, [5, 7], dtype="float32", align=16)
-        output1_buffer = T.match_buffer(output1, [5, 7], dtype="float32", align=16)
-        # body
-        T.evaluate(T.tvm_call_cpacked("test_fused_add", a_buffer.data, output0_buffer.data, output1_buffer.data, T.reinterpret(T.uint64(0), dtype="handle"), dtype="int32"))
-    # fmt: on
-
-    _assert_lowered_main(mod, func, CallType.CPacked)
-
-
-def test_tuple_intermediate():
-    mod = tvm.relay.parse(
-        """
-#[version = "0.0.5"]
-def @test_fused_add_0(%x: Tensor[(5, 7), float32]) -> (Tensor[(5, 7), float32], Tensor[(5, 7), float32]) { (%x, %x) }
-def @test_fused_add_1(%x: Tensor[(5, 7), float32], %y: Tensor[(5, 7), float32]) -> Tensor[(5, 7), float32] { %x }
-
-def @main(%a: Tensor[(5, 7), float32]) -> Tensor[(5, 7), float32] {
-  %0 = (%a,);
-  %1 = call_lowered(@test_fused_add_0, %0);
-  %2 = (%1.0, %1.1);
-  call_lowered(@test_fused_add_1, %2)
-}
-        """,
-    )
-
-    # fmt: off
-    @T.prim_func
-    def func(a: T.handle, output: T.handle) -> None:
-        # function attr dict
-        T.func_attr({"global_symbol": "test_mod___tvm_main__", "runner_function": True, "target": T.target({"kind":"llvm", "tag":"", "keys":["cpu"]}), "input_vars": [a], "output_vars": [output], "devices": []})
-        a_buffer = T.match_buffer(a, [5, 7], dtype="float32", align=16)
-        output_buffer = T.match_buffer(output, [5, 7], dtype="float32", align=16)
-        # body
-        sid_3 = T.allocate([140], "int8", "global.workspace")
-        sid_2 = T.allocate([140], "int8", "global.workspace")
-        T.evaluate(T.tvm_call_cpacked("test_fused_add_0", a_buffer.data, sid_2, sid_3, T.reinterpret(T.uint64(0), dtype="handle"), dtype="int32"))
-        T.evaluate(T.tvm_call_cpacked("test_fused_add_1", sid_2, sid_3, output_buffer.data, T.reinterpret(T.uint64(0), dtype="handle"), dtype="int32"))
-    # fmt: on
-
-    _assert_lowered_main(mod, func, CallType.CPacked)
-
-
-def test_multi_input():
-    mod = tvm.relay.parse(
-        """
-#[version = "0.0.5"]
-def @test_fused_add(%x: Tensor[(5, 7), float32], %y: Tensor[(5, 7), float32]) { %x }
-
-def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32]) -> Tensor[(5, 7), float32] {
-  %0 = (%a, %b) /* ty=(Tensor[(5, 7), float32], Tensor[(5, 7), float32]) */;
-  call_lowered(@test_fused_add, %0) /* ty=Tensor[(5, 7), float32] */
-}
-        """,
-    )
-
-    # fmt: off
-    @T.prim_func
-    def func(a: T.handle, b: T.handle, output: T.handle) -> None:
-        # function attr dict
-        T.func_attr({"global_symbol": "test_mod___tvm_main__", "runner_function": True, "target": T.target({"kind":"llvm", "tag":"", "keys":["cpu"]}), "input_vars": [a, b], "output_vars": [output], "devices": []})
-        a_buffer = T.match_buffer(a, [5, 7], dtype="float32", align=16)
-        b_buffer = T.match_buffer(b, [5, 7], dtype="float32", align=16)
-        output_buffer = T.match_buffer(output, [5, 7], dtype="float32", align=16)
-        # body
-        T.evaluate(T.tvm_call_cpacked("test_fused_add", a_buffer.data, b_buffer.data, output_buffer.data, T.reinterpret(T.uint64(0), dtype="handle"), dtype="int32"))
-    # fmt: on
-
-    _assert_lowered_main(mod, func, CallType.CPacked)
-
-
-def test_let_binding():
-    mod = tvm.relay.parse(
-        """
-#[version = "0.0.5"]
-def @test_fused_add(%x: Tensor[(5, 7), float32]) -> Tensor[(5, 7), float32] { %x }
-
-def @main(%a: Tensor[(5, 7), float32]) -> Tensor[(5, 7), float32] {
-  %0 = (%a,);
-  let %v1 = call_lowered(@test_fused_add, %0);
-  %v1
-}
-        """,
-    )
-
-    # fmt: off
-    @T.prim_func
-    def func(a: T.handle, output: T.handle) -> None:
-        # function attr dict
-        T.func_attr({"global_symbol": "test_mod___tvm_main__", "runner_function": True, "target": T.target({"kind":"llvm", "tag":"", "keys":["cpu"]}), "input_vars": [a], "output_vars": [output], "devices": []})
-        a_buffer = T.match_buffer(a, [5, 7], dtype="float32", align=16)
-        output_buffer = T.match_buffer(output, [5, 7], dtype="float32", align=16)
-        # body
-        T.evaluate(T.tvm_call_cpacked("test_fused_add", a_buffer.data, output_buffer.data, T.reinterpret(T.uint64(0), dtype="handle"), dtype="int32"))
-    # fmt: on
-
-    _assert_lowered_main(mod, func, CallType.CPacked)
-
-
-def test_let_binding_branch():
-    mod = tvm.relay.parse(
-        """
-#[version = "0.0.5"]
-def @test_fused_add_0(%x: Tensor[(5, 7), float32]) -> Tensor[(5, 7), float32] { %x }
-def @test_fused_add_1(%x: Tensor[(5, 7), float32], %y: Tensor[(5, 7), float32]) -> Tensor[(5, 7), float32] { %x }
-
-def @main(%a: Tensor[(5, 7), float32]) -> Tensor[(5, 7), float32] {
-  %0 = (%a,);
-  let %v0 = call_lowered(@test_fused_add_0, %0);
-  %1 = (%v0,);
-  let %v1 = call_lowered(@test_fused_add_0, %1);
-  %2 = (%v1,);
-  let %v2 = call_lowered(@test_fused_add_0, %2);
-  %3 = (%v1, %v2);
-  let %v3 = call_lowered(@test_fused_add_1, %3);
-  %v3
-}
-        """,
-    )
-
-    # fmt: off
-    @T.prim_func
-    def func(a: T.handle, output: T.handle) -> None:
-        # function attr dict
-        T.func_attr({"global_symbol": "test_mod___tvm_main__", "runner_function": True, "target": T.target({"kind":"llvm", "tag":"", "keys":["cpu"]}), "input_vars": [a], "output_vars": [output], "devices": []})
-        a_buffer = T.match_buffer(a, [5, 7], dtype="float32", align=16)
-        output_buffer = T.match_buffer(output, [5, 7], dtype="float32", align=16)
-        # body
-        sid_3 = T.allocate([140], "int8", "global.workspace")
-        sid_2 = T.allocate([140], "int8", "global.workspace")
-        sid_1 = T.allocate([140], "int8", "global.workspace")
-        T.evaluate(T.tvm_call_cpacked("test_fused_add_0", a_buffer.data, sid_1, T.reinterpret(T.uint64(0), dtype="handle"), dtype="int32"))
-        T.evaluate(T.tvm_call_cpacked("test_fused_add_0", sid_1, sid_2, T.reinterpret(T.uint64(0), dtype="handle"), dtype="int32"))
-        T.evaluate(T.tvm_call_cpacked("test_fused_add_0", sid_2, sid_3, T.reinterpret(T.uint64(0), dtype="handle"), dtype="int32"))
-        T.evaluate(T.tvm_call_cpacked("test_fused_add_1", sid_2, sid_3, output_buffer.data, T.reinterpret(T.uint64(0), dtype="handle"), dtype="int32"))
-    # fmt: on
-
-    _assert_lowered_main(mod, func, CallType.CPacked)
-
-
-def test_device_hooks():
-    mod = tvm.relay.parse(
-        """
-#[version = "0.0.5"]
-def @test_fused_add(%x: Tensor[(5, 7), float32]) -> Tensor[(5, 7), float32] { %x }
-
-def @main(%a: Tensor[(5, 7), float32]) -> Tensor[(5, 7), float32] {
-  %0 = (%a,);
-  %1 = call_lowered(@test_fused_add, %0);
-  %2 = (%1,);
-  call_lowered(@test_fused_add, %2)
-}
-        """,
-    )
-
-    # fmt: off
-    @T.prim_func
-    def func(a: T.handle, output: T.handle, device_context_example_target_hook: T.handle) -> None:
-        # function attr dict
-        T.func_attr({"global_symbol": "test_mod___tvm_main__", "runner_function": True, "target": T.target({"kind":"llvm", "tag":"", "keys":["cpu"]}), "input_vars": [a], "output_vars": [output], "devices": ["example_target_hook"]})
-        a_buffer = T.match_buffer(a, [5, 7], dtype="float32", align=16)
-        output_buffer = T.match_buffer(output, [5, 7], dtype="float32", align=16)
-        # body
-        T.evaluate(T.tvm_check_return(0, -1, T.call_extern("TVMDeviceExampleTargetHookActivate", device_context_example_target_hook, dtype="int32"), dtype="int32"))
-        with T.allocate([140], "int8", "global.workspace") as sid_2:
-            T.evaluate(T.tvm_check_return(0, -1, T.call_extern("TVMDeviceExampleTargetHookOpen", device_context_example_target_hook, dtype="int32"), dtype="int32"))
-            T.evaluate(T.tvm_call_cpacked("test_fused_add", a_buffer.data, sid_2, device_context_example_target_hook, dtype="int32"))
-            T.evaluate(T.tvm_check_return(0, -1, T.call_extern("TVMDeviceExampleTargetHookClose", device_context_example_target_hook, dtype="int32"), dtype="int32"))
-            T.evaluate(T.tvm_check_return(0, -1, T.call_extern("TVMDeviceExampleTargetHookOpen", device_context_example_target_hook, dtype="int32"), dtype="int32"))
-            T.evaluate(T.tvm_call_cpacked("test_fused_add", sid_2, output_buffer.data, device_context_example_target_hook, dtype="int32"))
-            T.evaluate(T.tvm_check_return(0, -1, T.call_extern("TVMDeviceExampleTargetHookClose", device_context_example_target_hook, dtype="int32"), dtype="int32"))
-        T.evaluate(T.tvm_check_return(0, -1, T.call_extern("TVMDeviceExampleTargetHookDeactivate", device_context_example_target_hook, dtype="int32"), dtype="int32"))
-    # fmt: on
-
-    device_contexts = {}
-    for gv in mod.get_global_vars():
-        device_contexts[gv] = "example_target_hook"
-
-    mod = mod.with_attr("device_contexts", device_contexts)
-
-    _assert_lowered_main(mod, func, CallType.CPacked)
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/relay/qnn/test_qnn_channel_stripping.py b/tests/python/relay/qnn/test_qnn_channel_stripping.py
index d0d32567a8a5..18b2da1a90ec 100644
--- a/tests/python/relay/qnn/test_qnn_channel_stripping.py
+++ b/tests/python/relay/qnn/test_qnn_channel_stripping.py
@@ -22,7 +22,7 @@
 
 from tvm.relay import transform
 from tvm.relay.testing.temp_op_attr import TempOpAttr
-from tvm.testing.aot import generate_ref_data
+from tvm.testing.utils import generate_ref_data
 
 from tvm.topi.arm_cpu.qnn_legalize import legalize_bias_add
 
diff --git a/tests/python/relay/strategy/arm_cpu/scalable_utils.py b/tests/python/relay/strategy/arm_cpu/scalable_utils.py
deleted file mode 100644
index ad16a47612d0..000000000000
--- a/tests/python/relay/strategy/arm_cpu/scalable_utils.py
+++ /dev/null
@@ -1,53 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import tvm
-from tvm.tir.stmt_functor import post_order_visit, ir_transform
-
-
-def calculate_extra_workspace_size_from_scalable_extents(func, known_vscale_value):
-    """
-    The AOT executor needs to know the size of the workspace ahead of time, but this
-    isn't possible when some allocations are scalable (vscale is not known at compile-time).
-    If we know the target hardware, we can reason about the value of vscale ahead of time.
-    This function will calculate an upper-bound for the extra workspace bytes required by the
-    AOT executor given TIR function and a known value for vscale.
-    """
-    extra_workspace_bytes = 0
-    is_scalable_extent = False
-    ana = tvm.arith.Analyzer()
-
-    def replace_vscale_with_known_value(stmt):
-        nonlocal is_scalable_extent
-        if isinstance(stmt, tvm.tir.expr.Call) and stmt.op.name == "tir.vscale":
-            is_scalable_extent = True
-            return tvm.tir.IntImm(stmt.dtype, known_vscale_value)
-
-    def calculate_workspace_bytes(stmt):
-        nonlocal extra_workspace_bytes, is_scalable_extent
-        if isinstance(stmt, tvm.tir.stmt.Allocate):
-            for extent in stmt.extents:
-                extent_stmt = tvm.tir.Evaluate(extent)
-                is_scalable_extent = False
-                mutated_extent = ir_transform(extent_stmt, replace_vscale_with_known_value, None)
-                # Non scalable extents are already included in the calculation by AOT
-                if is_scalable_extent:
-                    alloc_bytes = ana.simplify(mutated_extent.value) * tvm.DataType(stmt.dtype).bits
-                    extra_workspace_bytes += alloc_bytes
-
-    post_order_visit(func.body, calculate_workspace_bytes)
-    return extra_workspace_bytes
diff --git a/tests/python/relay/strategy/arm_cpu/test_avg_pool.py b/tests/python/relay/strategy/arm_cpu/test_avg_pool.py
deleted file mode 100644
index 3d6690a1a16f..000000000000
--- a/tests/python/relay/strategy/arm_cpu/test_avg_pool.py
+++ /dev/null
@@ -1,166 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import numpy as np
-import tvm
-import tvm.testing
-from tvm import relay
-from tvm.testing.aot import AOTTestModel, compile_and_run, generate_ref_data
-from tvm.micro.testing.aot_test_utils import (
-    AOT_CORSTONE300_RUNNER,
-)
-
-
-class BasicPoolTests:
-    @tvm.testing.requires_corstone300
-    def test_pool(
-        self,
-        pool_type,
-        shape,
-        dtype,
-        pool_size,
-        strides,
-        padding,
-        dilation,
-        layout,
-        ceil_mode,
-        count_include_pad,
-        schedule_name,
-    ):
-        """Test a subgraph with a single pool operator."""
-        ishape = shape
-        input0 = relay.var("input", relay.TensorType(ishape, dtype))
-
-        out0 = getattr(relay.op.nn, pool_type)(
-            input0,
-            pool_size=pool_size,
-            strides=strides,
-            dilation=dilation,
-            padding=padding,
-            layout=layout,
-            out_layout="",
-            ceil_mode=ceil_mode,
-            count_include_pad=count_include_pad,
-        )
-
-        ref_mod = tvm.IRModule.from_expr(relay.Function([input0], out0))
-
-        input1 = relay.var("input", relay.TensorType(ishape, dtype))
-        out1 = getattr(relay.op.nn, pool_type)(
-            input1,
-            pool_size=pool_size,
-            strides=strides,
-            dilation=dilation,
-            padding=padding,
-            layout=layout,
-            out_layout="",
-            ceil_mode=ceil_mode,
-            count_include_pad=count_include_pad,
-        )
-        mod = tvm.IRModule.from_expr(relay.Function([input1], out1))
-
-        inputs = {"input": np.random.randint(low=-128, high=127, size=ishape, dtype=dtype)}
-        output_list = generate_ref_data(ref_mod, inputs)
-
-        compile_and_run(
-            AOTTestModel(module=mod, inputs=inputs, outputs=output_list),
-            runner=AOT_CORSTONE300_RUNNER,
-            interface_api="c",
-            use_unpacked_api=True,
-            target_opts={
-                "-keys": "arm_cpu",
-                "-mcpu": "cortex-m7",
-            },
-            schedule_name=schedule_name,
-        )
-
-
-class TestAvgPool1d(BasicPoolTests):
-    """This test is for pool.arm_cpu schedule."""
-
-    (
-        shape,
-        pool_size,
-        strides,
-        padding,
-        dilation,
-        layout,
-        ceil_mode,
-        count_include_pad,
-    ) = tvm.testing.parameters(
-        ((3, 32, 27), (3,), (2,), 0, 1, "NCW", False, False),
-        ((3, 32, 27), (3,), (2,), 0, 1, "NWC", False, False),
-        ((3, 32, 27), (3,), (2,), 0, 1, "NCW", True, False),
-        ((3, 32, 27), (3,), (2,), 1, 1, "NCW", False, True),
-        ((1, 1, 32), 3, 1, 0, 1, "NCW", False, False),
-        ((1, 4, 20), 3, 2, 2, 1, "NCW", False, False),
-    )
-    pool_type = tvm.testing.parameter("avg_pool1d")
-    dtype = tvm.testing.parameter("int32")
-    schedule_name = tvm.testing.parameter("pool.arm_cpu")
-
-
-class TestAvgPool2d(BasicPoolTests):
-    """This test is for pool.arm_cpu schedule."""
-
-    (
-        shape,
-        pool_size,
-        strides,
-        padding,
-        dilation,
-        layout,
-        ceil_mode,
-        count_include_pad,
-    ) = tvm.testing.parameters(
-        ((3, 32, 27, 27), (3, 3), (2, 2), 0, 1, "NCHW", False, False),
-        ((3, 32, 27, 27), (3, 3), (2, 2), 0, 1, "NHWC", False, False),
-        ((2, 16, 27, 27), (3, 3), (2, 2), 0, 1, "NCHW", True, False),
-        ((2, 27, 27, 16), (3, 3), (2, 2), 0, 1, "NHWC", True, False),
-        ((2, 16, 27, 27), (3, 3), (2, 2), 0, 1, "NCHW", True, True),
-        ((1, 25, 5, 64), (25, 5), (25, 5), 0, 1, "NHWC", False, False),
-        ((1, 3, 3, 256), (3, 3), (3, 3), 0, 1, "NHWC", False, False),
-        ((1, 8, 8, 64), (8, 8), (8, 8), 0, 1, "NHWC", False, False),
-        ((1, 1, 32, 32), (3, 3), 1, 0, 1, "NCHW", False, False),
-        ((1, 4, 32, 20), (3, 3), (2, 2), 0, 1, "NCHW", False, False),
-    )
-    pool_type = tvm.testing.parameter("avg_pool2d")
-    dtype = tvm.testing.parameter("int32")
-    schedule_name = tvm.testing.parameter("pool.arm_cpu")
-
-
-class TestAvgPool3d(BasicPoolTests):
-    """This test is for pool.arm_cpu schedule."""
-
-    (
-        shape,
-        pool_size,
-        strides,
-        padding,
-        dilation,
-        layout,
-        ceil_mode,
-        count_include_pad,
-    ) = tvm.testing.parameters(
-        ((3, 4, 8, 27, 27), (3, 3, 3), 2, 0, 1, "NCDHW", False, False),
-    )
-    pool_type = tvm.testing.parameter("avg_pool3d")
-    dtype = tvm.testing.parameter("int32")
-    schedule_name = tvm.testing.parameter("pool.arm_cpu")
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/relay/strategy/arm_cpu/test_conv1d_ncw.py b/tests/python/relay/strategy/arm_cpu/test_conv1d_ncw.py
deleted file mode 100644
index b1dda10c4294..000000000000
--- a/tests/python/relay/strategy/arm_cpu/test_conv1d_ncw.py
+++ /dev/null
@@ -1,115 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import numpy as np
-import tvm
-import tvm.testing
-from tvm import relay
-from tvm.testing.aot import AOTTestModel, compile_and_run, generate_ref_data
-from tvm.micro.testing.aot_test_utils import (
-    AOT_CORSTONE300_RUNNER,
-)
-
-
-class BasicConv1dTests:
-    @tvm.testing.requires_corstone300
-    def test_conv1d(
-        self,
-        data_shape,
-        kernel_size,
-        num_filter,
-        strides,
-        padding,
-        dilation,
-        dtype,
-        schedule_name,
-    ):
-        """Test a subgraph with a single conv1d_ncw operator."""
-        ishape = data_shape
-        wshape = (num_filter, data_shape[1], kernel_size)
-
-        weight_data = np.random.randint(low=-10, high=10, size=wshape, dtype=dtype)
-
-        input0 = relay.var("input", relay.TensorType(ishape, dtype))
-        weight0 = relay.const(weight_data)
-        out0 = relay.op.nn.conv1d(
-            input0,
-            weight0,
-            kernel_size=kernel_size,
-            strides=strides,
-            padding=padding,
-            dilation=dilation,
-            data_layout="NCW",
-            kernel_layout="OIW",
-            out_dtype="int32",
-            out_layout="NCW",
-        )
-        ref_mod = tvm.IRModule.from_expr(relay.Function([input0], out0))
-
-        input1 = relay.var("input", relay.TensorType(ishape, dtype))
-        weight1 = relay.const(weight_data)
-
-        out1 = relay.op.nn.conv1d(
-            input1,
-            weight1,
-            kernel_size=kernel_size,
-            strides=strides,
-            padding=padding,
-            dilation=dilation,
-            data_layout="NCW",
-            kernel_layout="OIW",
-            out_dtype="int32",
-            out_layout="NCW",
-        )
-        mod = tvm.IRModule.from_expr(relay.Function([input1], out1))
-
-        inputs = {"input": np.random.randint(low=-128, high=127, size=ishape, dtype=dtype)}
-        output_list = generate_ref_data(ref_mod, inputs)
-
-        compile_and_run(
-            AOTTestModel(module=mod, inputs=inputs, outputs=output_list),
-            runner=AOT_CORSTONE300_RUNNER,
-            interface_api="c",
-            use_unpacked_api=True,
-            target_opts={
-                "-keys": "arm_cpu",
-                "-mcpu": "cortex-m7",
-            },
-            schedule_name=schedule_name,
-        )
-
-
-class TestConv1d_ncw(BasicConv1dTests):
-    """This test is for conv1d_ncw.generic schedule."""
-
-    data_shape, kernel_size, num_filter, strides, padding, dilation = tvm.testing.parameters(
-        ((4, 32, 16), 3, 12, 1, 0, 1),
-        ((4, 16, 32), 3, 12, 1, 0, 1),
-        ((1, 12, 32), 3, 16, 1, 0, 1),
-        ((3, 10, 12), 4, 24, 1, 0, 1),
-        ((1, 7, 7), 3, 5, 1, 0, 1),
-        ((1, 2, 10), 4, 4, 2, (1, 1), 1),
-        ((1, 2, 20), 4, 4, 2, (0, 1), 1),
-        ((1, 4, 16), 1, 12, 1, (1, 0), 1),
-        ((1, 16, 24), 1, 32, 3, (2, 2), 1),
-    )
-    dtype = tvm.testing.parameter("int8", "int16")
-    data_layout = tvm.testing.parameter("NCW")
-    schedule_name = tvm.testing.parameter("conv1d_ncw.generic")
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/relay/strategy/arm_cpu/test_conv1d_nwc.py b/tests/python/relay/strategy/arm_cpu/test_conv1d_nwc.py
deleted file mode 100644
index 3daed6221f68..000000000000
--- a/tests/python/relay/strategy/arm_cpu/test_conv1d_nwc.py
+++ /dev/null
@@ -1,143 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import numpy as np
-import tvm
-import tvm.testing
-from tvm import relay
-from tvm.testing.aot import AOTTestModel, compile_and_run, generate_ref_data
-from tvm.micro.testing.aot_test_utils import (
-    AOT_CORSTONE300_RUNNER,
-)
-
-
-class BasicConv1dTests:
-    @tvm.testing.requires_corstone300
-    def test_conv1d(
-        self,
-        data_shape,
-        kernel_size,
-        kernel_layout,
-        num_filter,
-        strides,
-        padding,
-        dilation,
-        dtype,
-        schedule_name,
-    ):
-        """Test a subgraph with a single conv1d_nwc operator."""
-        ishape = data_shape
-        wshape = (kernel_size, data_shape[-1], num_filter)
-        weight_data = np.random.randint(low=-10, high=10, size=wshape, dtype=dtype)
-
-        input0 = relay.var("input", relay.TensorType(ishape, dtype))
-        weight0 = relay.const(weight_data)
-        out0 = relay.op.nn.conv1d(
-            input0,
-            weight0,
-            kernel_size=kernel_size,
-            strides=strides,
-            padding=padding,
-            dilation=dilation,
-            data_layout="NWC",
-            kernel_layout="WIO",
-            out_dtype="int32",
-            out_layout="NWC",
-        )
-        ref_mod = tvm.IRModule.from_expr(relay.Function([input0], out0))
-
-        input1 = relay.var("input", relay.TensorType(ishape, dtype))
-
-        if kernel_layout == "WOI":
-            weight1 = relay.const(np.moveaxis(weight_data, 1, -1))
-        else:
-            weight1 = relay.const(weight_data)
-
-        out1 = relay.op.nn.conv1d(
-            input1,
-            weight1,
-            kernel_size=kernel_size,
-            strides=strides,
-            padding=padding,
-            dilation=dilation,
-            data_layout="NWC",
-            kernel_layout=kernel_layout,
-            out_dtype="int32",
-            out_layout="NWC",
-        )
-        mod = tvm.IRModule.from_expr(relay.Function([input1], out1))
-
-        inputs = {"input": np.random.randint(low=-128, high=127, size=ishape, dtype=dtype)}
-        output_list = generate_ref_data(ref_mod, inputs)
-
-        compile_and_run(
-            AOTTestModel(module=mod, inputs=inputs, outputs=output_list),
-            runner=AOT_CORSTONE300_RUNNER,
-            interface_api="c",
-            use_unpacked_api=True,
-            target_opts={
-                "-keys": "arm_cpu",
-                "-mcpu": "cortex-m7",
-            },
-            schedule_name=schedule_name,
-        )
-
-
-class TestConv1d_dsp(BasicConv1dTests):
-    """This test is for conv1d_dsp schedule."""
-
-    data_shape, kernel_size, num_filter, strides, padding, dilation = tvm.testing.parameters(
-        ((4, 32, 16), 3, 12, 1, 0, 1),
-        ((4, 16, 32), 3, 12, 1, 0, 1),
-        ((4, 32, 16), 3, 12, 1, 0, 1),
-        ((1, 32, 12), 3, 16, 1, 0, 1),
-        # TODO: The following 4 tests fail due to https://github.com/apache/tvm/issues/11466
-        # ((3, 12, 10), 4, 24, 1, 0, 1),
-        # ((1, 7, 7), 3, 5, 1, 0, 1),
-        # ((1, 10, 2), 4, 4, 2, (1, 1), 1),
-        # ((1, 20, 2), 4, 4, 2, (0, 1), 1),
-        ((1, 16, 4), 1, 12, 1, (1, 0), 1),
-        ((1, 24, 16), 1, 32, 3, (2, 2), 1),
-    )
-    dtype = tvm.testing.parameter("int8", "int16")
-    data_layout = tvm.testing.parameter("NWC")
-    kernel_layout = tvm.testing.parameter("WOI")
-    schedule_name = tvm.testing.parameter("conv1d_dsp")
-
-
-class TestConv1d_nwc(BasicConv1dTests):
-    """This test is for conv1d_nwc.generic schedule."""
-
-    data_shape, kernel_size, num_filter, strides, padding, dilation = tvm.testing.parameters(
-        ((4, 32, 16), 3, 12, 1, 0, 1),
-        ((4, 16, 32), 3, 12, 1, 0, 1),
-        ((4, 32, 16), 3, 12, 1, 0, 1),
-        ((1, 32, 12), 3, 16, 1, 0, 1),
-        ((3, 12, 10), 4, 24, 1, 0, 1),
-        ((1, 7, 7), 3, 5, 1, 0, 1),
-        ((1, 10, 2), 4, 4, 2, (1, 1), 1),
-        ((1, 20, 2), 4, 4, 2, (0, 1), 1),
-        ((1, 16, 4), 1, 12, 1, (1, 0), 1),
-        ((1, 24, 16), 1, 32, 3, (2, 2), 1),
-    )
-    dtype = tvm.testing.parameter("int8", "int16")
-    data_layout = tvm.testing.parameter("NWC")
-    kernel_layout = tvm.testing.parameter("WIO")
-    schedule_name = tvm.testing.parameter("conv1d_nwc.generic")
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/relay/strategy/arm_cpu/test_conv2d.py b/tests/python/relay/strategy/arm_cpu/test_conv2d.py
deleted file mode 100644
index 8ef9cb09e648..000000000000
--- a/tests/python/relay/strategy/arm_cpu/test_conv2d.py
+++ /dev/null
@@ -1,240 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Tests for arm_cpu schedules for regular conv2d."""
-
-import pytest
-import numpy as np
-
-import tvm
-import tvm.topi.testing
-from tvm import relay
-from test_generalized_conv2d import GeneralizedConv2dTests
-from tvm.testing import fixture, main, parameter, parameters
-from tvm.topi.nn.utils import get_pad_tuple
-from tvm.topi.utils import get_const_tuple
-from tvm.target.codegen import llvm_version_major
-from tvm.testing.aot import AOTTestModel, AOTCompiledTestModel, run_and_check, generate_ref_data
-from tvm.micro.testing.aot_test_utils import AOT_APROFILE_AEM_RUNNER
-from tvm.relay.op.strategy.arm_cpu import arm_cpu_tir_strategy
-from scalable_utils import calculate_extra_workspace_size_from_scalable_extents
-
-
-class Conv2dTests(GeneralizedConv2dTests):
-    """Helper for constructing regular Conv2ds. Always sets groups to 1. We set the reference
-    kernel layout here as we must pick something, but the x86 implementation supports several."""
-
-    @fixture
-    def groups(self):
-        """Using a fixture instead of a parameter stops Pytest from adding the (redundant) number of
-        groups to the name of each test."""
-        return 1
-
-    def setup_method(self):
-        self.ref_kernel_layout = "HWIO"
-
-
-class TestConv2d_NHWC_DSP(Conv2dTests):
-    """This test is for conv2d_nhwc_dsp.arm_cpu schedule."""
-
-    data_shape, kernel_size, num_filter, strides, padding, dilation = parameters(
-        # TODO(mehrdadh): Fails due to https://github.com/apache/tvm/issues/11216
-        # ((1, 32, 32, 1), (3, 3), 12, 1, 0, 1),
-        # ((1, 32, 10, 3), (3, 3), 16, 1, 0, 1),
-        # ((1, 49, 10, 1), (10, 4), 64, (2, 1), (4, 1, 5, 1), 1),
-        ((1, 32, 32, 16), (3, 3), 16, 1, (0, 2, 2, 0), 1),
-        ((1, 32, 32, 16), (3, 3), 16, 1, 0, 1),
-        ((1, 32, 32, 16), (3, 3), 16, 1, 0, 1),
-        ((1, 32, 32, 16), (3, 3), 16, 1, (0, 2, 2, 0), 2),
-        ((1, 32, 32, 16), (3, 3), 16, 1, (1, 1, 2, 2), 2),
-        # from Keyword Spotting model from MLPerfTiny models
-        # TODO(mehrdad): Fails due to https://github.com/apache/tvm/issues/11216
-        # ((1, 49, 10, 1), (10, 4), 64, (2, 2), (4, 1, 5, 1), 1),
-        # from Visual Wake Word model from MLPerfTiny models
-        # TODO(mehrdadh): fails due to https://github.com/apache/tvm/issues/11216
-        # ((1, 96, 96, 3), (3, 3), 8, (2, 2), (0, 0, 1, 1), 1),
-        # from Image Classification model from MLPerfTiny models
-        ((1, 16, 16, 32), (1, 1), 64, (2, 2), 0, 1),
-        ((4, 16, 16, 8), (5, 5), 8, 2, (0, 4, 4, 0), 1),
-        ((4, 16, 16, 8), (5, 5), 16, 2, (0, 4, 4, 0), 1),
-        ((4, 16, 16, 8), (5, 5), 8, 2, 0, 1),
-        ((4, 16, 16, 8), (5, 5), 16, 2, 0, 1),
-        ((1, 16, 16, 8), (3, 3), 16, 2, (0, 0, 1, 1), 1),
-        ((1, 16, 16, 8), (3, 3), 16, 2, (1, 1, 2, 2), 1),
-        ((1, 16, 16, 8), (5, 5), 16, 2, (3, 3, 2, 2), 1),
-        ((1, 16, 16, 8), (3, 3), 16, 2, (0, 1, 2, 3), 1),
-    )
-    in_dtype = parameter("int8", "int16")
-
-    data_layout = parameter("NHWC")
-    kernel_layout = parameter("HWOI")
-    out_layout = parameter("NHWC")
-    schedule_name = parameter("conv2d_nhwc_dsp.arm_cpu")
-
-
-class TestConv2d_NHWC_Spatial_Pack(Conv2dTests):
-    """This test is for conv2d_nhwc_spatial_pack.arm_cpu schedule."""
-
-    data_shape, kernel_size, num_filter, strides, padding, dilation = parameters(
-        ((1, 32, 32, 1), (3, 3), 12, 1, 0, 1),
-        ((1, 32, 10, 3), (3, 3), 16, 1, 0, 1),
-        ((1, 49, 10, 1), (10, 4), 64, (2, 1), (4, 1, 5, 1), 1),
-        ((1, 32, 32, 16), (3, 3), 16, 1, (0, 2, 2, 0), 1),
-        ((1, 32, 32, 16), (3, 3), 16, 1, 0, 1),
-        ((1, 32, 32, 16), (3, 3), 16, 1, 0, 1),
-        ((1, 32, 32, 16), (3, 3), 16, 1, (0, 2, 2, 0), 2),
-        ((1, 32, 32, 16), (3, 3), 16, 1, (1, 1, 2, 2), 2),
-    )
-    in_dtype = parameter("int8", "int16")
-
-    data_layout = parameter("NHWC")
-    kernel_layout = parameter("HWIO")
-    out_layout = parameter("NHWC")
-    schedule_name = parameter("conv2d_nhwc_spatial_pack.arm_cpu")
-
-
-class TestConv2d_NCHW_Spatial_Pack(Conv2dTests):
-    """This test is for conv2d_nchw_spatial_pack.arm_cpu schedule."""
-
-    data_shape, kernel_size, num_filter, strides, padding, dilation, in_dtype = parameters(
-        ((1, 32, 32, 16), (3, 3), 12, 1, 0, 1, "int8"),
-        ((1, 32, 32, 16), (3, 3), 12, 1, 0, 1, "int16"),
-        ((1, 16, 16, 32), (3, 3), 12, 1, 0, 1, "int16"),
-    )
-    data_layout = parameter("NCHW")
-    kernel_layout = parameter("OIHW")
-    out_layout = parameter("NCHW")
-    schedule_name = parameter("conv2d_nchw_spatial_pack.arm_cpu")
-
-
-def ref_data(in_dtype, out_dtype, data_shape, num_filter, kernel_size, stride, padding, dilation):
-    np.random.seed(0)
-    a_shape = data_shape
-    w_shape = (kernel_size[0], kernel_size[1], data_shape[3], num_filter)
-
-    a_np = np.random.uniform(size=a_shape).astype(in_dtype)
-    w_np = np.random.uniform(size=w_shape).astype(in_dtype)
-    dw_np = tvm.topi.testing.dilate_python(w_np, (dilation, dilation, 1, 1))
-    b_np = tvm.topi.testing.conv2d_nhwc_python(
-        a_np.astype(out_dtype), dw_np.astype(out_dtype), stride, padding
-    ).astype(out_dtype)
-    return a_np, w_np, dw_np, b_np
-
-
-@pytest.mark.skipif(
-    llvm_version_major() < 16, reason="SME is not supported in earlier versions of LLVM"
-)
-@pytest.mark.parametrize(
-    "data_shape,kernel_size,num_filter,stride,padding,dilation",
-    [
-        ((1, 1, 1, 1), (3, 3), 1, 1, "SAME", 1),
-        ((1, 9, 9, 1), (3, 3), 16, 1, "SAME", 1),
-        ((1, 32, 32, 1), (3, 3), 12, 1, "SAME", 1),
-        ((1, 32, 10, 3), (3, 3), 16, 1, 0, 1),
-        ((1, 49, 10, 1), (10, 4), 64, (2, 1), (4, 1, 5, 1), 1),
-        ((1, 32, 32, 16), (3, 3), 16, 1, (0, 2, 2, 0), 1),
-        ((1, 32, 32, 16), (3, 4), 16, 1, 0, 1),
-        ((1, 9, 31, 7), (3, 3), 7, 1, "VALID", 1),
-        ((1, 32, 32, 16), (5, 5), 16, 1, (0, 2, 2, 0), 2),
-        ((1, 32, 32, 16), (3, 3), 16, 1, (1, 1, 2, 2), 2),
-        ((1, 134, 153, 32), (3, 3), 2, (2, 2), "VALID", 1),
-        ((1, 16, 16, 64), (1, 1), 8, (1, 1), "SAME", 1),
-    ],
-)
-@pytest.mark.parametrize("in_dtype,out_dtype", [("float32", "float32"), ("float16", "float32")])
-@tvm.testing.requires_aprofile_aem_fvp
-def test_conv2d_sme(
-    target, data_shape, kernel_size, num_filter, stride, padding, dilation, in_dtype, out_dtype
-):
-    a_np, w_np, dw_np, b_np = ref_data(
-        in_dtype, out_dtype, data_shape, num_filter, kernel_size, stride, padding, dilation
-    )
-
-    kernel_size = get_const_tuple(w_np.shape[:2])
-    out_channels = w_np.shape[3]
-
-    x = relay.var("data", shape=a_np.shape, dtype=in_dtype)
-    weight = relay.const(w_np, dtype=in_dtype)
-    conv2d = relay.nn.conv2d(
-        x,
-        weight,
-        channels=out_channels,
-        kernel_size=kernel_size,
-        strides=stride,
-        dilation=dilation,
-        padding=get_pad_tuple(padding, dw_np.shape[:2]),
-        data_layout="NHWC",
-        kernel_layout="HWIO",
-        out_dtype=out_dtype,
-    )
-
-    func = relay.Function(relay.analysis.free_vars(conv2d), conv2d)
-
-    ir_mod = tvm.IRModule.from_expr(func)
-    ir_mod = tvm.relay.transform.InferType()(ir_mod)
-
-    inputs = {"data": a_np}
-    params = {}
-    ref_outputs = {"output": b_np}
-
-    target = tvm.target.Target("llvm -mtriple=aarch64-none-elf -mattr=+v9.2a,+sme")
-    runtime = tvm.relay.backend.Runtime("crt", {"system-lib": True})
-    executor = tvm.relay.backend.Executor(
-        "aot",
-        {
-            "interface-api": "packed",
-            "unpacked-api": False,
-        },
-    )
-
-    with tvm.transform.PassContext(
-        opt_level=3, config=AOT_APROFILE_AEM_RUNNER.pass_config
-    ), target, tvm.meta_schedule.database.ScheduleFnDatabase(arm_cpu_tir_strategy):
-        executor_factory = tvm.relay.build(
-            ir_mod,
-            target=target,
-            executor=executor,
-            runtime=runtime,
-            params=params,
-        )
-
-    if in_dtype == "float16":
-        func_name = "tvmgen_default_fused_nn_contrib_conv2d_gemm_without_weight_transform"
-    else:
-        func_name = "tvmgen_default_fused_nn_conv2d"
-    generated_func = executor_factory.lowered_ir_mods.items()[0][1][func_name]
-    extra_memory_in_bytes = calculate_extra_workspace_size_from_scalable_extents(generated_func, 4)
-
-    test_model = AOTTestModel(
-        ir_mod, inputs, ref_outputs, params=params, extra_memory_in_bytes=extra_memory_in_bytes
-    )
-    compiled = AOTCompiledTestModel(test_model, executor_factory)
-
-    assembly = (
-        compiled.executor_factory.module.imported_modules[0].imported_modules[0].get_source("asm")
-    )
-    assert "fmopa" in assembly
-
-    assert run_and_check(
-        models=[compiled],
-        interface_api="packed",
-        runner=AOT_APROFILE_AEM_RUNNER,
-        print_output_on_mismatch=True,
-    )
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/relay/strategy/arm_cpu/test_conv2d_NCHWc.py b/tests/python/relay/strategy/arm_cpu/test_conv2d_NCHWc.py
deleted file mode 100644
index 8ca132ffba75..000000000000
--- a/tests/python/relay/strategy/arm_cpu/test_conv2d_NCHWc.py
+++ /dev/null
@@ -1,136 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import numpy as np
-import tvm
-import tvm.testing
-from tvm import relay
-from tvm.testing.aot import AOTTestModel, compile_and_run, generate_ref_data
-from tvm.micro.testing.aot_test_utils import (
-    AOT_CORSTONE300_RUNNER,
-)
-
-
-class BasicConv2dTests:
-    @tvm.testing.requires_corstone300
-    def test_conv2d_NCHWc(
-        self,
-        data_shape,
-        kernel_size,
-        data_layout,
-        kernel_layout,
-        num_filter,
-        strides,
-        padding,
-        dilation,
-        dtype,
-        schedule_name,
-    ):
-        """Test a subgraph with a single conv2d_NCHWc operator."""
-        ishape = data_shape
-        wshape = (num_filter, data_shape[1], *kernel_size)
-        weight_data = np.random.randint(low=-10, high=10, size=wshape, dtype=dtype)
-
-        input0 = relay.var("input", relay.TensorType(ishape, dtype))
-        weight0 = relay.const(weight_data)
-        out0 = relay.op.nn.contrib_conv2d_nchwc(
-            relay.layout_transform(input0, "NCHW", data_layout),
-            relay.layout_transform(weight0, "OIHW", kernel_layout),
-            kernel_size=kernel_size,
-            strides=strides,
-            padding=padding,
-            dilation=dilation,
-            data_layout=data_layout,
-            kernel_layout=kernel_layout,
-            channels=num_filter,
-            out_dtype="",
-            out_layout="",
-        )
-        ref_mod = tvm.IRModule.from_expr(relay.Function([input0], out0))
-
-        input1 = relay.var("input", relay.TensorType(ishape, dtype))
-        weight1 = relay.const(weight_data)
-        out1 = relay.op.nn.contrib_conv2d_nchwc(
-            relay.layout_transform(input1, "NCHW", data_layout),
-            relay.layout_transform(weight1, "OIHW", kernel_layout),
-            kernel_size=kernel_size,
-            strides=strides,
-            padding=padding,
-            dilation=dilation,
-            data_layout=data_layout,
-            kernel_layout=kernel_layout,
-            channels=num_filter,
-            out_dtype="",
-            out_layout="",
-        )
-        mod = tvm.IRModule.from_expr(relay.Function([input1], out1))
-
-        inputs = {"input": np.random.randint(low=-128, high=127, size=ishape, dtype=dtype)}
-        output_list = generate_ref_data(ref_mod, inputs)
-
-        compile_and_run(
-            AOTTestModel(module=mod, inputs=inputs, outputs=output_list),
-            runner=AOT_CORSTONE300_RUNNER,
-            interface_api="c",
-            use_unpacked_api=True,
-            target_opts={
-                "-keys": "arm_cpu",
-                "-mcpu": "cortex-m7",
-            },
-            schedule_name=schedule_name,
-        )
-
-
-class TestConv2d_NCHWc(BasicConv2dTests):
-    """This test is for conv2d_NCHWc.x86 schedule."""
-
-    (
-        data_shape,
-        kernel_size,
-        num_filter,
-        strides,
-        padding,
-        dilation,
-        dtype,
-        kernel_layout,
-        data_layout,
-    ) = tvm.testing.parameters(
-        ((1, 16, 32, 32), (3, 3), 12, (1, 1), (1, 1), (1, 1), "int8", "OIHW4i4o", "NCHW4c"),
-        ((1, 16, 32, 32), (3, 3), 12, (1, 1), (1, 1), (1, 1), "int16", "OIHW4i4o", "NCHW4c"),
-        ((1, 16, 32, 32), (3, 3), 12, (1, 1), (1, 1), (1, 1), "int32", "OIHW4i4o", "NCHW4c"),
-        ((1, 16, 32, 32), (3, 3), 12, (1, 1), (1, 1), (1, 1), "int8", "OIHW2i8o", "NCHW8c"),
-        ((1, 16, 32, 32), (3, 3), 12, (1, 1), (1, 1), (1, 1), "int16", "OIHW2i8o", "NCHW8c"),
-        ((1, 16, 32, 32), (3, 3), 12, (1, 1), (1, 1), (1, 1), "int32", "OIHW2i8o", "NCHW8c"),
-        # ResNet18 workloads
-        # this test does not fit in corstone300 DCTM section.
-        # ((1, 3, 112, 112), (7, 7), 64, (2, 2), (3, 3), (1, 1), "int8", "OIHW4i4o", "NCHW4c"),
-        ((1, 64, 28, 28), (3, 3), 64, (1, 1), (1, 1), (1, 1), "int8", "OIHW4i4o", "NCHW4c"),
-        ((1, 64, 28, 28), (1, 1), 64, (1, 1), (0, 0), (1, 1), "int8", "OIHW4i4o", "NCHW4c"),
-        ((1, 64, 28, 28), (3, 3), 128, (2, 2), (1, 1), (1, 1), "int8", "OIHW4i4o", "NCHW4c"),
-        ((1, 64, 28, 28), (1, 1), 128, (2, 2), (0, 0), (1, 1), "int8", "OIHW4i4o", "NCHW4c"),
-        ((1, 128, 14, 14), (3, 3), 128, (1, 1), (1, 1), (1, 1), "int8", "OIHW4i4o", "NCHW4c"),
-        ((1, 128, 14, 14), (3, 3), 256, (2, 2), (1, 1), (1, 1), "int8", "OIHW4i4o", "NCHW4c"),
-        ((1, 128, 14, 14), (1, 1), 256, (2, 2), (0, 0), (1, 1), "int8", "OIHW4i4o", "NCHW4c"),
-        ((1, 256, 7, 7), (3, 3), 256, (1, 1), (1, 1), (1, 1), "int8", "OIHW4i4o", "NCHW4c"),
-        ((1, 256, 7, 7), (3, 3), 512, (2, 2), (1, 1), (1, 1), "int8", "OIHW4i4o", "NCHW4c"),
-        ((1, 256, 7, 7), (1, 1), 512, (2, 2), (0, 0), (1, 1), "int8", "OIHW4i4o", "NCHW4c"),
-        ((1, 512, 3, 3), (3, 3), 512, (1, 1), (1, 1), (1, 1), "int8", "OIHW4i4o", "NCHW4c"),
-    )
-    schedule_name = tvm.testing.parameter("conv2d_NCHWc.x86")
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/relay/strategy/arm_cpu/test_dense.py b/tests/python/relay/strategy/arm_cpu/test_dense.py
deleted file mode 100644
index 68188f7d0a01..000000000000
--- a/tests/python/relay/strategy/arm_cpu/test_dense.py
+++ /dev/null
@@ -1,232 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import pytest
-import numpy as np
-
-import tvm
-import tvm.testing
-from tvm import relay
-from tvm import meta_schedule
-from tvm.testing.aot import (
-    AOTTestModel,
-    AOTCompiledTestModel,
-    compile_and_run,
-    run_and_check,
-    generate_ref_data,
-)
-from tvm.micro.testing.aot_test_utils import AOT_CORSTONE300_RUNNER, AOT_APROFILE_AEM_RUNNER
-from tvm.target.codegen import llvm_version_major
-from tvm.relay.op.strategy.arm_cpu import arm_cpu_tir_strategy
-from scalable_utils import calculate_extra_workspace_size_from_scalable_extents
-
-
-class BasicDenseTests:
-    @tvm.testing.requires_corstone300
-    def test_dense(self, shape, weight_shape, dtype, schedule_name, enable_bias):
-        """Test a subgraph with a single dense operator."""
-        ishape = shape
-        wshape = weight_shape
-        out_dtype = "int32"
-        units = weight_shape[0]
-        weight_data = np.random.randint(low=-10, high=10, size=wshape, dtype=dtype)
-        if enable_bias:
-            bias_data = np.random.randint(low=-10, high=10, size=(wshape[0]), dtype=out_dtype)
-
-        input = relay.var("input", relay.TensorType(ishape, dtype))
-        weight = relay.const(weight_data)
-        dense = relay.op.nn.dense(
-            input,
-            weight,
-            units=units,
-            out_dtype=out_dtype,
-        )
-        if enable_bias:
-            bias = relay.const(bias_data)
-            relay_op = relay.op.nn.bias_add(dense, bias)
-        else:
-            relay_op = dense
-
-        inputs = {"input": np.random.randint(low=-128, high=127, size=ishape, dtype=dtype)}
-        ref_mod = tvm.IRModule.from_expr(relay.Function([input], relay_op))
-        output_list = generate_ref_data(ref_mod, inputs)
-
-        mod = tvm.IRModule.from_expr(relay.Function([input], relay_op))
-        compile_and_run(
-            AOTTestModel(module=mod, inputs=inputs, outputs=output_list),
-            runner=AOT_CORSTONE300_RUNNER,
-            interface_api="c",
-            use_unpacked_api=True,
-            target_opts={
-                "-keys": "arm_cpu",
-                "-mcpu": "cortex-m7",
-            },
-            schedule_name=schedule_name,
-        )
-
-
-class TestDense(BasicDenseTests):
-    """This test is for dense_dsp schedule."""
-
-    shape, weight_shape = tvm.testing.parameters(
-        ((8, 128), (32, 128)),
-        ((32, 32), (32, 32)),
-        ((1, 64), (1, 64)),
-        ((11, 2), (2, 2)),
-        ((1, 32), (64, 32)),
-        ((3, 12), (10, 12)),
-    )
-    dtype = tvm.testing.parameter("int8", "int16")
-    schedule_name = tvm.testing.parameter("dense_dsp.arm_cpu")
-    enable_bias = tvm.testing.parameter(False, True)
-
-
-@pytest.mark.skipif(
-    llvm_version_major() < 17, reason="SME is not supported in earlier versions of LLVM"
-)
-@tvm.testing.requires_aprofile_aem_fvp
-@pytest.mark.parametrize(
-    "data_shape,weight_shape,enable_bias",
-    [
-        ((32, 32), (32, 32), False),
-        ((2, 35), (6, 35), False),
-        ((3, 3), (68, 3), False),
-        ((79, 65), (152, 65), True),
-    ],
-)
-@pytest.mark.parametrize("in_dtype", ["float32", "float16"])
-def test_sme_dense(data_shape, weight_shape, enable_bias, in_dtype):
-    np.random.seed(0)
-    out_dtype = "float32"
-
-    input_data = np.random.uniform(size=data_shape).astype(in_dtype)
-    inp = relay.var("data", shape=data_shape, dtype=in_dtype)
-    weight_data = np.random.uniform(size=weight_shape).astype(in_dtype)
-    weight = relay.const(weight_data, dtype=in_dtype)
-
-    relay_op = relay.nn.dense(inp, weight, out_dtype=out_dtype)
-
-    if enable_bias:
-        bias_data = np.random.uniform(size=weight_shape[0]).astype(out_dtype)
-        bias = relay.const(bias_data, dtype=out_dtype)
-        relay_op = relay.nn.bias_add(relay_op, bias)
-
-    func = relay.Function(relay.analysis.free_vars(relay_op), relay_op)
-
-    ir_mod = tvm.IRModule.from_expr(func)
-    ir_mod = tvm.relay.transform.InferType()(ir_mod)
-
-    inputs = {"data": input_data}
-    params = {}
-    ref_outputs = generate_ref_data(ir_mod, inputs, params)
-
-    target = tvm.target.Target("llvm -mtriple=aarch64-none-elf -mattr=+v9.2a,+sme")
-    runtime = tvm.relay.backend.Runtime("crt", {"system-lib": True})
-    executor = tvm.relay.backend.Executor(
-        "aot",
-        {
-            "interface-api": "packed",
-            "unpacked-api": False,
-        },
-    )
-
-    with tvm.transform.PassContext(
-        opt_level=3, config=AOT_APROFILE_AEM_RUNNER.pass_config
-    ), target, meta_schedule.database.ScheduleFnDatabase(arm_cpu_tir_strategy):
-        executor_factory = tvm.relay.build(
-            ir_mod,
-            target=target,
-            executor=executor,
-            runtime=runtime,
-            params=params,
-        )
-
-    bias_postfix = "_add" if enable_bias else ""
-    generated_func = executor_factory.lowered_ir_mods.items()[0][1][
-        f"tvmgen_default_fused_nn_matmul{bias_postfix}"
-    ]
-    extra_memory_in_bytes = calculate_extra_workspace_size_from_scalable_extents(generated_func, 4)
-
-    test_model = AOTTestModel(
-        ir_mod, inputs, ref_outputs, params=params, extra_memory_in_bytes=extra_memory_in_bytes
-    )
-    compiled = AOTCompiledTestModel(test_model, executor_factory)
-
-    assembly = (
-        compiled.executor_factory.module.imported_modules[0].imported_modules[0].get_source("asm")
-    )
-    assert "fmopa" in assembly
-
-    assert run_and_check(
-        models=[compiled],
-        interface_api="packed",
-        runner=AOT_APROFILE_AEM_RUNNER,
-        print_output_on_mismatch=True,
-    )
-
-
-class TestGemmDense:
-    """This test is for dense_gemm schedule."""
-
-
-@tvm.testing.requires_aarch64
-@pytest.mark.parametrize(
-    "data_shape,weight_shape,enable_bias",
-    [
-        ((32, 32), (32, 32), False),
-        ((2, 35), (6, 35), False),
-        ((3, 3), (68, 3), False),
-        ((79, 65), (152, 65), True),
-    ],
-)
-@pytest.mark.parametrize("in_dtype", ["float32", "float16"])
-def test_gemm_dense(data_shape, weight_shape, enable_bias, in_dtype):
-    np.random.seed(0)
-    in_np = np.random.uniform(size=(data_shape)).astype(in_dtype)
-    w1 = np.random.uniform(size=(weight_shape)).astype(in_dtype)
-
-    w = relay.const(w1)
-    d = relay.var("data", shape=data_shape, dtype=in_dtype)
-    y = relay.nn.dense(d, w)
-
-    mod = tvm.IRModule()
-
-    mod["main"] = relay.Function([d], y)
-
-    target = "llvm -mtriple=aarch64-linux-gnu -device=arm_cpu -mattr=+v8.6a,+neon"
-
-    with tvm.transform.PassContext(opt_level=3):
-        lib = relay.build(mod, target=target, params=None)
-
-    out_np = np.array(np.matmul(in_np, w1.T))
-
-    dev = tvm.cpu(0)
-    input_buf = tvm.nd.array(in_np, device=dev)
-    rt = tvm.contrib.graph_executor.GraphModule(lib["default"](dev))
-    rt.set_input("data", input_buf)
-    rt.run()
-    out = rt.get_output(0)
-
-    if in_dtype == "float16":
-        tol = {"rtol": 1e-2, "atol": 1e-2}
-    else:
-        tol = {"rtol": 1e-7, "atol": 1e-7}
-
-    tvm.testing.assert_allclose(out.numpy(), out_np, rtol=tol["rtol"], atol=tol["atol"])
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/relay/strategy/arm_cpu/test_depthwise_conv2d.py b/tests/python/relay/strategy/arm_cpu/test_depthwise_conv2d.py
deleted file mode 100644
index 95ae105f9166..000000000000
--- a/tests/python/relay/strategy/arm_cpu/test_depthwise_conv2d.py
+++ /dev/null
@@ -1,114 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Tests for arm_cpu schedules for depthwise_conv2d."""
-
-from test_generalized_conv2d import GeneralizedConv2dTests
-from tvm.testing import fixture, main, parameter, parameters
-
-
-class DepthwiseConv2dTests(GeneralizedConv2dTests):
-    """Helper for constructing depthwise Conv2ds. Sets the reference kernel layout to what x86 code
-    supports."""
-
-    @fixture
-    def groups(self, data_shape):
-        """By definition, a depthwise_conv2d has a number of groups equal to the number of input
-        channels, so we don't need to specify the number of groups each time."""
-        return data_shape[3]
-
-    def setup_method(self):
-        self.ref_kernel_layout = "HWOI"
-
-
-class TestDepthwiseConv2d_NCHW_OIHW(DepthwiseConv2dTests):
-    """This test is for depthwise_conv2d_nchw.arm_cpu schedule."""
-
-    data_shape, kernel_size, num_filter, strides, padding, dilation = parameters(
-        ((1, 32, 32, 16), (3, 3), 16, 1, 0, 1),
-        ((1, 10, 3, 32), (3, 3), 32, 1, 0, 1),
-        ((1, 32, 16, 32), (3, 3), 32, 1, (0, 2, 2, 0), 1),
-        ((1, 32, 16, 32), (3, 3), 32, 1, 0, 1),
-        ((1, 32, 16, 32), (3, 3), 32, 1, 0, 1),
-        ((1, 32, 16, 32), (3, 3), 32, 1, (0, 2, 2, 0), 2),
-        ((1, 32, 16, 16), (3, 3), 16, 1, (1, 1, 2, 2), 2),
-    )
-
-    in_dtype = parameter("int8", "int16")
-    data_layout = parameter("NCHW")
-    kernel_layout = parameter("OIHW")
-    out_layout = parameter("NCHW")
-    schedule_name = parameter("depthwise_conv2d_nchw.arm_cpu")
-
-
-class TestDepthwiseConv2d_NHWC_HWOI(DepthwiseConv2dTests):
-    """This test is for depthwise_conv2d_nhwc.generic schedule."""
-
-    data_shape, kernel_size, num_filter, strides, padding, dilation = parameters(
-        ((1, 32, 32, 16), (3, 3), 16, 1, 0, 1),
-        ((1, 32, 10, 16), (3, 3), 16, 1, 0, 1),
-        ((1, 49, 10, 64), (10, 4), 64, (2, 1), (4, 1, 5, 1), 1),
-        ((1, 32, 32, 16), (3, 3), 16, 1, (0, 2, 2, 0), 1),
-        ((1, 32, 32, 16), (3, 3), 16, 1, 0, 1),
-        ((1, 32, 32, 16), (3, 3), 16, 1, 0, 1),
-        ((1, 32, 32, 16), (3, 3), 16, 1, (0, 2, 2, 0), 2),
-        ((1, 32, 32, 16), (3, 3), 16, 1, (1, 1, 2, 2), 2),
-    )
-
-    in_dtype = parameter("int8", "int16")
-    data_layout = parameter("NHWC")
-    kernel_layout = parameter("HWOI")
-    out_layout = parameter("NHWC")
-    schedule_name = parameter("depthwise_conv2d_nhwc.generic")
-
-
-class TestDepthwiseConv2d_NHWC_HWOI_DSP(DepthwiseConv2dTests):
-    """This test is for depthwise_conv2d_nhwc_dsp.arm_cpu schedule. The tests that are parameterized
-    by dtype work for both int8 and int16, while the others only work on the specified dtype."""
-
-    in_dtype_parameterized_tests = [
-        # Depthwise_conv2d parameters from MobileNetV1 0.25x
-        ((1, 48, 48, 8), (3, 3), 8, (1, 1), 1),
-        ((1, 48, 48, 16), (3, 3), 16, (2, 2), (1, 1, 0, 0)),
-        ((1, 24, 24, 32), (3, 3), 32, (1, 1), 1),
-        ((1, 24, 24, 32), (3, 3), 32, (2, 2), (1, 1, 0, 0)),
-        ((1, 12, 12, 64), (3, 3), 64, (1, 1), 1),
-        ((1, 12, 12, 64), (3, 3), 64, (2, 2), (1, 1, 0, 0)),
-        ((1, 6, 6, 128), (3, 3), 128, (1, 1), 1),
-        ((1, 6, 6, 128), (3, 3), 128, (2, 2), (1, 1, 0, 0)),
-        ((1, 3, 3, 256), (3, 3), 256, (1, 1), 1),
-        # Asymmetric and larger kernels
-        ((1, 25, 5, 64), (3, 3), 64, (1, 1), 1),
-        ((1, 24, 24, 8), (5, 5), 8, (1, 1), 1),
-        ((1, 24, 24, 8), (3, 5), 8, (1, 1), 1),
-    ]
-
-    data_shape, kernel_size, num_filter, strides, padding, in_dtype = parameters(
-        # Make a copy of each parameterized test for int8 and one for int16
-        *map(lambda t: t + ("int8",), in_dtype_parameterized_tests),
-        *map(lambda t: t + ("int16",), in_dtype_parameterized_tests),
-        # Test the int16 implementation with channel numbers not divisible by four
-        ((1, 48, 48, 6), (3, 3), 6, (1, 1), 1, "int16"),
-    )
-    dilation = parameter(1)
-    data_layout = parameter("NHWC")
-    kernel_layout = parameter("HWOI")
-    out_layout = parameter("NHWC")
-    schedule_name = parameter("depthwise_conv2d_nhwc_dsp.arm_cpu")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/tests/python/relay/strategy/arm_cpu/test_depthwise_conv2d_NCHWc.py b/tests/python/relay/strategy/arm_cpu/test_depthwise_conv2d_NCHWc.py
deleted file mode 100644
index 178b44edbd40..000000000000
--- a/tests/python/relay/strategy/arm_cpu/test_depthwise_conv2d_NCHWc.py
+++ /dev/null
@@ -1,119 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import numpy as np
-import tvm
-import tvm.testing
-from tvm import relay
-from tvm.testing.aot import AOTTestModel, compile_and_run, generate_ref_data
-from tvm.micro.testing.aot_test_utils import (
-    AOT_CORSTONE300_RUNNER,
-)
-
-
-class BasicConv2dTests:
-    @tvm.testing.requires_corstone300
-    def test_depthwise_conv2d_NCHWc(
-        self,
-        data_shape,
-        kernel_size,
-        data_layout,
-        kernel_layout,
-        groups,
-        strides,
-        padding,
-        dilation,
-        dtype,
-        schedule_name,
-    ):
-        """Test a subgraph with a single depthwise_conv2d_nchwc operator."""
-        ishape = data_shape
-        wshape = (data_shape[1], 1, *kernel_size)
-        weight_data = np.random.randint(low=-10, high=10, size=wshape, dtype=dtype)
-        groups = groups
-
-        input0 = relay.var("input", relay.TensorType(ishape, dtype))
-        weight0 = relay.const(weight_data)
-        out0 = relay.op.nn.contrib_depthwise_conv2d_nchwc(
-            relay.layout_transform(input0, "NCHW", data_layout),
-            relay.layout_transform(weight0, "OIHW", kernel_layout),
-            kernel_size=kernel_size,
-            strides=strides,
-            padding=padding,
-            dilation=dilation,
-            data_layout=data_layout,
-            kernel_layout=kernel_layout,
-            groups=groups,
-            out_dtype="",
-            out_layout="",
-        )
-        ref_mod = tvm.IRModule.from_expr(relay.Function([input0], out0))
-
-        input1 = relay.var("input", relay.TensorType(ishape, dtype))
-        weight1 = relay.const(weight_data)
-        out1 = relay.op.nn.contrib_depthwise_conv2d_nchwc(
-            relay.layout_transform(input1, "NCHW", data_layout),
-            relay.layout_transform(weight1, "OIHW", kernel_layout),
-            kernel_size=kernel_size,
-            strides=strides,
-            padding=padding,
-            dilation=dilation,
-            data_layout=data_layout,
-            kernel_layout=kernel_layout,
-            groups=groups,
-            out_dtype="",
-            out_layout="",
-        )
-        mod = tvm.IRModule.from_expr(relay.Function([input1], out1))
-
-        inputs = {"input": np.random.randint(low=-128, high=127, size=ishape, dtype=dtype)}
-        output_list = generate_ref_data(ref_mod, inputs)
-
-        compile_and_run(
-            AOTTestModel(module=mod, inputs=inputs, outputs=output_list),
-            runner=AOT_CORSTONE300_RUNNER,
-            interface_api="c",
-            use_unpacked_api=True,
-            target_opts={
-                "-keys": "arm_cpu",
-                "-mcpu": "cortex-m7",
-            },
-            schedule_name=schedule_name,
-        )
-
-
-class TestDepthWiseConv2d_NCHWc(BasicConv2dTests):
-    """This test is for depthwise_conv2d_NCHWc schedule."""
-
-    (
-        data_shape,
-        kernel_size,
-        groups,
-        strides,
-        padding,
-        dilation,
-        kernel_layout,
-        data_layout,
-    ) = tvm.testing.parameters(
-        ((1, 16, 32, 32), (3, 3), 16, (1, 1), (1, 1, 1, 1), (1, 1), "OIHW1i4o", "NCHW4c"),
-        ((1, 16, 32, 32), (3, 3), 12, (1, 1), (1, 1, 1, 1), (1, 1), "OIHW1i8o", "NCHW8c"),
-    )
-    dtype = tvm.testing.parameter("int8", "int16", "int32")
-    schedule_name = tvm.testing.parameter("depthwise_conv2d_NCHWc")
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/relay/strategy/arm_cpu/test_generalized_conv2d.py b/tests/python/relay/strategy/arm_cpu/test_generalized_conv2d.py
deleted file mode 100644
index d48c7e138fba..000000000000
--- a/tests/python/relay/strategy/arm_cpu/test_generalized_conv2d.py
+++ /dev/null
@@ -1,152 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Helper class for testing variations of 2D convolution. Should be used by subclassing
-`GeneralizedConv2dTests`, and then setting the arguments using tvm.testing.parameter(s)."""
-
-import numpy as np
-
-import tvm
-import tvm.testing
-from tvm import relay
-from tvm.testing.aot import AOTTestModel, compile_and_run, generate_ref_data
-from tvm.micro.testing.aot_test_utils import AOT_CORSTONE300_RUNNER
-
-
-def change_ndarray_layout(arr, src_layout, dst_layout):
-    """Makes a copy of an ndarray, reshaping it to a new data layout.
-
-    Parameter
-    ---------
-    arr : numpy.ndarray
-        The ndarray to be reformatted.
-
-    src_layout : str
-        The current layout of the Relay constant. Must be alphabetic (e.g. NHWC
-        or OIHW, but not NCHW2c).
-
-    dst_layout : str
-        The desired layout of new the Relay constant. Must be alphabetic (e.g. NHWC
-        or OIHW, but not NCHW2c).
-
-    Returns
-    -------
-    dst_shape : numpy.ndarray
-        A copy of the ndarray with the new layout.
-    """
-    assert src_layout.isalpha() and dst_layout.isalpha()
-    axis_order = [src_layout.index(c) for c in dst_layout]
-    return np.transpose(arr, axis_order)
-
-
-class GeneralizedConv2dTests:
-    """Superclass which can be used to test regular, depthwise, or grouped conv2D. Cannot be used
-    for 5D data formats (NCHWc and such) as written, but could be extended. Might also be worth
-    abstracting some of this logic into an even more general class that could be used for other
-    operators.
-
-    Note that data_shape should always be a tuple of length four indicating the data shape in NHWC
-    format (it will later be reshaped according to the given data_layout), and kernel_size should be
-    a length two tuple giving the height and width of the kernel.
-
-    This test (and other base Conv2dTests classes) are not run by Pytest, as their names do not
-    start with `Test`."""
-
-    @tvm.testing.requires_corstone300
-    def test_conv2d(
-        self,
-        data_shape,
-        kernel_size,
-        num_filter,
-        in_dtype,
-        strides,
-        padding,
-        groups,
-        dilation,
-        data_layout,
-        kernel_layout,
-        out_layout,
-        schedule_name,
-    ):
-        """Test a subgraph with a single conv2d operator."""
-
-        ref_input_data = np.random.randint(low=-128, high=127, size=data_shape, dtype=in_dtype)
-        ref_input_var = relay.var("input", relay.TensorType(data_shape, in_dtype))  # NHWC layout
-        kernel_shape = (*kernel_size, data_shape[-1] // groups, num_filter)  # HWIO layout
-        ref_kernel_data = np.random.randint(low=-10, high=10, size=kernel_shape, dtype=in_dtype)
-
-        """Our x86 depthwise implementation only supports HWOI with NHWC, so we need to change our
-        kernel layout to work around this. We can't just change the whole thing to HWIO or
-        something else, as then group conv2d would not work. Eventually, we should switch to using
-        TensorFlow to create the reference output so we can ensure our implementation is right.
-        See https://github.com/apache/tvm/issues/13137 for details."""
-
-        ref_relay_op = relay.op.nn.conv2d(
-            ref_input_var,
-            relay.const(change_ndarray_layout(ref_kernel_data, "HWIO", self.ref_kernel_layout)),
-            kernel_size=kernel_size,
-            strides=strides,
-            padding=padding,
-            groups=groups,
-            dilation=(dilation, dilation),
-            data_layout="NHWC",
-            kernel_layout=self.ref_kernel_layout,
-            out_dtype="int32",
-            out_layout="NHWC",
-        )
-        ref_module = tvm.IRModule.from_expr(relay.Function([ref_input_var], ref_relay_op))
-        ref_outputs = generate_ref_data(ref_module, {"input": ref_input_data})
-
-        # Reshape output dictionary to match out_layout
-        assert len(ref_outputs) == 1
-        output_tensor_name, output_tensor = next(iter(ref_outputs.items()))
-        ref_outputs[output_tensor_name] = change_ndarray_layout(output_tensor, "NHWC", out_layout)
-
-        test_input_data = change_ndarray_layout(ref_input_data, "NHWC", data_layout)
-        test_input_var = relay.var("input", relay.TensorType(test_input_data.shape, in_dtype))
-        test_kernel_data = change_ndarray_layout(ref_kernel_data, "HWIO", kernel_layout)
-
-        test_relay_op = relay.op.nn.conv2d(
-            test_input_var,
-            relay.const(test_kernel_data),
-            kernel_size=kernel_size,
-            strides=strides,
-            padding=padding,
-            groups=groups,
-            dilation=(dilation, dilation),
-            data_layout=data_layout,
-            kernel_layout=kernel_layout,
-            out_dtype="int32",
-            out_layout=out_layout,
-        )
-        test_function = relay.Function([test_input_var], test_relay_op)
-        test_model = AOTTestModel(
-            module=tvm.IRModule.from_expr(test_function),
-            inputs={"input": test_input_data},
-            outputs=ref_outputs,
-        )
-
-        compile_and_run(
-            test_model,
-            runner=AOT_CORSTONE300_RUNNER,
-            interface_api="c",
-            use_unpacked_api=True,
-            target_opts={
-                "-keys": "arm_cpu",
-                "-mcpu": "cortex-m7",
-            },
-            schedule_name=schedule_name,
-        )
diff --git a/tests/python/relay/strategy/arm_cpu/test_group_conv2d.py b/tests/python/relay/strategy/arm_cpu/test_group_conv2d.py
deleted file mode 100644
index fb11ceda5097..000000000000
--- a/tests/python/relay/strategy/arm_cpu/test_group_conv2d.py
+++ /dev/null
@@ -1,75 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Tests for arm_cpu schedules for grouped conv2d."""
-
-from test_generalized_conv2d import GeneralizedConv2dTests
-from tvm.testing import main, parameter, parameters
-
-
-class GroupConv2dTests(GeneralizedConv2dTests):
-    """Helper for constructing group Conv2ds. Sets the reference kernel layout to what x86 code
-    supports."""
-
-    def setup_method(self):
-        self.ref_kernel_layout = "HWIO"
-
-
-class TestGroupConv2d_NCHW_OIHW(GroupConv2dTests):
-    """This test is for group_conv2d_nchw.arm_cpu schedule."""
-
-    data_shape, kernel_size, num_filter, strides, padding, dilation = parameters(
-        ((1, 32, 32, 16), (3, 3), 12, 1, 0, 1),
-        ((1, 32, 10, 16), (3, 3), 16, 1, 0, 1),
-        ((1, 32, 32, 16), (3, 3), 16, 1, (0, 2, 2, 0), 1),
-        ((1, 32, 32, 16), (3, 3), 16, 1, 0, 1),
-        ((1, 32, 32, 16), (3, 3), 16, 1, 0, 1),
-        ((1, 32, 32, 16), (3, 3), 16, 1, (0, 2, 2, 0), 2),
-        ((1, 32, 32, 16), (3, 3), 32, 1, (1, 1, 2, 2), 2),
-    )
-    groups = parameter(2, 4)
-    in_dtype = parameter("int8", "int16")
-
-    data_layout = parameter("NCHW")
-    kernel_layout = parameter("OIHW")
-    out_layout = parameter("NCHW")
-    schedule_name = parameter("group_conv2d_nchw.arm_cpu")
-
-
-class TestGroupConv2d_NHWC_HWIO(GroupConv2dTests):
-    """This test is for group_conv2d_nhwc.generic schedule."""
-
-    data_shape, kernel_size, num_filter, strides, padding, dilation = parameters(
-        ((1, 32, 32, 16), (3, 3), 12, 1, 0, 1),
-        ((1, 32, 10, 16), (3, 3), 16, 1, 0, 1),
-        ((1, 49, 10, 16), (10, 4), 64, (2, 1), (4, 1, 5, 1), 1),
-        ((1, 32, 32, 16), (3, 3), 16, 1, (0, 2, 2, 0), 1),
-        ((1, 32, 32, 16), (3, 3), 16, 1, 0, 1),
-        ((1, 32, 32, 16), (3, 3), 16, 1, 0, 1),
-        ((1, 32, 32, 16), (3, 3), 16, 1, (0, 2, 2, 0), 2),
-        ((1, 32, 32, 16), (3, 3), 16, 1, (1, 1, 2, 2), 2),
-    )
-    groups = parameter(2, 4)
-    in_dtype = parameter("int8", "int16")
-
-    data_layout = parameter("NHWC")
-    kernel_layout = parameter("HWIO")
-    out_layout = parameter("NHWC")
-    schedule_name = parameter("group_conv2d_nhwc.generic")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/tests/python/relay/strategy/arm_cpu/test_matmul.py b/tests/python/relay/strategy/arm_cpu/test_matmul.py
deleted file mode 100644
index 83f9ac1da5ba..000000000000
--- a/tests/python/relay/strategy/arm_cpu/test_matmul.py
+++ /dev/null
@@ -1,125 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import pytest
-import numpy as np
-
-import tvm
-from tvm import relay
-from tvm import meta_schedule
-from tvm.testing.aot import (
-    AOTTestModel,
-    AOTCompiledTestModel,
-    run_and_check,
-    generate_ref_data,
-)
-from tvm.micro.testing.aot_test_utils import AOT_APROFILE_AEM_RUNNER
-from tvm.target.codegen import llvm_version_major
-from tvm.relay.op.strategy.arm_cpu import arm_cpu_tir_strategy
-from scalable_utils import calculate_extra_workspace_size_from_scalable_extents
-
-
-@pytest.mark.skipif(
-    llvm_version_major() < 17, reason="SME is not supported in earlier versions of LLVM"
-)
-@tvm.testing.requires_aprofile_aem_fvp
-@pytest.mark.parametrize(
-    "data_shape,weight_shape,transpose_a,transpose_b,in_dtype",
-    [
-        ((4, 63), (63, 10), False, False, "float32"),
-        ((64, 32), (32, 32), False, True, "float32"),
-        ((96, 64), (64, 32), False, False, "float32"),
-        ((62, 3), (3, 3), False, False, "float32"),
-        ((4, 5), (79, 5), False, True, "float32"),
-        ((134, 36), (36, 111), False, False, "float32"),
-        ((3, 10), (10, 72), False, False, "float32"),
-        ((4, 63), (10, 63), False, True, "float16"),
-        ((96, 64), (32, 64), False, True, "float16"),
-        ((62, 3), (3, 3), False, True, "float16"),
-        ((4, 5), (79, 5), False, True, "float16"),
-        ((134, 36), (111, 36), False, True, "float16"),
-        # Tensorization does not work when the reduction axis has unit iters.
-        # See https://github.com/apache/tvm/issues/16566
-        # ((5, 1), (1, 5), False, False),
-    ],
-)
-def test_sme_matmul_with_const_b(data_shape, weight_shape, transpose_a, transpose_b, in_dtype):
-    """
-    Execution tests for matmul Scalable Matrix Extension (SME) schedule.
-    """
-    np.random.seed(0)
-    out_dtype = "float32"
-
-    input_data = np.random.uniform(size=data_shape).astype(in_dtype)
-    inp = relay.var("data", shape=data_shape, dtype=in_dtype)
-    weight_data = np.random.uniform(size=weight_shape).astype(in_dtype)
-    weight = relay.const(weight_data, dtype=in_dtype)
-
-    matmul = relay.nn.matmul(
-        inp, weight, out_dtype=out_dtype, transpose_a=transpose_a, transpose_b=transpose_b
-    )
-    func = relay.Function(relay.analysis.free_vars(matmul), matmul)
-
-    ir_mod = tvm.IRModule.from_expr(func)
-    ir_mod = tvm.relay.transform.InferType()(ir_mod)
-
-    inputs = {"data": input_data}
-    params = {}
-    ref_outputs = generate_ref_data(ir_mod, inputs, params)
-
-    target = tvm.target.Target("llvm -mtriple=aarch64-none-elf -mattr=+v9.2a,+sme")
-    runtime = tvm.relay.backend.Runtime("crt", {"system-lib": True})
-    executor = tvm.relay.backend.Executor(
-        "aot",
-        {
-            "interface-api": "packed",
-            "unpacked-api": False,
-        },
-    )
-    with tvm.transform.PassContext(
-        opt_level=3, config=AOT_APROFILE_AEM_RUNNER.pass_config
-    ), target, meta_schedule.database.ScheduleFnDatabase(arm_cpu_tir_strategy):
-        executor_factory = tvm.relay.build(
-            ir_mod,
-            target=target,
-            executor=executor,
-            runtime=runtime,
-            params=params,
-        )
-    generated_func = executor_factory.lowered_ir_mods.items()[0][1][
-        "tvmgen_default_fused_nn_matmul"
-    ]
-    extra_memory_in_bytes = calculate_extra_workspace_size_from_scalable_extents(generated_func, 4)
-
-    test_model = AOTTestModel(
-        ir_mod, inputs, ref_outputs, params=params, extra_memory_in_bytes=extra_memory_in_bytes
-    )
-    compiled = AOTCompiledTestModel(test_model, executor_factory)
-
-    assembly = executor_factory.module.imported_modules[0].imported_modules[0].get_source("asm")
-    assert "fmopa" in assembly
-
-    assert run_and_check(
-        models=[compiled],
-        interface_api="packed",
-        runner=AOT_APROFILE_AEM_RUNNER,
-        print_output_on_mismatch=True,
-    )
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/relay/strategy/arm_cpu/test_max_pool.py b/tests/python/relay/strategy/arm_cpu/test_max_pool.py
deleted file mode 100644
index ee890261d1b4..000000000000
--- a/tests/python/relay/strategy/arm_cpu/test_max_pool.py
+++ /dev/null
@@ -1,132 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import numpy as np
-import tvm
-import tvm.testing
-from tvm import relay
-from tvm.testing.aot import AOTTestModel, compile_and_run, generate_ref_data
-from tvm.micro.testing.aot_test_utils import (
-    AOT_CORSTONE300_RUNNER,
-)
-
-
-class BasicPoolTests:
-    @tvm.testing.requires_corstone300
-    def test_pool(
-        self,
-        pool_type,
-        shape,
-        dtype,
-        pool_size,
-        strides,
-        padding,
-        dilation,
-        layout,
-        ceil_mode,
-        schedule_name,
-    ):
-        """Test a subgraph with a single max_pool operator."""
-        ishape = shape
-        input0 = relay.var("input", relay.TensorType(ishape, dtype))
-
-        out0 = getattr(relay.op.nn, pool_type)(
-            input0,
-            pool_size=pool_size,
-            strides=strides,
-            dilation=dilation,
-            padding=padding,
-            layout=layout,
-            out_layout="",
-            ceil_mode=ceil_mode,
-        )
-
-        ref_mod = tvm.IRModule.from_expr(relay.Function([input0], out0))
-
-        input1 = relay.var("input", relay.TensorType(ishape, dtype))
-        out1 = getattr(relay.op.nn, pool_type)(
-            input1,
-            pool_size=pool_size,
-            strides=strides,
-            dilation=dilation,
-            padding=padding,
-            layout=layout,
-            out_layout="",
-            ceil_mode=ceil_mode,
-        )
-        mod = tvm.IRModule.from_expr(relay.Function([input1], out1))
-
-        inputs = {"input": np.random.randint(low=-128, high=127, size=ishape, dtype=dtype)}
-        output_list = generate_ref_data(ref_mod, inputs)
-
-        compile_and_run(
-            AOTTestModel(module=mod, inputs=inputs, outputs=output_list),
-            runner=AOT_CORSTONE300_RUNNER,
-            interface_api="c",
-            use_unpacked_api=True,
-            target_opts={
-                "-keys": "arm_cpu",
-                "-mcpu": "cortex-m7",
-            },
-            schedule_name=schedule_name,
-        )
-
-
-class TestMaxPool1d(BasicPoolTests):
-    """This test is for pool.arm_cpu schedule."""
-
-    shape, pool_size, strides, padding, dilation, layout, ceil_mode = tvm.testing.parameters(
-        ((3, 32, 27), (3,), (2,), 0, 1, "NCW", True),
-        ((1, 32, 1), 3, 1, 0, 1, "NWC", False),
-        ((1, 20, 4), 3, 2, 0, 1, "NWC", False),
-    )
-    pool_type = tvm.testing.parameter("max_pool1d")
-    dtype = tvm.testing.parameter("int32")
-    schedule_name = tvm.testing.parameter("pool.arm_cpu")
-
-
-class TestMaxPool2d(BasicPoolTests):
-    """This test is for pool.arm_cpu schedule."""
-
-    shape, pool_size, strides, padding, dilation, layout, ceil_mode = tvm.testing.parameters(
-        ((2, 32, 27, 27), (3, 3), (2, 2), 0, 1, "NCHW", False),
-        ((2, 32, 27, 27), (3, 3), (2, 2), 0, 1, "NCHW", True),
-        ((1, 26, 26, 12), (2, 2), (2, 2), 0, 1, "NHWC", False),
-        ((1, 11, 11, 32), (2, 2), (2, 2), 0, 1, "NHWC", False),
-        ((1, 3, 3, 64), (2, 2), (2, 2), 0, 1, "NHWC", False),
-        ((1, 32, 32, 1), (3, 3), 1, 0, 1, "NHWC", False),
-        ((1, 32, 20, 4), (3, 3), (2, 2), 0, 1, "NHWC", False),
-        ((1, 32, 32, 1), (3, 3), 1, 0, 1, "NHWC", True),
-        ((1, 32, 20, 4), (3, 3), (2, 2), 0, 1, "NHWC", True),
-    )
-    pool_type = tvm.testing.parameter("max_pool2d")
-    dtype = tvm.testing.parameter("int32")
-    schedule_name = tvm.testing.parameter("pool.arm_cpu")
-
-
-class TestMaxPool3d(BasicPoolTests):
-    """This test is for pool.arm_cpu schedule."""
-
-    shape, pool_size, strides, padding, dilation, layout, ceil_mode = tvm.testing.parameters(
-        ((3, 4, 8, 27, 27), (3, 3, 3), 2, 0, 1, "NCDHW", False),
-    )
-    pool_type = tvm.testing.parameter("max_pool3d")
-    dtype = tvm.testing.parameter("int32")
-    schedule_name = tvm.testing.parameter("pool.arm_cpu")
-
-
-if __name__ == "__main__":
-    tvm.testing.main()
diff --git a/tests/python/relay/strategy/arm_cpu/test_quantized_convolution.py b/tests/python/relay/strategy/arm_cpu/test_quantized_convolution.py
deleted file mode 100644
index 8af49ca08f7f..000000000000
--- a/tests/python/relay/strategy/arm_cpu/test_quantized_convolution.py
+++ /dev/null
@@ -1,405 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""microTVM cares a lot about the convolution + bias + requantize + fused ReLU use case. There have
-been some accuracy issues in the past, so this test steps through a model (MobileNetV1) layer by
-layer and ensures there is 1-1 correspondance at each step. This test would run way faster if we ran
-the model all at once, but then we wouldn't know which layers had issues.
-
-Furthermore, this test uses some in-development optimizations for microTVM that aren't part of the
-main pipeline.
-"""
-
-import numpy as np
-from PIL import Image
-import pytest
-
-import tvm
-import tvm.testing
-from tvm import meta_schedule, relay
-from tvm.testing.aot import AOTTestModel, run_and_check, AOTCompiledTestModel
-from tvm.relay.backend import Executor, Runtime
-from tvm.micro.testing.aot_test_utils import AOT_CORSTONE300_RUNNER
-from tvm.contrib.download import download_testdata
-from test_generalized_conv2d import change_ndarray_layout
-
-
-# The model is the v0.7 version of the TinyML person detection (aka visual wake words) model. This
-# is an RGB 96x96 MobileNet V1 model.
-MODEL_URL = "https://github.com/mlcommons/tiny/raw/v0.7/benchmark/training/visual_wake_words/trained_models/vww_96_int8.tflite"
-SAMPLE_URL = (
-    "https://github.com/dmlc/web-data/raw/main/tensorflow/models/InceptionV1/elephant-299.jpg"
-)
-MODEL_NUM_CONVS = 27
-
-
-@pytest.fixture(scope="module")
-def interpreter():
-    """Returns a TFLite interpreter with the MLPerf Tiny visual wakewords model loaded, with an
-    elephant image run through it, and with all intermediate layer outputs saved."""
-
-    # Make sure the Tensorflow import is skipped if the test is being skipped. This is needed to
-    # prevent the "python: i386" tests from failing, as they don't have Tensorflow installed.
-    import tensorflow as tf  # pylint: disable=import-outside-toplevel
-
-    # Download the reference model
-    rel_model_path = "model_microtvm_mobilenetv1.tflite"
-    file = download_testdata(MODEL_URL, rel_model_path, overwrite=False)
-
-    # Load it into TensorFlow and allocate memory
-    interpreter = tf.lite.Interpreter(file, experimental_preserve_all_tensors=True)
-    interpreter.allocate_tensors()
-
-    # Download an image. The neuron activations are strange if we use random data or ones,
-    # so downloading an image is useful.
-    rel_image_path = "image_microtvm_mobilenetv1.jpg"
-    img_path = download_testdata(SAMPLE_URL, rel_image_path, overwrite=False)
-    image = Image.open(img_path).resize((96, 96))
-    image_data_hwc_uint8 = np.asarray(image)
-    assert image_data_hwc_uint8.shape == (96, 96, 3)
-    assert image_data_hwc_uint8.dtype == "uint8"
-    image_data_nhwc_int8 = (image_data_hwc_uint8 + 128).view("int8").reshape((1, 96, 96, 3))
-
-    # Load the image into the TFLite interpreter and compute all intermediate tensor values
-    input_details = interpreter.get_input_details()
-    interpreter.set_tensor(input_details[0]["index"], image_data_nhwc_int8)
-    interpreter.invoke()
-    return interpreter
-
-
-def _get_mobilenet_v1_layer_attributes(layer_num):
-    """Returns the relevant padding and stride for a given layer in a MobileNetV1 model. It's a huge
-    headache to read this data from TensorFlow, as it is not user accessible via the interpreter. If
-    we really wanted to, we would have to parse the .tflite file ourselves. This function is a bit
-    of a hack, but lets us skip that."""
-
-    if layer_num == 0:  # Regular conv2d
-        return ((0, 0, 1, 1), (2, 2), False)
-    if layer_num % 2 == 0:  # 1x1 conv2d
-        return ((0, 0, 0, 0), (1, 1), False)
-    if layer_num in [3, 7, 11, 23]:  # Downsizing depthwise_conv2d layers
-        return ((0, 0, 1, 1), (2, 2), True)
-    # Depthwise conv2d
-    return ((1, 1, 1, 1), (1, 1), True)
-
-
-@pytest.mark.parametrize("layer", range(2, 27, 2))
-@tvm.testing.requires_package("tensorflow")
-def test_empty_channel_detection(interpreter, layer):
-    """Some models (mainly MobileNetV1) have kernels with many output channels full entirely of
-    zeroes. The VWW model is one of these. This test confirms that the outputs of these channels,
-    as computed by TensorFlow, are indeed not dependent upon the input values.
-    """
-
-    _, kernel, bias, output = _load_tflite_layer(interpreter, layer)
-    kernel_data, _ = kernel
-    bias_data, bias_quant = bias
-    output_data, output_quant = output
-    is_depthwise = _get_mobilenet_v1_layer_attributes(layer)[2]
-    assert not is_depthwise
-    assert kernel_data.shape[1] == kernel_data.shape[2] == 1
-
-    out_channels = kernel_data.shape[3]
-    fixed_channels = {}
-
-    out_zero_point = output_quant["zero_points"][0]
-    assert out_zero_point == -128
-
-    for i in range(out_channels):
-        # Skip over output channels with data
-        if np.any(kernel_data[i, 0, 0, :]):
-            continue
-
-        scale = bias_quant["scales"][i] / output_quant["scales"][0]
-        channel_constant = round(bias_data[i] * scale + out_zero_point)
-        clipped = min(127, max(-128, channel_constant))
-
-        out_channel_values = output_data[0, :, :, i].flatten()
-        assert all(x == clipped for x in out_channel_values)
-        fixed_channels[i] = clipped
-
-    # Check if we are on the final convolution and skip the next test if so
-    if layer + 1 >= MODEL_NUM_CONVS:
-        return
-
-    # We now need to compute values for the following depthwise layer
-    depthwise_output = _load_tflite_layer(interpreter, layer + 1)[3][0]
-    is_depthwise = _get_mobilenet_v1_layer_attributes(layer + 1)[2]
-    assert is_depthwise
-
-    for i in fixed_channels:
-        assert np.all(depthwise_output[:, :, :, i] == depthwise_output[0, 0, 0, i])
-
-
-def _get_relu_activation_prefix(layer_num):
-    if layer_num == 0:
-        return "model/activation/Relu;"
-    return f"model/activation_{layer_num}/Relu;"
-
-
-def _get_main_path_tensor_details(details, tensor_num):
-    """A "main path" tensor is a fused layer input/output. Gets the tensor details from the tensor
-    index, where 0 gives the original input tensor, 1 gives the output of the first fused
-    convolution layer, and so on. TFLite names are a little wack, so we get this information by
-    finding the SECOND tensor (which has the suffix "1") for each ReLU activation (the first tensor
-    is the bias)."""
-
-    if tensor_num == 0:
-        return details[0]
-    prefix = _get_relu_activation_prefix(tensor_num - 1)
-    detail = next(d for d in details if d["name"].startswith(prefix) and d["name"].endswith("1"))
-    assert len(detail["shape"]) == 4
-    assert detail["dtype"] == np.int8
-    return detail
-
-
-def _get_bias_details(details, layer_num):
-    """Gets the tensor details for the bias tensor for the corresponding convolution layer. The
-    bias tensors always appear before the main path tensors, so we don't have to check the ending to
-    make sure we have the right one."""
-    prefix = _get_relu_activation_prefix(layer_num)
-    detail = next(d for d in details if d["name"].startswith(prefix))
-    assert len(detail["shape"]) == 1
-    assert detail["dtype"] == np.int32
-    return detail
-
-
-def _get_kernel_details(details, layer_num):
-    """Gets the tensor details for the kernel tensor for the corresponding convolution layer. These
-    have a different naming scheme from the main path and bias tensors, as they are converted before
-    activation function fusion. Note that regular vs depthwise conv2ds have different prefixes."""
-
-    if layer_num == 0:
-        prefix = "model/conv2d/Conv2D"
-    elif layer_num % 2 == 0:
-        prefix = f"model/conv2d_{layer_num // 2}/"
-    else:
-        prefix = f"model/batch_normalization_{layer_num}/"
-
-    detail = next(d for d in details if d["name"].startswith(prefix))
-    assert len(detail["shape"]) == 4
-    assert detail["dtype"] == np.int8
-    return detail
-
-
-def _get_quant_scale_const(quantization_dict, as_scalar=False):
-    scales = quantization_dict["scales"]
-    if as_scalar:
-        assert len(scales) == 1
-        scales = scales[0]
-    return relay.const(scales, "float32")
-
-
-def _get_quant_zp_const(quantization_dict, as_scalar=False):
-    zero_points = quantization_dict["zero_points"]
-    if as_scalar:
-        assert len(zero_points) == 1
-        zero_points = zero_points[0]
-    return relay.const(zero_points, "int32")
-
-
-def _change_layout(data, old_layout, new_layout, dtype):
-    return change_ndarray_layout(data, old_layout, new_layout).astype(dtype)
-
-
-def _load_tflite_layer(interpreter, layer):
-    tensor_details = interpreter.get_tensor_details()
-
-    def lookup(detail):
-        return interpreter.get_tensor(detail["index"]), detail["quantization_parameters"]
-
-    input_data = lookup(_get_main_path_tensor_details(tensor_details, layer))
-    kernel_data = lookup(_get_kernel_details(tensor_details, layer))
-    bias_data = lookup(_get_bias_details(tensor_details, layer))
-    output_data = lookup(_get_main_path_tensor_details(tensor_details, layer + 1))
-    return input_data, kernel_data, bias_data, output_data
-
-
-def _make_relay_partial_func(relay_op, *args, **kwargs):
-    return lambda op: relay_op(op, *args, **kwargs)
-
-
-def _make_conv2d_op(kernel, data_quant, kernel_quant, hyperparams, is_depthwise=False):
-    dtype, padding, strides, data_layout, kernel_layout, output_layout = hyperparams
-    kernel_size = kernel.shape[1:3]
-    if is_depthwise:
-        channels = groups = kernel.shape[3]
-    else:
-        channels = kernel.shape[0]
-        groups = 1
-
-    kernel_ndarr = _change_layout(kernel, "OHWI", kernel_layout, dtype)
-
-    return _make_relay_partial_func(
-        relay.qnn.op.conv2d,
-        relay.const(kernel_ndarr, dtype),
-        input_zero_point=_get_quant_zp_const(data_quant, as_scalar=True),
-        kernel_zero_point=_get_quant_zp_const(kernel_quant),
-        input_scale=_get_quant_scale_const(data_quant, as_scalar=True),
-        kernel_scale=_get_quant_scale_const(kernel_quant),
-        kernel_size=kernel_size,
-        data_layout=data_layout,
-        kernel_layout="IOHW" if is_depthwise else kernel_layout,
-        dilation=(1, 1),
-        strides=strides,
-        padding=padding,
-        groups=groups,
-        channels=channels,
-        out_dtype="int32",
-        out_layout=output_layout,
-    )
-
-
-def _make_bias_op(bias, output_layout):
-    requantize_axis = output_layout.index("C")
-    return _make_relay_partial_func(
-        relay.op.nn.bias_add,
-        relay.const(bias, "int32"),
-        axis=requantize_axis,
-    )
-
-
-def _make_requantize_op(bias_quant, output_quant, output_dtype, output_layout):
-    requantize_axis = output_layout.index("C")
-    return _make_relay_partial_func(
-        relay.qnn.op.requantize,
-        _get_quant_scale_const(bias_quant),
-        _get_quant_zp_const(bias_quant),
-        _get_quant_scale_const(output_quant, as_scalar=True),
-        _get_quant_zp_const(output_quant, as_scalar=True),
-        axis=requantize_axis,
-        compute_dtype="int64",
-        out_dtype=output_dtype,
-    )
-
-
-def _make_aot_model(params, hyperparams, layouts, is_depthwise=False):
-    tensors, quantizations = zip(*params)
-    data, kernel, bias, output = tensors
-    data_quant, kernel_quant, bias_quant, output_quant = quantizations
-
-    dtype, _padding, _strides = hyperparams
-    data_layout, _, output_layout = layouts
-    data_ndarr = _change_layout(data, "NHWC", data_layout, dtype)
-    output_ndarr = _change_layout(output, "NHWC", output_layout, dtype)
-
-    input_var = relay.var("input", relay.TensorType(data_ndarr.shape, dtype))
-    conv2d = _make_conv2d_op(kernel, data_quant, kernel_quant, hyperparams + layouts, is_depthwise)
-    bias = _make_bias_op(bias, output_layout)
-    requantize = _make_requantize_op(bias_quant, output_quant, dtype, output_layout)
-
-    relay_mod = requantize(bias(conv2d(input_var)))
-    relay_func = relay.Function([input_var], relay_mod)
-    return AOTTestModel(
-        module=tvm.IRModule.from_expr(relay_func),
-        inputs={"input": data_ndarr},
-        outputs={"output": output_ndarr},
-        output_tolerance=1,
-    )
-
-
-def _make_target():
-    return tvm.target.Target("c -keys=arm_cpu -mcpu=cortex-m7")
-
-
-def _make_executor():
-    return Executor(
-        "aot",
-        {
-            "workspace-byte-alignment": 8,
-            "constant-byte-alignment": 8,
-            "interface-api": "c",
-            "unpacked-api": True,
-        },
-    )
-
-
-@pytest.mark.parametrize("output_layout", ["NHWC", "NCHW"])
-@pytest.mark.parametrize("layer", range(27))
-@tvm.testing.requires_corstone300
-def test_qnn_conv2d_mobilenetv1_layer(interpreter, layer, output_layout):
-    """Checks microTVM output against TFLite for one MobileNetV1 layer.
-
-    Loads the input, kernel, bias, expected output, and quantization parameters from the specified
-    layer in a TFLite Interpreter. That information is used to construct a Relay Function with the
-    same structure. The Function is run using microTVM and AOTTestModel, and we verify microTVM's
-    output is the same as the TFLite ground truth.
-
-    This function only cross-checks the first 27 layers in MobileNetV1, which are regular and
-    depthwise 2D convolutions (this function only works for 2D convolutions). We do not test the
-    average pool, dense, or softmax layers at the end of the model.
-
-    Note that we disable the QNN Legalization pass. This allows TVM to use its QNN compute
-    definitions, fuse the three operations together, and perform other optimizations.
-
-    Parameters
-    ----------
-    interpreter: tensorflow.lite.python.interpreter.Interpreter
-        A TensorFlow Lite interpreter for a MobileNetV1 model, where invoke() has already been
-        called and experimental_preserve_all_tensors=True. Should be passed as a Pytest fixture.
-
-    layer: int
-        The index of the layer to check against TensorFlow's ground truth values.
-
-    output_layout: str
-        The output_layout for microTVM to use. Does not have to match the TensorFlow layout.
-    """
-    dtype = "int16"
-
-    tensor, kernel, bias, output = _load_tflite_layer(interpreter, layer)
-
-    padding, strides, is_depthwise = _get_mobilenet_v1_layer_attributes(layer)
-    if is_depthwise:
-        data_layout, kernel_layout = "NCHW", "OIHW"
-    else:
-        data_layout, kernel_layout = "NHWC", "OHWI"
-
-    test_model = _make_aot_model(
-        (tensor, kernel, bias, output),
-        (dtype, padding, strides),
-        (data_layout, kernel_layout, output_layout),
-        is_depthwise=is_depthwise,
-    )
-
-    def schedule_fn(_sch):
-        return True
-
-    with tvm.transform.PassContext(
-        opt_level=3,
-        config={
-            "tir.disable_vectorize": True,
-            "relay.backend.use_meta_schedule": True,
-            "relay.backend.tir_converter": "allow_extern",
-        },
-        disabled_pass=["qnn.Legalize"],
-    ), meta_schedule.database.ScheduleFnDatabase(schedule_fn):
-        executor_factory = tvm.relay.build(
-            test_model.module,
-            _make_target(),
-            executor=_make_executor(),
-            runtime=Runtime("crt"),
-            params=test_model.params,
-            mod_name=test_model.name,
-        )
-        compiled = AOTCompiledTestModel(model=test_model, executor_factory=executor_factory)
-
-    run_and_check(
-        models=[compiled],
-        runner=AOT_CORSTONE300_RUNNER,
-        interface_api="c",
-        workspace_byte_alignment=8,
-        constant_byte_alignment=8,
-    )
diff --git a/tests/python/relay/test_link_params.py b/tests/python/relay/test_link_params.py
index 594567eff3ae..35ca74d6f8e7 100644
--- a/tests/python/relay/test_link_params.py
+++ b/tests/python/relay/test_link_params.py
@@ -328,60 +328,6 @@ def _run_unlinked(lib_mod):
         np.testing.assert_allclose(unlinked_output.numpy(), linked_output.numpy())
 
 
-@tvm.testing.requires_micro
-def test_crt_link_params(linkable_dtype):
-    from tvm import micro
-
-    mod, param_init = _make_mod_and_params(linkable_dtype)
-    rand_input = _make_random_tensor(linkable_dtype, INPUT_SHAPE)
-    main_func = mod["main"]
-    target = "c"
-    runtime = Runtime("crt", {"system-lib": True})
-    executor = Executor("graph", {"link-params": True})
-    with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}):
-        factory = tvm.relay.build(
-            mod, target, runtime=runtime, executor=executor, params=param_init
-        )
-        assert len(factory.get_params().keys()) == 0  # NOTE: params became tir.constants
-
-        temp_dir = tvm.contrib.utils.tempdir()
-        template_project_dir = tvm.micro.get_microtvm_template_projects("crt")
-        project = tvm.micro.generate_project(
-            template_project_dir, factory, temp_dir / "project", {"verbose": 1}
-        )
-        project.build()
-        project.flash()
-        with tvm.micro.Session(project.transport()) as sess:
-            graph_rt = tvm.micro.session.create_local_graph_executor(
-                factory.get_graph_json(), sess.get_system_lib(), sess.device
-            )
-
-            assert len(factory.params.keys()) == 0  # NOTE: params became tir.constants
-
-            # NOTE: not setting params here.
-            graph_rt.set_input("rand_input", rand_input)
-            graph_rt.run()
-            linked_output = graph_rt.get_output(0).numpy()
-
-    runtime = Runtime("cpp", {"system-lib": True})
-    with tvm.transform.PassContext(opt_level=3):
-        lib = tvm.relay.build(mod, "llvm", runtime=runtime, params=param_init)
-
-        def _run_unlinked(lib):
-            graph_json, mod, lowered_params = lib
-            graph_rt = tvm.contrib.graph_executor.create(graph_json, mod, tvm.cpu(0))
-            graph_rt.set_input("rand_input", rand_input, **lowered_params)
-            graph_rt.run()
-            return graph_rt.get_output(0).numpy()
-
-        unlinked_output = _run_unlinked(lib)
-
-    if "int" in linkable_dtype:
-        np.testing.assert_equal(unlinked_output, linked_output)
-    else:
-        np.testing.assert_allclose(unlinked_output, linked_output)
-
-
 def test_tir_link_params():
     def get_dense(data_shape, weight_shape):
         data = relay.var("data", shape=data_shape, dtype="float32")
diff --git a/tests/python/relay/test_pass_partition_graph.py b/tests/python/relay/test_pass_partition_graph.py
index 5ee1c955b093..524e93408c8c 100644
--- a/tests/python/relay/test_pass_partition_graph.py
+++ b/tests/python/relay/test_pass_partition_graph.py
@@ -189,67 +189,6 @@ def check_graph_executor_result():
     check_graph_executor_result()
 
 
-def test_multi_node_compiler():
-    x = relay.var("x", shape=(10, 10))
-    w0 = relay.var("w0", shape=(10, 10))
-    w1 = relay.var("w1", shape=(10, 10))
-    w2 = relay.var("w2", shape=(10, 10))
-    w3 = relay.var("w3", shape=(10, 10))
-    w4 = relay.var("w4", shape=(10, 10))
-    w5 = relay.var("w5", shape=(10, 10))
-    w6 = relay.var("w6", shape=(10, 10))
-    w7 = relay.var("w7", shape=(10, 10))
-
-    # C compiler
-    # FIXME: We generate two compilers for this case but they should be merged to one
-    # due to the common input (x).
-    z0 = relay.add(x, w0)
-    p0 = relay.subtract(z0, w1)
-    q0 = relay.multiply(p0, w2)
-
-    z1 = relay.add(x, w3)
-    p1 = relay.subtract(z1, w4)
-    q1 = relay.multiply(p1, w5)
-
-    # Other parts on TVM
-    z2 = relay.add(x, w6)
-    q2 = relay.subtract(z2, w7)
-
-    r = relay.concatenate((q0, q1, q2), axis=0)
-    f = relay.Function([x, w0, w1, w2, w3, w4, w5, w6, w7], r)
-    mod = tvm.IRModule()
-    ann = byoc.CcompilerAnnotator()
-    mod["main"] = ann.visit(f)
-    mod = transform.PartitionGraph()(mod)
-    mod = transform.InferType()(mod)
-
-    x_data = np.random.rand(10, 10).astype("float32")
-    w_data = []
-    for _ in range(8):
-        w_data.append(np.random.rand(10, 10).astype("float32"))
-
-    map_inputs = {"w{}".format(i): w_data[i] for i in range(8)}
-    map_inputs["x"] = x_data
-
-    targets = [("llvm", Runtime("cpp")), ("c", Runtime("crt", {"system-lib": True}))]
-    for tgt, rt in targets:
-        check_result(
-            mod,
-            map_inputs,
-            (30, 10),
-            np.concatenate(
-                (
-                    ((x_data + w_data[0]) - w_data[1]) * w_data[2],
-                    ((x_data + w_data[3]) - w_data[4]) * w_data[5],
-                    x_data + w_data[6] - w_data[7],
-                ),
-                axis=0,
-            ),
-            target=tgt,
-            runtime=rt,
-        )
-
-
 def test_extern_ccompiler_single_op():
     @transform.function_pass(opt_level=0)
     class MyAnnotator:
diff --git a/tests/python/relay/test_target_hooks.py b/tests/python/relay/test_target_hooks.py
index 1df2b2a919a3..0b888781873f 100644
--- a/tests/python/relay/test_target_hooks.py
+++ b/tests/python/relay/test_target_hooks.py
@@ -27,7 +27,6 @@
 from utils.external_codegen import (
     parametrize_external_codegen_checks,
     set_external_func_attr,
-    check_aot_executor_result,
     check_graph_executor_result,
     check_vm_result,
 )
@@ -99,7 +98,7 @@ def @replace_add_with_subtract(%x: Tensor[(8), float32], %y: Tensor[(8), float32
     )
 
 
-@pytest.mark.parametrize("check_result", [check_aot_executor_result, check_graph_executor_result])
+@pytest.mark.parametrize("check_result", [check_graph_executor_result])
 def test_runtime_module_generation(check_result):
     shape = (8,)
     x_data = np.random.randint(255, size=shape).astype("float32")
diff --git a/tests/python/relay/utils/external_codegen.py b/tests/python/relay/utils/external_codegen.py
index bb06d3bb86aa..e200e885225d 100644
--- a/tests/python/relay/utils/external_codegen.py
+++ b/tests/python/relay/utils/external_codegen.py
@@ -27,10 +27,6 @@
 
 
 skip_windows = pytest.mark.skipif(sys.platform == "win32", reason="Skip test on Windows for now")
-skip_micro = pytest.mark.skipif(
-    tvm.support.libinfo().get("USE_MICRO", "OFF") != "ON",
-    reason="MicroTVM support not enabled. Set USE_MICRO=ON in config.cmake to enable.",
-)
 
 
 def parametrize_external_codegen_checks(test):
@@ -38,7 +34,6 @@ def parametrize_external_codegen_checks(test):
     return pytest.mark.parametrize(
         "check_result",
         [
-            pytest.param(check_aot_executor_result, marks=[skip_windows, skip_micro]),
             pytest.param(check_graph_executor_result, marks=[skip_windows]),
             pytest.param(check_vm_result, marks=[skip_windows]),
         ],
@@ -100,24 +95,6 @@ def check_graph_executor_result(
     tvm.testing.assert_allclose(out.numpy(), result, rtol=tol, atol=tol)
 
 
-def check_aot_executor_result(
-    mod, map_inputs, out_shape, result, tol=1e-5, target="llvm", device=tvm.cpu()
-):
-    # Late import to avoid breaking test with USE_MICRO=OFF.
-    from tvm.testing.aot import AOTTestModel, compile_and_run
-    from tvm.micro.testing.aot_test_utils import AOT_DEFAULT_RUNNER
-
-    interface_api = "packed"
-    use_unpacked_api = False
-    test_runner = AOT_DEFAULT_RUNNER
-    compile_and_run(
-        AOTTestModel(module=mod, inputs=map_inputs, outputs={"output": result}),
-        test_runner,
-        interface_api,
-        use_unpacked_api,
-    )
-
-
 def set_external_func_attr(func, compiler, ext_symbol):
     func = func.with_attr("Primitive", tvm.tir.IntImm("int32", 1))
     func = func.with_attr("Compiler", compiler)
diff --git a/tests/python/topi/test_topi_conv2d_int8.py b/tests/python/topi/test_topi_conv2d_int8.py
index fd101fb79768..cc1a16623684 100644
--- a/tests/python/topi/test_topi_conv2d_int8.py
+++ b/tests/python/topi/test_topi_conv2d_int8.py
@@ -28,7 +28,7 @@
 from tvm.topi.utils import get_const_tuple
 from tvm.topi.nn.conv2d import _get_workload
 from tvm.topi.generic.conv2d import fallback_schedule_cpu_common_int8
-from tvm.testing.aot import get_dtype_range
+from tvm.testing.utils import get_dtype_range
 
 from common import Int8Fallback
 import tvm.testing
diff --git a/tests/scripts/ci.py b/tests/scripts/ci.py
index 3c7c9a4b3a10..344c103850d7 100755
--- a/tests/scripts/ci.py
+++ b/tests/scripts/ci.py
@@ -266,8 +266,6 @@ def docs(
                 f"mkdir -p {build_dir}",
                 f"pushd {build_dir}",
                 "cp ../cmake/config.cmake .",
-                # The docs import tvm.micro, so it has to be enabled in the build
-                "echo set\(USE_MICRO ON\) >> config.cmake",
                 "popd",
             ]
         )
@@ -662,20 +660,6 @@ def add_subparser(
             "test": ("run WASM tests", ["./tests/scripts/task_web_wasm.sh"]),
         },
     ),
-    generate_command(
-        name="cortexm",
-        help="Run Cortex-M build and test(s)",
-        options={
-            "cpp": CPP_UNITTEST,
-            "test": (
-                "run microTVM tests",
-                [
-                    "./tests/scripts/task_python_microtvm.sh",
-                    "./tests/scripts/task_demo_microtvm.sh",
-                ],
-            ),
-        },
-    ),
     generate_command(
         name="hexagon",
         help="Run Hexagon build and test(s)",
@@ -705,19 +689,6 @@ def add_subparser(
             ),
         },
     ),
-    generate_command(
-        name="riscv",
-        help="Run RISC-V build and test(s)",
-        options={
-            "cpp": CPP_UNITTEST,
-            "python": (
-                "run full Python tests",
-                [
-                    "./tests/scripts/task_riscv_microtvm.sh",
-                ],
-            ),
-        },
-    ),
     generate_command(
         name="adreno",
         help="Run Adreno build and test(s)",
diff --git a/tests/scripts/release/make_notes.py b/tests/scripts/release/make_notes.py
index 8877d97253dd..704e2eedbd24 100644
--- a/tests/scripts/release/make_notes.py
+++ b/tests/scripts/release/make_notes.py
@@ -35,7 +35,6 @@
     "cuda": "cuda & cutlass & tensorrt",
     "cutlass": "cuda & cutlass & tensorrt",
     "tensorrt": "cuda & cutlass & tensorrt",
-    "ethosn": "Ethosn",
     "hexagon": "Hexagon",
     "metal": "Metal",
     "vulkan": "Vulkan",
diff --git a/tests/scripts/request_hook/request_hook.py b/tests/scripts/request_hook/request_hook.py
index 8e400a5c7703..80fd7f6f3d3d 100644
--- a/tests/scripts/request_hook/request_hook.py
+++ b/tests/scripts/request_hook/request_hook.py
@@ -147,7 +147,6 @@
     "https://homes.cs.washington.edu/~moreau/media/vta/cat.jpg": f"{BASE}/vta_cat.jpg",
     "https://objects.githubusercontent.com/github-production-release-asset-2e65be/130932608/4b196a8a-4e2d-11e8-9a11-be3c41846711?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIWNJYAX4CSVEH53A%2F20221004%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20221004T170456Z&X-Amz-Expires=300&X-Amz-Signature=0602b68e8864b9b01c9142eee22aed3543fe98a5482686eec33d98e2617a2295&X-Amz-SignedHeaders=host&actor_id=0&key_id=0&repo_id=130932608&response-content-disposition=attachment%3B%20filename%3Dmobilenet_v2_weights_tf_dim_ordering_tf_kernels_0.5_224.h5&response-content-type=application%2Foctet-stream": f"{BASE}/2022-10-05/aws-mobilenet_v2_weights_tf_dim_ordering_tf_kernels_0.5_224.h5",
     "https://oneflow-public.oss-cn-beijing.aliyuncs.com/model_zoo/flowvision/classification/ResNet/resnet18.zip": f"{BASE}/oneflow/resnet18.zip",
-    "https://github.com/tlc-pack/web-data/raw/main/testdata/microTVM/model/sine_model.tflite": f"{BASE}/tlc-pack/web-data/testdata/microTVM/model/sine_model.tflite",
     "https://pjreddie.com/media/files/yolov3-tiny.weights?raw=true": f"{BASE}/yolov3-tiny.weights",
     "https://pjreddie.com/media/files/yolov3.weights": f"{BASE}/yolov3.weights",
     "https://raw.githubusercontent.com/Cadene/pretrained-models.pytorch/master/data/imagenet_classes.txt": f"{BASE}/2022-10-05/imagenet_classes.txt",
@@ -205,12 +204,6 @@
     "https://github.com/mlcommons/tiny/raw/bceb91c5ad2e2deb295547d81505721d3a87d578/benchmark/training/keyword_spotting/trained_models/kws_ref_model.tflite": f"{BASE}/mlcommons/tiny/benchmark/training/keyword_spotting/trained_models/kws_ref_model.tflite",
     "https://github.com/mlcommons/tiny/raw/bceb91c5ad2e2deb295547d81505721d3a87d578/benchmark/training/anomaly_detection/trained_models/ToyCar/baseline_tf23/model/model_ToyCar_quant_fullint_micro.tflite": f"{BASE}/mlcommons/tiny/benchmark/training/anomaly_detection/trained_models/ToyCar/baseline_tf23/model/model_ToyCar_quant_fullint_micro.tflite",
     "https://github.com/mlcommons/tiny/raw/bceb91c5ad2e2deb295547d81505721d3a87d578/benchmark/training/image_classification/trained_models/pretrainedResnet_quant.tflite": f"{BASE}/mlcommons/tiny/benchmark/training/image_classification/trained_models/pretrainedResnet_quant.tflite",
-    "https://github.com/tlc-pack/web-data/raw/main/testdata/microTVM/data/keyword_spotting_int8_6.pyc.npy": f"{BASE}/tlc-pack/web-data/raw/main/testdata/microTVM/data/keyword_spotting_int8_6.pyc.npy",
-    "https://github.com/tlc-pack/web-data/raw/main/testdata/microTVM/data/visual_wake_word_int8_1.npy": f"{BASE}/tlc-pack/web-data/raw/main/testdata/microTVM/data/visual_wake_word_int8_1.npy",
-    "https://github.com/tlc-pack/web-data/raw/main/testdata/microTVM/data/anomaly_detection_normal_id_01.npy": f"{BASE}/tlc-pack/web-data/raw/main/testdata/microTVM/data/anomaly_detection_normal_id_01.npy",
-    "https://github.com/tlc-pack/web-data/raw/main/testdata/microTVM/data/image_classification_int8_0.npy": f"{BASE}/tlc-pack/web-data/raw/main/testdata/microTVM/data/image_classification_int8_0.npy",
-    "https://github.com/tlc-pack/web-data/raw/main/testdata/microTVM/data/vww_sample_person.jpg": f"{BASE}/tlc-pack/web-data/testdata/microTVM/data/vww_sample_person.jpg",
-    "https://github.com/tlc-pack/web-data/raw/main/testdata/microTVM/data/vww_sample_not_person.jpg": f"{BASE}/tlc-pack/web-data/testdata/microTVM/data/vww_sample_not_person.jpg",
     "https://github.com/tensorflow/tflite-micro/raw/de8f61a074460e1fa5227d875c95aa303be01240/tensorflow/lite/micro/models/keyword_scrambled.tflite": f"{BASE}/models/tflite/keyword_scrambled_8bit.tflite",
     "https://github.com/Grovety/ModelZoo/raw/52fb82156ae8c8e3f62c7d7caf6867b25261dda4/models/object_detection/ssd_mobilenet_v1/tflite_int8/tflite_graph_with_regular_nms.pb": f"{BASE}/ssd_mobilenet_v1/tflite_int8/tflite_graph_with_regular_nms.pb",
 }
diff --git a/tests/scripts/task_build.py b/tests/scripts/task_build.py
index 742436680208..96fc781744f9 100755
--- a/tests/scripts/task_build.py
+++ b/tests/scripts/task_build.py
@@ -41,6 +41,10 @@
     args = parser.parse_args()
     sccache_exe = shutil.which("sccache")
 
+    if args.cmake_target in ["standalone_crt", "crttest"]:
+        logging.info("Skipping standalone_crt build")
+        exit(0)
+
     use_sccache = sccache_exe is not None
     build_dir = Path(os.getcwd()) / args.build_dir
     build_dir = build_dir.relative_to(REPO_ROOT)
diff --git a/tests/scripts/task_clean.sh b/tests/scripts/task_clean.sh
index 3ae70b346c1b..c84dbfff067a 100755
--- a/tests/scripts/task_clean.sh
+++ b/tests/scripts/task_clean.sh
@@ -18,4 +18,4 @@
 set -euxo pipefail
 
 echo "Cleanup data..."
-cd $1 && rm -rf standalone_crt && rm -rf host_standalone_crt && rm -rf CMake* && cd ..
+cd $1 && rm -rf CMake* && cd ..
diff --git a/tests/scripts/task_config_build_arm.sh b/tests/scripts/task_config_build_arm.sh
index 87a3ee24750a..48ce67f9f790 100755
--- a/tests/scripts/task_config_build_arm.sh
+++ b/tests/scripts/task_config_build_arm.sh
@@ -25,8 +25,6 @@ cp ../cmake/config.cmake .
 
 echo set\(USE_SORT ON\) >> config.cmake
 echo set\(USE_RPC ON\) >> config.cmake
-echo set\(USE_MICRO ON\) >> config.cmake
-echo set\(USE_MICRO_STANDALONE_RUNTIME ON\) >> config.cmake
 echo set\(USE_PROFILER ON\) >> config.cmake
 echo set\(USE_LLVM llvm-config-17\) >> config.cmake
 echo set\(CMAKE_CXX_FLAGS -Werror\) >> config.cmake
diff --git a/tests/scripts/task_config_build_cortexm.sh b/tests/scripts/task_config_build_cortexm.sh
index a06a8a13e5db..a32e8dd61b6d 100755
--- a/tests/scripts/task_config_build_cortexm.sh
+++ b/tests/scripts/task_config_build_cortexm.sh
@@ -24,9 +24,6 @@ cd "$BUILD_DIR"
 cp ../cmake/config.cmake .
 
 echo set\(USE_SORT ON\) >> config.cmake
-echo set\(USE_MICRO ON\) >> config.cmake
-echo set\(USE_CMSISNN ON\) >> config.cmake
-echo set\(USE_ETHOSU ON\) >> config.cmake
 echo set\(USE_UMA ON\) >> config.cmake
 echo set\(USE_PROFILER ON\) >> config.cmake
 echo set\(USE_LLVM llvm-config-15\) >> config.cmake
diff --git a/tests/scripts/task_config_build_cpu.sh b/tests/scripts/task_config_build_cpu.sh
index c97321e538bd..b657d0c74b34 100755
--- a/tests/scripts/task_config_build_cpu.sh
+++ b/tests/scripts/task_config_build_cpu.sh
@@ -24,8 +24,6 @@ cd "$BUILD_DIR"
 cp ../cmake/config.cmake .
 
 echo set\(USE_SORT ON\) >> config.cmake
-echo set\(USE_MICRO ON\) >> config.cmake
-echo set\(USE_MICRO_STANDALONE_RUNTIME ON\) >> config.cmake
 echo set\(USE_PROFILER ON\) >> config.cmake
 echo set\(USE_DNNL ON\) >> config.cmake
 echo set\(USE_ARM_COMPUTE_LIB ON\) >> config.cmake
@@ -45,14 +43,10 @@ fi
 
 echo set\(USE_TENSORFLOW_PATH \"/tensorflow\"\) >> config.cmake
 echo set\(USE_FLATBUFFERS_PATH \"/flatbuffers\"\) >> config.cmake
-echo set\(USE_ETHOSN /opt/arm/ethosn-driver\) >> config.cmake
-echo set\(USE_ETHOSN_HW OFF\) >> config.cmake
-echo set\(USE_CMSISNN OFF\) >> config.cmake
 echo set\(USE_VITIS_AI ON\) >> config.cmake
 echo set\(USE_LIBBACKTRACE COMPILE\) >> config.cmake
 echo set\(BACKTRACE_ON_SEGFAULT ON\) >> config.cmake
 echo set\(USE_CCACHE OFF\) >> config.cmake
-echo set\(USE_ETHOSU OFF\) >> config.cmake
 echo set\(USE_UMA ON\) >> config.cmake
 echo set\(SUMMARIZE ON\) >> config.cmake
 echo set\(USE_MSC ON\) >> config.cmake
diff --git a/tests/scripts/task_config_build_gpu.sh b/tests/scripts/task_config_build_gpu.sh
index 530c44e170ef..e3599695a969 100755
--- a/tests/scripts/task_config_build_gpu.sh
+++ b/tests/scripts/task_config_build_gpu.sh
@@ -30,8 +30,6 @@ echo set\(USE_VULKAN ON\) >> config.cmake
 echo set\(USE_OPENGL ON\) >> config.cmake
 echo set\(USE_OPENCL ON\) >> config.cmake
 echo set\(USE_OPENCL_GTEST \"/googletest\"\) >> config.cmake
-echo set\(USE_MICRO ON\) >> config.cmake
-echo set\(USE_MICRO_STANDALONE_RUNTIME ON\) >> config.cmake
 echo set\(USE_LLVM \"/usr/bin/llvm-config-15 --link-static\"\) >> config.cmake
 echo set\(USE_RPC ON\) >> config.cmake
 echo set\(USE_SORT ON\) >> config.cmake
@@ -49,6 +47,5 @@ echo set\(SUMMARIZE ON\) >> config.cmake
 echo set\(HIDE_PRIVATE_SYMBOLS ON\) >> config.cmake
 echo set\(USE_PIPELINE_EXECUTOR ON\) >> config.cmake
 echo set\(USE_CUTLASS ON\) >> config.cmake
-echo set\(USE_CMSISNN ON\) >> config.cmake
 echo set\(USE_MSC ON\) >> config.cmake
 echo set\(CMAKE_CUDA_ARCHITECTURES 75\) >> config.cmake
diff --git a/tests/scripts/task_config_build_gpu_other.sh b/tests/scripts/task_config_build_gpu_other.sh
index 6fb10d44508a..747e1006e507 100755
--- a/tests/scripts/task_config_build_gpu_other.sh
+++ b/tests/scripts/task_config_build_gpu_other.sh
@@ -27,7 +27,6 @@ cp ../cmake/config.cmake .
 
 echo set\(USE_OPENCL ON\) >> config.cmake
 echo set\(USE_ROCM ON\) >> config.cmake
-echo set\(USE_MICRO ON\) >> config.cmake
 echo set\(USE_PROFILER ON\) >> config.cmake
 echo set\(USE_LIBBACKTRACE OFF\) >> config.cmake
 echo set\(CMAKE_CXX_FLAGS -Werror\) >> config.cmake
diff --git a/tests/scripts/task_config_build_hexagon.sh b/tests/scripts/task_config_build_hexagon.sh
index a3a42f18ee4e..ce0a7fd5f620 100755
--- a/tests/scripts/task_config_build_hexagon.sh
+++ b/tests/scripts/task_config_build_hexagon.sh
@@ -25,8 +25,6 @@ cp ../cmake/config.cmake .
 
 echo set\(USE_SORT ON\) >> config.cmake
 echo set\(USE_RPC ON\) >> config.cmake
-echo set\(USE_MICRO ON\) >> config.cmake
-echo set\(USE_MICRO_STANDALONE_RUNTIME ON\) >> config.cmake
 echo set\(USE_LLVM "${CLANG_LLVM_HOME}/bin/llvm-config"\) >> config.cmake
 
 if [[ ${CI:-false} == "true" ]]; then
diff --git a/tests/scripts/task_config_build_i386.sh b/tests/scripts/task_config_build_i386.sh
index 9d05d102ae0e..f5cbad42bbf2 100755
--- a/tests/scripts/task_config_build_i386.sh
+++ b/tests/scripts/task_config_build_i386.sh
@@ -25,8 +25,6 @@ cp ../cmake/config.cmake .
 
 echo set\(USE_SORT ON\) >> config.cmake
 echo set\(USE_RPC ON\) >> config.cmake
-echo set\(USE_MICRO ON\) >> config.cmake
-echo set\(USE_MICRO_STANDALONE_RUNTIME ON\) >> config.cmake
 echo set\(USE_PROFILER ON\) >> config.cmake
 echo set\(USE_LLVM llvm-config-10\) >> config.cmake
 echo set\(CMAKE_CXX_FLAGS -Werror\) >> config.cmake
diff --git a/tests/scripts/task_config_build_minimal.sh b/tests/scripts/task_config_build_minimal.sh
index 1a9f3c455c29..7321223f0002 100755
--- a/tests/scripts/task_config_build_minimal.sh
+++ b/tests/scripts/task_config_build_minimal.sh
@@ -32,4 +32,3 @@ echo set\(HIDE_PRIVATE_SYMBOLS ON\) >> config.cmake
 echo set\(USE_LIBBACKTRACE COMPILE\) >> config.cmake
 echo set\(USE_CCACHE OFF\) >> config.cmake
 echo set\(SUMMARIZE ON\) >> config.cmake
-echo set\(USE_MICRO ON\) >> config.cmake
diff --git a/tests/scripts/task_config_build_minimal_cross_isa.sh b/tests/scripts/task_config_build_minimal_cross_isa.sh
index 7fba9deee6a1..eacbb80d3440 100755
--- a/tests/scripts/task_config_build_minimal_cross_isa.sh
+++ b/tests/scripts/task_config_build_minimal_cross_isa.sh
@@ -24,7 +24,6 @@ cd "$BUILD_DIR"
 cp ../cmake/config.cmake .
 
 echo set\(USE_SORT ON\) >> config.cmake
-echo set\(USE_MICRO ON\) >> config.cmake
 echo set\(USE_RELAY_DEBUG ON\) >> config.cmake
 echo set\(CMAKE_BUILD_TYPE=Debug\) >> config.cmake
 echo set\(CMAKE_CXX_FLAGS \"-Werror -Wp,-D_GLIBCXX_ASSERTIONS\"\) >> config.cmake
diff --git a/tests/scripts/task_config_build_riscv.sh b/tests/scripts/task_config_build_riscv.sh
index 2583c5c3238f..f979b0052b1a 100755
--- a/tests/scripts/task_config_build_riscv.sh
+++ b/tests/scripts/task_config_build_riscv.sh
@@ -24,8 +24,6 @@ cd "$BUILD_DIR"
 cp ../cmake/config.cmake .
 
 echo set\(USE_SORT ON\) >> config.cmake
-echo set\(USE_MICRO ON\) >> config.cmake
-echo set\(USE_CMSISNN ON\) >> config.cmake
 echo set\(USE_UMA ON\) >> config.cmake
 echo set\(USE_PROFILER ON\) >> config.cmake
 echo set\(USE_LLVM llvm-config-15\) >> config.cmake
diff --git a/tests/scripts/task_config_build_wasm.sh b/tests/scripts/task_config_build_wasm.sh
index 86ab32c71b3c..d92bb83deba4 100755
--- a/tests/scripts/task_config_build_wasm.sh
+++ b/tests/scripts/task_config_build_wasm.sh
@@ -24,8 +24,6 @@ cd "$BUILD_DIR"
 cp ../cmake/config.cmake .
 
 echo set\(USE_SORT ON\) >> config.cmake
-echo set\(USE_MICRO ON\) >> config.cmake
-echo set\(USE_MICRO_STANDALONE_RUNTIME ON\) >> config.cmake
 echo set\(USE_PROFILER ON\) >> config.cmake
 echo set\(USE_LLVM llvm-config-15\) >> config.cmake
 echo set\(USE_ANTLR ON\) >> config.cmake
diff --git a/tests/scripts/task_convert_scripts_to_python.sh b/tests/scripts/task_convert_scripts_to_python.sh
deleted file mode 100755
index 521abc5e208c..000000000000
--- a/tests/scripts/task_convert_scripts_to_python.sh
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/usr/bin/env bash
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-set -euxo pipefail
-
-SCRIPTS_DIR=$(dirname "${BASH_SOURCE[0]}")
-TVM_DIR=$(cd "${SCRIPTS_DIR}" && git rev-parse --show-toplevel)
-python3 "${TVM_DIR}/docs/script_convert.py" "${TVM_DIR}/gallery/how_to/work_with_microtvm/micro_tvmc.sh"
diff --git a/tests/scripts/task_demo_microtvm.sh b/tests/scripts/task_demo_microtvm.sh
index 19ae297480bf..552ddcb56f37 100755
--- a/tests/scripts/task_demo_microtvm.sh
+++ b/tests/scripts/task_demo_microtvm.sh
@@ -18,47 +18,5 @@
 
 set -euxo pipefail
 
-source tests/scripts/setup-pytest-env.sh
-
-TMP_LOG_FILE=/tmp/$$.out.log
-
-cleanup()
-{
-    rm -f $TMP_LOG_FILE
-}
-trap cleanup 0
-
-pushd apps/microtvm/cmsisnn
-timeout 5m ./run_demo.sh > $TMP_LOG_FILE
-cat $TMP_LOG_FILE
-if ! grep -q "Person detected." $TMP_LOG_FILE; then
-    echo "The demo returned the wrong result"
-    exit 1
-fi
-popd
-
-# TODO(mehrdadh): disabled due to https://github.com/apache/tvm/issues/13856
-# pushd apps/microtvm/zephyr_cmsisnn
-#  timeout 5m ./run_demo.sh
-# popd
-
-pushd apps/microtvm/ethosu
-FVP_PATH="/opt/arm/FVP_Corstone_SSE-300_Ethos-U55"
-CMAKE_PATH="/opt/arm/cmake/bin/cmake"
-FREERTOS_PATH="/opt/freertos/FreeRTOSv202112.00"
-
-timeout 5m ./run_demo.sh --fvp_path $FVP_PATH --cmake_path $CMAKE_PATH > $TMP_LOG_FILE
-cat $TMP_LOG_FILE
-if ! grep -q "The image has been classified as 'tabby'" $TMP_LOG_FILE; then
-    echo "The demo returned the wrong result"
-    exit 1
-fi
-
-timeout 5m ./run_demo.sh --fvp_path $FVP_PATH --cmake_path $CMAKE_PATH --freertos_path $FREERTOS_PATH > $TMP_LOG_FILE
-cat $TMP_LOG_FILE
-if ! grep -q "The image has been classified as 'tabby'" $TMP_LOG_FILE; then
-    echo "The demo returned the wrong result"
-    exit 1
-fi
-
-popd
+# Skip this script for now.
+exit 0
diff --git a/tests/scripts/task_lint.sh b/tests/scripts/task_lint.sh
index c5497d54bf40..3b270b21f60a 100755
--- a/tests/scripts/task_lint.sh
+++ b/tests/scripts/task_lint.sh
@@ -28,9 +28,6 @@ trap cleanup 0
 # These shards are solely for CI to enable the lint job to have some parallelism.
 
 function shard1 {
-  echo "Convert scripts to Python..."
-  tests/scripts/task_convert_scripts_to_python.sh
-
   # echo "Check Jenkinsfile generation"
   # python3 ci/jenkins/generate.py --check
 
diff --git a/tests/scripts/task_microtvm_cpp_tests.sh b/tests/scripts/task_microtvm_cpp_tests.sh
index ce4c62ecee0c..4c1284d885cf 100755
--- a/tests/scripts/task_microtvm_cpp_tests.sh
+++ b/tests/scripts/task_microtvm_cpp_tests.sh
@@ -19,23 +19,4 @@
 set -euxo pipefail
 
 BUILD_DIR=$1
-
-# Python is required by apps/bundle_deploy
-source tests/scripts/setup-pytest-env.sh
-
-export LD_LIBRARY_PATH="lib:${LD_LIBRARY_PATH:-}"
-
-# to avoid CI thread throttling.
-export TVM_BIND_THREADS=0
-export OMP_NUM_THREADS=1
-
-# crttest requries USE_MICRO to be enabled.
-./build/crttest
-
-# Test MISRA-C runtime. It requires USE_MICRO to be enabled.
-pushd apps/bundle_deploy
-rm -rf build
-make test_dynamic VERBOSE=1
-make cleanall
-make test_static VERBOSE=1
-popd
+# Keep this script to pass the CI.
diff --git a/tests/scripts/task_python_docs.sh b/tests/scripts/task_python_docs.sh
index dca9c364e4e1..cfd7ef554b7b 100755
--- a/tests/scripts/task_python_docs.sh
+++ b/tests/scripts/task_python_docs.sh
@@ -58,8 +58,6 @@ sphinx_precheck() {
 
 function join_by { local IFS="$1"; shift; echo "$*"; }
 
-# Convert bash tutorials to Python format
-tests/scripts/task_convert_scripts_to_python.sh
 
 # These warnings are produced during the docs build for various reasons and are
 # known to not signficantly affect the output. Don't add anything new to this
diff --git a/tests/scripts/task_python_ethosn_tests.sh b/tests/scripts/task_python_ethosn_tests.sh
deleted file mode 100755
index 812c02798da8..000000000000
--- a/tests/scripts/task_python_ethosn_tests.sh
+++ /dev/null
@@ -1,33 +0,0 @@
-#!/usr/bin/env bash
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-set -euxo pipefail
-
-source tests/scripts/setup-pytest-env.sh
-
-
-# Rebuild cython
-# TODO(u99127): Enable cython tests.
-
-find . -type f -path "*.pyc" | xargs rm -f
-make cython3
-
-# Note: Setting ETHOSN_TEST_TARGET_CONFIG appropriately
-# (e.g. ETHOSN_TEST_TARGET_CONFIG="ethos-n -variant=n78 -tops=1 -ple_ratio=2")
-# switches the target to various NPU configurations.
-run_pytest ctypes python-ethosn tests/python/contrib/test_ethosn
diff --git a/tests/scripts/task_python_integration.sh b/tests/scripts/task_python_integration.sh
index 8852898986c9..2141cf0a4211 100755
--- a/tests/scripts/task_python_integration.sh
+++ b/tests/scripts/task_python_integration.sh
@@ -60,8 +60,6 @@ run_pytest cython ${TVM_INTEGRATION_TESTSUITE_NAME}-dso_plugin_module-1 apps/dso
 
 run_pytest ctypes ${TVM_INTEGRATION_TESTSUITE_NAME}-integration tests/python/integration
 
-# Ignoring Arm(R) Ethos(TM)-U NPU tests in the collective to run to run them in parallel in the next step.
-run_pytest ctypes ${TVM_INTEGRATION_TESTSUITE_NAME}-contrib tests/python/contrib --ignore=tests/python/contrib/test_ethosu --ignore=tests/python/contrib/test_cmsisnn --ignore=tests/python/contrib/test_msc
 # forked is needed because the global registry gets contaminated
 TVM_TEST_TARGETS="${TVM_RELAY_TEST_TARGETS:-llvm;cuda}" \
     run_pytest ctypes ${TVM_INTEGRATION_TESTSUITE_NAME}-relay tests/python/relay --ignore=tests/python/relay/aot
diff --git a/tests/scripts/task_python_microtvm.sh b/tests/scripts/task_python_microtvm.sh
index f25554e7327b..faa9e13e927b 100755
--- a/tests/scripts/task_python_microtvm.sh
+++ b/tests/scripts/task_python_microtvm.sh
@@ -18,55 +18,5 @@
 
 set -euxo pipefail
 
-
-source tests/scripts/setup-pytest-env.sh
-
-make cython3
-
-# Zephyr
-run_pytest ctypes python-microtvm-zephyr-qemu_x86 tests/micro/zephyr --board=qemu_x86
-run_pytest ctypes python-microtvm-zephyr-qemu_riscv32 tests/micro/zephyr --board=qemu_riscv32
-run_pytest ctypes python-microtvm-zephyr-qemu_riscv64 tests/micro/zephyr --board=qemu_riscv64
-run_pytest ctypes python-microtvm-zephyr-mps2_an521 tests/micro/zephyr --board=mps2_an521
-run_pytest ctypes python-microtvm-zephyr-mps3_an547 tests/micro/zephyr --board=mps3_an547 --use-fvp
-
-# Arduino
-run_pytest ctypes python-microtvm-arduino-nano33ble tests/micro/arduino --board=nano33ble --test-build-only
-run_pytest ctypes python-microtvm-arduino-due tests/micro/arduino --board=due --test-build-only
-
-# STM32
-run_pytest ctypes python-microtvm-stm32 tests/micro/stm32
-
-# Common Tests
-run_pytest ctypes python-microtvm-common-qemu_x86 tests/micro/common --platform=zephyr --board=qemu_x86
-run_pytest ctypes python-microtvm-common-due tests/micro/common --platform=arduino --board=due --test-build-only
-
-# Project API
-run_pytest ctypes python-microtvm-project_api tests/micro/project_api
-
-# Tutorials
-python3 gallery/how_to/work_with_microtvm/micro_tflite.py
-python3 gallery/how_to/work_with_microtvm/micro_autotune.py
-python3 gallery/how_to/work_with_microtvm/micro_aot.py
-python3 gallery/how_to/work_with_microtvm/micro_pytorch.py
-./gallery/how_to/work_with_microtvm/micro_tvmc.sh
-
-# without CMSIS-NN
-python3 gallery/how_to/work_with_microtvm/micro_mlperftiny.py
-# with CMSIS-NN
-export TVM_USE_CMSIS=1
-python3 gallery/how_to/work_with_microtvm/micro_mlperftiny.py
-export TVM_USE_CMSIS=
-
-# Tutorials running with Zephyr
-export TVM_MICRO_USE_HW=1
-export TVM_MICRO_BOARD=qemu_x86
-python3 gallery/how_to/work_with_microtvm/micro_tflite.py
-python3 gallery/how_to/work_with_microtvm/micro_autotune.py
-python3 gallery/how_to/work_with_microtvm/micro_aot.py
-
-run_pytest ctypes python-relay-strategy-arm_cpu tests/python/relay/strategy/arm_cpu --enable-corstone300-tests
-run_pytest ctypes python-relay-aot tests/python/relay/aot --enable-corstone300-tests
-run_pytest ctypes python-integration-m7-simd tests/python/integration/test_arm_mprofile_dsp.py --enable-corstone300-tests
-run_pytest ctypes python-integration-contrib-test_cmsisnn tests/python/contrib/test_cmsisnn -n auto
-run_pytest ctypes python-integration-contrib-test_ethosu tests/python/contrib/test_ethosu -n auto
+# TODO: Remove this once we have a way to run microTVM tests.
+exit 0
diff --git a/tests/scripts/task_python_vta_fsim.sh b/tests/scripts/task_python_vta_fsim.sh
deleted file mode 100755
index 6edd950cc7c2..000000000000
--- a/tests/scripts/task_python_vta_fsim.sh
+++ /dev/null
@@ -1,17 +0,0 @@
-#!/usr/bin/env bash
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
diff --git a/tests/scripts/task_python_vta_tsim.sh b/tests/scripts/task_python_vta_tsim.sh
deleted file mode 100755
index 6edd950cc7c2..000000000000
--- a/tests/scripts/task_python_vta_tsim.sh
+++ /dev/null
@@ -1,17 +0,0 @@
-#!/usr/bin/env bash
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
diff --git a/tests/scripts/task_riscv_microtvm.sh b/tests/scripts/task_riscv_microtvm.sh
index c597506dfede..552ddcb56f37 100755
--- a/tests/scripts/task_riscv_microtvm.sh
+++ b/tests/scripts/task_riscv_microtvm.sh
@@ -18,10 +18,5 @@
 
 set -euxo pipefail
 
-source tests/scripts/setup-pytest-env.sh
-
-make cython3
-
-# NOTE: this exists to ensure some tests run on RISC-V image. Without it, Jenkins reports a configuration error.
-# This line can be removed when RISC-V tests are added.
-run_pytest ctypes riscv-platform-minimal-test-0 tests/python/all-platform-minimal-test
+# Skip this script for now.
+exit 0