openvinotoolkit · luo-cheng2021 · Jun 28, 2023 · Jun 28, 2023 · Jun 28, 2023 · Jun 28, 2023
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,56 @@
+# Compiled Object files
+**/.DS_Store
+*.slo
+*.lo
+*.o
+*.obj
+
+# Precompiled Headers
+*.gch
+*.pch
+
+# Compiled Dynamic libraries
+*.so
+*.dylib
+*.dll
+
+# Fortran module files
+*.mod
+*.smod
+
+# Compiled Static libraries
+*.lai
+*.la
+*.a
+*.lib
+
+# Executables
+*.exe
+*.out
+*.app
+
+**/cmake-build-debug
+**/CMakeCache.txt
+**/cmake_install.cmake
+**/install_manifest.txt
+**/CMakeFiles/
+**/CTestTestfile.cmake
+**/Makefile
+**/*.cbp
+**/CMakeScripts
+**/compile_commands.json
+
+
+## Local 
+
+build/**/*
+**/build/**/*
+out/*
+lib/*
+bin/*
+test/test_runner
+.vs
+.cache
+__pycache__
+dist
+*.egg-info
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -0,0 +1,59 @@
+# Copyright (C) 2018-2023 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+
+cmake_minimum_required(VERSION 3.13)
+
+project(root)
+
+set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
+option(CPU_EXTENSIONS_BUILD_TESTS "Build with tests" ON)
+option(CPU_EXTENSIONS_ENABLE_LOG "Enable log" ON)
+
+message(INFO "--------------------------------")
+message(STATUS "Build with tests: ${CPU_EXTENSIONS_BUILD_TESTS}")
+message(INFO "--------------------------------")
+
+if(MSVC)
+  # TODO: validate
+  if(MSVC_VERSION VERSION_LESS 1928)
+    message(FATAL_ERROR "Insufficient msvc compiler version, current ${MSVC_VERSION}, minimum 1928.")
+  endif()
+elseif(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX)
+  if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS "11.2")
+    message(FATAL_ERROR "Insufficient gcc compiler version, current ${CMAKE_CXX_COMPILER_VERSION}, minimum 11.2.")
+  endif()
+  set(EXTRA_CXX_FLAGS -march=sapphirerapids -flax-vector-conversions)
+elseif(OV_COMPILER_IS_CLANG)
+  if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS "12")
+    message(FATAL_ERROR "Insufficient clang compiler version, current ${CMAKE_CXX_COMPILER_VERSION}, minimum 12.")
+  endif()
+  set(EXTRA_CXX_FLAGS -march=sapphirerapids -flax-vector-conversions)
+elseif(CMAKE_CXX_COMPILER_ID STREQUAL "IntelLLVM")
+  if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS "2023.0")
+    message(FATAL_ERROR "Insufficient intel compiler version, current ${CMAKE_CXX_COMPILER_VERSION}, minimum 2023.0.")
+  endif()
+  set(EXTRA_CXX_FLAGS -march=sapphirerapids)
+endif()
+
+if (NOT CMAKE_RUNTIME_OUTPUT_DIRECTORY)
+  set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
+  set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
+  set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
+endif()
+add_subdirectory(src)
+if (CPU_EXTENSIONS_BUILD_TESTS)
+  add_subdirectory(tests)
+endif()
+
+# Get the latest commit hash
+execute_process(
+  COMMAND git rev-parse HEAD
+  WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}
+  OUTPUT_VARIABLE GIT_HASH
+  OUTPUT_STRIP_TRAILING_WHITESPACE
+  )
+file(WRITE ${CMAKE_BINARY_DIR}/git-state.txt ${GIT_HASH})
+install(FILES
+        ${CMAKE_BINARY_DIR}/git-state.txt
+        DESTINATION ${CMAKE_INSTALL_PREFIX})
diff --git a/README.md b/README.md
@@ -0,0 +1,9 @@
+# About CPU_Extensions
+CPU_Extensions is a compute library containing processor optimized kernels code.
+
+# Unit tests for CPU_Extensions
+## Tests for kernels
+Tests for kernels are written in gtest under tests\src, use ./cpu_extensions_tests to run it.
+
+## Tests for complex features
+Some features have many steps and the reference could not be easily written using gtest. For these features can use python to generate the reference. The directory tests\script contains these test, please refer [test in python](./tests/script/README.md).
diff --git a/SECURITY.md b/SECURITY.md
@@ -0,0 +1,12 @@
+# Security Policy
+
+## Report a Vulnerability
+
+Please report security issues or vulnerabilities to the [Intel® Security Center].
+
+For more information on how Intel® works to resolve security issues, see
+[Vulnerability Handling Guidelines].
+
+[Intel® Security Center]:https://www.intel.com/security
+
+[Vulnerability Handling Guidelines]:https://www.intel.com/content/www/us/en/security-center/vulnerability-handling-guidelines.html
diff --git a/include/llm_emb_gpt.hpp b/include/llm_emb_gpt.hpp
@@ -0,0 +1,31 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include <vector>
+#include "llm_types.hpp"
+#include "llm_tensor.hpp"
+
+namespace llmdnn {
+
+status_t emb_gpt(const tensor& q_src,              // q shape: [batch, query_seq_len, head_num, head_size] or
+                                                   //          [batch, query_seq_len, num_kv_heads, head_num/num_kv_heads, head_size]
+                 const tensor& k_src,              // k shape: [batch, query_seq_len, head_num, head_size] or
+                                                   //          [batch, query_seq_len, num_kv_heads, 1, head_size]
+                 const tensor& v_src,              // v shape: [batch, query_seq_len, head_num, head_size] or
+                                                   //          [batch, query_seq_len, num_kv_heads, 1, head_size]
+                 const tensor& k_past,             // k_past shape: [batch, num_heads, past_seq_len, head_size]
+                 const tensor& v_past,             // v_past shape: [batch, num_heads, past_seq_len, head_size]
+                 const tensor& q_dst,              // q_dst, shape: [batch, num_heads, query_seq_len, head_size]
+                 const tensor& k_dst,              // k_past shape: [batch, num_heads, query_seq_len+past_seq_len, head_size]
+                                                   // if k_past!=k_past_dst, will copy k_past to k_past_dst
+                 const tensor& v_dst,              // v_past shape: [batch, num_heads, query_seq_len+past_seq_len, head_size]
+                 const tensor& cos,                // cos lookup table, shape: [1, 1, max_seq_len, rotary_dims]
+                 const tensor& sin,                // sin lookup table, shape: [1, 1, max_seq_len, rotary_dims]
+                 const tensor& position2d_ids);    // shape: [batch, 2, query_seq_len]
+
+}  // namespace llmdnn
diff --git a/include/llm_fc.hpp b/include/llm_fc.hpp
@@ -0,0 +1,98 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "llm_types.hpp"
+#include "llm_tensor.hpp"
+
+namespace llmdnn {
+
+typedef enum {
+    NONE = 0,
+    DEQUANT = 1 << 0,
+    BIAS = 1 << 1,
+    GELU_ERF = 1 << 2,
+    GELU_TANH = 1 << 3,
+    QUANT = 1 << 4,
+    GELU = GELU_ERF,        // default is ERF
+
+    BIAS_GELU = BIAS | GELU,
+    DEQUANT_BIAS_GELU = DEQUANT | BIAS_GELU,
+    DEQUANT_BIAS_GELU_QUANT = DEQUANT_BIAS_GELU | QUANT,
+    DEQUANT_BIAS_QUANT = DEQUANT | BIAS | QUANT,
+    DEQUANT_GELU_QUANT = DEQUANT | GELU | QUANT,
+    DEQUANT_QUANT = DEQUANT | QUANT,
+
+    DEQUANT_GELU = DEQUANT | GELU,
+    DEQUANT_BIAS = DEQUANT | BIAS,
+
+    BIAS_GELU_TANH = BIAS | GELU_TANH,
+    DEQUANT_BIAS_GELU_TANH = DEQUANT | BIAS_GELU_TANH,
+    DEQUANT_BIAS_GELU_TANH_QUANT = DEQUANT_BIAS_GELU_TANH | QUANT,
+    DEQUANT_GELU_TANH_QUANT = DEQUANT | GELU_TANH | QUANT,
+
+    DEQUANT_GELU_TANH = DEQUANT | GELU_TANH,
+} postops_types;
+
+struct fc_create_param {
+    data_type_t dt_a;
+    data_type_t dt_b;
+    data_type_t dt_c;
+    bool b_is_trans;
+    postops_types postops_type;
+    // for weight compression
+    float* scale;
+    float* zp;
+    int scale_zp_size;
+};
+
+struct fc_kernel;
+
+/// Generates a mm kernel based on param
+///
+/// @param mm Output kernel
+/// @param param kernel parameters, supported:
+///        fc: (s8,s8,s8),dq,[bias],[gelu],q
+///        fc: (s8,s8,bf16),dq,[bias],[gelu]
+///        fc: (s8,s8,f32),dq,[bias],[gelu]
+///        fc: (bf16,bf16,bf16),[bias],[gelu]
+///        fc: (bf16,bf16,f32),[bias],[gelu]
+///        fc: (bf16,u8,f32),dq,[bias],[gelu]
+///        fc: (bf16,u8,bf16),dq,[bias],[gelu]
+///
+status_t fc_kernel_create(fc_kernel** mm, const fc_create_param* param);
+void fc_kernel_destroy(fc_kernel* mm);
+// when fc_create_param.dt_b==bf16, dt_b is in [bf16, f32]
+// when fc_create_param.dt_b==u8, dt_b is in [bf16, f32]
+void fc_kernel_pack_weight(fc_kernel* mm, void* ptr_b, data_type_t dt_b, size_t N, size_t K, size_t stride_b, size_t n_start, size_t n_end);
+void fc_kernel_pack_weight_to_dst(fc_kernel* mm, void* src_b, void* dst_b, data_type_t dt_b, size_t N, size_t K, size_t stride_b, size_t n_start, size_t n_end);
+// ptr_b may be null if using fc_kernel_pack_weight to pack into internal buffer
+// if ptr_b is not null, its layout is [N/32, 32*rndup(K,32|64)]
+void fc_kernel_execute(fc_kernel* mm,
+        void* ptr_a, void* ptr_b, void* ptr_c, size_t stride_a, size_t stride_c,
+        size_t M, size_t N, size_t K, size_t n_start, size_t n_end,
+        float* dq=nullptr, float* q=nullptr, float* bias=nullptr);
+
+/// Generates a fc based on param
+class fc {
+public:
+    fc();
+    ~fc();
+
+    bool init(const fc_create_param& param);
+    void pack_weight(const tensor& w);
+    status_t exec(const tensor& input, const tensor& output, const tensor& dq, const tensor& q, const tensor& bias);
+
+    struct impl {
+        virtual ~impl() {}
+        virtual bool init(const fc_create_param& param) = 0;
+        virtual void pack_weight(const tensor& w) = 0;
+        virtual status_t exec(const tensor& input, const tensor& output, const tensor& dq, const tensor& q, const tensor& bias) = 0;
+    };
+protected:
+    impl* _impl;
+};
+
+}  // namespace llmdnn
diff --git a/include/llm_mha_gpt.hpp b/include/llm_mha_gpt.hpp
@@ -0,0 +1,53 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include <vector>
+#include "llm_types.hpp"
+#include "llm_tensor.hpp"
+
+namespace llmdnn {
+
+class mha_gpt {
+public:
+    mha_gpt();
+    ~mha_gpt();
+
+    status_t exec(const tensor& q,              // q shape: [batch, num_heads, query_seq_len, head_size]
+                  const tensor& k,              // k shape: [batch, num_heads, key_seq_len, head_size]
+                  const tensor& v,              // v shape: [batch, num_heads, value_seq_len, head_size]
+                  const tensor& output,         // output, compact, shape: [batch, query_seq_len, num_heads * head_size]
+                  const tensor& attn_mask,      // attention mask[opt], shape:
+                                                //      [batch, 1, 1, key_seq_len],
+                                                //      [batch, 1, query_seq_len, key_seq_len]
+                  const tensor& alibi,          // alibi[opt] shape: [batch, num_heads, 1, key_seq_len]
+                  const tensor& causal_mask,    // [opt] use_causal_mask must be false, u8, shape:
+                                                //      [1, 1, query_seq_len, key_seq_len]
+                                                //      [batch, 1, query_seq_len, key_seq_len]
+                  bool select_nfltmax_at_0,     // used when causal_mask is not null. true means causal_mask[i]==0 use -FLT_MAX
+                                                //      false means causal_mask[i]==1 use -FLT_MAX
+                  float normal_factor,
+                  bool use_causal_mask = false);// add causal mask
+
+    struct impl {
+        virtual ~impl() {}
+        virtual status_t exec(const tensor& q,
+                          const tensor& k,
+                          const tensor& v,
+                          const tensor& output,
+                          const tensor& attn_mask,
+                          const tensor& alibi,
+                          const tensor& causal_mask,
+                          bool select_nfltmax_at_0,
+                          float normal_factor,
+                          bool use_causal_mask = false) = 0;
+    };
+protected:
+    impl* _impl;
+};
+
+}  // namespace llmdnn
diff --git a/include/llm_mm.hpp b/include/llm_mm.hpp
@@ -0,0 +1,35 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "llm_types.hpp"
+
+namespace llmdnn {
+
+struct mm_create_param {
+    data_type_t dt_a;
+    data_type_t dt_b;
+    bool b_is_gemv;     // true if matrix b is vector. Shape: a[M,K], b[K,1], c[M,1]
+    bool b_is_trans;
+};
+
+struct mm_kernel;
+
+/// Generates a mm kernel based on param
+///
+/// @param mm Output kernel
+/// @param param kernel parameters, supported:
+///        matmul: (u8/s8,s8,f32)
+///        gemv: (s8,s8,f32)
+///        matmul: (bf16,bf16,f32)
+///        gemv: (bf16,bf16,f32)
+///
+status_t mm_kernel_create(mm_kernel** mm, const mm_create_param* param);
+void mm_kernel_destroy(const mm_kernel* mm);
+
+status_t mm_kernel_execute(const mm_kernel* mm, void* ptr_a, void* ptr_b, void* ptr_c, size_t lda, size_t ldb, size_t ldc,
+        size_t M, size_t N, size_t K);
+
+}  // namespace llmdnn