diff --git a/.clang-tidy b/.clang-tidy index 4824fc2f2ed..c14023d04d0 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -7,6 +7,9 @@ HeaderFilterRegex: '^.*/(base|src|programs|utils)/.*(h|hpp)$' +# We don't want to use clang-tidy diagnostics in the "contrib" folder. +ExcludeHeaderFilterRegex: '^.*/contrib/.*' + Checks: [ '*', @@ -28,6 +31,7 @@ Checks: [ '-bugprone-unchecked-optional-access', '-bugprone-crtp-constructor-accessibility', '-bugprone-not-null-terminated-result', + '-bugprone-forward-declaration-namespace', '-cert-dcl16-c', '-cert-err58-cpp', diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index c2ff2e056b1..d3b4dc74a29 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -17,7 +17,7 @@ tests/ci/cancel_and_rerun_workflow_lambda/app.py - Not for changelog (changelog entry is not required) -### Changelog entry (a user-readable short description of the changes that goes to CHANGELOG.md): +### Changelog entry (a [user-readable short description](https://github.com/ClickHouse/ClickHouse/blob/master/docs/changelog_entry_guidelines.md) of the changes that goes into CHANGELOG.md): ... ### Documentation entry for user-facing changes diff --git a/.github/workflows/build_linux_arm64_wheels-gh.yml b/.github/workflows/build_linux_arm64_wheels-gh.yml index 356faeebfb0..fe9e78277a5 100644 --- a/.github/workflows/build_linux_arm64_wheels-gh.yml +++ b/.github/workflows/build_linux_arm64_wheels-gh.yml @@ -106,6 +106,13 @@ jobs: python -m pip install setuptools tox pandas pyarrow twine psutil deltalake wheel pyenv shell --unset done + - name: Upgrade Rust toolchain + run: | + rustup toolchain install nightly-2025-07-07 + rustup default nightly-2025-07-07 + rustup component add rust-src + rustc --version + cargo --version - name: Install clang++ for Ubuntu run: | pwd @@ -116,6 +123,13 @@ jobs: which clang++-19 clang++-19 --version sudo apt-get install -y make cmake ccache ninja-build yasm gawk wget + # Install WebAssembly linker (wasm-ld) + sudo apt-get install -y lld-19 + # Create symlink for wasm-ld + if ! command -v wasm-ld &> /dev/null; then + sudo ln -sf /usr/bin/wasm-ld-19 /usr/bin/wasm-ld || true + fi + which wasm-ld || echo "wasm-ld not found in PATH" ccache -s - name: Update git run: | @@ -148,6 +162,7 @@ jobs: run: | export PATH="$HOME/.pyenv/bin:$PATH" eval "$(pyenv init -)" + source ~/.cargo/env pyenv shell 3.8 export CC=/usr/bin/clang export CXX=/usr/bin/clang++ @@ -161,6 +176,7 @@ jobs: export CC=/usr/bin/clang export CXX=/usr/bin/clang++ export PATH="$HOME/.pyenv/bin:$PATH" + source ~/.cargo/env eval "$(pyenv init -)" pyenv shell 3.8 bash ./chdb/build/build_static_lib.sh diff --git a/.github/workflows/build_linux_x86_wheels.yml b/.github/workflows/build_linux_x86_wheels.yml index 665a8a3f2b8..5a35668ead9 100644 --- a/.github/workflows/build_linux_x86_wheels.yml +++ b/.github/workflows/build_linux_x86_wheels.yml @@ -106,6 +106,13 @@ jobs: python -m pip install setuptools tox pandas pyarrow twine psutil deltalake wheel pyenv shell --unset done + - name: Upgrade Rust toolchain + run: | + rustup toolchain install nightly-2025-07-07 + rustup default nightly-2025-07-07 + rustup component add rust-src + rustc --version + cargo --version - name: Install clang++ for Ubuntu run: | pwd @@ -116,6 +123,13 @@ jobs: which clang++-19 clang++-19 --version sudo apt-get install -y make cmake ccache ninja-build yasm gawk wget + # Install WebAssembly linker (wasm-ld) + sudo apt-get install -y lld-19 + # Create symlink for wasm-ld + if ! command -v wasm-ld &> /dev/null; then + sudo ln -sf /usr/bin/wasm-ld-19 /usr/bin/wasm-ld || true + fi + which wasm-ld || echo "wasm-ld not found in PATH" ccache -s - name: Update git run: | @@ -148,6 +162,7 @@ jobs: run: | export PATH="$HOME/.pyenv/bin:$PATH" eval "$(pyenv init -)" + source ~/.cargo/env pyenv shell 3.8 export CC=/usr/bin/clang export CXX=/usr/bin/clang++ @@ -161,6 +176,7 @@ jobs: export CC=/usr/bin/clang export CXX=/usr/bin/clang++ export PATH="$HOME/.pyenv/bin:$PATH" + source ~/.cargo/env eval "$(pyenv init -)" pyenv shell 3.8 bash ./chdb/build/build_static_lib.sh diff --git a/.github/workflows/build_macos_arm64_wheels.yml b/.github/workflows/build_macos_arm64_wheels.yml index a7d7fb454e8..61c895f7601 100644 --- a/.github/workflows/build_macos_arm64_wheels.yml +++ b/.github/workflows/build_macos_arm64_wheels.yml @@ -79,61 +79,37 @@ jobs: brew install ca-certificates lz4 mpdecimal readline sqlite xz z3 zstd brew install openssl@3 || echo "OpenSSL install failed, continuing..." brew install --ignore-dependencies llvm@19 - brew install git ninja libtool gettext binutils grep findutils nasm - # brew install gcc || echo "GCC install failed, continuing..." - # brew install ccache || echo "ccache installation failed, continuing without it" + brew install git ninja libtool gettext binutils grep findutils nasm lld@19 libiconv brew install go cd /usr/local/opt/ && sudo rm -f llvm && sudo ln -sf llvm@19 llvm - export PATH=$(brew --prefix llvm@19)/bin:$PATH + export PATH=$(brew --prefix llvm@19)/bin:$(brew --prefix lld@19)/bin:$PATH which clang++ clang++ --version + which wasm-ld || echo "wasm-ld not found in PATH" which go go version - ccache -s || echo "ccache not available yet" - - name: Scan SQLite vulnerabilities with grype + ccache -s | echo "ccache not available yet" + - name: Upgrade Rust toolchain run: | - # Install grype - curl -sSfL https://raw.githubusercontent.com/anchore/grype/main/install.sh | sh -s -- -b /usr/local/bin - - # Update grype vulnerability database - grype db update - - # Check SQLite vulnerabilities in Homebrew packages - echo "Scanning SQLite packages for vulnerabilities..." - GRYPE_RAW_OUTPUT=$(grype dir:/opt/homebrew --scope all-layers 2>/dev/null || true) - echo "Raw grype output:" - echo "$GRYPE_RAW_OUTPUT" - - SQLITE_SCAN_OUTPUT=$(echo "$GRYPE_RAW_OUTPUT" | grep -i sqlite || true) - - if [ -n "$SQLITE_SCAN_OUTPUT" ]; then - echo "❌ SQLite vulnerabilities found in packages! Build should be reviewed." - echo "SQLite vulnerability details:" - echo "$SQLITE_SCAN_OUTPUT" - exit 1 - else - echo "✅ No SQLite vulnerabilities found" - fi - continue-on-error: false + rustup toolchain install nightly-2025-07-07 + rustup default nightly-2025-07-07 + rustup component add rust-src + rustc --version + cargo --version - uses: actions/checkout@v3 with: fetch-depth: 0 - name: Update submodules run: | git submodule update --init --recursive --jobs 4 - # - name: ccache - # uses: hendrikmuhs/ccache-action@v1.2 - # with: - # key: macos-13-xlarge - # max-size: 5G - # append-timestamp: true - name: Run chdb/build.sh timeout-minutes: 600 run: | export PATH="$HOME/.pyenv/bin:$PATH" eval "$(pyenv init -)" + source ~/.cargo/env pyenv shell 3.8 - export PATH=$(brew --prefix llvm@19)/bin:/usr/local/opt/grep/libexec/gnubin:/usr/local/opt/binutils/bin:$PATH:/usr/local/opt/findutils/libexec/gnubin + export PATH=$(brew --prefix llvm@19)/bin:$(brew --prefix lld@19)/bin:/usr/local/opt/grep/libexec/gnubin:/usr/local/opt/binutils/bin:$PATH:/usr/local/opt/findutils/libexec/gnubin export CC=$(brew --prefix llvm@19)/bin/clang export CXX=$(brew --prefix llvm@19)/bin/clang++ bash gen_manifest.sh @@ -143,9 +119,10 @@ jobs: - name: Run chdb/build/build_static_lib.sh timeout-minutes: 600 run: | - export PATH=$HOME/.pyenv/bin:$(brew --prefix llvm@19)/bin:/usr/local/opt/grep/libexec/gnubin:/usr/local/opt/binutils/bin:$PATH:/usr/local/opt/findutils/libexec/gnubin + export PATH=$HOME/.pyenv/bin:$(brew --prefix llvm@19)/bin:$(brew --prefix lld@19)/bin:/usr/local/opt/grep/libexec/gnubin:/usr/local/opt/binutils/bin:$PATH:/usr/local/opt/findutils/libexec/gnubin export CC=$(brew --prefix llvm@19)/bin/clang export CXX=$(brew --prefix llvm@19)/bin/clang++ + source ~/.cargo/env eval "$(pyenv init -)" pyenv shell 3.8 bash ./chdb/build/build_static_lib.sh @@ -168,28 +145,23 @@ jobs: - name: Scan chdb libraries with grype run: | echo "Scanning chdb libraries for vulnerabilities..." - # Files to scan FILES_TO_SCAN="" [ -f libchdb.so ] && FILES_TO_SCAN="$FILES_TO_SCAN libchdb.so" [ -f libchdb.a ] && FILES_TO_SCAN="$FILES_TO_SCAN libchdb.a" FILES_TO_SCAN="$FILES_TO_SCAN $(find chdb/ \( -name "*.dylib" -o -name "*.so" \) 2>/dev/null || true)" - SQLITE_VULNERABILITIES_FOUND=false - for file in $FILES_TO_SCAN; do if [ -f "$file" ]; then echo "=== Scanning $file ===" SCAN_OUTPUT=$(grype "$file" 2>/dev/null || true) echo "$SCAN_OUTPUT" - if echo "$SCAN_OUTPUT" | grep -qi sqlite; then echo "❌ SQLite vulnerability found in $file" SQLITE_VULNERABILITIES_FOUND=true fi fi done - if [ "$SQLITE_VULNERABILITIES_FOUND" = true ]; then echo "❌ SQLite vulnerabilities detected in chdb libraries!" exit 1 @@ -200,26 +172,11 @@ jobs: - name: Run libchdb stub in examples dir run: | bash -x ./examples/runStub.sh - # - name: Keep killall ccache and wait for ccache to finish - # if: always() - # run: | - # sleep 60 - # while ps -ef | grep ccache | grep -v grep; do \ - # killall ccache || true; \ - # sleep 10; \ - # done - # - name: Check ccache statistics - # run: | - # ccache -s || echo "ccache not available" - # ls -lh chdb - # df -h - # env: - # CIBW_ENVIRONMENT_MACOS: "PATH=$(brew --prefix llvm@19)/bin:/usr/local/opt/grep/libexec/gnubin:/usr/local/opt/binutils/bin:$PATH:/usr/local/opt/findutils/libexec/gnubin CC=$(brew --prefix llvm@19)/bin/clang CXX=$(brew --prefix llvm@19)/bin/clang++" - name: Build wheels run: | export PATH="$HOME/.pyenv/bin:$PATH" eval "$(pyenv init -)" - export PATH=$(brew --prefix llvm@19)/bin:/usr/local/opt/grep/libexec/gnubin:/usr/local/opt/binutils/bin:$PATH:/usr/local/opt/findutils/libexec/gnubin + export PATH=$(brew --prefix llvm@19)/bin:$(brew --prefix lld@19)/bin:/usr/local/opt/grep/libexec/gnubin:/usr/local/opt/binutils/bin:$PATH:/usr/local/opt/findutils/libexec/gnubin export CC=$(brew --prefix llvm@19)/bin/clang export CXX=$(brew --prefix llvm@19)/bin/clang++ pyenv shell 3.8 diff --git a/.github/workflows/build_macos_x86_wheels.yml b/.github/workflows/build_macos_x86_wheels.yml index 6390a6849fc..8217b1ad033 100644 --- a/.github/workflows/build_macos_x86_wheels.yml +++ b/.github/workflows/build_macos_x86_wheels.yml @@ -78,32 +78,28 @@ jobs: brew update brew install ca-certificates lz4 mpdecimal openssl@3 readline sqlite xz z3 zstd brew install --ignore-dependencies llvm@19 - brew install git ninja libtool gettext gcc binutils grep findutils nasm + brew install git ninja libtool gettext gcc binutils grep findutils nasm lld@19 libiconv brew install ccache || echo "ccache installation failed, continuing without it" brew install go cd /usr/local/opt/ && sudo rm -f llvm && sudo ln -sf llvm@19 llvm - export PATH=$(brew --prefix llvm@19)/bin:$PATH + export PATH=$(brew --prefix llvm@19)/bin:$(brew --prefix lld@19)/bin:$PATH which clang++ clang++ --version which go go version - ccache -s + ccache -s || echo "ccache not available yet" - name: Scan SQLite vulnerabilities with grype run: | # Install grype curl -sSfL https://raw.githubusercontent.com/anchore/grype/main/install.sh | sh -s -- -b /usr/local/bin - # Update grype vulnerability database grype db update - # Check SQLite vulnerabilities in Homebrew packages echo "Scanning SQLite packages for vulnerabilities..." - GRYPE_RAW_OUTPUT=$(grype dir:/usr/local --scope all-layers 2>/dev/null || true) + GRYPE_RAW_OUTPUT=$(grype dir:/opt/homebrew --scope all-layers 2>/dev/null || true) echo "Raw grype output:" echo "$GRYPE_RAW_OUTPUT" - SQLITE_SCAN_OUTPUT=$(echo "$GRYPE_RAW_OUTPUT" | grep -i sqlite || true) - if [ -n "$SQLITE_SCAN_OUTPUT" ]; then echo "❌ SQLite vulnerabilities found in packages! Build should be reviewed." echo "SQLite vulnerability details:" @@ -113,25 +109,27 @@ jobs: echo "✅ No SQLite vulnerabilities found" fi continue-on-error: false + - name: Upgrade Rust toolchain + run: | + rustup toolchain install nightly-2025-07-07 + rustup default nightly-2025-07-07 + rustup component add rust-src + rustc --version + cargo --version - uses: actions/checkout@v3 with: fetch-depth: 0 - name: Update submodules run: | git submodule update --init --recursive --jobs 4 - - name: ccache - uses: hendrikmuhs/ccache-action@v1.2 - with: - key: macos-14-x86_64 - max-size: 5G - append-timestamp: true - name: Run chdb/build.sh timeout-minutes: 600 run: | export PATH="$HOME/.pyenv/bin:$PATH" eval "$(pyenv init -)" + source ~/.cargo/env pyenv shell 3.8 - export PATH=$(brew --prefix llvm@19)/bin:/usr/local/opt/grep/libexec/gnubin:/usr/local/opt/binutils/bin:$PATH:/usr/local/opt/findutils/libexec/gnubin + export PATH=$(brew --prefix llvm@19)/bin:$(brew --prefix lld@19)/bin:/usr/local/opt/grep/libexec/gnubin:/usr/local/opt/binutils/bin:$PATH:/usr/local/opt/findutils/libexec/gnubin export CC=$(brew --prefix llvm@19)/bin/clang export CXX=$(brew --prefix llvm@19)/bin/clang++ bash gen_manifest.sh @@ -142,9 +140,10 @@ jobs: - name: Run chdb/build/build_static_lib.sh timeout-minutes: 600 run: | - export PATH=$HOME/.pyenv/bin:$(brew --prefix llvm@19)/bin:/usr/local/opt/grep/libexec/gnubin:/usr/local/opt/binutils/bin:$PATH:/usr/local/opt/findutils/libexec/gnubin + export PATH=$HOME/.pyenv/bin:$(brew --prefix llvm@19)/bin:$(brew --prefix lld@19)/bin:/usr/local/opt/grep/libexec/gnubin:/usr/local/opt/binutils/bin:$PATH:/usr/local/opt/findutils/libexec/gnubin export CC=$(brew --prefix llvm@19)/bin/clang export CXX=$(brew --prefix llvm@19)/bin/clang++ + source ~/.cargo/env eval "$(pyenv init -)" pyenv shell 3.8 bash ./chdb/build/build_static_lib.sh @@ -167,28 +166,23 @@ jobs: - name: Scan chdb libraries with grype run: | echo "Scanning chdb libraries for vulnerabilities..." - # Files to scan FILES_TO_SCAN="" [ -f libchdb.so ] && FILES_TO_SCAN="$FILES_TO_SCAN libchdb.so" [ -f libchdb.a ] && FILES_TO_SCAN="$FILES_TO_SCAN libchdb.a" FILES_TO_SCAN="$FILES_TO_SCAN $(find chdb/ \( -name "*.dylib" -o -name "*.so" \) 2>/dev/null || true)" - SQLITE_VULNERABILITIES_FOUND=false - for file in $FILES_TO_SCAN; do if [ -f "$file" ]; then echo "=== Scanning $file ===" SCAN_OUTPUT=$(grype "$file" 2>/dev/null || true) echo "$SCAN_OUTPUT" - if echo "$SCAN_OUTPUT" | grep -qi sqlite; then echo "❌ SQLite vulnerability found in $file" SQLITE_VULNERABILITIES_FOUND=true fi fi done - if [ "$SQLITE_VULNERABILITIES_FOUND" = true ]; then echo "❌ SQLite vulnerabilities detected in chdb libraries!" exit 1 @@ -199,26 +193,12 @@ jobs: - name: Run libchdb stub in examples dir run: | bash -x ./examples/runStub.sh - - name: Keep killall ccache and wait for ccache to finish - if: always() - run: | - sleep 60 - while ps -ef | grep ccache | grep -v grep; do \ - killall ccache; \ - sleep 10; \ - done - - name: Check ccache statistics - run: | - ccache -s - ls -lh chdb - df -h - env: - CIBW_ENVIRONMENT_MACOS: "PATH=$(brew --prefix llvm@19)/bin:/usr/local/opt/grep/libexec/gnubin:/usr/local/opt/binutils/bin:$PATH:/usr/local/opt/findutils/libexec/gnubin CC=$(brew --prefix llvm@19)/bin/clang CXX=$(brew --prefix llvm@19)/bin/clang++" - name: Build wheels run: | export PATH="$HOME/.pyenv/bin:$PATH" eval "$(pyenv init -)" - export PATH=$(brew --prefix llvm@19)/bin:/usr/local/opt/grep/libexec/gnubin:/usr/local/opt/binutils/bin:$PATH:/usr/local/opt/findutils/libexec/gnubin + source ~/.cargo/env + export PATH=$(brew --prefix llvm@19)/bin:$(brew --prefix lld@19)/bin:/usr/local/opt/grep/libexec/gnubin:/usr/local/opt/binutils/bin:$PATH:/usr/local/opt/findutils/libexec/gnubin export CC=$(brew --prefix llvm@19)/bin/clang export CXX=$(brew --prefix llvm@19)/bin/clang++ pyenv shell 3.8 diff --git a/.gitignore b/.gitignore index 35776ee9036..e63ce4ea900 100644 --- a/.gitignore +++ b/.gitignore @@ -19,6 +19,7 @@ *.log *.debuglog *.stderr +*.stderr-fatal* *.stdout # llvm-xray logs @@ -167,6 +168,7 @@ tests/queries/0_stateless/*.binary tests/queries/0_stateless/*.generated-expect tests/queries/0_stateless/*.expect.history tests/integration/**/_gen +tests/casa_del_dolor/_instances* # pytest --pdb history .pdb_history diff --git a/.gitmodules b/.gitmodules index a8902cdab9a..189a8de74fa 100644 --- a/.gitmodules +++ b/.gitmodules @@ -40,6 +40,9 @@ [submodule "contrib/boost"] path = contrib/boost url = https://github.com/ClickHouse/boost +[submodule "contrib/arrow"] + path = contrib/arrow + url = https://github.com/chdb-io/arrow.git [submodule "contrib/thrift"] path = contrib/thrift url = https://github.com/apache/thrift @@ -134,9 +137,6 @@ [submodule "contrib/fmtlib"] path = contrib/fmtlib url = https://github.com/fmtlib/fmt -[submodule "contrib/sentry-native"] - path = contrib/sentry-native - url = https://github.com/ClickHouse/sentry-native [submodule "contrib/krb5"] path = contrib/krb5 url = https://github.com/ClickHouse/krb5 @@ -296,9 +296,6 @@ [submodule "contrib/aws-c-compression"] path = contrib/aws-c-compression url = https://github.com/awslabs/aws-c-compression -[submodule "contrib/aws-s2n-tls"] - path = contrib/aws-s2n-tls - url = https://github.com/ClickHouse/s2n-tls [submodule "contrib/crc32-vpmsum"] path = contrib/crc32-vpmsum url = https://github.com/antonblanchard/crc32-vpmsum.git @@ -372,9 +369,27 @@ [submodule "contrib/SHA3IUF"] path = contrib/SHA3IUF url = https://github.com/brainhub/SHA3IUF.git -[submodule "contrib/arrow"] - path = contrib/arrow - url = https://github.com/chdb-io/arrow.git +[submodule "contrib/chdig"] + path = contrib/chdig + url = https://github.com/azat/chdig.git +[submodule "contrib/ai-sdk-cpp"] + path = contrib/ai-sdk-cpp + url = https://github.com/ClickHouse/ai-sdk-cpp +[submodule "contrib/nlohmann-json"] + path = contrib/nlohmann-json + url = https://github.com/nlohmann/json.git +[submodule "contrib/crc32c"] + path = contrib/crc32c + url = https://github.com/google/crc32c.git +[submodule "contrib/simde"] + path = contrib/simde + url = https://github.com/simd-everywhere/simde +[submodule "contrib/FastPFOR"] + path = contrib/FastPFOR + url = https://github.com/fast-pack/FastPFOR +[submodule "contrib/antlr4-cpp-runtime"] + path = contrib/antlr4-cpp-runtime + url = https://github.com/antlr/antlr4.git [submodule "contrib/utf8proc"] path = contrib/utf8proc url = https://github.com/JuliaStrings/utf8proc.git diff --git a/.snyk b/.snyk deleted file mode 100644 index 7acc6b9fbf5..00000000000 --- a/.snyk +++ /dev/null @@ -1,4 +0,0 @@ -# Snyk (https://snyk.io) policy file -exclude: - global: - - tests/** diff --git a/CMakeLists.txt b/CMakeLists.txt index 7406ad6008a..099ff907e9a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -234,6 +234,8 @@ if (SPLIT_DEBUG_SYMBOLS) set(SPLIT_DEBUG_SYMBOLS_DIR "stripped" CACHE STRING "A separate directory for stripped information") endif() +option(BUILD_STRIPPED_BINARY "Build stripped binary (clickhouse-stripped)" OFF) + cmake_host_system_information(RESULT AVAILABLE_PHYSICAL_MEMORY QUERY AVAILABLE_PHYSICAL_MEMORY) # Not available under freebsd @@ -302,9 +304,8 @@ set (CMAKE_C_STANDARD_REQUIRED ON) # See https://reviews.llvm.org/D112921 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsized-deallocation") -# falign-functions=32 prevents from random performance regressions with the code change. Thus, providing more stable -# benchmarks. -set(COMPILER_FLAGS "${COMPILER_FLAGS} -falign-functions=32") +# falign-functions=64 prevents from random performance regressions with the code change. Thus, providing more stable benchmarks. +set(COMPILER_FLAGS "${COMPILER_FLAGS} -falign-functions=64") if (ARCH_AMD64) # align branches within a 32-Byte boundary to avoid the potential performance loss when code layout change, @@ -381,6 +382,15 @@ elseif (ENABLE_THINLTO) message (${RECONFIGURE_MESSAGE_LEVEL} "Cannot enable ThinLTO") endif () +if(COMPILER_PIPE) + set(MAX_COMPILER_MEMORY 2500) +else() + set(MAX_COMPILER_MEMORY 1500) +endif() +set(MAX_LINKER_MEMORY 5000) +include(cmake/limit_jobs.cmake) + + # Turns on all external libs like s3, kafka, ODBC, ... option(ENABLE_LIBRARIES "Enable all external libraries by default" ON) @@ -402,15 +412,6 @@ endif () link_libraries(global-group) target_link_libraries(global-group INTERFACE $) -option (ENABLE_GWP_ASAN "Enable Gwp-Asan" OFF) -# We use mmap for allocations more heavily in debug builds, -# but GWP-ASan also wants to use mmap frequently, -# and due to a large number of memory mappings, -# it does not work together well. -# if ((NOT OS_LINUX AND NOT OS_ANDROID) OR (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG")) -# set(ENABLE_GWP_ASAN OFF) -# endif () - option (ENABLE_FIU "Enable Fiu" ON) option(WERROR "Enable -Werror compiler option" ON) @@ -517,13 +518,32 @@ macro (clickhouse_add_executable target) add_executable (${ARGV} $) endif () + # Wrap the malloc/free and other C-style functions with our own ones + # to inject memory tracking mechanism into them. + # Sanitizers have their own way of intercepting the + # allocations and deallocations, so we skip this step for them. + if (NOT (SANITIZE OR SANITIZE_COVERAGE OR OS_DARWIN OR OS_FREEBSD)) + target_link_options(${target} PRIVATE + "LINKER:--wrap=malloc" + "LINKER:--wrap=free" + "LINKER:--wrap=calloc" + "LINKER:--wrap=realloc" + "LINKER:--wrap=aligned_alloc" + "LINKER:--wrap=posix_memalign" + "LINKER:--wrap=valloc" + "LINKER:--wrap=memalign" + "LINKER:--wrap=reallocarray" + ) + if (NOT USE_MUSL) + target_link_options(${target} PRIVATE + "LINKER:--wrap=pvalloc" + ) + endif() + endif() + get_target_property (type ${target} TYPE) if (${type} STREQUAL EXECUTABLE) - # Disabled if memory tracking is disabled - if (TARGET clickhouse_new_delete) - # operator::new/delete for executables (MemoryTracker stuff) - target_link_libraries (${target} PRIVATE clickhouse_new_delete) - endif() + target_link_libraries (${target} PUBLIC clickhouse_new_delete) # In case of static jemalloc, because zone_register() is located in zone.c and # is never used outside (it is declared as constructor) it is omitted @@ -595,6 +615,9 @@ option(CHECK_LARGE_OBJECT_SIZES "Check that there are no large object files afte add_subdirectory (base) add_subdirectory (src) +if (LEXER_STANDALONE_BUILD) + add_subdirectory(tests/lexer) +endif () add_subdirectory (programs) add_subdirectory (utils) diff --git a/SECURITY.md b/SECURITY.md index c3a45802fb3..c5eafb5d086 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -14,9 +14,12 @@ The following versions of ClickHouse server are currently supported with securit | Version | Supported | |:-|:-| -| 25.4 | ✔️ | +| 25.7 | ✔️ | +| 25.6 | ✔️ | +| 25.5 | ✔️ | +| 25.4 | ❌ | | 25.3 | ✔️ | -| 25.2 | ✔️ | +| 25.2 | ❌ | | 25.1 | ❌ | | 24.12 | ❌ | | 24.11 | ❌ | diff --git a/base/base/Decimal.cpp b/base/base/Decimal.cpp index 7e65c0eb8d1..2e026c33ca0 100644 --- a/base/base/Decimal.cpp +++ b/base/base/Decimal.cpp @@ -1,4 +1,5 @@ #include +#include #include namespace DB @@ -72,13 +73,15 @@ template Decimal operator- (const Decimal & x, const Decimal< template Decimal operator* (const Decimal & x, const Decimal & y) { return x.value * y.value; } template Decimal operator/ (const Decimal & x, const Decimal & y) { return x.value / y.value; } template Decimal operator- (const Decimal & x) { return -x.value; } +template Decimal NO_SANITIZE_UNDEFINED negateOverflow (const Decimal & x) { return -x.value; } #define DISPATCH(TYPE) \ template Decimal operator+ (const Decimal & x, const Decimal & y); \ template Decimal operator- (const Decimal & x, const Decimal & y); \ template Decimal operator* (const Decimal & x, const Decimal & y); \ template Decimal operator/ (const Decimal & x, const Decimal & y); \ -template Decimal operator- (const Decimal & x); +template Decimal operator- (const Decimal & x); \ +template Decimal NO_SANITIZE_UNDEFINED negateOverflow (const Decimal & x); FOR_EACH_UNDERLYING_DECIMAL_TYPE(DISPATCH) #undef DISPATCH diff --git a/base/base/Decimal.h b/base/base/Decimal.h index 42f9e67c49d..fd7e74b8222 100644 --- a/base/base/Decimal.h +++ b/base/base/Decimal.h @@ -10,6 +10,7 @@ namespace DB { template struct Decimal; class DateTime64; +class Time64; #define FOR_EACH_UNDERLYING_DECIMAL_TYPE(M) \ M(Int32) \ @@ -118,13 +119,15 @@ template Decimal operator- (const Decimal & x, const Decimal< template Decimal operator* (const Decimal & x, const Decimal & y); template Decimal operator/ (const Decimal & x, const Decimal & y); template Decimal operator- (const Decimal & x); +template Decimal NO_SANITIZE_UNDEFINED negateOverflow (const Decimal & x); #define DISPATCH(TYPE) \ extern template Decimal operator+ (const Decimal & x, const Decimal & y); \ extern template Decimal operator- (const Decimal & x, const Decimal & y); \ extern template Decimal operator* (const Decimal & x, const Decimal & y); \ extern template Decimal operator/ (const Decimal & x, const Decimal & y); \ -extern template Decimal operator- (const Decimal & x); +extern template Decimal operator- (const Decimal & x); \ +extern template Decimal NO_SANITIZE_UNDEFINED negateOverflow (const Decimal & x); FOR_EACH_UNDERLYING_DECIMAL_TYPE(DISPATCH) #undef DISPATCH @@ -142,6 +145,16 @@ class DateTime64 : public Decimal64 constexpr DateTime64(const Base & v): Base(v) {} // NOLINT(google-explicit-constructor) }; + +class Time64 : public Decimal64 +{ +public: + using Base = Decimal64; + using Base::Base; + using NativeType = Base::NativeType; + + constexpr Time64(const Base & v): Base(v) {} // NOLINT(google-explicit-constructor) +}; } constexpr UInt64 max_uint_mask = std::numeric_limits::max(); @@ -173,6 +186,15 @@ namespace std } }; + template <> + struct hash + { + size_t operator()(const DB::Time64 & x) const + { + return std::hash()(x); + } + }; + template <> struct hash { diff --git a/base/base/Decimal_fwd.h b/base/base/Decimal_fwd.h index a11e13a479b..6983551c862 100644 --- a/base/base/Decimal_fwd.h +++ b/base/base/Decimal_fwd.h @@ -27,6 +27,7 @@ using Decimal128 = Decimal; using Decimal256 = Decimal; class DateTime64; +class Time64; template concept is_decimal = @@ -34,7 +35,8 @@ concept is_decimal = || std::is_same_v || std::is_same_v || std::is_same_v - || std::is_same_v; + || std::is_same_v + || std::is_same_v; template concept is_over_big_int = diff --git a/base/base/JSON.h b/base/base/JSON.h index 03dd62d639d..44d65abb4a9 100644 --- a/base/base/JSON.h +++ b/base/base/JSON.h @@ -6,35 +6,35 @@ #include -/** Очень простой класс для чтения JSON (или его кусочков). - * Представляет собой ссылку на кусок памяти, в котором содержится JSON (или его кусочек). - * Не создаёт никаких структур данных в оперативке. Не выделяет память (кроме std::string). - * Не парсит JSON до конца (парсит только часть, необходимую для выполнения вызванного метода). - * Парсинг необходимой части запускается каждый раз при вызове методов. - * Может работать с обрезанным JSON-ом. - * При этом, (в отличие от SAX-подобных парсеров), предоставляет удобные методы для работы. +/** A very simple class for reading JSON (or its fragments). + * Represents a reference to a piece of memory that contains JSON (or its fragment). + * Does not create any data structures in memory. Does not allocate memory (except for std::string). + * Does not parse JSON to the end (parses only the part needed to execute the called method). + * Parsing of the necessary part is started each time methods are called. + * Can work with truncated JSON. + * At the same time, (unlike SAX-like parsers), provides convenient methods for working. * - * Эта структура данных более оптимальна, если нужно доставать несколько элементов из большого количества маленьких JSON-ов. - * То есть, подходит для обработки "параметров визитов" и "параметров интернет магазинов" в Яндекс.Метрике. - * Если нужно много работать с одним большим JSON-ом, то этот класс может быть менее оптимальным. + * This data structure is more optimal if you need to extract several elements from a large number of small JSONs. + * That is, it is suitable for processing "visit parameters" and "online store parameters" in Yandex.Metrica. + * If you need to do a lot of work with one large JSON, then this class may be less optimal. * - * Имеются следующие соглашения: - * 1. Предполагается, что в JSON-е нет пробельных символов. - * 2. Предполагается, что строки в JSON в кодировке UTF-8; также могут использоваться \u-последовательности. - * Строки возвращаются в кодировке UTF-8, \u-последовательности переводятся в UTF-8. - * 3. Но суррогатная пара из двух \uXXXX\uYYYY переводится не в UTF-8, а в CESU-8. - * 4. Корректный JSON парсится корректно. - * При работе с некорректным JSON-ом, кидается исключение или возвращаются неверные результаты. - * (пример: считается, что если встретился символ 'n', то после него идёт 'ull' (null); - * если после него идёт ',1,', то исключение не кидается, и, таким образом, возвращается неверный результат) - * 5. Глубина вложенности JSON ограничена (см. MAX_JSON_DEPTH в cpp файле). - * При необходимости спуститься на большую глубину, кидается исключение. - * 6. В отличие от JSON, пользоволяет парсить значения вида 64-битное число, со знаком, или без. - * При этом, если число дробное - то дробная часть тихо отбрасывается. - * 7. Числа с плавающей запятой парсятся не с максимальной точностью. + * The following conventions are available: + * 1. It is assumed that there are no whitespace characters in JSON. + * 2. It is assumed that strings in JSON are in UTF-8 encoding; \u-sequences can also be used. + * Strings are returned in UTF-8 encoding, \u-sequences are converted to UTF-8. + * 3. But a surrogate pair of two \uXXXX\uYYYY is converted not to UTF-8, but to CESU-8. + * 4. Correct JSON is parsed correctly. + * When working with incorrect JSON, an exception is thrown or incorrect results are returned. + * (example: it is considered that if the symbol 'n' is encountered, then 'ull' (null) follows it; + * if ',1,' follows it, then no exception is thrown, and thus an incorrect result is returned) + * 5. The nesting depth of JSON is limited (see MAX_JSON_DEPTH in cpp file). + * When it is necessary to go to a greater depth, an exception is thrown. + * 6. Unlike JSON, allows parsing values like 64-bit number, signed or unsigned. + * At the same time, if the number is fractional - then the fractional part is silently discarded. + * 7. Floating point numbers are parsed with not maximum precision. * - * Подходит только для чтения JSON, модификация не предусмотрена. - * Все методы immutable, кроме operator++. + * Suitable only for reading JSON, modification is not provided. + * All methods are immutable, except operator++. */ @@ -96,52 +96,52 @@ class JSON bool isNull() const { return getType() == TYPE_NULL; } bool isNameValuePair() const { return getType() == TYPE_NAME_VALUE_PAIR; } - /// Количество элементов в массиве или объекте; если элемент - не массив или объект, то исключение. + /// Number of elements in array or object; if element is not array or object, then exception. size_t size() const; - /// Является ли массив или объект пустыми; если элемент - не массив или объект, то исключение. + /// Whether array or object is empty; if element is not array or object, then exception. bool empty() const; - /// Получить элемент массива по индексу; если элемент - не массив, то исключение. + /// Get array element by index; if element is not array, then exception. JSON operator[] (size_t n) const; - /// Получить элемент объекта по имени; если элемент - не объект, то исключение. + /// Get object element by name; if element is not object, then exception. JSON operator[] (const std::string & name) const; - /// Есть ли в объекте элемент с заданным именем; если элемент - не объект, то исключение. + /// Whether object has element with given name; if element is not object, then exception. bool has(const std::string & name) const { return has(name.data(), name.size()); } bool has(const char * data, size_t size) const; - /// Получить значение элемента; исключение, если элемент имеет неправильный тип. + /// Get element value; exception if element has wrong type. template T get() const; - /// если значения нет, или тип неверный, то возвращает дефолтное значение + /// if value is missing or type is wrong, then returns default value template T getWithDefault(const std::string & key, const T & default_ = T()) const; double getDouble() const; - Int64 getInt() const; /// Отбросить дробную часть. - UInt64 getUInt() const; /// Отбросить дробную часть. Если число отрицательное - исключение. + Int64 getInt() const; /// Discard fractional part. + UInt64 getUInt() const; /// Discard fractional part. If number is negative - exception. std::string getString() const; bool getBool() const; - std::string getName() const; /// Получить имя name-value пары. - JSON getValue() const; /// Получить значение name-value пары. + std::string getName() const; /// Get name of name-value pair. + JSON getValue() const; /// Get value of name-value pair. std::string_view getRawString() const; std::string_view getRawName() const; - /// Получить значение элемента; если элемент - строка, то распарсить значение из строки; если не строка или число - то исключение. + /// Get element value; if element is string, then parse value from string; if not string or number - then exception. double toDouble() const; Int64 toInt() const; UInt64 toUInt() const; - /** Преобразовать любой элемент в строку. - * Для строки возвращается её значение, для всех остальных элементов - сериализованное представление. + /** Convert any element to string. + * For string returns its value, for all other elements - serialized representation. */ std::string toString() const; - /// Класс JSON одновременно является итератором по самому себе. + /// JSON class is simultaneously an iterator over itself. using iterator = JSON; using const_iterator = JSON; @@ -150,32 +150,32 @@ class JSON bool operator== (const JSON & rhs) const { return ptr_begin == rhs.ptr_begin; } bool operator!= (const JSON & rhs) const { return ptr_begin != rhs.ptr_begin; } - /** Если элемент - массив или объект, то begin() возвращает iterator, - * который указывает на первый элемент массива или первую name-value пару объекта. + /** If element is array or object, then begin() returns iterator, + * which points to the first element of array or first name-value pair of object. */ iterator begin() const; - /** end() - значение, которое нельзя использовать; сигнализирует о том, что элементы закончились. + /** end() - value that cannot be used; signals that elements are finished. */ iterator end() const; - /// Перейти к следующему элементу массива или следующей name-value паре объекта. + /// Move to next array element or next name-value pair of object. iterator & operator++(); iterator operator++(int); // NOLINT(cert-dcl21-cpp) - /// Есть ли в строке escape-последовательности + /// Whether string has escape sequences bool hasEscapes() const; - /// Есть ли в строке спец-символы из набора \, ', \0, \b, \f, \r, \n, \t, возможно, заэскейпленные. + /// Whether string has special characters from the set \, ', \0, \b, \f, \r, \n, \t, possibly escaped. bool hasSpecialChars() const; private: - /// Проверить глубину рекурсии, а также корректность диапазона памяти. + /// Check recursion depth and memory range correctness. void checkInit() const; - /// Проверить, что pos лежит внутри диапазона памяти. + /// Check that pos lies within memory range. void checkPos(Pos pos) const; - /// Вернуть позицию после заданного элемента. + /// Return position after given element. Pos skipString() const; Pos skipNumber() const; Pos skipBool() const; @@ -186,7 +186,7 @@ class JSON Pos skipElement() const; - /// Найти name-value пару с заданным именем в объекте. + /// Find name-value pair with given name in object. Pos searchField(const std::string & name) const { return searchField(name.data(), name.size()); } Pos searchField(const char * data, size_t size) const; diff --git a/base/base/TypeName.h b/base/base/TypeName.h index 1f4b475d653..a00af5db503 100644 --- a/base/base/TypeName.h +++ b/base/base/TypeName.h @@ -44,6 +44,7 @@ TN_MAP(Decimal64) TN_MAP(Decimal128) TN_MAP(Decimal256) TN_MAP(DateTime64) +TN_MAP(Time64) TN_MAP(Array) TN_MAP(Tuple) TN_MAP(Map) diff --git a/base/base/extended_types.h b/base/base/extended_types.h index ef36a5385a0..5a7b3e718ae 100644 --- a/base/base/extended_types.h +++ b/base/base/extended_types.h @@ -74,6 +74,7 @@ template concept is_floating_point = M(DataTypeDate) \ M(DataTypeDate32) \ M(DataTypeDateTime) \ + M(DataTypeTime) \ M(DataTypeInt8) \ M(DataTypeUInt8) \ M(DataTypeInt16) \ @@ -94,6 +95,7 @@ template concept is_floating_point = M(DataTypeDate, X) \ M(DataTypeDate32, X) \ M(DataTypeDateTime, X) \ + M(DataTypeTime, X) \ M(DataTypeInt8, X) \ M(DataTypeUInt8, X) \ M(DataTypeInt16, X) \ diff --git a/base/base/map.h b/base/base/map.h deleted file mode 100644 index 0de42ebfdf6..00000000000 --- a/base/base/map.h +++ /dev/null @@ -1,52 +0,0 @@ -#pragma once - -#include -#include - -namespace collections -{ - -/// \brief Strip type off top level reference and cv-qualifiers thus allowing storage in containers -template -using unqualified_t = std::remove_cv_t>; - -/** \brief Returns collection of the same container-type as the input collection, - * with each element transformed by the application of `mapper`. - */ -template