From 02af9945c71550c0c7c2d2db220e4556d70f7c68 Mon Sep 17 00:00:00 2001 From: firewave Date: Fri, 15 Aug 2025 11:13:46 +0200 Subject: [PATCH 1/6] added constructors with modern buffer wrappers to `TokenList` and made it possible to hide "unsafe" ones --- simplecpp.cpp | 9 +-------- simplecpp.h | 46 ++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 45 insertions(+), 10 deletions(-) diff --git a/simplecpp.cpp b/simplecpp.cpp index 22a45e7a..9c9d9846 100644 --- a/simplecpp.cpp +++ b/simplecpp.cpp @@ -466,20 +466,13 @@ simplecpp::TokenList::TokenList(std::istream &istr, std::vector &fi readfile(stream,filename,outputList); } -simplecpp::TokenList::TokenList(const unsigned char* data, std::size_t size, std::vector &filenames, const std::string &filename, OutputList *outputList) +simplecpp::TokenList::TokenList(const unsigned char* data, std::size_t size, std::vector &filenames, const std::string &filename, OutputList *outputList, int /*unused*/) : frontToken(nullptr), backToken(nullptr), files(filenames) { StdCharBufStream stream(data, size); readfile(stream,filename,outputList); } -simplecpp::TokenList::TokenList(const char* data, std::size_t size, std::vector &filenames, const std::string &filename, OutputList *outputList) - : frontToken(nullptr), backToken(nullptr), files(filenames) -{ - StdCharBufStream stream(reinterpret_cast(data), size); - readfile(stream,filename,outputList); -} - simplecpp::TokenList::TokenList(const std::string &filename, std::vector &filenames, OutputList *outputList) : frontToken(nullptr), backToken(nullptr), files(filenames) { diff --git a/simplecpp.h b/simplecpp.h index 05de07dd..f54baf92 100644 --- a/simplecpp.h +++ b/simplecpp.h @@ -21,6 +21,13 @@ #include #include +#if (__cplusplus >= 201703L) && (__cplusplus < 202002L) +#include +#endif +#if __cplusplus >= 202002L +#include +#endif + #ifdef _WIN32 # ifdef SIMPLECPP_EXPORT # define SIMPLECPP_LIB __declspec(dllexport) @@ -46,6 +53,15 @@ # pragma warning(disable : 4244) #endif +// provide unsafe (i.e. raw pointer) API for TokenList +// note: std::istream has an overhead compared to raw pointers +#ifndef SIMPLECPP_UNSAFE_API +// still provide the unsafe API for standards which lack the performant wrappers +# if __cplusplus < 201703L +# define SIMPLECPP_UNSAFE_API +# endif +#endif + namespace simplecpp { /** C code standard */ enum cstd_t { CUnknown=-1, C89, C99, C11, C17, C23 }; @@ -216,10 +232,34 @@ namespace simplecpp { explicit TokenList(std::vector &filenames); /** generates a token list from the given std::istream parameter */ TokenList(std::istream &istr, std::vector &filenames, const std::string &filename=std::string(), OutputList *outputList = nullptr); +#ifdef SIMPLECPP_UNSAFE_API /** generates a token list from the given buffer */ - TokenList(const unsigned char* data, std::size_t size, std::vector &filenames, const std::string &filename=std::string(), OutputList *outputList = nullptr); + TokenList(const unsigned char* data, std::size_t size, std::vector &filenames, const std::string &filename=std::string(), OutputList *outputList = nullptr) + : TokenList(data, size, filenames, filename, outputList, 0) + {} + /** generates a token list from the given buffer */ + TokenList(const char* data, std::size_t size, std::vector &filenames, const std::string &filename=std::string(), OutputList *outputList = nullptr) + : TokenList(reinterpret_cast(data), size, filenames, filename, outputList, 0) + {} +#endif +#if (__cplusplus >= 201703L) && (__cplusplus < 202002L) /** generates a token list from the given buffer */ - TokenList(const char* data, std::size_t size, std::vector &filenames, const std::string &filename=std::string(), OutputList *outputList = nullptr); + TokenList(std::string_view data, std::vector &filenames, const std::string &filename=std::string(), OutputList *outputList = nullptr) + : TokenList(reinterpret_cast(data.data()), data.size(), filenames, filename, outputList, 0) + {} +#endif +#if __cplusplus >= 202002L + /** generates a token list from the given buffer */ + TokenList(std::span data, std::vector &filenames, const std::string &filename=std::string(), OutputList *outputList = nullptr) + : TokenList(reinterpret_cast(data.data()), data.size(), filenames, filename, outputList, 0) + {} + + /** generates a token list from the given buffer */ + TokenList(std::span data, std::vector &filenames, const std::string &filename=std::string(), OutputList *outputList = nullptr) + : TokenList(data.data(), data.size(), filenames, filename, outputList, 0) + {} +#endif + /** generates a token list from the given filename parameter */ TokenList(const std::string &filename, std::vector &filenames, OutputList *outputList = nullptr); TokenList(const TokenList &other); @@ -295,6 +335,8 @@ namespace simplecpp { } private: + TokenList(const unsigned char* data, std::size_t size, std::vector &filenames, const std::string &filename, OutputList *outputList, int unused); + void combineOperators(); void constFoldUnaryNotPosNeg(Token *tok); From 1aeef68b76e0470b86dab788ab586ccafcde9669 Mon Sep 17 00:00:00 2001 From: firewave Date: Fri, 15 Aug 2025 23:48:26 +0200 Subject: [PATCH 2/6] use feature-test macros --- simplecpp.h | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/simplecpp.h b/simplecpp.h index f54baf92..4c5f1d6d 100644 --- a/simplecpp.h +++ b/simplecpp.h @@ -20,11 +20,14 @@ #include #include #include +#if __cplusplus >= 202002L +# include +#endif -#if (__cplusplus >= 201703L) && (__cplusplus < 202002L) +#if defined(__cpp_lib_string_view) && !defined(__cpp_lib_span) #include #endif -#if __cplusplus >= 202002L +#ifdef __cpp_lib_span #include #endif @@ -56,8 +59,8 @@ // provide unsafe (i.e. raw pointer) API for TokenList // note: std::istream has an overhead compared to raw pointers #ifndef SIMPLECPP_UNSAFE_API -// still provide the unsafe API for standards which lack the performant wrappers -# if __cplusplus < 201703L +// still provide the unsafe API in case we lack the performant wrappers +# if !defined(__cpp_lib_string_view) && !defined(__cpp_lib_span) # define SIMPLECPP_UNSAFE_API # endif #endif @@ -242,13 +245,13 @@ namespace simplecpp { : TokenList(reinterpret_cast(data), size, filenames, filename, outputList, 0) {} #endif -#if (__cplusplus >= 201703L) && (__cplusplus < 202002L) +#if defined(__cpp_lib_string_view) && !defined(__cpp_lib_span) /** generates a token list from the given buffer */ TokenList(std::string_view data, std::vector &filenames, const std::string &filename=std::string(), OutputList *outputList = nullptr) : TokenList(reinterpret_cast(data.data()), data.size(), filenames, filename, outputList, 0) {} #endif -#if __cplusplus >= 202002L +#ifdef __cpp_lib_span /** generates a token list from the given buffer */ TokenList(std::span data, std::vector &filenames, const std::string &filename=std::string(), OutputList *outputList = nullptr) : TokenList(reinterpret_cast(data.data()), data.size(), filenames, filename, outputList, 0) From 5207325c3957b27dca6c31e30e9502e4991c8aed Mon Sep 17 00:00:00 2001 From: firewave Date: Tue, 19 Aug 2025 09:22:40 +0200 Subject: [PATCH 3/6] test.cpp: added compilation test for safe api --- .github/workflows/CI-unixish.yml | 10 ++++++++ test.cpp | 40 ++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+) diff --git a/.github/workflows/CI-unixish.yml b/.github/workflows/CI-unixish.yml index 60361389..35004004 100644 --- a/.github/workflows/CI-unixish.yml +++ b/.github/workflows/CI-unixish.yml @@ -61,6 +61,16 @@ jobs: run: | make -j$(nproc) selfcheck + - name: make testrunner (c++17) + run: | + make clean + make -j$(nproc) testrunner CXXOPTS="-std=c++17" + + - name: make testrunner (c++20) + run: | + make clean + make -j$(nproc) testrunner CXXOPTS="-std=c++20" + - name: Run CMake run: | cmake -S . -B cmake.output -DCMAKE_COMPILE_WARNING_AS_ERROR=On diff --git a/test.cpp b/test.cpp index 7b108638..0d73002d 100644 --- a/test.cpp +++ b/test.cpp @@ -3169,6 +3169,44 @@ static void preprocess_files() } } +static void safe_api() +{ + // this test is to make sure the safe APIs are compiling +#if defined(__cpp_lib_string_view) || defined(__cpp_lib_span) + std::vector filenames; +# if defined(__cpp_lib_string_view) + { + const char input[] = "code"; + const std::string_view sv = input; + // std::string_view can be implicitly converted into a std::span + simplecpp::TokenList(sv,filenames,""); + } +# endif +# ifdef __cpp_lib_span + { + char input[] = "code"; + const std::span sp = input; + simplecpp::TokenList(sp,filenames,""); + } + { + const char input[] = "code"; + const std::span sp = input; + simplecpp::TokenList(sp,filenames,""); + } + { + unsigned char input[] = "code"; + const std::span sp = input; + simplecpp::TokenList(sp,filenames,""); + } + { + const unsigned char input[] = "code"; + const std::span sp = input; + simplecpp::TokenList(sp,filenames,""); + } +# endif +#endif +} + static void fuzz_crash() { { @@ -3435,6 +3473,8 @@ int main(int argc, char **argv) TEST_CASE(preprocess_files); + TEST_CASE(safe_api); + TEST_CASE(fuzz_crash); return numberOfFailedAssertions > 0 ? EXIT_FAILURE : EXIT_SUCCESS; From 809b6244d7861a092f1b14a3823ef952d917e68f Mon Sep 17 00:00:00 2001 From: firewave Date: Thu, 21 Aug 2025 12:56:31 +0200 Subject: [PATCH 4/6] added `TokenList` constructors for buffers which do not require a size argument --- simplecpp.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/simplecpp.h b/simplecpp.h index 4c5f1d6d..6e7bc6a4 100644 --- a/simplecpp.h +++ b/simplecpp.h @@ -236,6 +236,17 @@ namespace simplecpp { /** generates a token list from the given std::istream parameter */ TokenList(std::istream &istr, std::vector &filenames, const std::string &filename=std::string(), OutputList *outputList = nullptr); #ifdef SIMPLECPP_UNSAFE_API + /** generates a token list from the given buffer */ + template + TokenList(const char (&data)[size], std::vector &filenames, const std::string &filename=std::string(), OutputList *outputList = nullptr) + : TokenList(reinterpret_cast(data), size-1, filenames, filename, outputList, 0) + {} + /** generates a token list from the given buffer */ + template + TokenList(const unsigned char (&data)[size], std::vector &filenames, const std::string &filename=std::string(), OutputList *outputList = nullptr) + : TokenList(data, size-1, filenames, filename, outputList, 0) + {} + /** generates a token list from the given buffer */ TokenList(const unsigned char* data, std::size_t size, std::vector &filenames, const std::string &filename=std::string(), OutputList *outputList = nullptr) : TokenList(data, size, filenames, filename, outputList, 0) From 24fe556f262d63abc41d7aa098e45e891c88b9d6 Mon Sep 17 00:00:00 2001 From: firewave Date: Thu, 28 Aug 2025 22:27:46 +0200 Subject: [PATCH 5/6] renamed `SIMPLECPP_UNSAFE_API` to `SIMPLECPP_TOKENLIST_ALLOW_PTR` --- simplecpp.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/simplecpp.h b/simplecpp.h index 6e7bc6a4..a2b94b8c 100644 --- a/simplecpp.h +++ b/simplecpp.h @@ -56,12 +56,12 @@ # pragma warning(disable : 4244) #endif -// provide unsafe (i.e. raw pointer) API for TokenList +// provide legacy (i.e. raw pointer) API for TokenList // note: std::istream has an overhead compared to raw pointers -#ifndef SIMPLECPP_UNSAFE_API -// still provide the unsafe API in case we lack the performant wrappers +#ifndef SIMPLECPP_TOKENLIST_ALLOW_PTR +// still provide the legacy API in case we lack the performant wrappers # if !defined(__cpp_lib_string_view) && !defined(__cpp_lib_span) -# define SIMPLECPP_UNSAFE_API +# define SIMPLECPP_TOKENLIST_ALLOW_PTR # endif #endif @@ -235,7 +235,7 @@ namespace simplecpp { explicit TokenList(std::vector &filenames); /** generates a token list from the given std::istream parameter */ TokenList(std::istream &istr, std::vector &filenames, const std::string &filename=std::string(), OutputList *outputList = nullptr); -#ifdef SIMPLECPP_UNSAFE_API +#ifdef SIMPLECPP_TOKENLIST_ALLOW_PTR /** generates a token list from the given buffer */ template TokenList(const char (&data)[size], std::vector &filenames, const std::string &filename=std::string(), OutputList *outputList = nullptr) From deeb23a880e3472e542b10a60a44907921c1675b Mon Sep 17 00:00:00 2001 From: firewave Date: Thu, 28 Aug 2025 22:36:23 +0200 Subject: [PATCH 6/6] do not spill `SIMPLECPP_TOKENLIST_ALLOW_PTR` define --- simplecpp.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/simplecpp.h b/simplecpp.h index a2b94b8c..f035ea95 100644 --- a/simplecpp.h +++ b/simplecpp.h @@ -561,6 +561,8 @@ namespace simplecpp { SIMPLECPP_LIB std::string getCppStdString(cppstd_t std); } +#undef SIMPLECPP_TOKENLIST_ALLOW_PTR + #if defined(_MSC_VER) # pragma warning(pop) #endif