diff --git a/.github/workflows/CI-unixish.yml b/.github/workflows/CI-unixish.yml index 07ee9731..290b4f98 100644 --- a/.github/workflows/CI-unixish.yml +++ b/.github/workflows/CI-unixish.yml @@ -145,7 +145,7 @@ jobs: wget https://github.com/danmar/simplecpp/archive/refs/tags/1.5.1.tar.gz tar xvf 1.5.1.tar.gz make clean - make -j$(nproc) CXXOPTS="-O2 -g3" + make -j$(nproc) CXXOPTS="-O3 -g3 -flto" LDOPTS="-flto" valgrind --tool=callgrind ./simplecpp -e simplecpp-1.5.1/simplecpp.cpp 2>callgrind.log || (cat callgrind.log && false) cat callgrind.log callgrind_annotate --auto=no > callgrind.annotated.log diff --git a/Makefile b/Makefile index 7489ec83..8a39db17 100644 --- a/Makefile +++ b/Makefile @@ -11,21 +11,32 @@ TEST_CPPFLAGS = -DSIMPLECPP_TEST_SOURCE_DIR=\"$(CURDIR)\" test.o: CPPFLAGS += $(TEST_CPPFLAGS) %.o: %.cpp simplecpp.h - $(CXX) $(CPPFLAGS) $(CXXFLAGS) -c $< + $(CXX) $(CPPFLAGS) $(CXXFLAGS) -c $< $(LIB_FUZZING_ENGINE) + +fuzz_no.o: fuzz.cpp + $(CXX) $(CPPFLAGS) $(CXXFLAGS) -DNO_FUZZ -c -o $@ fuzz.cpp testrunner: test.o simplecpp.o - $(CXX) $(LDFLAGS) simplecpp.o test.o -o testrunner + $(CXX) $(LDFLAGS) -o $@ $^ test: testrunner simplecpp ./testrunner python3 run-tests.py python3 -m pytest integration_test.py -vv +fuzz: fuzz.o simplecpp.o + # TODO: use -stdlib=libc++ -lc++ + # make fuzz CXX=clang++ CXXOPTS="-O3 -flto -fno-omit-frame-pointer -g -gline-tables-only -DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION -fsanitize=address,undefined -fsanitize-address-use-after-scope -fno-sanitize=integer -fno-sanitize-recover=undefined" LDOPTS="-flto" LIB_FUZZING_ENGINE="-fsanitize=fuzzer" + $(CXX) $(LDFLAGS) $(CXXFLAGS) -o $@ $^ $(LIB_FUZZING_ENGINE) + +no-fuzz: fuzz_no.o simplecpp.o + $(CXX) $(LDFLAGS) $(CXXFLAGS) -o $@ $^ + selfcheck: simplecpp ./selfcheck.sh simplecpp: main.o simplecpp.o - $(CXX) $(LDFLAGS) main.o simplecpp.o -o simplecpp + $(CXX) $(LDFLAGS) -o $@ $^ clean: - rm -f testrunner simplecpp *.o + rm -f testrunner fuzz no-fuzz simplecpp *.o diff --git a/fuzz.cpp b/fuzz.cpp new file mode 100644 index 00000000..17813b58 --- /dev/null +++ b/fuzz.cpp @@ -0,0 +1,66 @@ +/* + * simplecpp - A simple and high-fidelity C/C++ preprocessor library + * Copyright (C) 2016-2024 simplecpp team + */ + +#include "simplecpp.h" + +#include + +#ifdef NO_FUZZ +#include +#include +#include +#include +#endif + +static void doProcess(const uint8_t *data, size_t dataSize) +{ + simplecpp::OutputList outputList; + std::vector files; + simplecpp::TokenList rawtokens(data, dataSize, files, "test.cpp", &outputList); + + simplecpp::TokenList outputTokens(files); + simplecpp::FileDataCache filedata; + simplecpp::DUI dui; + dui.includePaths = { "/usr/include" }; + std::list macroUsage; + std::list ifCond; + simplecpp::preprocess(outputTokens, rawtokens, files, filedata, dui, &outputList, ¯oUsage, &ifCond); + + simplecpp::cleanup(filedata); +} + +#ifndef NO_FUZZ +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t dataSize); + +int LLVMFuzzerTestOneInput(const uint8_t *data, size_t dataSize) +{ + doProcess(data, dataSize); + return 0; +} +#else +int main(int argc, char * argv[]) +{ + if (argc < 2 || argc > 3) + return EXIT_FAILURE; + + std::ifstream f(argv[1]); + if (!f.is_open()) + return EXIT_FAILURE; + + std::ostringstream oss; + oss << f.rdbuf(); + + if (!f.good()) + return EXIT_FAILURE; + + const int cnt = (argc == 3) ? std::stoi(argv[2]) : 1; + + const std::string code = oss.str(); + for (int i = 0; i < cnt; ++i) + doProcess(reinterpret_cast(code.data()), code.size()); + + return EXIT_SUCCESS; +} +#endif diff --git a/simplecpp.cpp b/simplecpp.cpp index 0029aac0..2e309e86 100644 --- a/simplecpp.cpp +++ b/simplecpp.cpp @@ -49,8 +49,16 @@ #ifdef _WIN32 # include +using mode_t = unsigned short; #else # include +# include +#endif + +#ifdef __GNUC__ +# define unlikely(x) __builtin_expect(!!(x), 0) +#else +# define unlikely(x) (x) #endif static bool isHex(const std::string &s) @@ -464,6 +472,77 @@ class FileStream : public simplecpp::TokenList::Stream { int lastStatus; }; +class FileStreamBuffered : public simplecpp::TokenList::Stream { +public: + FileStreamBuffered(const std::string &filename, std::vector &files) + : file(fopen(filename.c_str(), "rb")) + , lastStatus(0) + , buf_len(0) + , buf_idx(-1) + { + if (!file) { + files.push_back(filename); + throw simplecpp::Output(files, simplecpp::Output::FILE_NOT_FOUND, "File is missing: " + filename); + } + init(); + } + + ~FileStreamBuffered() { + fclose(file); + file = nullptr; + } + + virtual int get() { + read_internal(); + return buf[buf_idx++]; + } + virtual int peek() { + read_internal(); + return buf[buf_idx]; + } + virtual void unget() { + --buf_idx; + } + virtual bool good() { + return lastStatus != EOF; + } + +private: + void read_internal() { + // check if we are in the last chunk + if (unlikely(buf_idx >= buf_len)) { + if (buf_len != sizeof(buf)) { + lastStatus = EOF; + return; + } + } + + if (unlikely(buf_idx == -1 || buf_idx == buf_len)) + { + buf_idx = 0; + buf_len = fread(buf, 1, sizeof(buf), file); + if (buf_len == 0) { + lastStatus = EOF; + } + else if (buf_len != sizeof(buf)) { + if (ferror(file)) { + // TODO: is this correct? + lastStatus = EOF; + } + } + } + } + + FileStreamBuffered(const FileStreamBuffered&); + FileStreamBuffered &operator=(const FileStreamBuffered&); + + FILE *file; + int lastStatus; + unsigned char buf[8192]; + int buf_len; + int buf_idx; +}; + simplecpp::TokenList::TokenList(std::vector &filenames) : frontToken(nullptr), backToken(nullptr), files(filenames) {} simplecpp::TokenList::TokenList(std::istream &istr, std::vector &filenames, const std::string &filename, OutputList *outputList) @@ -484,7 +563,7 @@ simplecpp::TokenList::TokenList(const std::string &filename, std::vectorpush_back(e); @@ -694,33 +773,55 @@ void simplecpp::TokenList::readfile(Stream &stream, const std::string &filename, if (oldLastToken != cback()) { oldLastToken = cback(); - if (!isLastLinePreprocessor()) + const Token * const llTok = isLastLinePreprocessor(); + if (!llTok) + continue; + const Token * const llNextToken = llTok->next; + if (!llTok->next) continue; - const std::string lastline(lastLine()); - if (lastline == "# file %str%") { - const Token *strtok = cback(); - while (strtok->comment) - strtok = strtok->previous; - loc.push(location); - location.fileIndex = fileIndex(strtok->str().substr(1U, strtok->str().size() - 2U)); - location.line = 1U; - } else if (lastline == "# line %num%") { - const Token *numtok = cback(); - while (numtok->comment) - numtok = numtok->previous; - lineDirective(location.fileIndex, std::atol(numtok->str().c_str()), &location); - } else if (lastline == "# %num% %str%" || lastline == "# line %num% %str%") { - const Token *strtok = cback(); - while (strtok->comment) - strtok = strtok->previous; - const Token *numtok = strtok->previous; - while (numtok->comment) - numtok = numtok->previous; - lineDirective(fileIndex(replaceAll(strtok->str().substr(1U, strtok->str().size() - 2U),"\\\\","\\")), - std::atol(numtok->str().c_str()), &location); + if (llNextToken->next) { + // #file "file.c" + if (llNextToken->str() == "file" && + llNextToken->next->str()[0] == '\"') + { + const Token *strtok = cback(); + while (strtok->comment) + strtok = strtok->previous; + loc.push(location); + location.fileIndex = fileIndex(strtok->str().substr(1U, strtok->str().size() - 2U)); + location.line = 1U; + } + // #3 "file.c" + // #line 3 "file.c" + else if ((llNextToken->number && + llNextToken->next->str()[0] == '\"') || + (llNextToken->str() == "line" && + llNextToken->next->number && + llNextToken->next->next && + llNextToken->next->next->str()[0] == '\"')) + { + const Token *strtok = cback(); + while (strtok->comment) + strtok = strtok->previous; + const Token *numtok = strtok->previous; + while (numtok->comment) + numtok = numtok->previous; + lineDirective(fileIndex(replaceAll(strtok->str().substr(1U, strtok->str().size() - 2U),"\\\\","\\")), + std::atol(numtok->str().c_str()), &location); + } + // #line 3 + else if (llNextToken->str() == "line" && + llNextToken->next->number) + { + const Token *numtok = cback(); + while (numtok->comment) + numtok = numtok->previous; + lineDirective(location.fileIndex, std::atol(numtok->str().c_str()), &location); + } } // #endfile - else if (lastline == "# endfile" && !loc.empty()) { + else if (llNextToken->str() == "endfile" && !loc.empty()) + { location = loc.top(); loc.pop(); } @@ -737,8 +838,8 @@ void simplecpp::TokenList::readfile(Stream &stream, const std::string &filename, TokenString currentToken; if (cback() && cback()->location.line == location.line && cback()->previous && cback()->previous->op == '#') { - const Token* const llTok = lastLineTok(); - if (llTok && llTok->op == '#' && llTok->next && (llTok->next->str() == "error" || llTok->next->str() == "warning")) { + const Token* const ppTok = cback()->previous; + if (ppTok->next && (ppTok->next->str() == "error" || ppTok->next->str() == "warning")) { char prev = ' '; while (stream.good() && (prev == '\\' || (ch != '\r' && ch != '\n'))) { currentToken += ch; @@ -1415,34 +1516,6 @@ std::string simplecpp::TokenList::readUntil(Stream &stream, const Location &loca return ret; } -std::string simplecpp::TokenList::lastLine(int maxsize) const -{ - std::string ret; - int count = 0; - for (const Token *tok = cback(); ; tok = tok->previous) { - if (!sameline(tok, cback())) { - break; - } - if (tok->comment) - continue; - if (++count > maxsize) - return ""; - if (!ret.empty()) - ret += ' '; - // add tokens in reverse for performance reasons - if (tok->str()[0] == '\"') - ret += "%rts%"; // %str% - else if (tok->number) - ret += "%mun%"; // %num% - else { - ret += tok->str(); - std::reverse(ret.end() - tok->str().length(), ret.end()); - } - } - std::reverse(ret.begin(), ret.end()); - return ret; -} - const simplecpp::Token* simplecpp::TokenList::lastLineTok(int maxsize) const { const Token* prevTok = nullptr; @@ -1459,10 +1532,12 @@ const simplecpp::Token* simplecpp::TokenList::lastLineTok(int maxsize) const return prevTok; } -bool simplecpp::TokenList::isLastLinePreprocessor(int maxsize) const +const simplecpp::Token* simplecpp::TokenList::isLastLinePreprocessor(int maxsize) const { const Token * const prevTok = lastLineTok(maxsize); - return prevTok && prevTok->op == '#'; + if (prevTok && prevTok->op == '#') + return prevTok; + return nullptr; } unsigned int simplecpp::TokenList::fileIndex(const std::string &filename) @@ -2019,7 +2094,13 @@ namespace simplecpp { tok = tok->next; if (tok == endToken2) { - output.push_back(new Token(*tok->previous)); + if (tok) { + output.push_back(new Token(*tok->previous)); + } + else { + output.push_back(new Token(*nameTokInst)); + output.back()->setstr("\"\""); + } break; } if (tok->op == '#') { @@ -3007,9 +3088,11 @@ static std::string openHeaderDirect(std::ifstream &f, const std::string &path) if (nonExistingFilesCache.contains(path)) return ""; // file is known not to exist, skip expensive file open call #endif - f.open(path.c_str()); - if (f.is_open()) - return path; + if (simplecpp::isFile(path)) { + f.open(path.c_str()); + if (f.is_open()) + return path; + } #ifdef SIMPLECPP_WINDOWS nonExistingFilesCache.add(path); #endif @@ -3129,6 +3212,9 @@ bool simplecpp::FileDataCache::getFileId(const std::string &path, FileID &id) if (stat(path.c_str(), &statbuf) != 0) return false; + if ((statbuf.st_mode & S_IFMT) != S_IFREG) + return false; + id.dev = statbuf.st_dev; id.ino = statbuf.st_ino; @@ -3873,3 +3959,21 @@ std::string simplecpp::getCppStdString(const std::string &std) { return getCppStdString(getCppStd(std)); } + +static mode_t file_type(const std::string &path) +{ + struct stat file_stat; + if (stat(path.c_str(), &file_stat) == -1) + return 0; + return file_stat.st_mode & S_IFMT; +} + +bool simplecpp::isFile(const std::string &path) +{ + return file_type(path) == S_IFREG; +} + +bool simplecpp::isDirectory(const std::string &path) +{ + return file_type(path) == S_IFDIR; +} diff --git a/simplecpp.h b/simplecpp.h index b461de47..9c287cc5 100644 --- a/simplecpp.h +++ b/simplecpp.h @@ -361,9 +361,8 @@ namespace simplecpp { std::string readUntil(Stream &stream, const Location &location, char start, char end, OutputList *outputList); void lineDirective(unsigned int fileIndex, unsigned int line, Location *location); - std::string lastLine(int maxsize=1000) const; const Token* lastLineTok(int maxsize=1000) const; - bool isLastLinePreprocessor(int maxsize=1000) const; + const Token* isLastLinePreprocessor(int maxsize=1000) const; unsigned int fileIndex(const std::string &filename); @@ -404,6 +403,20 @@ namespace simplecpp { bool removeComments; /** remove comment tokens from included files */ }; + /** + * @brief Checks if given path is a file + * @param path Path to be checked + * @return true if given path is a file + */ + SIMPLECPP_LIB bool isFile(const std::string &path); + + /** + * @brief Checks if a given path is a directory + * @param path Path to be checked + * @return true if given path is a directory + */ + SIMPLECPP_LIB bool isDirectory(const std::string &path); + struct SIMPLECPP_LIB FileData { /** The canonical filename associated with this data */ std::string filename; diff --git a/test.cpp b/test.cpp index d42d6570..1677c92e 100644 --- a/test.cpp +++ b/test.cpp @@ -1041,6 +1041,16 @@ static void define_va_opt_7() toString(outputList)); } +static void define_va_opt_8() +{ + const char code[] = "#define f(...) #__VA_OPT__(x)\n" + "const char* v1 = f();"; + + simplecpp::OutputList outputList; + ASSERT_EQUALS("\nconst char * v1 = \"\" ;", preprocess(code, &outputList)); + ASSERT_EQUALS("", toString(outputList)); +} + static void define_ifdef() { const char code[] = "#define A(X) X\n" @@ -2090,6 +2100,44 @@ static void missingHeader4() ASSERT_EQUALS("file0,1,syntax_error,No header in #include\n", toString(outputList)); } +#ifndef _WIN32 +static void missingHeader5() +{ + // this is a directory + const char code[] = "#include \"/\"\n"; + simplecpp::OutputList outputList; + ASSERT_EQUALS("", preprocess(code, &outputList)); + ASSERT_EQUALS("file0,1,missing_header,Header not found: \"/\"\n", toString(outputList)); +} + +static void missingHeader6() +{ + // this is a directory + const char code[] = "#include \"/usr\"\n"; + simplecpp::OutputList outputList; + ASSERT_EQUALS("", preprocess(code, &outputList)); + ASSERT_EQUALS("file0,1,missing_header,Header not found: \"/usr\"\n", toString(outputList)); +} + +static void missingHeader7() +{ + // this is a directory + const char code[] = "#include \n"; + simplecpp::OutputList outputList; + ASSERT_EQUALS("", preprocess(code, &outputList)); + ASSERT_EQUALS("file0,1,missing_header,Header not found: \n", toString(outputList)); +} + +static void missingHeader8() +{ + // this is a directory + const char code[] = "#include \n"; + simplecpp::OutputList outputList; + ASSERT_EQUALS("", preprocess(code, &outputList)); + ASSERT_EQUALS("file0,1,missing_header,Header not found: \n", toString(outputList)); +} +#endif + static void nestedInclude() { const char code[] = "#include \"test.h\"\n"; @@ -3310,6 +3358,16 @@ static void leak() } } +static void leak() +{ + const char code[] = "#include\n" + "#include\n"; + simplecpp::OutputList outputList; + ASSERT_EQUALS("", preprocess(code, &outputList)); + ASSERT_EQUALS("file0,1,missing_header,Header not found: \n" + "file0,2,missing_header,Header not found: \n", toString(outputList)); +} + int main(int argc, char **argv) { TEST_CASE(backslash); @@ -3383,6 +3441,7 @@ int main(int argc, char **argv) TEST_CASE(define_va_opt_5); TEST_CASE(define_va_opt_6); TEST_CASE(define_va_opt_7); + TEST_CASE(define_va_opt_8); TEST_CASE(pragma_backslash); // multiline pragma directive @@ -3483,6 +3542,12 @@ int main(int argc, char **argv) TEST_CASE(missingHeader2); TEST_CASE(missingHeader3); TEST_CASE(missingHeader4); +#ifndef _WIN32 + TEST_CASE(missingHeader5); + TEST_CASE(missingHeader6); + TEST_CASE(missingHeader7); + TEST_CASE(missingHeader8); +#endif TEST_CASE(nestedInclude); TEST_CASE(systemInclude); TEST_CASE(circularInclude);