diff --git a/.github/workflows/build-extras-cpp-eventanalyze.yml b/.github/workflows/build-extras-cpp-eventanalyze.yml new file mode 100644 index 00000000..48db3d70 --- /dev/null +++ b/.github/workflows/build-extras-cpp-eventanalyze.yml @@ -0,0 +1,88 @@ +name: Build extras/analyze + +on: + workflow_dispatch: + inputs: + python-version: + description: 'Python version to use' + required: true + default: '3.8-dev' + ref: + description: 'The OptSched git ref to checkout to build' + required: true + default: 'master' + build_type: + description: 'CMAKE_BUILD_TYPE' + required: true + default: 'Release' + +jobs: + build: + runs-on: ubuntu-20.04 + + steps: + - name: Install APT dependencies + run: | + # For parallel STL + sudo apt-get install libtbb-dev + + # For latest C++ features + sudo add-apt-repository ppa:ubuntu-toolchain-r/test + sudo apt-get update + + sudo apt-get install g++-11 + + # For pyenv python + sudo apt-get install make build-essential libssl-dev zlib1g-dev \ + libbz2-dev libreadline-dev libsqlite3-dev wget curl llvm \ + libncursesw5-dev xz-utils tk-dev libxml2-dev libxmlsec1-dev libffi-dev liblzma-dev + + - name: Install Python version + run: | + curl https://pyenv.run | bash + eval "$(pyenv init --path)" + echo "PYENV_ROOT=$HOME/.pyenv" >> $GITHUB_ENV + echo "$HOME/.pyenv/bin" >> $GITHUB_PATH + + export PYENV_ROOT="$HOME/.pyenv" + export PATH="$PYENV_ROOT/bin:$PATH" + + pyenv install ${{ github.event.inputs.python-version }} + pyenv global ${{ github.event.inputs.python-version }} + + python3 --version + + - uses: actions/checkout@v2 + with: + ref: ${{ github.event.inputs.ref }} + + - name: Configure + run: | + eval "$(pyenv init --path)" + + cmake -S extras/analyze -B build \ + -DCMAKE_BUILD_TYPE=${{ github.event.inputs.build_type }} \ + -DPython_FIND_UNVERSIONED_NAMES=FIRST \ + -DPYBIND11_FINDPYTHON=ON \ + -DCMAKE_CXX_COMPILER=g++-11 + + - name: Build + run: | + eval "$(pyenv init --path)" + + cmake --build build -j 2 + + - name: Bundle Shared Objects + run: | + cd build + + # Copy the shared object dependencies of this Python module to the current directory + ldd eventanalyze.*.so | sed -E 's/^.*=> (\S+).*$|(\S+) .*$/\1/g' | xargs -I {} cp {} . + chmod +x lib*.so* + + - name: Upload Artifact + uses: actions/upload-artifact@v2 + with: + name: Python-${{ github.event.inputs.python-version }} ${{ github.event.inputs.build_type }} Module + path: build/*.so* + if-no-files-found: error diff --git a/.gitignore b/.gitignore index be2ee7fd..6f35c36b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,4 @@ -/build -/.vscode +build/ +.vscode/ __pycache__ *.pyc diff --git a/extras/analyze/CMakeLists.txt b/extras/analyze/CMakeLists.txt new file mode 100644 index 00000000..9bcd8c81 --- /dev/null +++ b/extras/analyze/CMakeLists.txt @@ -0,0 +1,57 @@ +cmake_minimum_required(VERSION 3.20.3) + +project(EventAnalyze) + +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) +set(CMAKE_POSITION_INDEPENDENT_CODE ON) + +include(FetchContent) + +FetchContent_Declare( + mio + GIT_REPOSITORY https://github.com/mandreyel/mio.git + GIT_TAG 3f86a95c0784d73ce6815237ec33ed25f233b643 +) +FetchContent_MakeAvailable(mio) + +FetchContent_Declare( + pybind11 + GIT_REPOSITORY https://github.com/pybind/pybind11.git + GIT_TAG v2.6.2 +) +FetchContent_MakeAvailable(pybind11) + +FetchContent_Declare( + abseil + GIT_REPOSITORY https://github.com/abseil/abseil-cpp.git + GIT_TAG f39e6ad4753e06d4a0d6a9bf6310478757479984 +) +set(BUILD_TESTING OFF) +FetchContent_MakeAvailable(abseil) + +FetchContent_Declare( + FindTBB + GIT_REPOSITORY https://github.com/justusc/FindTBB.git + GIT_TAG 25ecdea817b3af4a26d74ddcd439642dbd706acb +) +FetchContent_GetProperties(FindTBB) +if(NOT findtbb_POPULATED) + FetchContent_Populate(FindTBB) + list(APPEND CMAKE_MODULE_PATH "${findtbb_SOURCE_DIR}" ${CMAKE_CURRENT_SOURCE_DIR}/cmake) +endif() + +find_package(TBB) +if(TBB_FOUND) + add_definitions(-DHAS_TBB) + link_libraries(tbb) +endif() + +if(CMAKE_CXX_COMPILER_ID STREQUAL GNU) + add_compile_options(-Wall -Wextra) +endif() + +file(GLOB_RECURSE sources CONFIGURE_DEPENDS "src/*.cpp") +pybind11_add_module(eventanalyze ${sources}) +target_include_directories(eventanalyze PUBLIC include) +target_compile_features(eventanalyze PUBLIC cxx_std_20) +target_link_libraries(eventanalyze PRIVATE mio::mio absl::base absl::flat_hash_map) diff --git a/extras/analyze/include/parse.hpp b/extras/analyze/include/parse.hpp new file mode 100644 index 00000000..e3a4f0b2 --- /dev/null +++ b/extras/analyze/include/parse.hpp @@ -0,0 +1,11 @@ +#pragma once + +#include "py.hpp" + +namespace ev { +void defParse(pybind11::module &Mod); + +struct EventSchema; + +const EventSchema *getSchema(std::string_view Id); +} // namespace ev diff --git a/extras/analyze/include/py.hpp b/extras/analyze/include/py.hpp new file mode 100644 index 00000000..e445e080 --- /dev/null +++ b/extras/analyze/include/py.hpp @@ -0,0 +1,36 @@ +#pragma once + +#include +#include + +#include +#include +#include + +#include + +namespace pybind11::detail { +template <> struct type_caster { +public: + PYBIND11_TYPE_CASTER(std::filesystem::path, _("pathlib.Path | str")); + + // Python -> C++ + bool load(handle Src, bool) { + // If !isinstance(Src, str): + if (!PyUnicode_Check(Src.ptr())) { + object PyPath = module::import("pathlib").attr("Path"); + + if (!PyObject_IsInstance(Src.ptr(), PyPath.ptr())) + return false; + } + this->value = std::filesystem::path(std::string(str(Src))); + return true; + } + + static handle cast(const std::filesystem::path &Path, return_value_policy, + handle) { + object PyPath = module::import("pathlib").attr("Path"); + return PyPath(str(Path.string())); + } +}; +} // namespace pybind11::detail diff --git a/extras/analyze/include/types.hpp b/extras/analyze/include/types.hpp new file mode 100644 index 00000000..fdb8997d --- /dev/null +++ b/extras/analyze/include/types.hpp @@ -0,0 +1,133 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "py.hpp" + +#include +#include +#include + +namespace ev { +using Number = std::variant; + +struct EventId { + std::string_view Value; + + bool operator==(const EventId &) const = default; +}; + +enum class Type { + Number, + String, + Bool, +}; + +union Value { + Number Num; + std::string_view Str; + bool Bool; +}; + +struct EventSchema { + EventId Id; + std::vector Parameters; + std::vector ParamTypes; + + bool operator==(const EventSchema &) const = default; +}; + +struct Event { + EventId Id; + std::vector Values; +}; + +inline EventId getId(EventId Id) { return Id; } + +// clang-format off +template +requires requires(const T &It) { + { It.Id } -> std::convertible_to; +} +EventId getId(const T &It) { return It.Id; } +// clang-format on + +template EventId getId(const std::vector &Vec) { + assert(!Vec.empty()); + return getId(Vec.front()); +} + +struct EventIdHash { + using is_transparent = void; + + std::size_t operator()(std::string_view Id) const noexcept { + return std::hash()(Id); + } + + std::size_t operator()(EventId Id) const noexcept { + return (*this)(Id.Value); + } + + template std::size_t operator()(const T &It) const noexcept { + return (*this)(getId(It)); + } +}; + +struct EventIdEq { + using is_transparent = void; + + bool operator()(EventId Lhs, EventId Rhs) const { return Lhs == Rhs; } + + template + bool operator()(const T &Lhs, const U &Rhs) const { + return getId(Lhs) == getId(Rhs); + } +}; + +using BlockEventMap = + absl::flat_hash_set, EventIdHash, EventIdEq>; + +struct Logs; +struct Benchmark; + +struct Block { + std::string_view Name; + BlockEventMap Events; + std::string_view RawLog; + + std::string UniqueId; + + ev::Benchmark *Bench; + + std::string File; // Which file was compiled for this block +}; + +struct Benchmark { + std::string Name; + std::vector Blocks; + std::string_view RawLog; + + // Keep the memory around so that we can detect if the Logs object was + // destroyed, giving the Python user a good error message. + std::weak_ptr Logs; +}; + +struct Logs { + std::filesystem::path LogFile; + mio::mmap_source MMap; + std::string_view RawLog; + std::vector> Benchmarks; +}; + +void defTypes(pybind11::module &Mod); +} // namespace ev diff --git a/extras/analyze/src/module.cpp b/extras/analyze/src/module.cpp new file mode 100644 index 00000000..c0a0418f --- /dev/null +++ b/extras/analyze/src/module.cpp @@ -0,0 +1,14 @@ +#include "parse.hpp" +#include "py.hpp" +#include "types.hpp" + +namespace py = pybind11; + +PYBIND11_MODULE(eventanalyze, Mod) { + Mod.doc() = "C++-accelerated event logging types and parser"; + + Mod.attr("VERSION") = std::tuple(1, 0, 0); + + ev::defTypes(Mod); + ev::defParse(Mod); +} diff --git a/extras/analyze/src/parse.cpp b/extras/analyze/src/parse.cpp new file mode 100644 index 00000000..7ffc250d --- /dev/null +++ b/extras/analyze/src/parse.cpp @@ -0,0 +1,372 @@ +#include "parse.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "py.hpp" +#include "types.hpp" + +using namespace std::literals; +using namespace ev; +namespace py = pybind11; +namespace fs = std::filesystem; + +static constexpr std::string_view RegionNameEv = + R"("event_id": "ProcessDag", "name": ")"; +static const std::boyer_moore_horspool_searcher + BlockNameSearcher(RegionNameEv.begin(), RegionNameEv.end()); + +// Extracts the name of the block +static std::string_view parseName(const std::string_view BlockLog) { + auto It = std::search(BlockLog.begin(), BlockLog.end(), BlockNameSearcher); + It += RegionNameEv.size(); + auto End = std::find(It, BlockLog.end(), '"'); + + return std::string_view(It, End); +} + +// Parses out an EventSchema, which is shared for all events of that EventId. +static EventSchema parseEventSchema( + EventId Id, + const std::vector> &Init) { + EventSchema Result; + Result.Id = EventId(Id.Value); + Result.ParamTypes.reserve(Init.size() - 1); + Result.Parameters.reserve(Init.size() - 1); + + for (std::size_t Index = 0; Index < Init.size() - 1; ++Index) { + Result.Parameters.push_back(Init[Index + 1].first); + assert(!Init[Index + 1].second.empty()); + if (Init[Index + 1].second.front() == '"') { + Result.ParamTypes.push_back(Type::String); + } else if (Init[Index + 1].second == "true"sv || + Init[Index + 1].second == "false"sv) { + Result.ParamTypes.push_back(Type::Bool); + } else { + Result.ParamTypes.push_back(Type::Number); + } + } + + return Result; +} + +#include + +// Schemas are globally loaded. +// This static/thread_local dance is to make it appropriately thread safe but +// still fast. +static absl::flat_hash_set MasterSchemas; +static std::mutex MasterSchemaMutex; +thread_local absl::flat_hash_set Schemas; + +static void updateSchemaStructures(EventId Id, EventSchema schema) { + std::scoped_lock Lock(MasterSchemaMutex); + if (MasterSchemas.find(Id) == MasterSchemas.end()) + MasterSchemas.emplace_hint(MasterSchemas.end(), std::move(schema)); + Schemas = MasterSchemas; +} + +static Event parseEvent(const std::string_view Event) { + const auto End = Event.rfind('}'); + auto Begin = Event.find('{'); + + std::vector> Result; + + while (Begin < End) { + const auto KeyF = Event.find('"', Begin + 1) + 1; + if (KeyF == std::string_view::npos) + break; + const auto KeyE = Event.find('"', KeyF); + if (KeyE == std::string_view::npos) + break; + const std::string_view Key = Event.substr(KeyF, KeyE - KeyF); + const auto ValF = + Event.find_first_not_of(" \t\n", Event.find(':', KeyE + 1) + 1); + if (ValF == std::string_view::npos) + break; + const auto ValE = [&] { + if (Event[ValF] == '"') { + // Find the end of the string + return Event.find('"', ValF + // start after the open quote + + 1) + // include the end quote + + 1; + } else { + // Find the end of the number/bool/etc; either the next whitespace, the + // separating comma, or the end of the JSON object: + return Event.find_first_of(",} \t\n", ValF + 1); + } + }(); + if (ValE == std::string_view::npos) + break; + std::string_view Val = Event.substr(ValF, ValE - ValF); + + Result.emplace_back(Key, Val); + // Find the start of the next element (if there is a next) + Begin = Event.find_first_of(",}", ValE); + if (Begin == std::string_view::npos) + break; + Begin += 1; + } + + assert(Result[0].first == "event_id"sv); + EventId Id(Result[0].second.substr(1, Result[0].second.size() - 2)); + + auto It = Schemas.find(Id); + if (It == Schemas.end()) { + auto Sch = ::parseEventSchema(Id, Result); + ::updateSchemaStructures(Id, std::move(Sch)); + It = Schemas.find(Id); + } + + Id = It->Id; // Update to the non-dangling Id. + + assert(It->ParamTypes.size() == Result.size() - 1); + std::vector Values; + + for (std::size_t Index = 0; Index < Result.size() - 1; ++Index) { + const std::string_view Data = Result[Index + 1].second; + Values.push_back([&]() -> Value { + switch (It->ParamTypes[Index]) { + case Type::Number: { + std::int64_t I64; + [[maybe_unused]] const auto Ri64 = + std::from_chars(Data.data(), Data.data() + Data.size(), I64); + if (Ri64.ec == std::errc() && Ri64.ptr == Data.data() + Data.size()) { + return Value{.Num = Number(I64)}; + } + + std::uint64_t U64; + [[maybe_unused]] const auto Ru64 = + std::from_chars(Data.data(), Data.data() + Data.size(), U64); + if (Ru64.ec == std::errc() && Ru64.ptr == Data.data() + Data.size()) { + return Value{.Num = Number(U64)}; + } + + double Fl; + [[maybe_unused]] const auto Rfl = + std::from_chars(Data.data(), Data.data() + Data.size(), Fl); + if (Rfl.ec == std::errc() && Rfl.ptr == Data.data() + Data.size()) { + return Value{.Num = Number(Fl)}; + } + std::abort(); + } + case Type::String: + return Value{.Str = Data.substr(1, Data.size() - 2)}; + case Type::Bool: + return Value{.Bool = Data == "true"sv}; + } + std::abort(); + }()); + } + + return ev::Event{.Id = Id, .Values = std::move(Values)}; +} + +static constexpr std::string_view EventTag = R"(EVENT: {)"; +static const std::boyer_moore_horspool_searcher + EventTagSearcher(EventTag.begin(), EventTag.end()); + +static BlockEventMap parseEvents(const std::string_view BlockLog) { + absl::flat_hash_map, EventIdHash, EventIdEq> + Result; + + const auto E = BlockLog.end(); + auto B = std::search(BlockLog.begin(), E, EventTagSearcher); + while (B != E) { + auto line_end = std::find(B + EventTag.size() - 1, E, '\n'); + + std::string_view Event(B, line_end); + + ev::Event Ev = ::parseEvent(Event); + Result[Ev.Id].push_back(Ev); + + B = std::search(line_end, E, EventTagSearcher); + } + + auto Vals = std::ranges::views::values(Result); + + return BlockEventMap(std::make_move_iterator(Vals.begin()), + std::make_move_iterator(Vals.end())); +} + +static Block parseBlock(ev::Benchmark *Bench, const std::string_view BlockLog) { + std::string_view Name = ::parseName(BlockLog); + BlockEventMap Events = ::parseEvents(BlockLog); + std::string UniqueId = Bench->Name + ':' + std::string(Name); + auto PF = Events.find(EventId("PassFinished")); + if (PF != Events.end()) { + UniqueId += + ",pass=" + + std::to_string(std::get(PF->front().Values.front().Num)); + } + return Block{ + .Name = std::move(Name), + .Events = std::move(Events), + .RawLog = BlockLog, + .UniqueId = std::move(UniqueId), + .Bench = Bench, + // Extracting file info costs quite a bit of time, and we never use it + // anyway. + .File = "", + }; +} + +static std::vector splitBlocks(const std::string_view file) { + static constexpr std::string_view RegionDelimiter = + "********** Opt Scheduling **********"; + const std::boyer_moore_horspool_searcher searcher(RegionDelimiter.begin(), + RegionDelimiter.end()); + + std::vector Result; + + const auto E = file.end(); + auto B = std::search(file.begin(), E, searcher); + while (B != E) { + auto It = std::search(B + RegionDelimiter.size(), E, searcher); + Result.emplace_back(file.data() + std::distance(file.begin(), B), + std::distance(B, It)); + B = It; + } + + return Result; +} + +namespace { +struct BenchmarkRegion { + std::string BenchmarkName; + // The offset in the file + std::size_t Offset; +}; + +enum class BenchmarkRE : int { + Spec, +}; +} // namespace + +static std::shared_ptr parse(std::weak_ptr Logs, + const std::string_view File, + BenchmarkRegion Bench) { + const auto RawBlocks = ::splitBlocks(File); + std::vector Blocks(RawBlocks.size()); + + auto Result = std::make_shared(); + Result->Name = Bench.BenchmarkName; + Result->Logs = std::move(Logs); + Result->RawLog = File; + + std::transform( +#if HAS_TBB + std::execution::par_unseq, +#endif + RawBlocks.begin(), RawBlocks.end(), Blocks.begin(), + [Bench = Result.get()](std::string_view Blk) { + return ::parseBlock(Bench, Blk); + }); + + Result->Blocks = std::move(Blocks); + + return Result; +} + +static constexpr std::string_view SpecBenchmarkRegion = R"( Building )"; +static const std::boyer_moore_horspool_searcher + SpecBenchmarkSearcher(SpecBenchmarkRegion.begin(), + SpecBenchmarkRegion.end()); +static std::vector splitSpecBenchmarks(std::string_view File) { + std::vector Result; + + auto B = File.begin(); + auto E = File.end(); + while (B != E) { + auto It = std::search(B, E, SpecBenchmarkSearcher); + if (It == E) + break; + It += SpecBenchmarkRegion.size(); + auto EndOfName = std::find(It, E, ' '); + + const auto Name = std::string_view(It, EndOfName); + const std::size_t Offset = It - File.begin(); + + Result.emplace_back(std::string(Name), Offset); + + B = It; + } + + return Result; +} + +void ev::defParse(py::module &Mod) { + // static constexpr std::string_view BenchmarkRE + Mod.attr("SPEC_BENCH_RE") = (int)BenchmarkRE::Spec; + + Mod.def("parse_blocks", [](const fs::path &Path, + // One of the RE types. + int REChoice) { + if (REChoice != (int)BenchmarkRE::Spec) { + throw py::value_error("Unknown regular expression number " + + std::to_string(REChoice)); + } + auto Logs = std::make_shared(); + Logs->LogFile = std::move(Path); + Logs->MMap = mio::mmap_source(Logs->LogFile.string()); + Logs->RawLog = std::string_view(Logs->MMap.data(), Logs->MMap.size()); + const std::string_view File = Logs->RawLog; + + const std::vector BenchmarkSections = + [&]() -> std::vector { + switch ((BenchmarkRE)REChoice) { + case BenchmarkRE::Spec: + return splitSpecBenchmarks(File); + } + std::abort(); + }(); + + Logs->Benchmarks.reserve(BenchmarkSections.size()); + for (std::size_t Index = 0; Index < BenchmarkSections.size(); ++Index) { + const std::size_t Offset = BenchmarkSections[Index].Offset; + const std::size_t OffsetEnd = Index + 1 < BenchmarkSections.size() + ? BenchmarkSections[Index + 1].Offset + : File.size(); + + const std::string_view Section = File.substr(Offset, OffsetEnd - Offset); + + Logs->Benchmarks.push_back( + ::parse(Logs, Section, std::move(BenchmarkSections[Index]))); + } + + return Logs; + }); + Mod.def("parse_blocks", [](const fs::path &Path, + // A single benchmark name for the whole logs. + std::string_view BenchmarkName) { + auto Logs = std::make_shared(); + Logs->LogFile = std::move(Path); + Logs->MMap = mio::mmap_source(Logs->LogFile.string()); + Logs->RawLog = std::string_view(Logs->MMap.data(), Logs->MMap.size()); + const std::string_view File = Logs->RawLog; + + Logs->Benchmarks.push_back( + ::parse(Logs, File, BenchmarkRegion{std::string(BenchmarkName), 0})); + + return Logs; + }); +} + +const EventSchema *ev::getSchema(std::string_view Id) { + auto It = MasterSchemas.find(EventId(Id)); + if (It == MasterSchemas.end()) + return nullptr; + return &*It; +} diff --git a/extras/analyze/src/types.cpp b/extras/analyze/src/types.cpp new file mode 100644 index 00000000..b849f5b9 --- /dev/null +++ b/extras/analyze/src/types.cpp @@ -0,0 +1,201 @@ +#include "types.hpp" + +#include +#include + +#include "parse.hpp" +#include "py.hpp" + +using namespace std::literals; +namespace py = pybind11; + +namespace { +template +const T &index_into(std::span Span, std::int64_t index) { + if (index < 0) { + // Negative index indexes from the end + index += Span.size(); + } + if (index < 0 || static_cast(index) >= Span.size()) { + throw py::index_error("Index out of bounds: " + std::to_string(index) + + "/" + std::to_string(Span.size())); + } + return Span[index]; +} +} // namespace + +void ev::defTypes(py::module &Mod) { + py::class_(Mod, "_Event") + .def("__getitem__", + [](const Event &Event, std::string_view Property) -> py::object { + const EventSchema *Schema = ev::getSchema(Event.Id.Value); + if (!Schema) { + throw py::key_error("Unknown event " + + std::string(Event.Id.Value)); + } + auto Index = + std::distance(Schema->Parameters.begin(), + std::find(Schema->Parameters.begin(), + Schema->Parameters.end(), Property)); + + const Value Val = Event.Values[Index]; + switch (Schema->ParamTypes[Index]) { + case Type::Number: + return std::visit( + // clang-format off + [](T x) -> py::object { + // clang-format on + if constexpr (std::same_as) + return py::float_(x); + else + return py::int_(x); + }, + Val.Num); + case Type::String: + return py::str(std::string(Val.Str)); + case Type::Bool: + return py::bool_(Val.Bool); + } + std::abort(); + }) + .def("__repr__", [](const Event &Event) { + const EventSchema *Schema = ev::getSchema(Event.Id.Value); + if (!Schema) { + throw py::key_error("Unknown event " + std::string(Event.Id.Value)); + } + + std::ostringstream out; + out << '{'; + for (std::size_t Index = 0; Index < Schema->Parameters.size(); + ++Index) { + if (Index != 0) + out << ", "; + out << '\'' << Schema->Parameters[Index] << "': "; + const Value Val = Event.Values[Index]; + switch (Schema->ParamTypes[Index]) { + case Type::Number: + std::visit([&out](auto x) { out << x; }, Val.Num); + break; + case Type::String: + out << '\'' << Val.Str << '\''; + break; + case Type::Bool: + out << std::boolalpha << Val.Bool; + break; + } + } + out << '}'; + + return out.str(); + }); + + py::class_(Mod, "Block") + .def_readonly("name", &Block::Name) + .def_readonly("raw_log", &Block::RawLog) + .def("__getitem__", + [](const Block &Blk, + std::string_view EvId) -> const std::vector & { + auto It = Blk.Events.find(EventId(EvId)); + if (It != Blk.Events.end()) { + return *It; + } else { + throw py::key_error(std::string(EvId)); + } + }) + .def("_event_names", + [](const Block &Blk) { + std::vector Names; + Names.reserve(Blk.Events.size()); + + for (const auto &Events : Blk.Events) { + Names.push_back(ev::getId(Events).Value); + } + + return Names; + }) + .def_readonly("uniqueid", &Block::UniqueId) + .def("__contains__", + [](const Block &Blk, std::string_view EvId) { + return Blk.Events.contains(EventId(EvId)); + }) + .def("__repr__", [](const Block &Blk) { + return "Name + ", file="s + Blk.File + + ", "s + std::to_string(Blk.Events.size()) + " events)>"; + }); + + struct BenchmarkBlocks { + std::span Blocks; + }; + + py::class_(Mod, "_Blocks") + .def("__getitem__", + [](const BenchmarkBlocks &Blocks, std::int64_t index) { + return ::index_into(Blocks.Blocks, index); + }) + .def("__len__", + [](const BenchmarkBlocks &Blocks) { return Blocks.Blocks.size(); }); + + py::class_>(Mod, "Benchmark") + .def_readonly("name", &Benchmark::Name) + .def_readonly("raw_log", &Benchmark::RawLog) + .def_property_readonly( + "blocks", + [](const Benchmark &Bench) { return BenchmarkBlocks{Bench.Blocks}; }) + .def_property_readonly("info", + [](const Benchmark &Bench) -> py::dict { + using namespace pybind11::literals; + + return py::dict("name"_a = Bench.Name); + }) + .def("__repr__", [](const Benchmark &Bench) { + return ""; + }); + + struct LogsBenchmarks { + std::span> Benchmarks; + }; + py::class_(Mod, "_Benchmarks") + .def("__getitem__", + [](const LogsBenchmarks &Benchmarks, std::int64_t index) { + return ::index_into(Benchmarks.Benchmarks, index); + }) + .def("__len__", [](const LogsBenchmarks &Benchmarks) { + return Benchmarks.Benchmarks.size(); + }); + + py::class_>(Mod, "Logs") + .def_property_readonly( + "benchmarks", + [](const ev::Logs &Logs) { return LogsBenchmarks{Logs.Benchmarks}; }) + .def_readonly("raw_log", &Logs::RawLog) + .def("benchmark", + [](const ev::Logs &Logs, const std::string_view BenchName) { + auto It = + std::find_if(Logs.Benchmarks.begin(), Logs.Benchmarks.end(), + [BenchName](const auto &Bench) { + return Bench->Name == BenchName; + }); + + if (It == Logs.Benchmarks.end()) { + throw py::key_error("No benchmark `" + std::string(BenchName) + + "` in this Logs"); + } else { + return It->get(); + } + }) + .def("__iter__", [](py::handle Logs) { return Logs.attr("benchmarks"); }) + .def("__repr__", [](const ev::Logs &Logs) { + std::string Result = "Name; + } + + return Result + ")>"; + }); +} diff --git a/util/analyze/_cpp_types.py b/util/analyze/_cpp_types.py new file mode 100644 index 00000000..24593bc1 --- /dev/null +++ b/util/analyze/_cpp_types.py @@ -0,0 +1,138 @@ +import sys +from typing import Iterator, Union + +from . import _types + + +def _make_classes(cpp): + class Block(_types.Block): + def __init__(self, block: cpp.Block): + self.__cpp = block + self.name = self.__cpp.name + + @property + def raw_log(self): + return self.__cpp.raw_log + + # uses inherited single(...) + + def __getitem__(self, event_name): + return self.__cpp[event_name] + + def get(self, event_name, default=None): + if event_name in self.__cpp: + return self.__cpp[event_name] + return default + + def __contains__(self, event_name) -> bool: + return event_name in self.__cpp + + def __iter__(self) -> Iterator[str]: + return iter(self.__cpp._event_names()) + + def __repr__(self): + return repr(self.__cpp) + + def uniqueid(self): + return self.__cpp.uniqueid + + class _BenchmarkBlocks: + def __init__(self, blocks: cpp._Blocks): + self.__cpp = blocks + + def __getitem__(self, index: int) -> _types.Block: + return Block(self.__cpp[index]) + + def __len__(self) -> int: + return len(self.__cpp) + + def __repr__(self): + return repr(self.__cpp) + + class Benchmark(_types.Benchmark): + def __init__(self, benchmark: cpp.Benchmark): + self.__cpp = benchmark + self.name = self.__cpp.name + + @property + def blocks(self): + return _BenchmarkBlocks(self.__cpp.blocks) + + @property + def raw_log(self): + return self.__cpp.raw_log + + @property + def info(self): + return self.__cpp.info + + # Inherit __iter__ + + # Inherit .benchmarks + + def __repr__(self): + return repr(self.__cpp) + + def keep_blocks_if(self, p): + return _types.Benchmark( + {'name': self.name}, + list(filter(p, self)), + ) + + class Logs(_types.Logs): + def __init__(self, logs: cpp.Logs): + self.__cpp = logs + self.benchmarks = list(Benchmark(bench) for bench in logs.benchmarks) + + @property + def raw_log(self): + return self.__cpp.raw_log + + def benchmark(self, name: str) -> _types.Benchmark: + for bench in self.benchmarks: + if bench.name == name: + return bench + + raise KeyError(f'No benchmark `{name}` in this Logs') + + def __iter__(self): + for bench in self.benchmarks: + yield from bench + + def __repr__(self): + return repr(self.__cpp) + + def keep_blocks_if(self, p): + return _types.Logs([b.keep_blocks_if(p) for b in self.benchmarks]) + + return { + 'Logs': Logs, + 'Benchmark': Benchmark, + 'Block': Block, + } + + +class _M: + def __init__(self): + self.__cpp = None + + @property + def VERSION(self): + return self.__cpp.VERSION + + @property + def __doc__(self): + return self.__cpp.__doc__ + + def load_module(self, cpp): + self.__cpp = cpp + classes = _make_classes(self.__cpp) + self.Logs = classes['Logs'] + self.Benchmark = classes['Benchmark'] + self.Block = classes['Block'] + + def parse_blocks(self, file, benchspec: Union[str, int]) -> _types.Logs: + return self.Logs(self.__cpp.parse_blocks(file, benchspec)) + + +sys.modules[__name__] = _M() diff --git a/util/analyze/_main.py b/util/analyze/_main.py index e34f92ed..be88bd93 100644 --- a/util/analyze/_main.py +++ b/util/analyze/_main.py @@ -1,11 +1,14 @@ -import pickle import argparse -import json import fnmatch +import json +from pathlib import Path +import pickle +import sys from typing import Callable -from .imports import * from ._types import Block, Logs +from . import _cpp_types +from .imports import * def __load_file(file): @@ -66,12 +69,47 @@ def parse_args(parser: argparse.ArgumentParser, *names, args=None): type=json.loads, help='Keep blocks matching (JSON format)', ) + parser.add_argument( + '--use-c++', + dest='use_cpp', + action='store_true', + help='Use the accelerated C++ parser. The eventparser module is expected to be on the PYTHONPATH', + ) + parser.add_argument( + '--c++-module', + dest='cpp_module', + type=Path, + default=None, + help='The path to the accelerated C++ parser module. --use-c++ is unnecessary if this is supplied.', + ) args = parser.parse_args(args) + use_cpp = bool(args.use_cpp or args.cpp_module) + + if use_cpp and args.benchsuite != 'spec': + print(f'WARNING: Unable to use the C++-accelerated parser for {args.benchsuite}', file=sys.stderr) + + def cpp_parse_blocks_fn(): + if args.cpp_module: + import importlib + import importlib.util + spec = importlib.util.spec_from_file_location('eventanalyze', args.cpp_module) + mod = importlib.util.module_from_spec(spec) + spec.loader.exec_module(mod) + else: + mod = __import__('eventanalyze') + + _cpp_types.load_module(mod) + + def parse(file): + return _cpp_types.parse_blocks(file, mod.SPEC_BENCH_RE) + + return parse + FILE_PARSERS = { 'pickle': __load_filepath, - 'spec': import_cpu2006.parse, + 'spec': cpp_parse_blocks_fn() if use_cpp else import_cpu2006.parse, 'plaidml': import_plaidml.parse, 'shoc': import_shoc.parse, } diff --git a/util/analyze/_types.py b/util/analyze/_types.py index 8151bdc6..01339f16 100644 --- a/util/analyze/_types.py +++ b/util/analyze/_types.py @@ -1,3 +1,17 @@ +def merge_logs(lhs, rhs): + ''' + Merges the logs from the rhs into the lhs. + + The rhs must have different benchmarks from the lhs + ''' + in_both = set(lhs.benchmarks) & set(rhs.benchmarks) + if in_both: + raise ValueError( + 'Cannot merge Logs which share common benchmarks', in_both) + + lhs.benchmarks += rhs.benchmarks + + class Logs: ''' Abstracts a log file as a collection of benchmarks @@ -9,21 +23,6 @@ class Logs: def __init__(self, benchmarks): self.benchmarks = benchmarks - def merge(self, rhs): - ''' - Merges the logs from the rhs into this. - - The rhs must have different benchmarks from this Logs - ''' - in_both = set(self.benchmarks) & set(rhs.benchmarks) - if in_both: - raise ValueError( - 'Cannot merge Logs which share common benchmarks', in_both) - - self.benchmarks += rhs.benchmarks - - return self - def benchmark(self, name): ''' Gets the benchmark with the specified name @@ -103,7 +102,7 @@ def single(self, event_name): raises AssertionError if there is not exactly one event with the specified name ''' - result = self.events[event_name] + result = self[event_name] if len(result) != 1: raise AssertionError(f'Multiple events for {event_name}') diff --git a/util/analyze/imports/import_plaidml.py b/util/analyze/imports/import_plaidml.py index dd890d75..b14af55a 100644 --- a/util/analyze/imports/import_plaidml.py +++ b/util/analyze/imports/import_plaidml.py @@ -3,7 +3,7 @@ import os import pathlib -from .._types import Logs +from .._types import Logs, merge_logs from . import import_utils @@ -22,7 +22,8 @@ def parse(path): with logfiles[0].open('r') as f: benchname = benchmark_dir.stem - result.merge( + merge_logs( + result, import_utils.parse_single_bench_file( f.read(), benchname=benchname) ) diff --git a/util/analyze/imports/import_shoc.py b/util/analyze/imports/import_shoc.py index 5174724d..874d2b10 100644 --- a/util/analyze/imports/import_shoc.py +++ b/util/analyze/imports/import_shoc.py @@ -4,7 +4,7 @@ import re import pathlib -from .._types import Logs +from .._types import Logs, merge_logs from . import import_utils @@ -25,7 +25,8 @@ def parse(path): for benchmark in benchmarks: with benchmark.open('r') as f: benchname = benchname_re.search(benchmark.stem).group(1) - result.merge( + merge_logs( + result, import_utils.parse_single_bench_file( f.read(), benchname=benchname) )