Skip to content

Commit 86022e4

Browse files
❇️ Started to port the parsing code for gcc
No API yet
1 parent e837258 commit 86022e4

File tree

9 files changed

+345
-16
lines changed

9 files changed

+345
-16
lines changed

.clang-format

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,5 @@ SpacesBeforeTrailingComments: 2
99
ColumnLimit: 80
1010
IndentWidth: 4
1111
TabWidth: 4
12+
AlignOperands: Align
13+
BreakBeforeBinaryOperators: All

.vscode/settings.json

Lines changed: 0 additions & 3 deletions
This file was deleted.

CMakeLists.txt

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,11 @@ set(CMAKE_CXX_STANDARD 23)
44
set(CMAKE_CXX_STANDARD_REQUIRED ON)
55
set(CMAKE_CXX_EXTENSIONS OFF) # Ensure only standard C++ features
66

7-
# TODO: Maybe get rid of? Don't remember why its here and I think libhal's cmake utils handles this
8-
# set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
9-
# add_custom_target(copy_compile_commands ALL
10-
# COMMAND ${CMAKE_COMMAND} -E copy_if_different
11-
# ${CMAKE_BINARY_DIR}/compile_commands.json
12-
# ${CMAKE_SOURCE_DIR}/compile_commands.json
13-
# DEPENDS ${CMAKE_BINARY_DIR}/compile_commands.json)
7+
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
8+
9+
find_package(ctre REQUIRED)
10+
11+
1412

1513
# Find package dependancies
1614
#TODO: Remove due to this being a temporary patch for libraries to work
@@ -26,6 +24,11 @@ target_link_libraries(${PROJECT_NAME} libelf::libelf)
2624

2725
#Linking Libraries
2826
target_link_libraries(${PROJECT_NAME} libelf::libelf)
27+
add_executable(${PROJECT_NAME} src/main.cpp src/gcc_parse.cpp)
28+
29+
# Add include directories
30+
target_include_directories(${PROJECT_NAME} PUBLIC include/)
31+
target_link_libraries(${PROJECT_NAME} PUBLIC ctre::ctre)
2932

3033
# Add compile options
3134
list(APPEND COMPILER_BUILD_FLAGS

conanfile.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ def requirements(self):
3030
self.requires("libelf/0.8.13")
3131

3232
self.requires("tl-function-ref/1.0.0")
33+
self.requires("ctre/[^3.9.0]")
3334

3435
def generate(self):
3536
c = CMake(self)

include/gcc_parse.hpp

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
#pragma once
2+
#include <algorithm>
3+
#include <cctype>
4+
#include <cstddef>
5+
#include <ctll/fixed_string.hpp>
6+
#include <ctre.hpp>
7+
#include <ctre/wrapper.hpp>
8+
#include <ranges>
9+
#include <string>
10+
11+
namespace rng = std::ranges;
12+
namespace views = rng::views;
13+
14+
using string = std::string;
15+
using std::operator""sv;
16+
17+
struct Node;
18+
19+
void parse(std::string file_path);
20+
21+
constexpr bool is_word_in_str(std::string_view const word,
22+
std::string_view const full_str)
23+
{
24+
constexpr auto pattern = ctll::fixed_string{ "\\n| |\\t" };
25+
return rng::any_of(ctre::split<pattern>(full_str),
26+
[word](auto&& c) { return word == c; });
27+
}

src/gcc_parse.cpp

Lines changed: 282 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,282 @@
1+
#include "gcc_parse.hpp"
2+
#include <algorithm>
3+
#include <cstddef>
4+
#include <format>
5+
#include <fstream>
6+
#include <print>
7+
#include <ranges>
8+
#include <string>
9+
#include <string_view>
10+
#include <unordered_map>
11+
#include <vector>
12+
13+
namespace rng = std::ranges;
14+
namespace views = rng::views;
15+
16+
using string = std::string;
17+
using std::operator""sv;
18+
19+
struct Node
20+
{
21+
Node(int p_nid,
22+
std::string p_fn_name,
23+
std::string p_demangled_name,
24+
std::string p_visibility,
25+
std::string p_avaliablity,
26+
std::string p_flags)
27+
: id(p_nid)
28+
, fn_name(std::move(p_fn_name))
29+
, demangled_name(std::move(p_demangled_name))
30+
, visibility(std::move(p_visibility))
31+
, availability(std::move(p_avaliablity))
32+
, flags(std::move(p_flags)) {};
33+
34+
Node& operator=(Node const&) = default;
35+
Node(Node const&) = default;
36+
Node& operator=(Node&&) = default;
37+
Node(Node&&) = default;
38+
// Node() = default;
39+
40+
int id;
41+
std::string fn_name;
42+
std::string demangled_name;
43+
std::string visibility;
44+
std::string availability;
45+
std::string flags;
46+
std::vector<Node> callees;
47+
std::vector<Node> callers;
48+
};
49+
50+
inline auto get_names(std::span<Node> v)
51+
{
52+
return v | views::transform([](auto& n) { return n.fn_name; })
53+
| rng::to<std::vector<string>>();
54+
}
55+
56+
template<>
57+
struct std::formatter<Node> : std::formatter<std::string>
58+
{
59+
auto format(Node& n, format_context& ctx) const
60+
{
61+
return formatter<std::string>::format(std::format("id: {}"
62+
"func_name: {}"
63+
"demangled_name: {}"
64+
"visibility: {}"
65+
"availability: {}"
66+
"flags: {}"
67+
"callers: {}"
68+
"callees: {}",
69+
n.id,
70+
n.fn_name,
71+
n.demangled_name,
72+
n.visibility,
73+
n.availability,
74+
n.flags,
75+
get_names(n.callers),
76+
get_names(n.callees)),
77+
ctx);
78+
}
79+
};
80+
81+
constexpr string trim(std::string_view str)
82+
{
83+
auto trimmed = str
84+
| views::drop_while([](char c) { return std::isspace(c); })
85+
| views::reverse
86+
| views::drop_while([](char c) { return std::isspace(c); })
87+
| views::reverse;
88+
89+
return { trimmed.begin(), trimmed.end() };
90+
}
91+
92+
constexpr bool is_whitespace(std::string_view sv)
93+
{
94+
95+
return rng::all_of(sv, [](char c) { return std::isspace(c); });
96+
}
97+
98+
std::vector<string> parse_fn_list(std::string_view inp)
99+
{
100+
auto split_vec
101+
= inp | views::split(' ')
102+
| views::transform([](auto&& s) { return std::string_view(s); })
103+
| rng::to<std::vector<string>>();
104+
105+
std::vector<string> res;
106+
for (string& s : split_vec) {
107+
string name = *(s | views::split('/') | views::take(1)
108+
| views::transform(
109+
[](auto&& ss) { return trim(std::string_view(ss)); }))
110+
.begin();
111+
112+
res.push_back(name);
113+
}
114+
return res;
115+
}
116+
117+
void parse(string file_path)
118+
{
119+
std::ifstream file;
120+
file.open(file_path);
121+
if (!file.is_open()) {
122+
throw std::runtime_error(
123+
std::format("Cannot open file: {}", file_path));
124+
}
125+
126+
string line;
127+
while (std::getline(file, line)) {
128+
if (rng::equal(line, std::string("Symbol table:"))) {
129+
break;
130+
}
131+
}
132+
133+
bool in_entry = false;
134+
bool is_function = true;
135+
std::vector<string> raw_entries;
136+
std::string cur_raw_entry = "";
137+
while (std::getline(file, line)) {
138+
if (line.length() == 0) {
139+
continue;
140+
}
141+
142+
if (!std::isspace(line[0])) {
143+
in_entry = false;
144+
}
145+
146+
auto trimmed_line = trim(line);
147+
148+
if (trimmed_line.starts_with("Type")
149+
&& !is_word_in_str("function", trimmed_line)) {
150+
is_function = false;
151+
continue;
152+
}
153+
154+
if (!in_entry) {
155+
if (is_function && cur_raw_entry.length() != 0) {
156+
raw_entries.push_back(cur_raw_entry);
157+
}
158+
cur_raw_entry = "";
159+
is_function = true; // We assume the next entry will be a function
160+
in_entry = true;
161+
}
162+
163+
cur_raw_entry += trimmed_line + "\n";
164+
}
165+
166+
if (is_function) {
167+
raw_entries.push_back(cur_raw_entry);
168+
}
169+
170+
file.close();
171+
// for (auto& e : raw_entries) {
172+
// std::println("Entry:");
173+
// for (auto&& s :
174+
// e | views::split('\n') | rng::to<std::vector<string>>()) {
175+
// std::println("{}", s);
176+
// }
177+
// std::println("\n");
178+
// }
179+
180+
std::vector<std::unordered_map<string, string>> table_entries;
181+
auto fn_name_re = ctre::search<".+[0-9]+">;
182+
for (auto& raw_entry : raw_entries) {
183+
std::println("\nNEW ENTRY\n");
184+
if (raw_entry.length() == 0 || is_whitespace(raw_entry)) {
185+
continue;
186+
}
187+
std::unordered_map<string, string> parsed_entry;
188+
std::vector<string> split_vec
189+
= raw_entry | views::split('\n') | views::transform([](auto&& s) {
190+
return trim(std::string_view(s));
191+
})
192+
| views::filter([](auto&& s) { return s.length() != 0; })
193+
| rng::to<std::vector<string>>();
194+
195+
// FIXME: Hacky solution to remove personality functions, in future we
196+
// should ignore all functions that are disjoint with the callgraph.
197+
auto first_line = split_vec[0];
198+
if (!rng::search(first_line, "__gxx_personality"sv).empty()) {
199+
continue;
200+
}
201+
// TODO: Use rng::enumerate when clang is updated
202+
for (size_t i = 0; i < split_vec.size(); i++) {
203+
auto& entry = split_vec[i];
204+
if (i == 0) {
205+
auto m = fn_name_re.search(entry);
206+
// std::println("entry:{}", entry);
207+
208+
auto iter = m.get<0>().to_view() | views::split('/')
209+
| views::transform([](auto&& s) {
210+
return trim(std::string_view(s));
211+
});
212+
// Find a better way to do this, std::next nor std::advanced
213+
// work as well nor did it++
214+
auto fn_name = *iter.begin();
215+
parsed_entry["fn_name"] = fn_name;
216+
parsed_entry["id"] = *(iter | views::drop(1)).begin();
217+
auto demangle_name_iter
218+
= ctre::search<"\\(.*\\)">(entry).get<0>().to_view()
219+
| views::drop(1) | views::reverse | views::drop(1)
220+
| views::reverse;
221+
222+
// *sigh* C++ needs a collect method
223+
parsed_entry["demangled_name"] = string(
224+
demangle_name_iter.begin(), demangle_name_iter.end());
225+
226+
std::println("fn_name: {}, id: {}, demangled_name: {}",
227+
parsed_entry["fn_name"],
228+
parsed_entry["id"],
229+
parsed_entry["demangled_name"]);
230+
continue;
231+
}
232+
233+
// TODO: Track misc strings
234+
auto colon_match = ctre::split<":">(entry);
235+
auto collect_colon_split
236+
= colon_match | views::transform([](auto&& s) {
237+
return trim(std::string_view(s));
238+
})
239+
| views::filter([](auto&& s) { return s.length() != 0; })
240+
| rng::to<std::vector<string>>();
241+
242+
auto kv_iter = colon_match | views::transform([](auto&& s) {
243+
return string(s);
244+
});
245+
string key = *kv_iter.begin() | views::transform([](char& c) {
246+
return static_cast<char>(std::tolower(c));
247+
}) | rng::to<string>();
248+
rng::replace(key, ' ', '_');
249+
string value = trim(*(kv_iter | views::drop(1)).begin());
250+
std::println("{}: {}", key, value);
251+
parsed_entry[key] = value;
252+
}
253+
table_entries.push_back(parsed_entry);
254+
}
255+
256+
// Create all nodes
257+
std::unordered_map<std::string_view, Node> all_nodes;
258+
for (auto& entry : table_entries) {
259+
Node n = { std::stoi(entry["id"]), entry["fn_name"],
260+
entry["demangled_name"], entry["visablity"],
261+
entry["avaliablity"], entry["function_flags"] };
262+
all_nodes.emplace(entry["fn_name"], n);
263+
}
264+
265+
// Create Edges
266+
for (auto& entry : table_entries) {
267+
std::string_view name = entry["fn_name"];
268+
if (!all_nodes.contains(name)) {
269+
continue;
270+
}
271+
auto caller_strs = parse_fn_list(entry["called_by"]);
272+
auto callee_strs = parse_fn_list(entry["calls"]);
273+
274+
auto get_node = [&all_nodes](std::string_view name) -> Node& {
275+
return all_nodes.at(name);
276+
};
277+
278+
Node& n = get_node(name);
279+
n.callers = caller_strs | views::transform(get_node)
280+
| rng::to<std::vector<Node>>();
281+
}
282+
}

src/main.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,15 @@
88
* @copyright Copyright (c) 2025
99
*
1010
*/
11+
#include "gcc_parse.hpp"
12+
#include <print>
13+
#include <stdexcept>
14+
15+
namespace rng = std::ranges;
16+
namespace views = rng::views;
17+
18+
using string = std::string;
19+
using std::operator""sv;
1120

1221
#include <fstream>
1322
#include <print>
@@ -17,6 +26,13 @@
1726
int main(int argc, char* argv[])
1827
{
1928
std::println("yeet: {}", __cplusplus);
29+
try {
30+
parse("./"
31+
"demo_class.wpa.081i.whole-program");
32+
} catch (std::runtime_error& e) {
33+
std::println("Error: {}", e.what());
34+
return 1;
35+
}
2036

2137
return 0;
2238
}

0 commit comments

Comments
 (0)