Skip to content

Commit 622a54d

Browse files
committed
[Picojson] Let the key of objects in json be ordered by default
Previously picojson define `object` as an alias of `std::unordered_map`. That means when parsing json, the order of keys in objects are uncertain and dependent on implementation. This makes it inconvenient for certain applications, e.g. in LLM generation output, we wish the order of keys the same as the order in the json file. This PR implements a ordered hashmap `ordered_hashmap` that 1) maintains the order in which the elements are inserted, and 2) have the same interface as `std::unordered_map`. Picojson will define object as an alias of `ordered_hashmap`, so the order of the input json is maintained when parsing. Macro `PICOJSON_USE_ORDERED_OBJECT` controls whether object uses the ordered version or the unordered version. It is set by default.
1 parent 4d4f050 commit 622a54d

File tree

2 files changed

+324
-0
lines changed

2 files changed

+324
-0
lines changed

3rdparty/picojson/picojson.h

Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,21 @@
2626
* POSSIBILITY OF SUCH DAMAGE.
2727
*/
2828
#pragma once
29+
2930
#ifndef PICOJSON_USE_INT64
3031
#define PICOJSON_USE_INT64
3132
#define __STDC_FORMAT_MACROS 1
3233
#endif
3334

35+
// If PICOJSON_USE_ORDERED_OBJECT is set, picojson uses ordered dict for objects.
36+
// When iterating the object, the order of keys is the order they appear in the json string.
37+
// This macro is set by default.
38+
#ifndef PICOJSON_USE_ORDERED_OBJECT
39+
#define PICOJSON_USE_ORDERED_OBJECT 1
40+
#endif
41+
3442
#include <algorithm>
43+
#include <cassert>
3544
#include <cstddef>
3645
#include <cstdio>
3746
#include <cstdlib>
@@ -137,10 +146,177 @@ enum { INDENT_WIDTH = 2 };
137146

138147
struct null {};
139148

149+
// The ordered version of hashmap. When iterating through the hashmap,
150+
// the elements maintain the order in which they were inserted.
151+
// Its API is the same as std::unordered_map.
152+
template <typename Key, typename T>
153+
class ordered_hashmap : private std::unordered_map<Key, T> {
154+
public:
155+
using value_type = std::pair<const Key, T>;
156+
157+
private:
158+
template <bool IsConst>
159+
struct iterator_base {
160+
public:
161+
using pointer = std::conditional_t<IsConst, const value_type*, value_type*>;
162+
using reference = std::conditional_t<IsConst, const value_type&, value_type&>;
163+
164+
iterator_base(const iterator_base<IsConst>& other)
165+
: order_index(other.order_index), obj_ptr(other.obj_ptr) {}
166+
167+
template <bool _IsConst = IsConst, typename = std::enable_if_t<_IsConst>>
168+
iterator_base(const iterator_base<false>& other)
169+
: order_index(other.order_index), obj_ptr(other.obj_ptr) {
170+
static_assert(_IsConst, "This constructor should only be used for const iterators.");
171+
}
172+
173+
iterator_base<IsConst>& operator++() {
174+
++order_index;
175+
return *this;
176+
}
177+
iterator_base<IsConst> operator++(int) {
178+
auto tmp = *this;
179+
++order_index;
180+
return tmp;
181+
}
182+
pointer operator->() const {
183+
assert(order_index >= 0 && order_index < obj_ptr->order.size());
184+
return obj_ptr->std::unordered_map<Key, T>::find(obj_ptr->order[order_index]).operator->();
185+
}
186+
reference operator*() const { return *operator->(); }
187+
bool operator==(const iterator_base<IsConst>& other) const {
188+
return order_index == other.order_index && obj_ptr == other.obj_ptr;
189+
}
190+
bool operator!=(const iterator_base<IsConst>& other) const { return !(*this == other); }
191+
192+
friend class ordered_hashmap;
193+
194+
private:
195+
using obj_pointer_type = std::conditional_t<IsConst, const ordered_hashmap*, ordered_hashmap*>;
196+
iterator_base(int order_index, obj_pointer_type obj_ptr)
197+
: order_index(order_index), obj_ptr(obj_ptr) {}
198+
199+
int order_index;
200+
obj_pointer_type obj_ptr;
201+
};
202+
203+
public:
204+
using iterator = iterator_base<false>;
205+
using const_iterator = iterator_base<true>;
206+
207+
ordered_hashmap() = default;
208+
ordered_hashmap(const ordered_hashmap&) = default;
209+
ordered_hashmap(ordered_hashmap&&) = default;
210+
ordered_hashmap(std::initializer_list<value_type> init) : std::unordered_map<Key, T>(init) {
211+
for (const auto& pair : init) {
212+
order.push_back(pair.first);
213+
}
214+
}
215+
ordered_hashmap& operator=(const ordered_hashmap&) = default;
216+
ordered_hashmap& operator=(ordered_hashmap&&) = default;
217+
218+
iterator begin() { return {0, this}; }
219+
iterator end() { return {static_cast<int>(order.size()), this}; }
220+
const_iterator begin() const { return {0, this}; }
221+
const_iterator end() const { return {static_cast<int>(order.size()), this}; }
222+
const_iterator cbegin() const { return {0, this}; }
223+
const_iterator cend() const { return {static_cast<int>(order.size()), this}; }
224+
225+
using std::unordered_map<Key, T>::empty;
226+
using std::unordered_map<Key, T>::size;
227+
using std::unordered_map<Key, T>::at;
228+
229+
T& operator[](const Key& key) {
230+
if (count(key) == 0) {
231+
order.push_back(key);
232+
}
233+
return std::unordered_map<Key, T>::operator[](key);
234+
}
235+
236+
using std::unordered_map<Key, T>::count;
237+
238+
iterator find(const Key& key) {
239+
auto it = std::find(order.begin(), order.end(), key);
240+
if (it == order.end()) {
241+
return end();
242+
}
243+
return {static_cast<int>(std::distance(order.begin(), it)), this};
244+
}
245+
246+
const_iterator find(const Key& key) const {
247+
auto it = std::find(order.begin(), order.end(), key);
248+
if (it == order.end()) {
249+
return end();
250+
}
251+
return {static_cast<int>(std::distance(order.begin(), it)), this};
252+
}
253+
254+
void clear() {
255+
std::unordered_map<Key, T>::clear();
256+
order.clear();
257+
}
258+
259+
std::pair<iterator, bool> insert(const value_type& value) {
260+
if (count(value.first)) {
261+
return {find(value.first), false};
262+
}
263+
order.push_back(value.first);
264+
std::unordered_map<Key, T>::insert(value);
265+
return {{static_cast<int>(order.size()) - 1, this}, true};
266+
}
267+
268+
template <class... Args>
269+
std::pair<iterator, bool> emplace(Args&&... args) {
270+
return insert(value_type(std::forward<Args>(args)...));
271+
}
272+
273+
iterator erase(const_iterator pos) {
274+
assert(pos.order_index >= 0 && pos.order_index < order.size());
275+
std::unordered_map<Key, T>::erase(order[pos.order_index]);
276+
order.erase(order.begin() + pos.order_index);
277+
return {pos.order_index, this};
278+
}
279+
280+
iterator erase(iterator pos) {
281+
assert(pos.order_index >= 0 && pos.order_index < order.size());
282+
std::unordered_map<Key, T>::erase(order[pos.order_index]);
283+
order.erase(order.begin() + pos.order_index);
284+
return pos;
285+
}
286+
287+
size_t erase(const Key& key) {
288+
if (std::unordered_map<Key, T>::erase(key)) {
289+
order.erase(std::find(order.begin(), order.end(), key));
290+
return 1;
291+
} else {
292+
return 0;
293+
}
294+
}
295+
296+
template <typename _Key, typename _T>
297+
friend bool operator==(const ordered_hashmap<_Key, _T>& lhs,
298+
const ordered_hashmap<_Key, _T>& rhs);
299+
300+
private:
301+
std::vector<Key> order;
302+
};
303+
304+
template <typename Key, typename T>
305+
bool operator==(const ordered_hashmap<Key, T>& lhs, const ordered_hashmap<Key, T>& rhs) {
306+
return static_cast<const std::unordered_map<Key, T>&>(lhs) ==
307+
static_cast<const std::unordered_map<Key, T>&>(rhs) &&
308+
lhs.order == rhs.order;
309+
}
310+
140311
class value {
141312
public:
142313
typedef std::vector<value> array;
314+
#ifdef PICOJSON_USE_ORDERED_OBJECT
315+
typedef ordered_hashmap<std::string, value> object;
316+
#else
143317
typedef std::unordered_map<std::string, value> object;
318+
#endif
319+
144320
union _storage {
145321
bool boolean_;
146322
double number_;
Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
#include <cassert>
20+
#include <sstream>
21+
22+
#include "picojson.h"
23+
24+
using picojson::ordered_hashmap;
25+
26+
std::string print_ordered_hashmap(const ordered_hashmap<std::string, int>& obj) {
27+
std::ostringstream oss;
28+
int idx = 0;
29+
for (const auto& [key, value] : obj) {
30+
if (idx != 0) {
31+
oss << " ";
32+
}
33+
++idx;
34+
oss << "(" << key << ", " << value << ")";
35+
}
36+
return oss.str();
37+
}
38+
39+
void test_constructor() {
40+
ordered_hashmap<std::string, int> obj;
41+
obj["foo"] = 1;
42+
assert(print_ordered_hashmap(obj) == "(foo, 1)");
43+
44+
ordered_hashmap<std::string, int> obj1{{"foo", 1}, {"bar", 2}};
45+
assert(print_ordered_hashmap(obj1) == "(foo, 1) (bar, 2)");
46+
47+
ordered_hashmap<std::string, int> obj2(obj1);
48+
assert(print_ordered_hashmap(obj2) == "(foo, 1) (bar, 2)");
49+
50+
ordered_hashmap<std::string, int> obj3(std::move(obj2));
51+
assert(print_ordered_hashmap(obj3) == "(foo, 1) (bar, 2)");
52+
53+
obj = obj3;
54+
assert(print_ordered_hashmap(obj) == "(foo, 1) (bar, 2)");
55+
56+
const ordered_hashmap<std::string, int> obj4{{"foo", 1}, {"bar", 2}};
57+
assert(print_ordered_hashmap(obj4) == "(foo, 1) (bar, 2)");
58+
}
59+
60+
void test_iterator() {
61+
ordered_hashmap<std::string, int> obj{{"foo", 1}, {"bar", 2}};
62+
auto it = obj.begin();
63+
auto past_it = it++;
64+
assert(past_it == obj.begin());
65+
assert(it->first == "bar");
66+
assert(it->second == 2);
67+
it->second = 3;
68+
assert(print_ordered_hashmap(obj) == "(foo, 1) (bar, 3)");
69+
(*it).second = 4;
70+
assert(print_ordered_hashmap(obj) == "(foo, 1) (bar, 4)");
71+
++it;
72+
assert(it == obj.end());
73+
auto cit = obj.cbegin();
74+
assert(cit->first == "foo");
75+
assert(cit->second == 1);
76+
++cit;
77+
assert(cit->first == "bar");
78+
assert(cit->second == 4);
79+
cit = it;
80+
assert(cit == obj.cend());
81+
}
82+
83+
void test_visiter() {
84+
ordered_hashmap<std::string, int> obj{{"foo", 1}, {"bar", 2}};
85+
auto it = obj.find("foo");
86+
assert(it->first == "foo");
87+
assert(it->second == 1);
88+
assert(obj.find("abc") == obj.end());
89+
obj["foo"] = 3;
90+
assert(print_ordered_hashmap(obj) == "(foo, 3) (bar, 2)");
91+
obj["abc"] = 4;
92+
assert(print_ordered_hashmap(obj) == "(foo, 3) (bar, 2) (abc, 4)");
93+
assert(obj.count("abc") == 1);
94+
assert(obj.count("def") == 0);
95+
assert(obj.at("bar") == 2);
96+
assert(obj.size() == 3);
97+
98+
ordered_hashmap<std::string, int> obj1{{"foo", 3}, {"bar", 2}, {"abc", 4}};
99+
assert(obj == obj1);
100+
}
101+
102+
void test_modifier() {
103+
ordered_hashmap<std::string, int> obj{{"foo", 1}, {"bar", 2}};
104+
105+
auto [it, successful] = obj.insert({"abc", 3});
106+
assert(it->first == "abc");
107+
assert(it->second == 3);
108+
assert(it == obj.find("abc"));
109+
++it;
110+
assert(it == obj.end());
111+
assert(successful);
112+
113+
std::tie(it, successful) = obj.emplace("def", 4);
114+
assert(it == obj.find("def"));
115+
++it;
116+
assert(it == obj.end());
117+
assert(successful);
118+
119+
std::tie(it, successful) = obj.insert({"abc", 4});
120+
assert(it->second == 3);
121+
++it;
122+
++it;
123+
assert(it == obj.end());
124+
assert(!successful);
125+
126+
it = obj.find("abc");
127+
it = obj.erase(it);
128+
assert(it->first == "def");
129+
assert(it->second == 4);
130+
++it;
131+
assert(it == obj.end());
132+
assert(print_ordered_hashmap(obj) == "(foo, 1) (bar, 2) (def, 4)");
133+
134+
assert(obj.erase("foo") == 1);
135+
assert(print_ordered_hashmap(obj) == "(bar, 2) (def, 4)");
136+
137+
obj.clear();
138+
assert(print_ordered_hashmap(obj) == "");
139+
assert(obj.empty());
140+
}
141+
142+
int main() {
143+
test_constructor();
144+
test_iterator();
145+
test_visiter();
146+
test_modifier();
147+
return 0;
148+
}

0 commit comments

Comments
 (0)