Skip to content

Commit c0f8f9a

Browse files
committed
[Picojson] Let the key of objects in json be ordered by default
Previously picojson define `object` as an alias of `std::unordered_map`. That means when parsing json, the order of keys in objects are uncertain and dependent on implementation. This makes it inconvenient for certain applications, e.g. in LLM generation output, we wish the order of keys the same as the order in the json file. This PR implements a ordered hashmap `ordered_hashmap` that 1) maintains the order in which the elements are inserted, and 2) have the same interface as `std::unordered_map`. Picojson will define object as an alias of `ordered_hashmap`, so the order of the input json is maintained when parsing. Macro `PICOJSON_USE_ORDERED_OBJECT` controls whether object uses the ordered version or the unordered version. It is set by default.
1 parent 4d4f050 commit c0f8f9a

File tree

2 files changed

+167
-0
lines changed

2 files changed

+167
-0
lines changed

3rdparty/picojson/picojson.h

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,21 @@
2626
* POSSIBILITY OF SUCH DAMAGE.
2727
*/
2828
#pragma once
29+
2930
#ifndef PICOJSON_USE_INT64
3031
#define PICOJSON_USE_INT64
3132
#define __STDC_FORMAT_MACROS 1
3233
#endif
3334

35+
// If PICOJSON_USE_ORDERED_OBJECT is set, picojson uses ordered dict for objects.
36+
// When iterating the object, the order of keys is the order they appear in the json string.
37+
// This macro is set by default.
38+
#ifndef PICOJSON_USE_ORDERED_OBJECT
39+
#define PICOJSON_USE_ORDERED_OBJECT 1
40+
#endif
41+
3442
#include <algorithm>
43+
#include <cassert>
3544
#include <cstddef>
3645
#include <cstdio>
3746
#include <cstdlib>
@@ -137,10 +146,17 @@ enum { INDENT_WIDTH = 2 };
137146

138147
struct null {};
139148

149+
class object_with_ordered_keys;
150+
140151
class value {
141152
public:
142153
typedef std::vector<value> array;
154+
#ifdef PICOJSON_USE_ORDERED_OBJECT
155+
typedef object_with_ordered_keys object;
156+
#else
143157
typedef std::unordered_map<std::string, value> object;
158+
#endif
159+
144160
union _storage {
145161
bool boolean_;
146162
double number_;
@@ -220,6 +236,92 @@ class value {
220236
void clear();
221237
};
222238

239+
// The ordered version of hashmap. It has the same interface as std::unordered_map, but provides
240+
// ordered_keys() to return the keys in the order they were inserted.
241+
class object_with_ordered_keys : private std::unordered_map<std::string, value> {
242+
public:
243+
using typename std::unordered_map<std::string, value>::value_type;
244+
using typename std::unordered_map<std::string, value>::iterator;
245+
using typename std::unordered_map<std::string, value>::const_iterator;
246+
247+
object_with_ordered_keys() = default;
248+
object_with_ordered_keys(const object_with_ordered_keys&) = default;
249+
object_with_ordered_keys(object_with_ordered_keys&&) = default;
250+
object_with_ordered_keys(std::initializer_list<value_type> init)
251+
: std::unordered_map<std::string, value>(init) {
252+
for (const auto& pair : init) {
253+
ordered_keys_.push_back(pair.first);
254+
}
255+
}
256+
object_with_ordered_keys& operator=(const object_with_ordered_keys&) = default;
257+
object_with_ordered_keys& operator=(object_with_ordered_keys&&) = default;
258+
259+
using std::unordered_map<std::string, value>::begin;
260+
using std::unordered_map<std::string, value>::end;
261+
using std::unordered_map<std::string, value>::cbegin;
262+
using std::unordered_map<std::string, value>::cend;
263+
using std::unordered_map<std::string, value>::empty;
264+
using std::unordered_map<std::string, value>::size;
265+
using std::unordered_map<std::string, value>::at;
266+
using std::unordered_map<std::string, value>::count;
267+
using std::unordered_map<std::string, value>::find;
268+
269+
value& operator[](const std::string& key) {
270+
if (count(key) == 0) {
271+
ordered_keys_.push_back(key);
272+
}
273+
return std::unordered_map<std::string, value>::operator[](key);
274+
}
275+
276+
void clear() {
277+
std::unordered_map<std::string, value>::clear();
278+
ordered_keys_.clear();
279+
}
280+
281+
std::pair<iterator, bool> insert(const value_type& kv) {
282+
if (!count(kv.first)) {
283+
ordered_keys_.push_back(kv.first);
284+
}
285+
return std::unordered_map<std::string, value>::insert(kv);
286+
}
287+
288+
template <class... Args>
289+
std::pair<iterator, bool> emplace(Args&&... args) {
290+
return insert(value_type(std::forward<Args>(args)...));
291+
}
292+
293+
iterator erase(const_iterator it) {
294+
ordered_keys_.erase(std::find(ordered_keys_.begin(), ordered_keys_.end(), it->first));
295+
return std::unordered_map<std::string, value>::erase(it);
296+
}
297+
298+
iterator erase(iterator it) {
299+
ordered_keys_.erase(std::find(ordered_keys_.begin(), ordered_keys_.end(), it->first));
300+
return std::unordered_map<std::string, value>::erase(it);
301+
}
302+
303+
size_t erase(const std::string& key) {
304+
if (std::unordered_map<std::string, value>::erase(key)) {
305+
ordered_keys_.erase(std::find(ordered_keys_.begin(), ordered_keys_.end(), key));
306+
return 1;
307+
} else {
308+
return 0;
309+
}
310+
}
311+
312+
const std::vector<std::string>& ordered_keys() const { return ordered_keys_; }
313+
314+
friend bool operator==(const object_with_ordered_keys& lhs, const object_with_ordered_keys& rhs);
315+
316+
private:
317+
std::vector<std::string> ordered_keys_;
318+
};
319+
320+
inline bool operator==(const object_with_ordered_keys& lhs, const object_with_ordered_keys& rhs) {
321+
return static_cast<const std::unordered_map<std::string, value>&>(lhs) ==
322+
static_cast<const std::unordered_map<std::string, value>&>(rhs);
323+
}
324+
223325
typedef value::array array;
224326
typedef value::object object;
225327

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
#include <cassert>
20+
#include <sstream>
21+
22+
#include "picojson.h"
23+
24+
using picojson::object_with_ordered_keys;
25+
26+
void test_constructor() {
27+
object_with_ordered_keys obj;
28+
obj["foo"] = picojson::value(true);
29+
assert((obj.ordered_keys() == std::vector<std::string>{"foo"}));
30+
31+
object_with_ordered_keys obj1{{"foo", picojson::value(true)}, {"bar", picojson::value(false)}};
32+
assert((obj1.ordered_keys() == std::vector<std::string>{"foo", "bar"}));
33+
34+
object_with_ordered_keys obj2(obj1);
35+
assert((obj2.ordered_keys() == std::vector<std::string>{"foo", "bar"}));
36+
37+
object_with_ordered_keys obj3(std::move(obj2));
38+
assert((obj3.ordered_keys() == std::vector<std::string>{"foo", "bar"}));
39+
40+
obj = obj3;
41+
assert((obj.ordered_keys() == std::vector<std::string>{"foo", "bar"}));
42+
}
43+
44+
void test_modifier() {
45+
object_with_ordered_keys obj{{"foo", picojson::value(true)}, {"bar", picojson::value(false)}};
46+
obj.insert({"abc", picojson::value(false)});
47+
assert((obj.ordered_keys() == std::vector<std::string>{"foo", "bar", "abc"}));
48+
obj.emplace("def", picojson::value(true));
49+
assert((obj.ordered_keys() == std::vector<std::string>{"foo", "bar", "abc", "def"}));
50+
obj.insert({"abc", picojson::value(true)});
51+
assert((obj.ordered_keys() == std::vector<std::string>{"foo", "bar", "abc", "def"}));
52+
auto it = obj.find("abc");
53+
it = obj.erase(it);
54+
assert((obj.ordered_keys() == std::vector<std::string>{"foo", "bar", "def"}));
55+
obj.erase("foo");
56+
assert((obj.ordered_keys() == std::vector<std::string>{"bar", "def"}));
57+
obj.clear();
58+
assert((obj.ordered_keys() == std::vector<std::string>{}));
59+
}
60+
61+
int main() {
62+
test_constructor();
63+
test_modifier();
64+
return 0;
65+
}

0 commit comments

Comments
 (0)