From e9270593dbd978976e5b2b2bfbfed16fe1c8bd23 Mon Sep 17 00:00:00 2001 From: Satyam Singh Date: Mon, 17 Jul 2023 13:53:50 +0530 Subject: [PATCH] Test inferred schema for json array Arrow infer schema internally uses IndexMap to keep track of inferred schema while updating it through given json objects. Thus the order in which insertion happens to this map determines the output order for schema fields as well. Since the json object in serde_json uses BtreeMap, the iteration is always ascending by key. This leads to infer_schema producing schema fields in ascending order when we infer using only one json object. In case of json array any new fields encountered are added to the last. Thus output order of infer schema did not provide good enough guarantee. This is solved in #450. This commit just adds a test for it. --- server/src/handlers/http/ingest.rs | 51 ++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/server/src/handlers/http/ingest.rs b/server/src/handlers/http/ingest.rs index 101dc8fb7..9d957bd2c 100644 --- a/server/src/handlers/http/ingest.rs +++ b/server/src/handlers/http/ingest.rs @@ -354,6 +354,57 @@ mod tests { .is_err()) } + #[test] + fn array_into_recordbatch_inffered_schema() { + let json = json!([ + { + "b": "hello", + }, + { + "b": "hello", + "a": 1, + "c": 1 + }, + { + "a": 1, + "b": "hello", + "c": null + }, + ]); + + let req = TestRequest::default().to_http_request(); + + let (_, rb, _) = into_event_batch( + req, + Bytes::from(serde_json::to_vec(&json).unwrap()), + HashMap::default(), + ) + .unwrap(); + + assert_eq!(rb.num_rows(), 3); + assert_eq!(rb.num_columns(), 6); + + let schema = rb.schema(); + let fields = &schema.fields; + + assert_eq!(&*fields[1], &Field::new("a", DataType::Int64, true)); + assert_eq!(&*fields[2], &Field::new("b", DataType::Utf8, true)); + assert_eq!(&*fields[3], &Field::new("c", DataType::Int64, true)); + + assert_eq!( + rb.column_by_name("a").unwrap().as_int64_arr(), + &Int64Array::from(vec![None, Some(1), Some(1)]) + ); + assert_eq!( + rb.column_by_name("b").unwrap().as_utf8_arr(), + &StringArray::from(vec![Some("hello"), Some("hello"), Some("hello"),]) + ); + assert_eq!( + rb.column_by_name("c").unwrap().as_int64_arr(), + &Int64Array::from(vec![None, Some(1), None]) + ); + } + #[test] fn arr_with_null_into_rb() { let json = json!([