Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions datafusion/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,15 @@ The benchmark should be completed in under an hour. On-demand pricing is $0.6 pe
1. `cd ClickBench/datafusion`
1. `vi benchmark.sh` and modify following line to target Datafusion version
```
git checkout 36.0.0
git checkout 40.0.0
```
1. `bash benchmark.sh`

### Know Issues:

1. importing parquet by `datafusion-cli` doesn't support schema, need to add some casting in quries.sql (e.g. converting EventTime from Int to Timestamp via `to_timestamp_seconds`)
2. importing parquet by `datafusion-cli` make column name column name case-sensitive, i change all column name in quries.sql to double quoted literal (e.g. `EventTime` -> `"EventTime"`)
3. `comparing binary with utf-8` and `group by binary` don't work in mac, if you run these quries in mac, you'll get some errors for quries contain binary format apache/arrow-datafusion#3050
1. importing parquet by `datafusion-cli` doesn't support schema, need to add some casting in queries.sql (e.g. converting EventTime from Int to Timestamp via `to_timestamp_seconds`)
2. importing parquet by `datafusion-cli` make column name column name case-sensitive, i change all column name in queries.sql to double quoted literal (e.g. `EventTime` -> `"EventTime"`)
3. `comparing binary with utf-8` and `group by binary` don't work in mac, if you run these queries in mac, you'll get some errors for queries contain binary format apache/arrow-datafusion#3050


## Generate full human readable results (for debugging)
Expand Down
2 changes: 1 addition & 1 deletion datafusion/benchmark.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ sudo yum install gcc -y
# Install DataFusion main branch
git clone https://github.com/apache/arrow-datafusion.git
cd arrow-datafusion/datafusion-cli
git checkout 36.0.0
git checkout 40.0.0
CARGO_PROFILE_RELEASE_LTO=true RUSTFLAGS="-C codegen-units=1" cargo build --release
export PATH="`pwd`/target/release:$PATH"
cd ../..
Expand Down
90 changes: 45 additions & 45 deletions datafusion/results/partitioned.json
Original file line number Diff line number Diff line change
@@ -1,58 +1,58 @@
{
"system": "DataFusion (Parquet, partitioned)",
"date": "2024-03-07",
"date": "2024-07-27",
"machine": "c6a.4xlarge, 500gb gp2",
"cluster_size": 1,
"comment": "v36.0.0 (bf6f83b)",
"comment": "v40.0.0 (4cae813)",

"tags": ["Rust", "column-oriented", "embedded", "stateless"],

"load_time": 0,
"data_size": 14779976446,

"result": [
[0.039, 0.011, 0.009],
[0.081, 0.028, 0.025],
[0.164, 0.070, 0.069],
[0.369, 0.076, 0.073],
[1.266, 0.782, 0.782],
[1.270, 1.172, 1.182],
[0.058, 0.031, 0.033],
[0.056, 0.027, 0.027],
[1.481, 1.412, 1.389],
[1.256, 0.964, 0.968],
[0.469, 0.274, 0.279],
[0.790, 0.309, 0.308],
[1.364, 1.237, 1.255],
[3.424, 2.509, 2.520],
[1.468, 1.387, 1.402],
[0.966, 0.899, 0.900],
[3.151, 2.654, 2.619],
[3.090, 2.555, 2.580],
[6.834, 5.596, 5.624],
[0.396, 0.066, 0.068],
[10.209, 1.558, 1.580],
[11.343, 1.892, 1.855],
[22.693, 4.159, 4.199],
[55.450, 11.146, 11.161],
[2.693, 0.488, 0.491],
[0.769, 0.429, 0.422],
[2.640, 0.572, 0.561],
[9.681, 2.242, 2.267],
[8.752, 5.296, 5.205],
[0.496, 0.403, 0.402],
[2.388, 1.067, 1.064],
[6.067, 1.554, 1.546],
[8.350, 7.786, 7.676],
[11.737, 6.850, 6.943],
[12.001, 7.473, 7.597],
[1.878, 1.764, 1.757],
[0.453, 0.284, 0.283],
[0.167, 0.107, 0.106],
[0.202, 0.136, 0.122],
[0.814, 0.667, 0.642],
[0.133, 0.040, 0.044],
[0.122, 0.034, 0.033],
[0.129, 0.045, 0.044]
[0.043, 0.018, 0.016],
[0.087, 0.031, 0.028],
[0.173, 0.072, 0.073],
[0.356, 0.075, 0.081],
[1.201, 0.784, 0.796],
[0.960, 0.831, 0.837],
[0.057, 0.026, 0.026],
[0.062, 0.029, 0.031],
[1.408, 1.314, 1.315],
[1.302, 1.025, 1.038],
[0.483, 0.280, 0.269],
[0.705, 0.306, 0.296],
[1.137, 0.931, 0.939],
[3.183, 2.245, 2.252],
[1.499, 1.415, 1.429],
[1.011, 0.901, 0.897],
[3.230, 2.670, 2.655],
[3.136, 2.560, 2.539],
[6.849, 5.608, 5.827],
[0.299, 0.075, 0.068],
[10.086, 1.544, 1.617],
[11.238, 1.821, 1.835],
[21.957, 4.104, 4.132],
[55.510, 10.615, 10.548],
[2.678, 0.503, 0.500],
[0.765, 0.412, 0.413],
[2.649, 0.574, 0.559],
[9.652, 2.177, 2.203],
[8.528, 5.051, 5.019],
[0.499, 0.421, 0.439],
[2.389, 1.018, 1.028],
[6.060, 1.520, 1.513],
[8.820, 8.081, 7.826],
[10.604, 4.851, 5.088],
[10.567, 4.971, 4.880],
[1.737, 1.659, 1.649],
[0.363, 0.247, 0.231],
[0.156, 0.093, 0.092],
[0.198, 0.125, 0.124],
[0.902, 0.701, 0.683],
[0.144, 0.042, 0.041],
[0.130, 0.037, 0.040],
[0.131, 0.055, 0.050]
]
}
90 changes: 45 additions & 45 deletions datafusion/results/single.json
Original file line number Diff line number Diff line change
@@ -1,58 +1,58 @@
{
"system": "DataFusion (Parquet, single)",
"date": "2024-03-07",
"date": "2024-07-27",
"machine": "c6a.4xlarge, 500gb gp2",
"cluster_size": 1,
"comment": "v36.0.0 (bf6f83b)",
"comment": "v40.0.0 (4cae813)",

"tags": ["Rust", "column-oriented", "embedded", "stateless"],

"load_time": 0,
"data_size": 14779976446,

"result": [
[0.075, 0.045, 0.048],
[0.105, 0.059, 0.060],
[0.170, 0.100, 0.103],
[0.349, 0.105, 0.107],
[1.145, 0.841, 0.834],
[1.374, 1.251, 1.271],
[0.088, 0.064, 0.063],
[0.091, 0.065, 0.061],
[1.523, 1.442, 1.429],
[1.213, 1.025, 1.014],
[0.413, 0.315, 0.309],
[0.680, 0.334, 0.357],
[1.380, 1.260, 1.255],
[3.382, 2.497, 2.493],
[1.470, 1.392, 1.401],
[1.054, 0.946, 0.947],
[3.158, 2.703, 2.701],
[3.085, 2.615, 2.629],
[6.878, 5.644, 5.705],
[0.336, 0.098, 0.098],
[9.957, 1.526, 1.521],
[11.223, 1.853, 1.881],
[22.175, 4.074, 4.050],
[56.012, 11.500, 11.475],
[2.540, 0.578, 0.588],
[0.768, 0.524, 0.513],
[2.521, 0.670, 0.674],
[9.559, 2.258, 2.247],
[8.948, 5.095, 5.133],
[0.512, 0.460, 0.456],
[2.314, 1.127, 1.124],
[5.812, 1.591, 1.579],
[8.333, 7.788, 7.797],
[11.544, 6.860, 6.871],
[12.007, 7.641, 7.549],
[1.940, 1.815, 1.821],
[0.457, 0.318, 0.313],
[0.222, 0.179, 0.176],
[0.229, 0.178, 0.173],
[0.860, 0.693, 0.704],
[0.157, 0.073, 0.072],
[0.142, 0.069, 0.074],
[0.150, 0.086, 0.077]
[0.076, 0.051, 0.055],
[0.113, 0.066, 0.066],
[0.196, 0.115, 0.105],
[0.340, 0.114, 0.115],
[1.074, 0.862, 0.858],
[0.995, 0.874, 0.909],
[0.088, 0.076, 0.065],
[0.102, 0.078, 0.068],
[1.442, 1.349, 1.368],
[1.260, 1.083, 1.064],
[0.451, 0.306, 0.304],
[0.597, 0.337, 0.335],
[1.088, 0.986, 0.974],
[3.085, 2.261, 2.268],
[1.522, 1.428, 1.429],
[1.068, 0.957, 0.960],
[3.217, 2.702, 2.754],
[3.149, 2.621, 2.564],
[6.978, 5.679, 5.865],
[0.338, 0.107, 0.113],
[9.885, 1.466, 1.474],
[11.225, 1.794, 1.791],
[22.035, 3.906, 3.912],
[55.923, 10.899, 10.975],
[2.560, 0.579, 0.575],
[0.754, 0.509, 0.506],
[2.517, 0.674, 0.651],
[9.574, 2.220, 2.216],
[9.070, 4.926, 4.940],
[0.536, 0.473, 0.481],
[2.288, 1.090, 1.101],
[5.823, 1.543, 1.528],
[8.637, 8.328, 7.848],
[10.477, 4.972, 5.022],
[10.435, 4.910, 5.020],
[1.827, 1.685, 1.724],
[0.389, 0.275, 0.270],
[0.201, 0.175, 0.160],
[0.230, 0.173, 0.172],
[0.887, 0.749, 0.755],
[0.172, 0.085, 0.076],
[0.165, 0.075, 0.073],
[0.160, 0.090, 0.100]
]
}
2 changes: 1 addition & 1 deletion datafusion/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ cat queries.sql | while read query; do
# 2. each query contains a "Query took xxx seconds", we just grep these 2 lines
# 3. use sed to take the second line
# 4. use awk to take the number we want
RES=`datafusion-cli -f $CREATE_SQL_FILE /tmp/query.sql 2>&1 | grep "Query took" | sed -n 2p | awk '{print $7}'`
RES=`datafusion-cli -f $CREATE_SQL_FILE /tmp/query.sql 2>&1 | grep "Elapsed" |sed -n 2p | awk '{ print $2 }'
[[ $RES != "" ]] && \
echo -n "$RES" || \
echo -n "null"
Expand Down
Loading