1515// specific language governing permissions and limitations
1616// under the License.
1717
18+ use std:: path:: Path ;
1819use std:: { path:: PathBuf , time:: Instant } ;
1920
2021use datafusion:: {
21- common:: exec_err,
2222 error:: { DataFusionError , Result } ,
2323 prelude:: SessionContext ,
2424} ;
25+ use datafusion_common:: exec_datafusion_err;
2526use structopt:: StructOpt ;
2627
2728use crate :: { BenchmarkRun , CommonOpt } ;
@@ -69,15 +70,49 @@ pub struct RunOpt {
6970 output_path : Option < PathBuf > ,
7071}
7172
72- const CLICKBENCH_QUERY_START_ID : usize = 0 ;
73- const CLICKBENCH_QUERY_END_ID : usize = 42 ;
73+ struct AllQueries {
74+ queries : Vec < String > ,
75+ }
76+
77+ impl AllQueries {
78+ fn try_new ( path : & Path ) -> Result < Self > {
79+ // ClickBench has all queries in a single file identified by line number
80+ let all_queries = std:: fs:: read_to_string ( path)
81+ . map_err ( |e| exec_datafusion_err ! ( "Could not open {path:?}: {e}" ) ) ?;
82+ Ok ( Self {
83+ queries : all_queries. lines ( ) . map ( |s| s. to_string ( ) ) . collect ( ) ,
84+ } )
85+ }
86+
87+ /// Returns the text of query `query_id`
88+ fn get_query ( & self , query_id : usize ) -> Result < & str > {
89+ self . queries
90+ . get ( query_id)
91+ . ok_or_else ( || {
92+ let min_id = self . min_query_id ( ) ;
93+ let max_id = self . max_query_id ( ) ;
94+ exec_datafusion_err ! (
95+ "Invalid query id {query_id}. Must be between {min_id} and {max_id}"
96+ )
97+ } )
98+ . map ( |s| s. as_str ( ) )
99+ }
100+
101+ fn min_query_id ( & self ) -> usize {
102+ 0
103+ }
74104
105+ fn max_query_id ( & self ) -> usize {
106+ self . queries . len ( ) - 1
107+ }
108+ }
75109impl RunOpt {
76110 pub async fn run ( self ) -> Result < ( ) > {
77111 println ! ( "Running benchmarks with the following options: {self:?}" ) ;
112+ let queries = AllQueries :: try_new ( self . queries_path . as_path ( ) ) ?;
78113 let query_range = match self . query {
79114 Some ( query_id) => query_id..=query_id,
80- None => CLICKBENCH_QUERY_START_ID ..= CLICKBENCH_QUERY_END_ID ,
115+ None => queries . min_query_id ( ) ..=queries . max_query_id ( ) ,
81116 } ;
82117
83118 let config = self . common . config ( ) ;
@@ -88,12 +123,12 @@ impl RunOpt {
88123 let mut benchmark_run = BenchmarkRun :: new ( ) ;
89124 for query_id in query_range {
90125 benchmark_run. start_new_case ( & format ! ( "Query {query_id}" ) ) ;
91- let sql = self . get_query ( query_id) ?;
126+ let sql = queries . get_query ( query_id) ?;
92127 println ! ( "Q{query_id}: {sql}" ) ;
93128
94129 for i in 0 ..iterations {
95130 let start = Instant :: now ( ) ;
96- let results = ctx. sql ( & sql) . await ?. collect ( ) . await ?;
131+ let results = ctx. sql ( sql) . await ?. collect ( ) . await ?;
97132 let elapsed = start. elapsed ( ) ;
98133 let ms = elapsed. as_secs_f64 ( ) * 1000.0 ;
99134 let row_count: usize = results. iter ( ) . map ( |b| b. num_rows ( ) ) . sum ( ) ;
@@ -120,23 +155,4 @@ impl RunOpt {
120155 )
121156 } )
122157 }
123-
124- /// Returns the text of query `query_id`
125- fn get_query ( & self , query_id : usize ) -> Result < String > {
126- if query_id > CLICKBENCH_QUERY_END_ID {
127- return exec_err ! (
128- "Invalid query id {query_id}. Must be between {CLICKBENCH_QUERY_START_ID} and {CLICKBENCH_QUERY_END_ID}"
129- ) ;
130- }
131-
132- let path = self . queries_path . as_path ( ) ;
133-
134- // ClickBench has all queries in a single file identified by line number
135- let all_queries = std:: fs:: read_to_string ( path) . map_err ( |e| {
136- DataFusionError :: Execution ( format ! ( "Could not open {path:?}: {e}" ) )
137- } ) ?;
138- let all_queries: Vec < _ > = all_queries. lines ( ) . collect ( ) ;
139-
140- Ok ( all_queries. get ( query_id) . map ( |s| s. to_string ( ) ) . unwrap ( ) )
141- }
142158}
0 commit comments