1111#![ allow( dead_code) ] // runtime init functions not used during testing
1212
1313use os:: windows:: prelude:: * ;
14+ use sys:: windows:: os:: current_exe;
1415use sys:: c;
15- use slice;
16- use ops:: Range ;
1716use ffi:: OsString ;
18- use libc:: { c_int, c_void} ;
1917use fmt;
18+ use vec;
19+ use core:: iter;
20+ use slice;
21+ use path:: PathBuf ;
2022
2123pub unsafe fn init ( _argc : isize , _argv : * const * const u8 ) { }
2224
2325pub unsafe fn cleanup ( ) { }
2426
2527pub fn args ( ) -> Args {
2628 unsafe {
27- let mut nArgs: c_int = 0 ;
28- let lpCmdLine = c:: GetCommandLineW ( ) ;
29- let szArgList = c:: CommandLineToArgvW ( lpCmdLine, & mut nArgs) ;
30-
31- // szArcList can be NULL if CommandLinToArgvW failed,
32- // but in that case nArgs is 0 so we won't actually
33- // try to read a null pointer
34- Args { cur : szArgList, range : 0 ..( nArgs as isize ) }
29+ let lp_cmd_line = c:: GetCommandLineW ( ) ;
30+ let parsed_args_list = parse_lp_cmd_line (
31+ lp_cmd_line as * const u16 ,
32+ || current_exe ( ) . map ( PathBuf :: into_os_string) . unwrap_or_else ( |_| OsString :: new ( ) ) ) ;
33+
34+ Args { parsed_args_list : parsed_args_list. into_iter ( ) }
3535 }
3636}
3737
38+ /// Implements the Windows command-line argument parsing algorithm.
39+ ///
40+ /// Microsoft's documentation for the Windows CLI argument format can be found at
41+ /// <https://docs.microsoft.com/en-us/previous-versions//17w5ykft(v=vs.85)>.
42+ ///
43+ /// Windows includes a function to do this in shell32.dll,
44+ /// but linking with that DLL causes the process to be registered as a GUI application.
45+ /// GUI applications add a bunch of overhead, even if no windows are drawn. See
46+ /// <https://randomascii.wordpress.com/2018/12/03/a-not-called-function-can-cause-a-5x-slowdown/>.
47+ ///
48+ /// This function was tested for equivalence to the shell32.dll implementation in
49+ /// Windows 10 Pro v1803, using an exhaustive test suite available at
50+ /// <https://gist.github.com/notriddle/dde431930c392e428055b2dc22e638f5> or
51+ /// <https://paste.gg/p/anonymous/47d6ed5f5bd549168b1c69c799825223>.
52+ unsafe fn parse_lp_cmd_line < F : Fn ( ) -> OsString > ( lp_cmd_line : * const u16 , exe_name : F )
53+ -> Vec < OsString > {
54+ const BACKSLASH : u16 = '\\' as u16 ;
55+ const QUOTE : u16 = '"' as u16 ;
56+ const TAB : u16 = '\t' as u16 ;
57+ const SPACE : u16 = ' ' as u16 ;
58+ let mut ret_val = Vec :: new ( ) ;
59+ if lp_cmd_line. is_null ( ) || * lp_cmd_line == 0 {
60+ ret_val. push ( exe_name ( ) ) ;
61+ return ret_val;
62+ }
63+ let mut cmd_line = {
64+ let mut end = 0 ;
65+ while * lp_cmd_line. offset ( end) != 0 {
66+ end += 1 ;
67+ }
68+ slice:: from_raw_parts ( lp_cmd_line, end as usize )
69+ } ;
70+ // The executable name at the beginning is special.
71+ cmd_line = match cmd_line[ 0 ] {
72+ // The executable name ends at the next quote mark,
73+ // no matter what.
74+ QUOTE => {
75+ let args = {
76+ let mut cut = cmd_line[ 1 ..] . splitn ( 2 , |& c| c == QUOTE ) ;
77+ if let Some ( exe) = cut. next ( ) {
78+ ret_val. push ( OsString :: from_wide ( exe) ) ;
79+ }
80+ cut. next ( )
81+ } ;
82+ if let Some ( args) = args {
83+ args
84+ } else {
85+ return ret_val;
86+ }
87+ }
88+ // Implement quirk: when they say whitespace here,
89+ // they include the entire ASCII control plane:
90+ // "However, if lpCmdLine starts with any amount of whitespace, CommandLineToArgvW
91+ // will consider the first argument to be an empty string. Excess whitespace at the
92+ // end of lpCmdLine is ignored."
93+ 0 ...SPACE => {
94+ ret_val. push ( OsString :: new ( ) ) ;
95+ & cmd_line[ 1 ..]
96+ } ,
97+ // The executable name ends at the next whitespace,
98+ // no matter what.
99+ _ => {
100+ let args = {
101+ let mut cut = cmd_line. splitn ( 2 , |& c| c > 0 && c <= SPACE ) ;
102+ if let Some ( exe) = cut. next ( ) {
103+ ret_val. push ( OsString :: from_wide ( exe) ) ;
104+ }
105+ cut. next ( )
106+ } ;
107+ if let Some ( args) = args {
108+ args
109+ } else {
110+ return ret_val;
111+ }
112+ }
113+ } ;
114+ let mut cur = Vec :: new ( ) ;
115+ let mut in_quotes = false ;
116+ let mut was_in_quotes = false ;
117+ let mut backslash_count: usize = 0 ;
118+ for & c in cmd_line {
119+ match c {
120+ // backslash
121+ BACKSLASH => {
122+ backslash_count += 1 ;
123+ was_in_quotes = false ;
124+ } ,
125+ QUOTE if backslash_count % 2 == 0 => {
126+ cur. extend ( iter:: repeat ( b'\\' as u16 ) . take ( backslash_count / 2 ) ) ;
127+ backslash_count = 0 ;
128+ if was_in_quotes {
129+ cur. push ( '"' as u16 ) ;
130+ was_in_quotes = false ;
131+ } else {
132+ was_in_quotes = in_quotes;
133+ in_quotes = !in_quotes;
134+ }
135+ }
136+ QUOTE if backslash_count % 2 != 0 => {
137+ cur. extend ( iter:: repeat ( b'\\' as u16 ) . take ( backslash_count / 2 ) ) ;
138+ backslash_count = 0 ;
139+ was_in_quotes = false ;
140+ cur. push ( b'"' as u16 ) ;
141+ }
142+ SPACE | TAB if !in_quotes => {
143+ cur. extend ( iter:: repeat ( b'\\' as u16 ) . take ( backslash_count) ) ;
144+ if !cur. is_empty ( ) || was_in_quotes {
145+ ret_val. push ( OsString :: from_wide ( & cur[ ..] ) ) ;
146+ cur. truncate ( 0 ) ;
147+ }
148+ backslash_count = 0 ;
149+ was_in_quotes = false ;
150+ }
151+ _ => {
152+ cur. extend ( iter:: repeat ( b'\\' as u16 ) . take ( backslash_count) ) ;
153+ backslash_count = 0 ;
154+ was_in_quotes = false ;
155+ cur. push ( c) ;
156+ }
157+ }
158+ }
159+ cur. extend ( iter:: repeat ( b'\\' as u16 ) . take ( backslash_count) ) ;
160+ // include empty quoted strings at the end of the arguments list
161+ if !cur. is_empty ( ) || was_in_quotes || in_quotes {
162+ ret_val. push ( OsString :: from_wide ( & cur[ ..] ) ) ;
163+ }
164+ ret_val
165+ }
166+
38167pub struct Args {
39- range : Range < isize > ,
40- cur : * mut * mut u16 ,
168+ parsed_args_list : vec:: IntoIter < OsString > ,
41169}
42170
43171pub struct ArgsInnerDebug < ' a > {
@@ -46,19 +174,7 @@ pub struct ArgsInnerDebug<'a> {
46174
47175impl < ' a > fmt:: Debug for ArgsInnerDebug < ' a > {
48176 fn fmt ( & self , f : & mut fmt:: Formatter ) -> fmt:: Result {
49- f. write_str ( "[" ) ?;
50- let mut first = true ;
51- for i in self . args . range . clone ( ) {
52- if !first {
53- f. write_str ( ", " ) ?;
54- }
55- first = false ;
56-
57- // Here we do allocation which could be avoided.
58- fmt:: Debug :: fmt ( & unsafe { os_string_from_ptr ( * self . args . cur . offset ( i) ) } , f) ?;
59- }
60- f. write_str ( "]" ) ?;
61- Ok ( ( ) )
177+ self . args . parsed_args_list . as_slice ( ) . fmt ( f)
62178 }
63179}
64180
@@ -70,38 +186,82 @@ impl Args {
70186 }
71187}
72188
73- unsafe fn os_string_from_ptr ( ptr : * mut u16 ) -> OsString {
74- let mut len = 0 ;
75- while * ptr. offset ( len) != 0 { len += 1 ; }
76-
77- // Push it onto the list.
78- let ptr = ptr as * const u16 ;
79- let buf = slice:: from_raw_parts ( ptr, len as usize ) ;
80- OsStringExt :: from_wide ( buf)
81- }
82-
83189impl Iterator for Args {
84190 type Item = OsString ;
85- fn next ( & mut self ) -> Option < OsString > {
86- self . range . next ( ) . map ( |i| unsafe { os_string_from_ptr ( * self . cur . offset ( i) ) } )
87- }
88- fn size_hint ( & self ) -> ( usize , Option < usize > ) { self . range . size_hint ( ) }
191+ fn next ( & mut self ) -> Option < OsString > { self . parsed_args_list . next ( ) }
192+ fn size_hint ( & self ) -> ( usize , Option < usize > ) { self . parsed_args_list . size_hint ( ) }
89193}
90194
91195impl DoubleEndedIterator for Args {
92- fn next_back ( & mut self ) -> Option < OsString > {
93- self . range . next_back ( ) . map ( |i| unsafe { os_string_from_ptr ( * self . cur . offset ( i) ) } )
94- }
196+ fn next_back ( & mut self ) -> Option < OsString > { self . parsed_args_list . next_back ( ) }
95197}
96198
97199impl ExactSizeIterator for Args {
98- fn len ( & self ) -> usize { self . range . len ( ) }
200+ fn len ( & self ) -> usize { self . parsed_args_list . len ( ) }
99201}
100202
101- impl Drop for Args {
102- fn drop ( & mut self ) {
103- // self.cur can be null if CommandLineToArgvW previously failed,
104- // but LocalFree ignores NULL pointers
105- unsafe { c:: LocalFree ( self . cur as * mut c_void ) ; }
203+ #[ cfg( test) ]
204+ mod tests {
205+ use sys:: windows:: args:: * ;
206+ use ffi:: OsString ;
207+
208+ fn chk ( string : & str , parts : & [ & str ] ) {
209+ let mut wide: Vec < u16 > = OsString :: from ( string) . encode_wide ( ) . collect ( ) ;
210+ wide. push ( 0 ) ;
211+ let parsed = unsafe {
212+ parse_lp_cmd_line ( wide. as_ptr ( ) as * const u16 , || OsString :: from ( "TEST.EXE" ) )
213+ } ;
214+ let expected: Vec < OsString > = parts. iter ( ) . map ( |k| OsString :: from ( k) ) . collect ( ) ;
215+ assert_eq ! ( parsed. as_slice( ) , expected. as_slice( ) ) ;
216+ }
217+
218+ #[ test]
219+ fn empty ( ) {
220+ chk ( "" , & [ "TEST.EXE" ] ) ;
221+ chk ( "\0 " , & [ "TEST.EXE" ] ) ;
222+ }
223+
224+ #[ test]
225+ fn single_words ( ) {
226+ chk ( "EXE one_word" , & [ "EXE" , "one_word" ] ) ;
227+ chk ( "EXE a" , & [ "EXE" , "a" ] ) ;
228+ chk ( "EXE 😅" , & [ "EXE" , "😅" ] ) ;
229+ chk ( "EXE 😅🤦" , & [ "EXE" , "😅🤦" ] ) ;
230+ }
231+
232+ #[ test]
233+ fn official_examples ( ) {
234+ chk ( r#"EXE "abc" d e"# , & [ "EXE" , "abc" , "d" , "e" ] ) ;
235+ chk ( r#"EXE a\\\b d"e f"g h"# , & [ "EXE" , r#"a\\\b"# , "de fg" , "h" ] ) ;
236+ chk ( r#"EXE a\\\"b c d"# , & [ "EXE" , r#"a\"b"# , "c" , "d" ] ) ;
237+ chk ( r#"EXE a\\\\"b c" d e"# , & [ "EXE" , r#"a\\b c"# , "d" , "e" ] ) ;
238+ }
239+
240+ #[ test]
241+ fn whitespace_behavior ( ) {
242+ chk ( r#" test"# , & [ "" , "test" ] ) ;
243+ chk ( r#" test"# , & [ "" , "test" ] ) ;
244+ chk ( r#" test test2"# , & [ "" , "test" , "test2" ] ) ;
245+ chk ( r#" test test2"# , & [ "" , "test" , "test2" ] ) ;
246+ chk ( r#"test test2 "# , & [ "test" , "test2" ] ) ;
247+ chk ( r#"test test2 "# , & [ "test" , "test2" ] ) ;
248+ chk ( r#"test "# , & [ "test" ] ) ;
249+ }
250+
251+ #[ test]
252+ fn genius_quotes ( ) {
253+ chk ( r#"EXE "" """# , & [ "EXE" , "" , "" ] ) ;
254+ chk ( r#"EXE "" """"# , & [ "EXE" , "" , "\" " ] ) ;
255+ chk (
256+ r#"EXE "this is """all""" in the same argument""# ,
257+ & [ "EXE" , "this is \" all\" in the same argument" ]
258+ ) ;
259+ chk ( r#"EXE "a"""# , & [ "EXE" , "a\" " ] ) ;
260+ chk ( r#"EXE "a"" a"# , & [ "EXE" , "a\" " , "a" ] ) ;
261+ // quotes cannot be escaped in command names
262+ chk ( r#""EXE" check"# , & [ "EXE" , "check" ] ) ;
263+ chk ( r#""EXE check""# , & [ "EXE check" ] ) ;
264+ chk ( r#""EXE """for""" check"# , & [ "EXE " , r#"for""# , "check" ] ) ;
265+ chk ( r#""EXE \"for\" check"# , & [ r#"EXE \"# , r#"for""# , "check" ] ) ;
106266 }
107267}
0 commit comments