@@ -50,7 +50,7 @@ extern void *memset(void *s, int c, size_t n);
5050#include  "rust-demangle.h" 
5151
5252
53- /* Mangled Rust symbols look like this: 
53+ /* Mangled (legacy)  Rust symbols look like this: 
5454     _$LT$std..sys..fd..FileDesc$u20$as$u20$core..ops..Drop$GT$::drop::hc68340e1baa4987a 
5555
5656   The original symbol is: 
@@ -74,16 +74,7 @@ extern void *memset(void *s, int c, size_t n);
7474   ">"  =>  $GT$ 
7575   "("  =>  $LP$ 
7676   ")"  =>  $RP$ 
77-    " "  =>  $u20$ 
78-    "\"" =>  $u22$ 
79-    "'"  =>  $u27$ 
80-    "+"  =>  $u2b$ 
81-    ";"  =>  $u3b$ 
82-    "["  =>  $u5b$ 
83-    "]"  =>  $u5d$ 
84-    "{"  =>  $u7b$ 
85-    "}"  =>  $u7d$ 
86-    "~"  =>  $u7e$ 
77+    "\u{XY}"  =>  $uXY$ 
8778
8879   A double ".." means "::" and a single "." means "-". 
8980
@@ -95,7 +86,8 @@ static const size_t hash_len = 16;
9586
9687static  int  is_prefixed_hash  (const  char  * start );
9788static  int  looks_like_rust  (const  char  * sym , size_t  len );
98- static  int  unescape  (const  char  * * in , char  * * out , const  char  * seq , char  value );
89+ static  int  parse_lower_hex_nibble  (char  nibble );
90+ static  char  parse_legacy_escape  (const  char  * * in );
9991
10092/* INPUT: sym: symbol that has been through C++ (gnu v3) demangling 
10193
@@ -149,20 +141,20 @@ is_prefixed_hash (const char *str)
149141  const  char  * end ;
150142  char  seen [16 ];
151143  size_t  i ;
152-   int  count ;
144+   int  count ,  nibble ;
153145
154146  if  (strncmp  (str , hash_prefix , hash_prefix_len ))
155147    return  0 ;
156148  str  +=  hash_prefix_len ;
157149
158150  memset  (seen , 0 , sizeof (seen ));
159151  for  (end  =  str  +  hash_len ; str  <  end ; str ++ )
160-     if  ( * str  >=  '0'   &&   * str  <=  '9' ) 
161-       seen [ * str   -   '0' ]  =   1 ;
162-     else   if  (* str  >=  'a'   &&   * str  <=  'f' )
163-       seen [ * str   -   'a'   +   10 ]  =   1 ;
164-     else 
165-        return   0 ; 
152+     { 
153+       nibble   =   parse_lower_hex_nibble  ( * str ) ;
154+        if  (nibble   <   0 )
155+          return   0 ;
156+        seen [ nibble ]  =   1 ; 
157+     } 
166158
167159  /* Count how many distinct digits seen */ 
168160  count  =  0 ;
@@ -179,57 +171,17 @@ looks_like_rust (const char *str, size_t len)
179171  const  char  * end  =  str  +  len ;
180172
181173  while  (str  <  end )
182-     switch  (* str )
183-       {
184-       case  '$' :
185- 	if  (!strncmp  (str , "$C$" , 3 ))
186- 	  str  +=  3 ;
187- 	else  if  (!strncmp  (str , "$SP$" , 4 )
188- 		 ||  !strncmp  (str , "$BP$" , 4 )
189- 		 ||  !strncmp  (str , "$RF$" , 4 )
190- 		 ||  !strncmp  (str , "$LT$" , 4 )
191- 		 ||  !strncmp  (str , "$GT$" , 4 )
192- 		 ||  !strncmp  (str , "$LP$" , 4 )
193- 		 ||  !strncmp  (str , "$RP$" , 4 ))
194- 	  str  +=  4 ;
195- 	else  if  (!strncmp  (str , "$u20$" , 5 )
196- 		 ||  !strncmp  (str , "$u22$" , 5 )
197- 		 ||  !strncmp  (str , "$u27$" , 5 )
198- 		 ||  !strncmp  (str , "$u2b$" , 5 )
199- 		 ||  !strncmp  (str , "$u3b$" , 5 )
200- 		 ||  !strncmp  (str , "$u5b$" , 5 )
201- 		 ||  !strncmp  (str , "$u5d$" , 5 )
202- 		 ||  !strncmp  (str , "$u7b$" , 5 )
203- 		 ||  !strncmp  (str , "$u7d$" , 5 )
204- 		 ||  !strncmp  (str , "$u7e$" , 5 ))
205- 	  str  +=  5 ;
206- 	else 
207- 	  return  0 ;
208- 	break ;
209-       case  '.' :
210- 	/* Do not allow three or more consecutive dots */ 
211- 	if  (!strncmp  (str , "..." , 3 ))
212- 	  return  0 ;
213- 	/* Fall through */ 
214-       case  'a' : case  'b' : case  'c' : case  'd' : case  'e' : case  'f' :
215-       case  'g' : case  'h' : case  'i' : case  'j' : case  'k' : case  'l' :
216-       case  'm' : case  'n' : case  'o' : case  'p' : case  'q' : case  'r' :
217-       case  's' : case  't' : case  'u' : case  'v' : case  'w' : case  'x' :
218-       case  'y' : case  'z' :
219-       case  'A' : case  'B' : case  'C' : case  'D' : case  'E' : case  'F' :
220-       case  'G' : case  'H' : case  'I' : case  'J' : case  'K' : case  'L' :
221-       case  'M' : case  'N' : case  'O' : case  'P' : case  'Q' : case  'R' :
222-       case  'S' : case  'T' : case  'U' : case  'V' : case  'W' : case  'X' :
223-       case  'Y' : case  'Z' :
224-       case  '0' : case  '1' : case  '2' : case  '3' : case  '4' : case  '5' :
225-       case  '6' : case  '7' : case  '8' : case  '9' :
226-       case  '_' :
227-       case  ':' :
228- 	str ++ ;
229- 	break ;
230-       default :
231- 	return  0 ;
232-       }
174+     {
175+       if  (* str  ==  '$' )
176+         {
177+           if  (!parse_legacy_escape  (& str ))
178+             return  0 ;
179+         }
180+       else  if  (* str  ==  '.'  ||  * str  ==  '_'  ||  * str  ==  ':'  ||  ISALNUM  (* str ))
181+         str ++ ;
182+       else 
183+         return  0 ;
184+     }
233185
234186  return  1 ;
235187}
@@ -246,6 +198,7 @@ rust_demangle_sym (char *sym)
246198  const  char  * in ;
247199  char  * out ;
248200  const  char  * end ;
201+   char  unescaped ;
249202
250203  if  (!sym )
251204    return ;
@@ -255,75 +208,49 @@ rust_demangle_sym (char *sym)
255208  end  =  sym  +  strlen  (sym ) -  (hash_prefix_len  +  hash_len );
256209
257210  while  (in  <  end )
258-     switch  (* in )
259-       {
260-       case  '$' :
261- 	if  (!(unescape  (& in , & out , "$C$" , ',' )
262- 	      ||  unescape  (& in , & out , "$SP$" , '@' )
263- 	      ||  unescape  (& in , & out , "$BP$" , '*' )
264- 	      ||  unescape  (& in , & out , "$RF$" , '&' )
265- 	      ||  unescape  (& in , & out , "$LT$" , '<' )
266- 	      ||  unescape  (& in , & out , "$GT$" , '>' )
267- 	      ||  unescape  (& in , & out , "$LP$" , '(' )
268- 	      ||  unescape  (& in , & out , "$RP$" , ')' )
269- 	      ||  unescape  (& in , & out , "$u20$" , ' ' )
270- 	      ||  unescape  (& in , & out , "$u22$" , '\"' )
271- 	      ||  unescape  (& in , & out , "$u27$" , '\'' )
272- 	      ||  unescape  (& in , & out , "$u2b$" , '+' )
273- 	      ||  unescape  (& in , & out , "$u3b$" , ';' )
274- 	      ||  unescape  (& in , & out , "$u5b$" , '[' )
275- 	      ||  unescape  (& in , & out , "$u5d$" , ']' )
276- 	      ||  unescape  (& in , & out , "$u7b$" , '{' )
277- 	      ||  unescape  (& in , & out , "$u7d$" , '}' )
278- 	      ||  unescape  (& in , & out , "$u7e$" , '~' ))) {
279- 	  /* unexpected escape sequence, not looks_like_rust. */ 
280- 	  goto fail ;
281- 	}
282- 	break ;
283-       case  '_' :
284- 	/* If this is the start of a path component and the next 
285- 	   character is an escape sequence, ignore the underscore. The 
286- 	   mangler inserts an underscore to make sure the path 
287- 	   component begins with a XID_Start character. */ 
288- 	if  ((in  ==  sym  ||  in [-1 ] ==  ':' ) &&  in [1 ] ==  '$' )
289- 	  in ++ ;
290- 	else 
291- 	  * out ++  =  * in ++ ;
292- 	break ;
293-       case  '.' :
294- 	if  (in [1 ] ==  '.' )
295- 	  {
296- 	    /* ".." becomes "::" */ 
297- 	    * out ++  =  ':' ;
298- 	    * out ++  =  ':' ;
299- 	    in  +=  2 ;
300- 	  }
301- 	else 
302- 	  {
303- 	    /* "." becomes "-" */ 
304- 	    * out ++  =  '-' ;
305- 	    in ++ ;
306- 	  }
307- 	break ;
308-       case  'a' : case  'b' : case  'c' : case  'd' : case  'e' : case  'f' :
309-       case  'g' : case  'h' : case  'i' : case  'j' : case  'k' : case  'l' :
310-       case  'm' : case  'n' : case  'o' : case  'p' : case  'q' : case  'r' :
311-       case  's' : case  't' : case  'u' : case  'v' : case  'w' : case  'x' :
312-       case  'y' : case  'z' :
313-       case  'A' : case  'B' : case  'C' : case  'D' : case  'E' : case  'F' :
314-       case  'G' : case  'H' : case  'I' : case  'J' : case  'K' : case  'L' :
315-       case  'M' : case  'N' : case  'O' : case  'P' : case  'Q' : case  'R' :
316-       case  'S' : case  'T' : case  'U' : case  'V' : case  'W' : case  'X' :
317-       case  'Y' : case  'Z' :
318-       case  '0' : case  '1' : case  '2' : case  '3' : case  '4' : case  '5' :
319-       case  '6' : case  '7' : case  '8' : case  '9' :
320-       case  ':' :
321- 	* out ++  =  * in ++ ;
322- 	break ;
323-       default :
324- 	/* unexpected character in symbol, not looks_like_rust.  */ 
325- 	goto fail ;
326-       }
211+     {
212+       if  (* in  ==  '$' )
213+         {
214+           unescaped  =  parse_legacy_escape  (& in );
215+           if  (unescaped )
216+             * out ++  =  unescaped ;
217+           else 
218+             /* unexpected escape sequence, not looks_like_rust. */ 
219+             goto fail ;
220+         }
221+       else  if  (* in  ==  '_' )
222+         {
223+           /* If this is the start of a path component and the next 
224+              character is an escape sequence, ignore the underscore. The 
225+              mangler inserts an underscore to make sure the path 
226+              component begins with a XID_Start character. */ 
227+           if  ((in  ==  sym  ||  in [-1 ] ==  ':' ) &&  in [1 ] ==  '$' )
228+             in ++ ;
229+           else 
230+             * out ++  =  * in ++ ;
231+         }
232+       else  if  (* in  ==  '.' )
233+         {
234+           if  (in [1 ] ==  '.' )
235+             {
236+               /* ".." becomes "::" */ 
237+               * out ++  =  ':' ;
238+               * out ++  =  ':' ;
239+               in  +=  2 ;
240+             }
241+           else 
242+             {
243+               /* "." becomes "-" */ 
244+               * out ++  =  '-' ;
245+               in ++ ;
246+             }
247+         }
248+       else  if  (* in  ==  ':'  ||  ISALNUM  (* in ))
249+         * out ++  =  * in ++ ;
250+       else 
251+         /* unexpected character in symbol, not looks_like_rust.  */ 
252+         goto fail ;
253+     }
327254  goto done ;
328255
329256fail :
@@ -332,18 +259,78 @@ rust_demangle_sym (char *sym)
332259  * out  =  '\0' ;
333260}
334261
262+ /* Return a 0x0-0xf value if the char is 0-9a-f, and -1 otherwise. */ 
335263static  int 
336- unescape  ( const   char  * * in ,  char   * * out ,  const   char   * seq ,  char   value )
264+ parse_lower_hex_nibble  ( char  nibble )
337265{
338-   size_t  len  =  strlen  (seq );
266+   if  ('0'  <= nibble  &&  nibble  <= '9' )
267+     return  nibble  -  '0' ;
268+   if  ('a'  <= nibble  &&  nibble  <= 'f' )
269+     return  0xa  +  (nibble  -  'a' );
270+   return  -1 ;
271+ }
339272
340-   if  (strncmp  (* in , seq , len ))
341-     return  0 ;
273+ /* Return the unescaped character for a "$...$" escape, or 0 if invalid. */ 
274+ static  char 
275+ parse_legacy_escape  (const  char  * * in )
276+ {
277+   char  c  =  0 ;
278+   const  char  * e ;
279+   size_t  escape_len  =  0 ;
280+   int  lo_nibble  =  -1 , hi_nibble  =  -1 ;
342281
343-   * * out  =  value ;
282+   if  ((* in )[0 ] !=  '$' )
283+     return  0 ;
344284
345-   * in  +=  len ;
346-   * out  +=  1 ;
285+   e  =  * in  +  1 ;
286+ 
287+   if  (e [0 ] ==  'C' )
288+     {
289+       escape_len  =  1 ;
290+ 
291+       c  =  ',' ;
292+     }
293+   else 
294+     {
295+       escape_len  =  2 ;
296+ 
297+       if  (e [0 ] ==  'S'  &&  e [1 ] ==  'P' )
298+         c  =  '@' ;
299+       else  if  (e [0 ] ==  'B'  &&  e [1 ] ==  'P' )
300+         c  =  '*' ;
301+       else  if  (e [0 ] ==  'R'  &&  e [1 ] ==  'F' )
302+         c  =  '&' ;
303+       else  if  (e [0 ] ==  'L'  &&  e [1 ] ==  'T' )
304+         c  =  '<' ;
305+       else  if  (e [0 ] ==  'G'  &&  e [1 ] ==  'T' )
306+         c  =  '>' ;
307+       else  if  (e [0 ] ==  'L'  &&  e [1 ] ==  'P' )
308+         c  =  '(' ;
309+       else  if  (e [0 ] ==  'R'  &&  e [1 ] ==  'P' )
310+         c  =  ')' ;
311+       else  if  (e [0 ] ==  'u' )
312+         {
313+           escape_len  =  3 ;
314+ 
315+           hi_nibble  =  parse_lower_hex_nibble  (e [1 ]);
316+           if  (hi_nibble  <  0 )
317+             return  0 ;
318+           lo_nibble  =  parse_lower_hex_nibble  (e [2 ]);
319+           if  (lo_nibble  <  0 )
320+             return  0 ;
321+ 
322+           /* Only allow non-control ASCII characters. */ 
323+           if  (hi_nibble  >  7 )
324+             return  0 ;
325+           c  =  (hi_nibble  << 4 ) | lo_nibble ;
326+           if  (c  <  0x20 )
327+             return  0 ;
328+         }
329+     }
330+ 
331+   if  (!c  ||  e [escape_len ] !=  '$' )
332+     return  0 ;
347333
348-   return  1 ;
334+   * in  +=  2  +  escape_len ;
335+   return  c ;
349336}
0 commit comments