@@ -239,7 +239,7 @@ def is_codepoint_identifier_allowed(c, identifier_allowed):
239239 return True
240240 return False
241241
242- def load_rustc_mixedscript_confusables (f , identifier_allowed , scripts ):
242+ def load_potential_mixedscript_confusables (f , identifier_allowed , scripts ):
243243 # First, load all confusables data from confusables.txt
244244 confusables = load_confusables (f )
245245
@@ -248,15 +248,6 @@ def load_rustc_mixedscript_confusables(f, identifier_allowed, scripts):
248248 # seen as substitutes to itself. So if the confusables.txt says A -> C, B -> C,
249249 # and implicitly C -> C, it means A <-> B, A <-> C, B <-> C are confusable.
250250
251- # here we first make a dict that contains all As and Bs whose corresponding C is single code point.
252- seekup_map = {}
253- for item in confusables :
254- d_proto_list = item [1 ]
255- d_source = item [0 ]
256- assert (len (d_proto_list ) > 0 )
257- if len (d_proto_list ) == 1 :
258- seekup_map [escape_char (d_source )] = d_proto_list
259-
260251 # Here we're dividing all confusable lhs and rhs(prototype) operands of the substitution into equivalence classes.
261252 # Principally we'll be using the rhs operands as the representive element of its equivalence classes.
262253 # However some rhs operands are single code point, while some others are not.
@@ -275,9 +266,8 @@ def load_rustc_mixedscript_confusables(f, identifier_allowed, scripts):
275266 if d_proto not in codepoint_map :
276267 codepoint_map [d_proto ] = []
277268 # when we create new equivalence class, we'll check whether the representative element should be collected.
278- # i.e. if it is not subject to substituion, and not restricted from identifier usage,
279- # we collect it into the equivalence class.
280- if d_proto not in seekup_map and is_codepoint_identifier_allowed (d_proto_list [0 ], identifier_allowed ):
269+ # i.e. if it is not restricted from identifier usage, we collect it into the equivalence class.
270+ if is_codepoint_identifier_allowed (d_proto_list [0 ], identifier_allowed ):
281271 codepoint_map [d_proto ].append (d_proto_list [0 ])
282272 # we collect the original code point to be substituted into this list.
283273 codepoint_map [d_proto ].append (d_source )
@@ -562,23 +552,20 @@ def emit_confusable_detection_module(f):
562552def escape_script_constant (name , longforms ):
563553 return "Script::" + longforms [name ].strip ()
564554
565- def emit_rustc_mixed_script_confusable_detection (f ):
566- f .write ("pub mod rustc_mixed_script_confusable_detection {" )
555+ def emit_potiential_mixed_script_confusable (f ):
556+ f .write ("pub mod potential_mixed_script_confusable {" )
567557 f .write ("""
568- use unicode_script::Script;
569-
570558 #[inline]
571- pub fn is_rustc_mixed_script_confusable (c: char) -> Option<Script> {
559+ pub fn potential_mixed_script_confusable (c: char) -> bool {
572560 match c as usize {
573- _ => super::util::bsearch_value_table (c, CONFUSABLES)
561+ _ => super::util::bsearch_table (c, CONFUSABLES)
574562 }
575563 }
576-
577564""" )
578565 identifier_status_table = load_properties ("IdentifierStatus.txt" )
579- longforms , scripts = load_scripts ("Scripts.txt" )
566+ _ , scripts = load_scripts ("Scripts.txt" )
580567 identifier_allowed = identifier_status_table ['Allowed' ]
581- (mixedscript_confusable , mixedscript_confusable_unresolved ) = load_rustc_mixedscript_confusables ("confusables.txt" , identifier_allowed , scripts )
568+ (mixedscript_confusable , mixedscript_confusable_unresolved ) = load_potential_mixedscript_confusables ("confusables.txt" , identifier_allowed , scripts )
582569 debug = False
583570 if debug == True :
584571 debug_emit_mixedscript_confusable (f , mixedscript_confusable , "mixedscript_confusable" , scripts )
@@ -589,16 +576,21 @@ def emit_rustc_mixed_script_confusable_detection(f):
589576 source = pair [0 ]
590577 confusable_table .append ((source , script ))
591578 confusable_table .sort (key = lambda w : w [0 ])
592- emit_table (f , "CONFUSABLES" , confusable_table , "&'static [( char, Script) ]" , is_pub = False ,
593- pfun = lambda x : "(%s,%s) " % ( escape_char (x [0 ]), escape_script_constant ( x [ 1 ], longforms ) ))
579+ emit_table (f , "CONFUSABLES" , confusable_table , "&'static [char]" , is_pub = False ,
580+ pfun = lambda x : "%s " % escape_char (x [0 ]))
594581 f .write ("}\n \n " )
595582
596583
597584def emit_util_mod (f ):
598585 f .write ("""
599586pub mod util {
600587 use core::result::Result::{Ok, Err};
601-
588+
589+ #[inline]
590+ pub fn bsearch_table(c: char, r: &'static [char]) -> bool {
591+ r.binary_search(&c).is_ok()
592+ }
593+
602594 #[inline]
603595 pub fn bsearch_value_table<T: Copy>(c: char, r: &'static [(char, T)]) -> Option<T> {
604596 match r.binary_search_by_key(&c, |&(k, _)| k) {
@@ -609,7 +601,7 @@ def emit_util_mod(f):
609601 Err(_) => None
610602 }
611603 }
612-
604+
613605 #[inline]
614606 pub fn bsearch_range_table(c: char, r: &'static [(char,char)]) -> bool {
615607 use core::cmp::Ordering::{Equal, Less, Greater};
@@ -619,7 +611,7 @@ def emit_util_mod(f):
619611 else { Greater }
620612 }).is_ok()
621613 }
622-
614+
623615 pub fn bsearch_range_value_table<T: Copy>(c: char, r: &'static [(char, char, T)]) -> Option<T> {
624616 use core::cmp::Ordering::{Equal, Less, Greater};
625617 match r.binary_search_by(|&(lo, hi, _)| {
@@ -660,4 +652,4 @@ def emit_util_mod(f):
660652 ### confusable_detection module
661653 emit_confusable_detection_module (rf )
662654 ### mixed_script_confusable_detection module
663- emit_rustc_mixed_script_confusable_detection (rf )
655+ emit_potiential_mixed_script_confusable (rf )
0 commit comments