@@ -656,30 +656,91 @@ impl Session {
656656            return  n as  usize 
657657        } 
658658
659+         // Why is 16 codegen units the default all the time? 
660+         // 
661+         // The main reason for enabling multiple codegen units by default is to 
662+         // leverage the ability for the trans backend to do translation and 
663+         // codegen in parallel. This allows us, especially for large crates, to 
664+         // make good use of all available resources on the machine once we've 
665+         // hit that stage of compilation. Large crates especially then often 
666+         // take a long time in trans/codegen and this helps us amortize that 
667+         // cost. 
668+         // 
669+         // Note that a high number here doesn't mean that we'll be spawning a 
670+         // large number of threads in parallel. The backend of rustc contains 
671+         // global rate limiting through the `jobserver` crate so we'll never 
672+         // overload the system with too much work, but rather we'll only be 
673+         // optimizing when we're otherwise cooperating with other instances of 
674+         // rustc. 
675+         // 
676+         // Rather a high number here means that we should be able to keep a lot 
677+         // of idle cpus busy. By ensuring that no codegen unit takes *too* long 
678+         // to build we'll be guaranteed that all cpus will finish pretty closely 
679+         // to one another and we should make relatively optimal use of system 
680+         // resources 
681+         // 
682+         // Note that the main cost of codegen units is that it prevents LLVM 
683+         // from inlining across codegen units. Users in general don't have a lot 
684+         // of control over how codegen units are split up so it's our job in the 
685+         // compiler to ensure that undue performance isn't lost when using 
686+         // codegen units (aka we can't require everyone to slap `#[inline]` on 
687+         // everything). 
688+         // 
689+         // If we're compiling at `-O0` then the number doesn't really matter too 
690+         // much because performance doesn't matter and inlining is ok to lose. 
691+         // In debug mode we just want to try to guarantee that no cpu is stuck 
692+         // doing work that could otherwise be farmed to others. 
693+         // 
694+         // In release mode, however (O1 and above) performance does indeed 
695+         // matter! To recover the loss in performance due to inlining we'll be 
696+         // enabling ThinLTO by default (the function for which is just below). 
697+         // This will ensure that we recover any inlining wins we otherwise lost 
698+         // through codegen unit partitioning. 
699+         // 
700+         // --- 
701+         // 
702+         // Ok that's a lot of words but the basic tl;dr; is that we want a high 
703+         // number here -- but not too high. Additionally we're "safe" to have it 
704+         // always at the same number at all optimization levels. 
705+         // 
706+         // As a result 16 was chosen here! Mostly because it was a power of 2 
707+         // and most benchmarks agreed it was roughly a local optimum. Not very 
708+         // scientific. 
659709        match  self . opts . optimize  { 
660-             // If we're compiling at `-O0` then default to 16 codegen units. 
661-             // The number here shouldn't matter too too much as debug mode 
662-             // builds don't rely on performance at all, meaning that lost 
663-             // opportunities for inlining through multiple codegen units is 
664-             // a non-issue. 
665-             // 
666-             // Note that the high number here doesn't mean that we'll be 
667-             // spawning a large number of threads in parallel. The backend 
668-             // of rustc contains global rate limiting through the 
669-             // `jobserver` crate so we'll never overload the system with too 
670-             // much work, but rather we'll only be optimizing when we're 
671-             // otherwise cooperating with other instances of rustc. 
672-             // 
673-             // Rather the high number here means that we should be able to 
674-             // keep a lot of idle cpus busy. By ensuring that no codegen 
675-             // unit takes *too* long to build we'll be guaranteed that all 
676-             // cpus will finish pretty closely to one another and we should 
677-             // make relatively optimal use of system resources 
678710            config:: OptLevel :: No  => 16 , 
711+             _ => 1 ,  // FIXME(#46346) this should be 16 
712+         } 
713+     } 
679714
680-             // All other optimization levels default use one codegen unit, 
681-             // the historical default in Rust for a Long Time. 
682-             _ => 1 , 
715+     /// Returns whether ThinLTO is enabled for this compilation 
716+ pub  fn  thinlto ( & self )  -> bool  { 
717+         // If processing command line options determined that we're incompatible 
718+         // with ThinLTO (e.g. `-C lto --emit llvm-ir`) then return that option. 
719+         if  let  Some ( enabled)  = self . opts . cli_forced_thinlto  { 
720+             return  enabled
721+         } 
722+ 
723+         // If explicitly specified, use that with the next highest priority 
724+         if  let  Some ( enabled)  = self . opts . debugging_opts . thinlto  { 
725+             return  enabled
726+         } 
727+ 
728+         // If there's only one codegen unit and LTO isn't enabled then there's 
729+         // no need for ThinLTO so just return false. 
730+         if  self . codegen_units ( )  == 1  && !self . lto ( )  { 
731+             return  false 
732+         } 
733+ 
734+         // Right now ThinLTO isn't compatible with incremental compilation. 
735+         if  self . opts . incremental . is_some ( )  { 
736+             return  false 
737+         } 
738+ 
739+         // Now we're in "defaults" territory. By default we enable ThinLTO for 
740+         // optimized compiles (anything greater than O0). 
741+         match  self . opts . optimize  { 
742+             config:: OptLevel :: No  => false , 
743+             _ => true , 
683744        } 
684745    } 
685746} 
0 commit comments