| 
 | 1 | +use crate::parser::{unescape_llvm_string_contents, Parser};  | 
 | 2 | +use anyhow::{anyhow, Context};  | 
 | 3 | +use regex::Regex;  | 
 | 4 | +use std::collections::HashMap;  | 
 | 5 | +use std::fmt::{self, Debug, Write as _};  | 
 | 6 | +use std::sync::OnceLock;  | 
 | 7 | + | 
 | 8 | +pub(crate) fn dump_covfun_mappings(  | 
 | 9 | +    llvm_ir: &str,  | 
 | 10 | +    function_names: &HashMap<u64, String>,  | 
 | 11 | +) -> anyhow::Result<()> {  | 
 | 12 | +    // Extract function coverage entries from the LLVM IR assembly, and associate  | 
 | 13 | +    // each entry with its (demangled) name.  | 
 | 14 | +    let mut covfun_entries = llvm_ir  | 
 | 15 | +        .lines()  | 
 | 16 | +        .filter_map(covfun_line_data)  | 
 | 17 | +        .map(|line_data| (function_names.get(&line_data.name_hash).map(String::as_str), line_data))  | 
 | 18 | +        .collect::<Vec<_>>();  | 
 | 19 | +    covfun_entries.sort_by(|a, b| {  | 
 | 20 | +        // Sort entries primarily by name, to help make the order consistent  | 
 | 21 | +        // across platforms and relatively insensitive to changes.  | 
 | 22 | +        // (Sadly we can't use `sort_by_key` because we would need to return references.)  | 
 | 23 | +        Ord::cmp(&a.0, &b.0)  | 
 | 24 | +            .then_with(|| Ord::cmp(&a.1.is_used, &b.1.is_used))  | 
 | 25 | +            .then_with(|| Ord::cmp(a.1.payload.as_slice(), b.1.payload.as_slice()))  | 
 | 26 | +    });  | 
 | 27 | + | 
 | 28 | +    for (name, line_data) in &covfun_entries {  | 
 | 29 | +        let name = name.unwrap_or("(unknown)");  | 
 | 30 | +        let unused = if line_data.is_used { "" } else { " (unused)" };  | 
 | 31 | +        println!("Function name: {name}{unused}");  | 
 | 32 | + | 
 | 33 | +        let payload: &[u8] = &line_data.payload;  | 
 | 34 | +        println!("Raw bytes ({len}): 0x{payload:02x?}", len = payload.len());  | 
 | 35 | + | 
 | 36 | +        let mut parser = Parser::new(payload);  | 
 | 37 | + | 
 | 38 | +        let num_files = parser.read_uleb128_u32()?;  | 
 | 39 | +        println!("Number of files: {num_files}");  | 
 | 40 | + | 
 | 41 | +        for i in 0..num_files {  | 
 | 42 | +            let global_file_id = parser.read_uleb128_u32()?;  | 
 | 43 | +            println!("- file {i} => global file {global_file_id}");  | 
 | 44 | +        }  | 
 | 45 | + | 
 | 46 | +        let num_expressions = parser.read_uleb128_u32()?;  | 
 | 47 | +        println!("Number of expressions: {num_expressions}");  | 
 | 48 | + | 
 | 49 | +        let mut expression_resolver = ExpressionResolver::new();  | 
 | 50 | +        for i in 0..num_expressions {  | 
 | 51 | +            let lhs = parser.read_simple_term()?;  | 
 | 52 | +            let rhs = parser.read_simple_term()?;  | 
 | 53 | +            println!("- expression {i} operands: lhs = {lhs:?}, rhs = {rhs:?}");  | 
 | 54 | +            expression_resolver.push_operands(lhs, rhs);  | 
 | 55 | +        }  | 
 | 56 | + | 
 | 57 | +        for i in 0..num_files {  | 
 | 58 | +            let num_mappings = parser.read_uleb128_u32()?;  | 
 | 59 | +            println!("Number of file {i} mappings: {num_mappings}");  | 
 | 60 | + | 
 | 61 | +            for _ in 0..num_mappings {  | 
 | 62 | +                let (kind, region) = parser.read_mapping_kind_and_region()?;  | 
 | 63 | +                println!("- {kind:?} at {region:?}");  | 
 | 64 | + | 
 | 65 | +                match kind {  | 
 | 66 | +                    // Also print expression mappings in resolved form.  | 
 | 67 | +                    MappingKind::Code(term @ CovTerm::Expression { .. })  | 
 | 68 | +                    | MappingKind::Gap(term @ CovTerm::Expression { .. }) => {  | 
 | 69 | +                        println!("    = {}", expression_resolver.format_term(term));  | 
 | 70 | +                    }  | 
 | 71 | +                    // If the mapping is a branch region, print both of its arms  | 
 | 72 | +                    // in resolved form (even if they aren't expressions).  | 
 | 73 | +                    MappingKind::Branch { r#true, r#false } => {  | 
 | 74 | +                        println!("    true  = {}", expression_resolver.format_term(r#true));  | 
 | 75 | +                        println!("    false = {}", expression_resolver.format_term(r#false));  | 
 | 76 | +                    }  | 
 | 77 | +                    _ => (),  | 
 | 78 | +                }  | 
 | 79 | +            }  | 
 | 80 | +        }  | 
 | 81 | + | 
 | 82 | +        parser.ensure_empty()?;  | 
 | 83 | +        println!();  | 
 | 84 | +    }  | 
 | 85 | +    Ok(())  | 
 | 86 | +}  | 
 | 87 | + | 
 | 88 | +struct CovfunLineData {  | 
 | 89 | +    name_hash: u64,  | 
 | 90 | +    is_used: bool,  | 
 | 91 | +    payload: Vec<u8>,  | 
 | 92 | +}  | 
 | 93 | + | 
 | 94 | +/// Checks a line of LLVM IR assembly to see if it contains an `__llvm_covfun`  | 
 | 95 | +/// entry, and if so extracts relevant data in a `CovfunLineData`.  | 
 | 96 | +fn covfun_line_data(line: &str) -> Option<CovfunLineData> {  | 
 | 97 | +    let re = {  | 
 | 98 | +        // We cheat a little bit and match variable names `@__covrec_[HASH]u`  | 
 | 99 | +        // rather than the section name, because the section name is harder to  | 
 | 100 | +        // extract and differs across Linux/Windows/macOS. We also extract the  | 
 | 101 | +        // symbol name hash from the variable name rather than the data, since  | 
 | 102 | +        // it's easier and both should match.  | 
 | 103 | +        static RE: OnceLock<Regex> = OnceLock::new();  | 
 | 104 | +        RE.get_or_init(|| {  | 
 | 105 | +            Regex::new(  | 
 | 106 | +                r#"^@__covrec_(?<name_hash>[0-9A-Z]+)(?<is_used>u)? = .*\[[0-9]+ x i8\] c"(?<payload>[^"]*)".*$"#,  | 
 | 107 | +            )  | 
 | 108 | +            .unwrap()  | 
 | 109 | +        })  | 
 | 110 | +    };  | 
 | 111 | + | 
 | 112 | +    let captures = re.captures(line)?;  | 
 | 113 | +    let name_hash = u64::from_str_radix(&captures["name_hash"], 16).unwrap();  | 
 | 114 | +    let is_used = captures.name("is_used").is_some();  | 
 | 115 | +    let payload = unescape_llvm_string_contents(&captures["payload"]);  | 
 | 116 | + | 
 | 117 | +    Some(CovfunLineData { name_hash, is_used, payload })  | 
 | 118 | +}  | 
 | 119 | + | 
 | 120 | +// Extra parser methods only needed when parsing `covfun` payloads.  | 
 | 121 | +impl<'a> Parser<'a> {  | 
 | 122 | +    fn read_simple_term(&mut self) -> anyhow::Result<CovTerm> {  | 
 | 123 | +        let raw_term = self.read_uleb128_u32()?;  | 
 | 124 | +        CovTerm::decode(raw_term).context("decoding term")  | 
 | 125 | +    }  | 
 | 126 | + | 
 | 127 | +    fn read_mapping_kind_and_region(&mut self) -> anyhow::Result<(MappingKind, MappingRegion)> {  | 
 | 128 | +        let mut kind = self.read_raw_mapping_kind()?;  | 
 | 129 | +        let mut region = self.read_raw_mapping_region()?;  | 
 | 130 | + | 
 | 131 | +        const HIGH_BIT: u32 = 1u32 << 31;  | 
 | 132 | +        if region.end_column & HIGH_BIT != 0 {  | 
 | 133 | +            region.end_column &= !HIGH_BIT;  | 
 | 134 | +            kind = match kind {  | 
 | 135 | +                MappingKind::Code(term) => MappingKind::Gap(term),  | 
 | 136 | +                // LLVM's coverage mapping reader will actually handle this  | 
 | 137 | +                // case without complaint, but the result is almost certainly  | 
 | 138 | +                // a meaningless implementation artifact.  | 
 | 139 | +                _ => return Err(anyhow!("unexpected base kind for gap region: {kind:?}")),  | 
 | 140 | +            }  | 
 | 141 | +        }  | 
 | 142 | + | 
 | 143 | +        Ok((kind, region))  | 
 | 144 | +    }  | 
 | 145 | + | 
 | 146 | +    fn read_raw_mapping_kind(&mut self) -> anyhow::Result<MappingKind> {  | 
 | 147 | +        let raw_mapping_kind = self.read_uleb128_u32()?;  | 
 | 148 | +        if let Some(term) = CovTerm::decode(raw_mapping_kind) {  | 
 | 149 | +            return Ok(MappingKind::Code(term));  | 
 | 150 | +        }  | 
 | 151 | + | 
 | 152 | +        assert_eq!(raw_mapping_kind & 0b11, 0);  | 
 | 153 | +        assert_ne!(raw_mapping_kind, 0);  | 
 | 154 | + | 
 | 155 | +        let (high, is_expansion) = (raw_mapping_kind >> 3, raw_mapping_kind & 0b100 != 0);  | 
 | 156 | +        if is_expansion {  | 
 | 157 | +            Ok(MappingKind::Expansion(high))  | 
 | 158 | +        } else {  | 
 | 159 | +            match high {  | 
 | 160 | +                0 => unreachable!("zero kind should have already been handled as a code mapping"),  | 
 | 161 | +                2 => Ok(MappingKind::Skip),  | 
 | 162 | +                4 => {  | 
 | 163 | +                    let r#true = self.read_simple_term()?;  | 
 | 164 | +                    let r#false = self.read_simple_term()?;  | 
 | 165 | +                    Ok(MappingKind::Branch { r#true, r#false })  | 
 | 166 | +                }  | 
 | 167 | +                _ => Err(anyhow!("unknown mapping kind: {raw_mapping_kind:#x}")),  | 
 | 168 | +            }  | 
 | 169 | +        }  | 
 | 170 | +    }  | 
 | 171 | + | 
 | 172 | +    fn read_raw_mapping_region(&mut self) -> anyhow::Result<MappingRegion> {  | 
 | 173 | +        let start_line_offset = self.read_uleb128_u32()?;  | 
 | 174 | +        let start_column = self.read_uleb128_u32()?;  | 
 | 175 | +        let end_line_offset = self.read_uleb128_u32()?;  | 
 | 176 | +        let end_column = self.read_uleb128_u32()?;  | 
 | 177 | +        Ok(MappingRegion { start_line_offset, start_column, end_line_offset, end_column })  | 
 | 178 | +    }  | 
 | 179 | +}  | 
 | 180 | + | 
 | 181 | +/// Enum that can hold a constant zero value, the ID of an physical coverage  | 
 | 182 | +/// counter, or the ID (and operation) of a coverage-counter expression.  | 
 | 183 | +///  | 
 | 184 | +/// Terms are used as the operands of coverage-counter expressions, as the arms  | 
 | 185 | +/// of branch mappings, and as the value of code/gap mappings.  | 
 | 186 | +#[derive(Clone, Copy, Debug)]  | 
 | 187 | +pub(crate) enum CovTerm {  | 
 | 188 | +    Zero,  | 
 | 189 | +    Counter(u32),  | 
 | 190 | +    Expression(u32, Op),  | 
 | 191 | +}  | 
 | 192 | + | 
 | 193 | +/// Operator (addition or subtraction) used by an expression.  | 
 | 194 | +#[derive(Clone, Copy, Debug)]  | 
 | 195 | +pub(crate) enum Op {  | 
 | 196 | +    Sub,  | 
 | 197 | +    Add,  | 
 | 198 | +}  | 
 | 199 | + | 
 | 200 | +impl CovTerm {  | 
 | 201 | +    pub(crate) fn decode(input: u32) -> Option<Self> {  | 
 | 202 | +        let (high, tag) = (input >> 2, input & 0b11);  | 
 | 203 | +        match tag {  | 
 | 204 | +            0b00 if high == 0 => Some(Self::Zero),  | 
 | 205 | +            0b01 => Some(Self::Counter(high)),  | 
 | 206 | +            0b10 => Some(Self::Expression(high, Op::Sub)),  | 
 | 207 | +            0b11 => Some(Self::Expression(high, Op::Add)),  | 
 | 208 | +            // When reading expression operands or branch arms, the LLVM coverage  | 
 | 209 | +            // mapping reader will always interpret a `0b00` tag as a zero  | 
 | 210 | +            // term, even when the high bits are non-zero.  | 
 | 211 | +            // We treat that case as failure instead, so that this code can be  | 
 | 212 | +            // shared by the full mapping-kind reader as well.  | 
 | 213 | +            _ => None,  | 
 | 214 | +        }  | 
 | 215 | +    }  | 
 | 216 | +}  | 
 | 217 | + | 
 | 218 | +#[derive(Debug)]  | 
 | 219 | +enum MappingKind {  | 
 | 220 | +    Code(CovTerm),  | 
 | 221 | +    Gap(CovTerm),  | 
 | 222 | +    Expansion(u32),  | 
 | 223 | +    Skip,  | 
 | 224 | +    // Using raw identifiers here makes the dump output a little bit nicer  | 
 | 225 | +    // (via the derived Debug), at the expense of making this tool's source  | 
 | 226 | +    // code a little bit uglier.  | 
 | 227 | +    Branch { r#true: CovTerm, r#false: CovTerm },  | 
 | 228 | +}  | 
 | 229 | + | 
 | 230 | +struct MappingRegion {  | 
 | 231 | +    /// Offset of this region's start line, relative to the *start line* of  | 
 | 232 | +    /// the *previous mapping* (or 0). Line numbers are 1-based.  | 
 | 233 | +    start_line_offset: u32,  | 
 | 234 | +    /// This region's start column, absolute and 1-based.  | 
 | 235 | +    start_column: u32,  | 
 | 236 | +    /// Offset of this region's end line, relative to the *this mapping's*  | 
 | 237 | +    /// start line. Line numbers are 1-based.  | 
 | 238 | +    end_line_offset: u32,  | 
 | 239 | +    /// This region's end column, absolute, 1-based, and exclusive.  | 
 | 240 | +    ///  | 
 | 241 | +    /// If the highest bit is set, that bit is cleared and the associated  | 
 | 242 | +    /// mapping becomes a gap region mapping.  | 
 | 243 | +    end_column: u32,  | 
 | 244 | +}  | 
 | 245 | + | 
 | 246 | +impl Debug for MappingRegion {  | 
 | 247 | +    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {  | 
 | 248 | +        write!(  | 
 | 249 | +            f,  | 
 | 250 | +            "(prev + {}, {}) to (start + {}, {})",  | 
 | 251 | +            self.start_line_offset, self.start_column, self.end_line_offset, self.end_column  | 
 | 252 | +        )  | 
 | 253 | +    }  | 
 | 254 | +}  | 
 | 255 | + | 
 | 256 | +/// Helper type that prints expressions in a "resolved" form, so that  | 
 | 257 | +/// developers reading the dump don't need to resolve expressions by hand.  | 
 | 258 | +struct ExpressionResolver {  | 
 | 259 | +    operands: Vec<(CovTerm, CovTerm)>,  | 
 | 260 | +}  | 
 | 261 | + | 
 | 262 | +impl ExpressionResolver {  | 
 | 263 | +    fn new() -> Self {  | 
 | 264 | +        Self { operands: Vec::new() }  | 
 | 265 | +    }  | 
 | 266 | + | 
 | 267 | +    fn push_operands(&mut self, lhs: CovTerm, rhs: CovTerm) {  | 
 | 268 | +        self.operands.push((lhs, rhs));  | 
 | 269 | +    }  | 
 | 270 | + | 
 | 271 | +    fn format_term(&self, term: CovTerm) -> String {  | 
 | 272 | +        let mut output = String::new();  | 
 | 273 | +        self.write_term(&mut output, term);  | 
 | 274 | +        output  | 
 | 275 | +    }  | 
 | 276 | + | 
 | 277 | +    fn write_term(&self, output: &mut String, term: CovTerm) {  | 
 | 278 | +        match term {  | 
 | 279 | +            CovTerm::Zero => output.push_str("Zero"),  | 
 | 280 | +            CovTerm::Counter(id) => write!(output, "c{id}").unwrap(),  | 
 | 281 | +            CovTerm::Expression(id, op) => {  | 
 | 282 | +                let (lhs, rhs) = self.operands[id as usize];  | 
 | 283 | +                let op = match op {  | 
 | 284 | +                    Op::Sub => "-",  | 
 | 285 | +                    Op::Add => "+",  | 
 | 286 | +                };  | 
 | 287 | + | 
 | 288 | +                output.push('(');  | 
 | 289 | +                self.write_term(output, lhs);  | 
 | 290 | +                write!(output, " {op} ").unwrap();  | 
 | 291 | +                self.write_term(output, rhs);  | 
 | 292 | +                output.push(')');  | 
 | 293 | +            }  | 
 | 294 | +        }  | 
 | 295 | +    }  | 
 | 296 | +}  | 
0 commit comments