Skip to content

Commit

Permalink
feat: Add SymCache Transformers
Browse files Browse the repository at this point in the history
These allow transforming the function and source location records that
will be written into SymCaches.
  • Loading branch information
Swatinem committed Feb 8, 2022
1 parent f51a1b4 commit 3e2cd11
Show file tree
Hide file tree
Showing 6 changed files with 281 additions and 74 deletions.
1 change: 1 addition & 0 deletions symbolic-symcache/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ mod old;
pub(crate) mod preamble;

pub use compat::*;
pub use new::transform;
pub use new::SymCacheWriter;
#[allow(deprecated)]
pub use old::format;
Expand Down
30 changes: 27 additions & 3 deletions symbolic-symcache/src/new/compat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,7 @@ where
{
/// Converts an entire object into a SymCache.
///
/// Any object which implements [`ObjectLike`] can be written into a
/// [`SymCache`](crate::SymCache) by this function. This already implicitly
/// calls [`SymCacheWriter::finish`], thus consuming the writer.
/// This is a shortcut for [`SymCacheWriter::process_object`] followed by [`SymCacheWriter::finish`].
pub fn write_object<'d, 'o, O>(object: &'o O, target: W) -> Result<W, SymCacheError>
where
O: ObjectLike<'d, 'o>,
Expand Down Expand Up @@ -93,6 +91,32 @@ where
})
}

/// Adds a new [`transform::Transformer`] to this [`SymCacheWriter`].
///
/// Every [`transform::Function`] and [`transform::SourceLocation`] will be passed through
/// this transformer before it is being written to the SymCache.
pub fn add_transformer<T>(&mut self, t: T)
where
T: transform::Transformer + 'static,
{
self.converter.add_transformer(t)
}

/// Processes the [`ObjectLike`], writing its functions, line information and symbols into the
/// SymCache.
pub fn process_object<'d, 'o, O>(&mut self, object: &'o O) -> Result<(), SymCacheError>
where
O: ObjectLike<'d, 'o>,
O::Error: std::error::Error + Send + Sync + 'static,
{
self.converter.set_arch(object.arch());
self.converter.set_debug_id(object.debug_id());

self.converter.process_object(object)?;

Ok(())
}

/// Sets the CPU architecture of this SymCache.
pub fn set_arch(&mut self, arch: Arch) {
self.converter.set_arch(arch)
Expand Down
1 change: 1 addition & 0 deletions symbolic-symcache/src/new/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ mod compat;
mod error;
mod lookup;
pub(crate) mod raw;
pub mod transform;
mod writer;

pub use compat::*;
Expand Down
101 changes: 101 additions & 0 deletions symbolic-symcache/src/new/transform.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
//! Utilities that transform the Data to be written to a SymCache.

use std::borrow::Cow;

use symbolic_debuginfo::macho::BcSymbolMap;

/// A Function record to be written to the SymCache.
#[non_exhaustive]
pub struct Function<'s> {
/// The functions name.
pub name: Cow<'s, str>,
/// The compilation directory of the function.
pub comp_dir: Option<Cow<'s, str>>,
}

/// A File to be written to the SymCache.
#[non_exhaustive]
pub struct File<'s> {
/// The file name.
pub name: Cow<'s, str>,
/// The optional directory prefix.
pub directory: Option<Cow<'s, str>>,
/// The optional compilation directory prefix.
pub comp_dir: Option<Cow<'s, str>>,
}

/// A Source Location (File + Line) to be written to the SymCache.
#[non_exhaustive]
pub struct SourceLocation<'s> {
/// The [`File`] part of this [`SourceLocation`].
pub file: File<'s>,
/// The line number.
pub line: u32,
}

/// A transformer that is applied to each [`Function`] and [`SourceLocation`] record in the SymCache.
pub trait Transformer {
/// Transforms a [`Function`] record.
///
/// This can be used for example to de-obfuscate a functions name.
fn transform_function<'f>(&'f self, f: Function<'f>) -> Function<'f> {
f
}

/// Transforms a [`SourceLocation`].
///
/// This can be used for example to apply a Source Mapping in case an intermediate compilation
/// step might have introduced an indirection, or to de-obfuscate the [`File`] information.
fn transform_source_location<'f>(&'f self, sl: SourceLocation<'f>) -> SourceLocation<'f> {
sl
}
}

// This is essentially just a newtype in order to implement `Debug`.
#[derive(Default)]
pub(crate) struct Transformers(pub Vec<Box<dyn Transformer>>);

impl std::fmt::Debug for Transformers {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let len = self.0.len();
f.debug_tuple("Transformers").field(&len).finish()
}
}

// This ended up as a macro which "inlines" mapping the `Cow` into the calling function, as using
// a real function here would lead to the following borrow checker error:
// error[E0495]: cannot infer an appropriate lifetime for lifetime parameter `'d` due to conflicting requirements
macro_rules! map_cow {
($cow:expr, $f: expr) => {
match $cow {
Cow::Borrowed(inner) => Cow::Borrowed($f(inner)),
Cow::Owned(inner) => Cow::Owned($f(&inner).to_owned()),
}
};
}

impl Transformer for BcSymbolMap<'_> {
fn transform_function<'f>(&'f self, f: Function<'f>) -> Function<'f> {
Function {
name: map_cow!(f.name, |s| self.resolve(s)),
comp_dir: f.comp_dir.map(|dir| map_cow!(dir, |s| self.resolve(s))),
}
}

fn transform_source_location<'f>(&'f self, sl: SourceLocation<'f>) -> SourceLocation<'f> {
SourceLocation {
file: File {
name: map_cow!(sl.file.name, |s| self.resolve(s)),
directory: sl
.file
.directory
.map(|dir| map_cow!(dir, |s| self.resolve(s))),
comp_dir: sl
.file
.comp_dir
.map(|dir| map_cow!(dir, |s| self.resolve(s))),
},
line: sl.line,
}
}
}
167 changes: 97 additions & 70 deletions symbolic-symcache/src/new/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@ use std::collections::{BTreeMap, HashMap};
use std::io::Write;

use indexmap::IndexSet;
use symbolic_common::{Arch, DebugId, Language};
use symbolic_common::{Arch, DebugId};
use symbolic_debuginfo::{DebugSession, Function, ObjectLike, Symbol};

use super::raw;
use super::{raw, transform};
use crate::{SymCacheError, SymCacheErrorKind};

/// The SymCache Converter.
Expand All @@ -22,6 +22,9 @@ pub struct SymCacheConverter {
/// CPU architecture of the object file.
arch: Arch,

/// A list of transformers that are used to transform each function / source location.
transformers: transform::Transformers,

/// The concatenation of all strings that have been added to this `Converter`.
string_bytes: Vec<u8>,
/// A map from [`String`]s that have been added to this `Converter` to their offsets in the `string_bytes` field.
Expand Down Expand Up @@ -52,6 +55,17 @@ impl SymCacheConverter {
Self::default()
}

/// Adds a new [`transform::Transformer`] to this [`SymCacheConverter`].
///
/// Every [`transform::Function`] and [`transform::SourceLocation`] will be passed through
/// this transformer before it is being written to the SymCache.
pub fn add_transformer<T>(&mut self, t: T)
where
T: transform::Transformer + 'static,
{
self.transformers.0.push(Box::new(t));
}

/// Sets the CPU architecture of this SymCache.
pub fn set_arch(&mut self, arch: Arch) {
self.arch = arch;
Expand All @@ -67,72 +81,30 @@ impl SymCacheConverter {
/// If the string was already present, it is not added again. A newly added string
/// is prefixed by its length as a `u32`. The returned `u32`
/// is the offset into the `string_bytes` field where the string is saved.
fn insert_string(&mut self, s: &str) -> u32 {
fn insert_string(
string_bytes: &mut Vec<u8>,
strings: &mut HashMap<String, u32>,
s: &str,
) -> u32 {
if s.is_empty() {
return u32::MAX;
}
if let Some(&offset) = self.strings.get(s) {
if let Some(&offset) = strings.get(s) {
return offset;
}
let string_offset = self.string_bytes.len() as u32;
let string_offset = string_bytes.len() as u32;
let string_len = s.len() as u32;
self.string_bytes.extend(string_len.to_ne_bytes());
self.string_bytes.extend(s.bytes());
string_bytes.extend(string_len.to_ne_bytes());
string_bytes.extend(s.bytes());
// we should have written exactly `string_len + 4` bytes
debug_assert_eq!(
self.string_bytes.len(),
string_bytes.len(),
string_offset as usize + string_len as usize + std::mem::size_of::<u32>(),
);
self.strings.insert(s.to_owned(), string_offset);
strings.insert(s.to_owned(), string_offset);
string_offset
}

/// Insert a file into this converter.
///
/// If the file was already present, it is not added again. The returned `u32`
/// is the file's index in insertion order.
fn insert_file(
&mut self,
path_name: &str,
directory: Option<&str>,
comp_dir: Option<&str>,
) -> u32 {
let path_name_offset = self.insert_string(path_name);
let directory_offset = directory.map_or(u32::MAX, |d| self.insert_string(d));
let comp_dir_offset = comp_dir.map_or(u32::MAX, |cd| self.insert_string(cd));

let (file_idx, _) = self.files.insert_full(raw::File {
path_name_offset,
directory_offset,
comp_dir_offset,
});

file_idx as u32
}

/// Insert a function into this converter.
///
/// If the function was already present, it is not added again. The returned `u32`
/// is the function's index in insertion order.
fn insert_function(
&mut self,
name: &str,
comp_dir: Option<&str>,
entry_pc: u32,
lang: Language,
) -> u32 {
let name_offset = self.insert_string(name);
let comp_dir_offset = comp_dir.map_or(u32::MAX, |comp_dir| self.insert_string(comp_dir));
let lang = lang as u32;
let (fun_idx, _) = self.functions.insert_full(raw::Function {
name_offset,
comp_dir_offset,
entry_pc,
lang,
});
fun_idx as u32
}

// Methods processing symbolic-debuginfo [`ObjectLike`] below:
// Feel free to move these to a separate file.

Expand Down Expand Up @@ -174,20 +146,67 @@ impl SymCacheConverter {
} else {
function.address as u32
};
let function_idx = self.insert_function(
function.name.as_str(),
comp_dir,
entry_pc,
function.name.language(),
);

let function_idx = {
let language = function.name.language();
let mut function = transform::Function {
name: function.name.as_str().into(),
comp_dir: comp_dir.map(Into::into),
};
for transformer in &self.transformers.0 {
function = transformer.transform_function(function);
}

let string_bytes = &mut self.string_bytes;
let strings = &mut self.strings;
let name_offset = Self::insert_string(string_bytes, strings, &function.name);

let comp_dir_offset = function.comp_dir.map_or(u32::MAX, |comp_dir| {
Self::insert_string(string_bytes, strings, &comp_dir)
});
let lang = language as u32;
let (fun_idx, _) = self.functions.insert_full(raw::Function {
name_offset,
comp_dir_offset,
entry_pc,
lang,
});
fun_idx as u32
};

for line in &function.lines {
let path_name = line.file.name_str();
let file_idx = self.insert_file(&path_name, Some(&line.file.dir_str()), comp_dir);
let mut location = transform::SourceLocation {
file: transform::File {
name: line.file.name_str(),
directory: Some(line.file.dir_str()),
comp_dir: comp_dir.map(Into::into),
},
line: line.line as u32,
};
for transformer in &self.transformers.0 {
location = transformer.transform_source_location(location);
}

let string_bytes = &mut self.string_bytes;
let strings = &mut self.strings;
let path_name_offset = Self::insert_string(string_bytes, strings, &location.file.name);
let directory_offset = location
.file
.directory
.map_or(u32::MAX, |d| Self::insert_string(string_bytes, strings, &d));
let comp_dir_offset = location.file.comp_dir.map_or(u32::MAX, |cd| {
Self::insert_string(string_bytes, strings, &cd)
});

let (file_idx, _) = self.files.insert_full(raw::File {
path_name_offset,
directory_offset,
comp_dir_offset,
});

let source_location = raw::SourceLocation {
file_idx,
line: line.line as u32,
file_idx: file_idx as u32,
line: location.line,
function_idx,
inlined_into_idx: u32::MAX,
};
Expand Down Expand Up @@ -241,12 +260,20 @@ impl SymCacheConverter {
}

pub fn process_symbolic_symbol(&mut self, symbol: &Symbol<'_>) {
let name = match symbol.name {
Some(ref name) => name.as_ref(),
None => return,
};
let name_idx = {
let mut function = transform::Function {
name: match symbol.name {
Some(ref name) => name.clone(),
None => return,
},
comp_dir: None,
};
for transformer in &self.transformers.0 {
function = transformer.transform_function(function);
}

let name_idx = self.insert_string(name);
Self::insert_string(&mut self.string_bytes, &mut self.strings, &function.name)
};

match self.ranges.entry(symbol.address as u32) {
btree_map::Entry::Vacant(entry) => {
Expand Down
Loading

0 comments on commit 3e2cd11

Please sign in to comment.