diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..49c8c3b --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,16 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "type": "lldb", + "request": "launch", + "name": "Debug", + "program": "${workspaceFolder}/runrunrun/", + "args": [], + "cwd": "${workspaceFolder}", + } + ] +} \ No newline at end of file diff --git a/adding-machine/Cargo.toml b/adding-machine/Cargo.toml index 292a0cb..5fcff41 100644 --- a/adding-machine/Cargo.toml +++ b/adding-machine/Cargo.toml @@ -8,4 +8,155 @@ edition = "2021" name = "addingmachine" crate-type = ["cdylib"] + [dependencies] +libc = "0.2" +regex = "1.5" +lazy_static = "1.4.0" + +serde_json = "1.0.87" + +value = { git = "https://github.com/vectordotdev/vector", default-features = false } +vrl = { git = "https://github.com/vectordotdev/vector", default-features = false } +vrl-diagnostic = { git = "https://github.com/vectordotdev/vector", package = "vrl-diagnostic" } + +# serde stuff here +serde = { version = "1.0", features = ["derive"] } +gloo-utils = { version = "0.1", features = ["serde"] } +serde-wasm-bindgen = "0.4" + + +[dependencies.stdlib] +package = "vrl-stdlib" +git = "https://github.com/vectordotdev/vector" +default-features = false +features = [ + "append", + "array", + "assert", + "assert_eq", + "ceil", + "chunks", + "compact", + "contains", + "decode_base64", + "decode_percent", + "del", + "downcase", + "encode_base64", + "encode_json", + "encode_key_value", + "encode_logfmt", + "encode_percent", + "ends_with", + "exists", + "filter", + "find", + "flatten", + "float", + "floor", + "for_each", + "format_int", + "format_number", + "format_timestamp", + "get", + "get_env_var", + "includes", + "ip_aton", + "ip_cidr_contains", + "ip_ntoa", + "ip_ntop", + "ip_pton", + "ip_subnet", + "ip_to_ipv6", + "ipv6_to_ipv4", + "is_array", + "is_boolean", + "is_empty", + "is_float", + "is_integer", + "is_ipv4", + "is_ipv6", + "is_json", + "is_null", + "is_nullish", + "is_object", + "is_regex", + "is_string", + "is_timestamp", + "join", + "keys", + "length", + "map_keys", + "map_values", + "match", + "match_any", + "match_array", + "match_datadog_query", + "md5", + "merge", + "mod", + "now", + "object", + "parse_apache_log", + "parse_aws_alb_log", + "parse_aws_cloudwatch_log_subscription_message", + "parse_aws_vpc_flow_log", + "parse_common_log", + "parse_csv", + "parse_duration", + "parse_glog", + "parse_int", + "parse_json", + "parse_key_value", + "parse_klog", + "parse_linux_authorization", + "parse_logfmt", + "parse_nginx_log", + "parse_query_string", + "parse_regex", + "parse_regex_all", + "parse_ruby_hash", + "parse_syslog", + "parse_timestamp", + "parse_tokens", + "parse_url", + "parse_user_agent", + "parse_xml", + "push", + "redact", + "remove", + "replace", + "round", + "set", + "sha1", + "sha2", + "sha3", + "slice", + "split", + "starts_with", + "string", + "strip_ansi_escape_codes", + "strip_whitespace", + "strlen", + "tally", + "tally_value", + "tag_types_externally", + "timestamp", + "to_bool", + "to_float", + "to_int", + "to_regex", + "to_string", + "to_syslog_facility", + "to_syslog_level", + "to_syslog_severity", + "to_timestamp", + "to_unix_timestamp", + "truncate", + "type_def", + "unique", + "unnest", + "upcase", + "values" +] \ No newline at end of file diff --git a/adding-machine/src/lib.rs b/adding-machine/src/lib.rs index 9d6ec41..37f278c 100644 --- a/adding-machine/src/lib.rs +++ b/adding-machine/src/lib.rs @@ -1,9 +1,223 @@ +// this is going to be running in wasmtime +// since this will be compiled to wasm32 + + +// this is the guest rust program +extern crate alloc; +use std::slice; +use std::mem::MaybeUninit; +use alloc::vec::Vec; + +use ::value::Value; +use value::Secrets; +use vrl::diagnostic::DiagnosticList; +use vrl::state::TypeState; +use vrl::{diagnostic::Formatter, prelude::BTreeMap, CompileConfig, Runtime}; +use vrl::{TargetValue, Terminate, TimeZone}; + + +// maybe not needed but will be the the formatted output glue +use serde::{Deserialize, Serialize}; + +#[derive(Serialize, Deserialize)] +pub struct Input { + pub program: String, + pub event: Value, +} + +impl Input { + pub fn new(program: &str, event: Value) -> Self { + Self { + program: program.to_owned(), + event, + } + } +} + +// The module returns the result of the last expression and the event that results from the +// applied program +#[derive(Deserialize, Serialize)] +pub struct VrlCompileResult { + pub output: Value, + pub result: Value, +} + +impl VrlCompileResult { + fn new(output: Value, result: Value) -> Self { + Self { output, result } + } +} + +#[derive(Deserialize, Serialize, Default)] +pub struct VrlDiagnosticResult { + pub list: Vec, + pub msg: String, + pub msg_colorized: String, +} + +impl VrlDiagnosticResult { + fn new(program: &str, diagnostic_list: DiagnosticList) -> Self { + Self { + list: diagnostic_list + .clone() + .into_iter() + .map(|diag| String::from(diag.message())) + .collect(), + msg: Formatter::new(program, diagnostic_list.clone()).to_string(), + msg_colorized: Formatter::new(program, diagnostic_list) + .colored() + .to_string(), + } + } + + fn new_runtime_error(program: &str, terminate: Terminate) -> Self { + Self { + list: Vec::with_capacity(1), + msg: Formatter::new(program, terminate.clone().get_expression_error()).to_string(), + msg_colorized: Formatter::new(program, terminate.get_expression_error()) + .colored() + .to_string(), + } + } +} + +fn compile(mut input: Input) -> Result { + let event = &mut input.event; + let functions = stdlib::all(); + let state = TypeState::default(); + let mut runtime = Runtime::default(); + let config = CompileConfig::default(); + let timezone = TimeZone::default(); + + let mut target_value = TargetValue { + value: event.clone(), + metadata: Value::Object(BTreeMap::new()), + secrets: Secrets::new(), + }; + + let program = match vrl::compile_with_state(&input.program, &functions, &state, config) { + Ok(program) => program, + Err(diagnostics) => return Err(VrlDiagnosticResult::new(&input.program, diagnostics)), + }; + + match runtime.resolve(&mut target_value, &program.program, &timezone) { + Ok(result) => Ok(VrlCompileResult::new(result, target_value.value)), + Err(err) => Err(VrlDiagnosticResult::new_runtime_error(&input.program, err)), + } +} + +#[cfg_attr(all(target_arch = "wasm32"), export_name = "run_vrl_wasm")] +#[no_mangle] +pub unsafe extern "C" fn run_vrl(ptr: u32, len: u32) -> u32 { + let incoming = &ptr_to_string(ptr, len); + + let input: Input = serde_json::from_str(incoming).unwrap(); + + match compile(input) { + Ok(res) => { + let res_json_str = serde_json::to_value(res).unwrap().to_string(); + + store_string_at_ptr(&res_json_str, ptr); + res_json_str.len() as u32 + }, + Err(err) => { + let err_json_str = serde_json::to_value(err).unwrap().to_string(); + store_string_at_ptr(&err_json_str, ptr); + err_json_str.len() as u32 + } + } +} + #[cfg_attr(all(target_arch = "wasm32"), export_name = "add_wasm")] #[no_mangle] pub fn add(left: usize, right: usize) -> usize { left + right } +#[cfg_attr(all(target_arch = "wasm32"), export_name = "echo_string")] +#[no_mangle] +pub unsafe extern "C" fn echo_string(ptr: u32, len: u32) -> u32 { + ptr + len +} + +// this will read a string that was written +// by the runrunrun/src/main.rs file +// returns the length of the string read +#[cfg_attr(all(target_arch = "wasm32"), export_name = "read_string")] +#[no_mangle] +pub unsafe extern "C" fn read_string(ptr: u32, len: u32) -> u32 { + let incoming_string = &ptr_to_string(ptr, len); + incoming_string.len() as u32 +} + +#[cfg_attr(all(target_arch = "wasm32"), export_name = "return_string")] +#[no_mangle] +pub unsafe extern "C" fn return_string(ptr: u32, len: u32) -> u32 { + let incoming_string = &ptr_to_string(ptr, len); + let new_string = format!("Incoming: {incoming_string}\nThis string was written from adding-machine/src/lib.rs"); + store_string_at_ptr(&new_string, ptr); + + new_string.len() as u32 +} + +// WASM Memory-related helper functinos +// +// TODO explore using lol_alloc instead of default rust allocator +/// WebAssembly export that allocates a pointer (linear memory offset) that can +/// be used for a string. +/// +/// This is an ownership transfer, which means the caller must call +/// [`deallocate`] when finished. +#[cfg_attr(all(target_arch = "wasm32"), export_name = "allocate")] +#[no_mangle] +pub extern "C" fn _allocate(size: u32) -> *mut u8 { + allocate(size as usize) +} + +/// Allocates size bytes and leaks the pointer where they start. +fn allocate(size: usize) -> *mut u8 { + // Allocate the amount of bytes needed. + let vec: Vec> = Vec::with_capacity(size); + + // into_raw leaks the memory to the caller. + Box::into_raw(vec.into_boxed_slice()) as *mut u8 +} + +/// WebAssembly export that deallocates a pointer of the given size (linear +/// memory offset, byteCount) allocated by [`allocate`]. +#[cfg_attr(all(target_arch = "wasm32"), export_name = "deallocate")] +#[no_mangle] +pub unsafe extern "C" fn _deallocate(ptr: u32, size: u32) { + deallocate(ptr as *mut u8, size as usize); +} + +/// Retakes the pointer which allows its memory to be freed. +unsafe fn deallocate(ptr: *mut u8, size: usize) { + // TODO - should this be Box::from_raw? (see Box::into_raw docs) + let _ = Vec::from_raw_parts(ptr, 0, size); +} + + + +// WASM String-related helper functions +/// Returns a string from WebAssembly compatible numeric types representing +/// its pointer and length. +unsafe fn ptr_to_string(ptr: u32, len: u32) -> String { + let slice = slice::from_raw_parts_mut(ptr as *mut u8, len as usize); + let utf8 = std::str::from_utf8_unchecked_mut(slice); + return String::from(utf8); +} + + +/// Stores the given string 's' at the memory location pointed to by 'ptr' +/// This assumes no buffer overflows - here be dragons. +unsafe fn store_string_at_ptr(s: &str, ptr: u32) { + // Create a mutable slice of u8 pointing at the buffer given as 'ptr' + // with a length of the string we're about to copy into it + let dest = slice::from_raw_parts_mut(ptr as *mut u8, s.len() as usize); + dest.copy_from_slice(s.as_bytes()); +} + #[cfg(test)] mod tests { use super::*; diff --git a/build.sh b/build.sh new file mode 100644 index 0000000..36a52fd --- /dev/null +++ b/build.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +cd adding-machine && cargo build --target wasm32-unknown-unknown + +cd .. + +cd runrunrun && cargo build + +cargo run diff --git a/runrunrun/.vscode/launch.json b/runrunrun/.vscode/launch.json new file mode 100644 index 0000000..243af45 --- /dev/null +++ b/runrunrun/.vscode/launch.json @@ -0,0 +1,45 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "type": "lldb", + "request": "launch", + "name": "Debug executable 'runrunrun'", + "cargo": { + "args": [ + "build", + "--bin=runrunrun", + "--package=runrunrun" + ], + "filter": { + "name": "runrunrun", + "kind": "bin" + } + }, + "args": [], + "cwd": "${workspaceFolder}" + }, + { + "type": "lldb", + "request": "launch", + "name": "Debug unit tests in executable 'runrunrun'", + "cargo": { + "args": [ + "test", + "--no-run", + "--bin=runrunrun", + "--package=runrunrun" + ], + "filter": { + "name": "runrunrun", + "kind": "bin" + } + }, + "args": [], + "cwd": "${workspaceFolder}" + } + ] +} \ No newline at end of file diff --git a/runrunrun/Cargo.toml b/runrunrun/Cargo.toml index eb68066..23ddc65 100644 --- a/runrunrun/Cargo.toml +++ b/runrunrun/Cargo.toml @@ -7,3 +7,5 @@ edition = "2021" [dependencies] wasmtime = { git= "https://github.com/bytecodealliance/wasmtime" } +serde_json = "1.0.87" +serde = { version = "1.0", features = ["derive"] } diff --git a/runrunrun/src/main.rs b/runrunrun/src/main.rs index 2578f70..19d8422 100644 --- a/runrunrun/src/main.rs +++ b/runrunrun/src/main.rs @@ -1,22 +1,127 @@ +// this is the host rust program extern crate wasmtime; -use std::error::Error; +use std::{error::Error}; use wasmtime::*; -const WASM_BYTES_PATH: &str = "/Users/scott.opell/dev/rust-wasm-example-proj/adding-machine/target/wasm32-unknown-unknown/debug/addingmachine.wasm"; +// maybe not needed but will be the the formatted output glue +use serde::{Deserialize, Serialize}; + + +// use std::slice; + +const WASM_BYTES_PATH: &str = "/Users/jon.padilla/Documents/scott-rust-wasmtime/rust-wasm-example-project/adding-machine/target/wasm32-unknown-unknown/debug/addingmachine.wasm"; +const BUF_SIZE: u32 = 2048; +// const WASM_PAGE_SIZE: u32 = 65536; fn main() -> Result<(), Box> { + println!("line 17"); let engine = Engine::default(); + println!("line 19"); let module = Module::from_file(&engine, WASM_BYTES_PATH)?; + + // print out the imports here + println!("line 23"); + let imports = module.imports(); + println!("printing out all the required imports now:"); + imports.for_each(|import| println!("{0}", import.name())); + + + // snippet to print out the exports here + // let _exports = module.exports(); + // println!("printing out all the exports now: "); + // exports.for_each(|export| println!("{0}", export.name())); + + // we cannot instantiate like normal since we are missing + // the below imports + // __wbindgen_describe + // __wbindgen_externref_table_grow + // __wbindgen_externref_table_set_null + // __wbindgen_throw + + // we can still make the wasm binary run by calling the + // define_unknown_imports_as_traps() function, but it + // requires a linker object first which we create below + let mut linker = Linker::new(&engine); + linker.define_unknown_imports_as_traps(&module)?; + + let mut store = Store::new(&engine, ()); - let instance = Instance::new(&mut store, &module, &[])?; - let add = instance - .get_typed_func::<(i32, i32), i32>(&mut store, "add_wasm") - .expect("`add_wasm` was not an exported function"); + // because we had to use a linker, we instantiate from the linker + // object instead of Instance::new() + + //let instance = Instance::new(&mut store, &module, &[])?; + let instance = linker.instantiate(&mut store, &module)?; + + // Load up our exports from the wasmtime instance + let memory = instance. + get_export(&mut store, "memory").unwrap() + .into_memory().unwrap(); + + let _memory_buf = memory.data(&mut store); + + let allocate = instance + .get_typed_func::(&mut store, "allocate") + .expect("`allocate` was not an exported function"); + + // we theoretically would call deallocate but this example is incomplete + let _deallocate = instance + .get_typed_func::<(u32, u32), ()>(&mut store, "deallocate") + .expect("`deallocate` was not an exported function"); + + let allocate_leaked_ptr = allocate.call(&mut store, BUF_SIZE)?; + let outgoing_str = String::from("Value created by runrunrun/src/main.rs"); + let _res = memory.write(&mut store, allocate_leaked_ptr as usize, outgoing_str.as_bytes()); + + // call read_string in wasm module + let read_string = instance + .get_typed_func::<(u32, u32), u32>(&mut store, "read_string") + .expect("`read_string was not an exported function`"); + + let ans = read_string.call(&mut store, (allocate_leaked_ptr, outgoing_str.len() as u32))?; + println!("wasm output: {ans}"); + + unsafe{ + let mut temp_vec = vec![0u8; BUF_SIZE as usize]; + let _slice = memory.read(&mut store, allocate_leaked_ptr as usize, &mut temp_vec).unwrap(); + let utf8 = std::str::from_utf8_unchecked(temp_vec.as_slice()); + println!("wasm output string: {0}", utf8); + } + + + // now trying to run wasm + // we will pass in input like like + /* + + json: + { + program: "string here that is a vrl program" + event: {another json thingy here that will be modified} + } + + we need to pass it in as a string + + */ + + let run_vrl_wasm = instance + .get_typed_func::<(u32, u32), u32>(&mut store, "run_vrl_wasm") + .expect("`run_vrl_wasm was not an exported function`"); + + let vrl_input = serde_json::json!({ + "program": r#".jon = "padilla""#, + "event": {} + }); + + let vrl_input_str = vrl_input.to_string(); + + memory.write(&mut store, allocate_leaked_ptr as usize, vrl_input_str.as_bytes()).unwrap(); - // And finally we can call our function! Note that the error propagation - // with `?` is done to handle the case where the wasm function traps. - let result = add.call(&mut store, (3, 8))?; - println!("Answer: {:?}", result); + let output_len = run_vrl_wasm.call(&mut store, (allocate_leaked_ptr, vrl_input_str.len() as u32)); + unsafe{ + let mut temp_vec = vec![0u8; BUF_SIZE as usize]; + let _slice = memory.read(&mut store, allocate_leaked_ptr as usize, &mut temp_vec).unwrap(); + let utf8 = std::str::from_utf8_unchecked(temp_vec.as_slice()); + println!("wasm output string: {0}", utf8); + } Ok(()) }