diff --git a/Cargo.lock b/Cargo.lock index 6314b73..2822106 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -228,6 +228,21 @@ version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" +[[package]] +name = "oxilangtag" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23f3f87617a86af77fa3691e6350483e7154c2ead9f1261b75130e21ca0f8acb" +dependencies = [ + "serde", +] + +[[package]] +name = "oxiri" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d05417ee46e2eb40dd9d590b4d67fc2408208b3a48a6b7f71d2bc1d7ce12a3e0" + [[package]] name = "powerfmt" version = "0.2.0" @@ -258,6 +273,8 @@ version = "0.1.0" dependencies = [ "bitflags", "clap", + "rio_api", + "rio_turtle", "serde", "slog", "slog-async", @@ -275,6 +292,23 @@ dependencies = [ "thiserror", ] +[[package]] +name = "rio_api" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1924fa1f0e6d851f9b73b3c569e607c368a0d92995d99d563ad7bf1414696603" + +[[package]] +name = "rio_turtle" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5cec59971eafd99b9c7e3544bfcabafea81a7072ac51c9f46985ca0bd7ba6016" +dependencies = [ + "oxilangtag", + "oxiri", + "rio_api", +] + [[package]] name = "rustversion" version = "1.0.17" diff --git a/Cargo.toml b/Cargo.toml index 87282c6..5834d8e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,4 +14,6 @@ slog-async = "2.8.0" # Popular serialization library. serde = { version = "1.0", features = ['derive']} clap = { version = "4.5.7", features = ["derive"] } +rio_turtle = "0.8.4" +rio_api = "0.8.4" bitflags = "2.5.0" diff --git a/src/io.rs b/src/io.rs index 4b9cc0a..9be449f 100644 --- a/src/io.rs +++ b/src/io.rs @@ -1,9 +1,7 @@ -use crate::model::Triple; - +use rio_turtle::NTriplesParser; use std::{ fs::File, io::{BufRead, BufReader}, - iter::Iterator, path::Path, }; @@ -15,8 +13,8 @@ pub fn get_buffer(path: &Path) -> BufReader { } // Parse RDF triples. -pub fn parse_ntriples(reader: Box) -> impl Iterator { - return reader.lines().map(|l| Triple::parse_ntriples(&l.unwrap())); +pub fn parse_ntriples(reader: Box) -> NTriplesParser> { + return NTriplesParser::new(reader); } #[cfg(test)] diff --git a/src/main.rs b/src/main.rs index 8af2d9d..aa3ba98 100644 --- a/src/main.rs +++ b/src/main.rs @@ -14,7 +14,7 @@ use crate::{ }; use clap::{Args, Parser, Subcommand}; -use std::{fs::File, io::BufReader, path::PathBuf}; +use std::path::PathBuf; #[derive(Parser)] #[command(name = "rdf-protect")] diff --git a/src/model.rs b/src/model.rs index 49680ae..9bcf11e 100644 --- a/src/model.rs +++ b/src/model.rs @@ -1,62 +1,74 @@ -use crate::crypto::hash; -use bitflags::bitflags; - -#[derive(Debug)] -pub struct Triple { - subject: String, - predicate: String, - object: String, +use bitflags; +use rio_api::model::{Subject, Term, Triple}; + +pub trait Pseudonymize { + fn pseudo(&self) -> Self; } -// should use bitflags, e.g. S = 0b100, P = 0b010 -> SP = S + P -bitflags! { - pub struct TriplePart: u8 { - const SUBJECT = 1 << 0; +// Used to select any combination of fields in a triple +bitflags::bitflags! { + #[derive(Debug, Copy, Clone)] + pub struct TripleMask: u8 { + const SUBJECT = 1 << 2 ; const PREDICATE = 1 << 1; - const OBJECT = 1 << 2; + const OBJECT = 1 << 0; } } -impl TriplePart { - // Checks if a all bits in `mask` are set. - fn is_set(&self, mask: TriplePart) -> bool { - return self.bits() & mask.bits() == mask.bits(); + +impl TripleMask { + + // Checks if bit from another mask are all set in this mask + pub fn is_set(&self, other: &TripleMask) -> bool { + + return (*other - *self).bits() != 0; } + } -impl Triple { - pub fn new(subject: String, predicate: String, object: String) -> Triple { - Triple { - subject, - predicate, - object, - } - } +// Pseudonymize parts of a triple set by its mask +pub fn pseudonymize_triple<'a>(triple: &Triple<'a>, mask: TripleMask) -> Triple<'a> { + let pseudo_subject = if mask.is_set(&TripleMask::SUBJECT) { + &triple.subject.pseudo() + } else { + &triple.subject.clone() + }; - pub fn hash_parts(&self, mask: TriplePart) -> Triple { - let hash_subject = if mask.is_set(TriplePart::SUBJECT) { - hash(&self.subject) - } else { - self.subject.clone() - }; - - let hash_predicate = if mask.is_set(TriplePart::PREDICATE) { - hash(&self.predicate) - } else { - self.predicate.clone() - }; - - let hash_object = if mask.is_set(TriplePart::OBJECT) { - hash(&self.object) - } else { - self.object.clone() - }; - - return Triple::new(hash_subject, hash_predicate, hash_object); + let pseudo_object = if mask.is_set(&TripleMask::OBJECT) { + triple.object.pseudo() + } else { + triple.object.clone() + }; + + return Triple { + subject: *pseudo_subject, + predicate: triple.predicate, + object: pseudo_object, + }; +} + +// Pseudonymization of objects (Nodes or literals) +impl Pseudonymize for Term<'_> { + fn pseudo(&self) -> Self { + match self { + Term::Literal(val) => Term::Literal(*val), + Term::NamedNode(val) => Term::NamedNode(*val), + Term::BlankNode(val) => Term::BlankNode(*val), + Term::Triple(_) => panic!("RDF-star not supported (triple as object)"), + } } +} - // instantiate a triple from a ntriple string - pub fn parse_ntriples(triple: &str) -> Triple { - Triple::new(String::from("A"), String::from("B"), String::from("C")) +// Pseudonymization of subjects (always a URI / blank node) +impl Pseudonymize for Subject<'_> { + fn pseudo(&self) -> Self { + match self { + Subject::NamedNode(val) => Subject::NamedNode(*val), + Subject::BlankNode(val) => Subject::BlankNode(*val), + Subject::Triple(_) => panic!("RDF-star not supported (triple as subject)"), + } } } + +// TODO: implement for blanknodes +// NOTE: Support for RDF-star? diff --git a/src/pass_first.rs b/src/pass_first.rs index 139597f..8b13789 100644 --- a/src/pass_first.rs +++ b/src/pass_first.rs @@ -1,2 +1 @@ - diff --git a/src/pass_second.rs b/src/pass_second.rs index eeaddc2..923cc77 100644 --- a/src/pass_second.rs +++ b/src/pass_second.rs @@ -1,9 +1,23 @@ +use rio_api::{model::Triple, parser::TriplesParser}; +use rio_turtle::TurtleError; use std::{ io::{BufRead, BufReader}, path::Path, }; -use crate::{info, io, log::Logger, model::TriplePart}; +use crate::{ + io, + log::Logger, + model::{pseudonymize_triple, TripleMask}, +}; + +// mask and encode input triple +// NOTE: This will need the type-map to perform masking +fn process_triple(triple: &Triple) -> Result<(), TurtleError> { + let mask = TripleMask::SUBJECT; + println!("{}", pseudonymize_triple(&triple, mask).to_string()); + Ok(()) +} pub fn encrypt(log: &Logger, input: &Path, output: &Path, type_map_file: &Path) { // Construct the buffer either from `stdio` or from an input file. @@ -13,9 +27,8 @@ pub fn encrypt(log: &Logger, input: &Path, output: &Path, type_map_file: &Path) _ => Box::new(io::get_buffer(input)), }; - let triples = io::parse_ntriples(buffer); - - for triple in triples { - info!(log, "{:?}", triple.hash_parts(TriplePart::SUBJECT)); + let mut triples = io::parse_ntriples(buffer); + while !triples.is_end() { + triples.parse_step(&mut |t| process_triple(&t)).unwrap(); } }