Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: rio parsing #13

Merged
merged 13 commits into from
Jun 19, 2024
35 changes: 34 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,5 @@ slog-async = "2.8.0"
# Popular serialization library.
serde = { version = "1.0", features = ['derive']}
clap = { version = "4.5.7", features = ["derive"] }
bitflags = "2.5.0"
rio_turtle = "0.8.4"
rio_api = "0.8.4"
8 changes: 3 additions & 5 deletions src/io.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
use crate::model::Triple;

use rio_turtle::NTriplesParser;
use std::{
fs::File,
io::{BufRead, BufReader},
iter::Iterator,
path::Path,
};

Expand All @@ -15,8 +13,8 @@ pub fn get_buffer(path: &Path) -> BufReader<File> {
}

// Parse RDF triples.
pub fn parse_ntriples(reader: Box<dyn BufRead>) -> impl Iterator<Item = Triple> {
return reader.lines().map(|l| Triple::parse_ntriples(&l.unwrap()));
pub fn parse_ntriples(reader: Box<dyn BufRead>) -> NTriplesParser<Box<dyn BufRead>> {
return NTriplesParser::new(reader);
}

#[cfg(test)]
Expand Down
2 changes: 1 addition & 1 deletion src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ use crate::{
};

use clap::{Args, Parser, Subcommand};
use std::{fs::File, io::BufReader, path::PathBuf};
use std::path::PathBuf;

#[derive(Parser)]
#[command(name = "rdf-protect")]
Expand Down
123 changes: 73 additions & 50 deletions src/model.rs
Original file line number Diff line number Diff line change
@@ -1,62 +1,85 @@
use crate::crypto::hash;
use bitflags::bitflags;

#[derive(Debug)]
pub struct Triple {
subject: String,
predicate: String,
object: String,
use rio_api::model::{Subject, Term, Triple};

pub trait Pseudonymize {
fn pseudo(&self) -> Self;
}

// should use bitflags, e.g. S = 0b100, P = 0b010 -> SP = S + P
bitflags! {
pub struct TriplePart: u8 {
const SUBJECT = 1 << 0;
const PREDICATE = 1 << 1;
const OBJECT = 1 << 2;
}
// Represent an individual component of a triple.
#[repr(u8)]
pub enum TriplePart {
SUBJECT = 0b100,
cmdoret marked this conversation as resolved.
Show resolved Hide resolved
PREDICATE = 0b010,
OBJECT = 0b001,
}

impl TriplePart {
// Checks if a all bits in `mask` are set.
fn is_set(&self, mask: TriplePart) -> bool {
return self.bits() & mask.bits() == mask.bits();
// Used to select any combination of fields in a triple
pub struct TripleMask(u8);

impl TripleMask {
cmdoret marked this conversation as resolved.
Show resolved Hide resolved
pub fn new() -> Self {
return TripleMask(0);
}
}

impl Triple {
pub fn new(subject: String, predicate: String, object: String) -> Triple {
Triple {
subject,
predicate,
object,
}
pub fn union(&mut self, other: TripleMask) -> TripleMask {
return TripleMask(self.0 | other.0);
}

pub fn is_set(&self, part: TriplePart) -> bool {
return (self.0 & part as u8) != 0;
cmdoret marked this conversation as resolved.
Show resolved Hide resolved
}

pub fn bits(&self) -> u8 {
return self.0;
}

pub fn set(&mut self, part: TriplePart) {
self.0 |= part as u8;
}
}
cmdoret marked this conversation as resolved.
Show resolved Hide resolved

// Pseudonymize parts of a triple set by its mask
pub fn pseudonymize_triple<'a>(triple: &Triple<'a>, mask: TripleMask) -> Triple<'a> {
let pseudo_subject = if mask.is_set(TriplePart::SUBJECT) {
cmdoret marked this conversation as resolved.
Show resolved Hide resolved
&triple.subject.pseudo()
} else {
&triple.subject.clone()
};

let pseudo_object = if mask.is_set(TriplePart::OBJECT) {
triple.object.pseudo()
} else {
triple.object.clone()
};

return Triple {
subject: *pseudo_subject,
predicate: triple.predicate,
object: pseudo_object,
};
}

pub fn hash_parts(&self, mask: TriplePart) -> Triple {
let hash_subject = if mask.is_set(TriplePart::SUBJECT) {
hash(&self.subject)
} else {
self.subject.clone()
};

let hash_predicate = if mask.is_set(TriplePart::PREDICATE) {
hash(&self.predicate)
} else {
self.predicate.clone()
};

let hash_object = if mask.is_set(TriplePart::OBJECT) {
hash(&self.object)
} else {
self.object.clone()
};

return Triple::new(hash_subject, hash_predicate, hash_object);
// Pseudonymization of objects (Nodes or literals)
impl Pseudonymize for Term<'_> {
fn pseudo(&self) -> Self {
match self {
Term::Literal(val) => Term::Literal(*val),
Term::NamedNode(val) => Term::NamedNode(*val),
Term::BlankNode(val) => Term::BlankNode(*val),
Term::Triple(_) => panic!("RDF-star not supported (triple as object)"),
}
}
}

// instantiate a triple from a ntriple string
pub fn parse_ntriples(triple: &str) -> Triple {
Triple::new(String::from("A"), String::from("B"), String::from("C"))
// Pseudonymization of subjects (always a URI / blank node)
impl Pseudonymize for Subject<'_> {
fn pseudo(&self) -> Self {
match self {
Subject::NamedNode(val) => Subject::NamedNode(*val),
Subject::BlankNode(val) => Subject::BlankNode(*val),
Subject::Triple(_) => panic!("RDF-star not supported (triple as subject)"),
}
}
}

// TODO: implement for blanknodes
// NOTE: Support for RDF-star?
1 change: 0 additions & 1 deletion src/pass_first.rs
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@


23 changes: 18 additions & 5 deletions src/pass_second.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,23 @@
use rio_api::{model::Triple, parser::TriplesParser};
use rio_turtle::TurtleError;
use std::{
io::{BufRead, BufReader},
path::Path,
};

use crate::{info, io, log::Logger, model::TriplePart};
use crate::{
io,
log::Logger,
model::{pseudonymize_triple, TripleMask},
};

// mask and encode input triple
// NOTE: This will need the type-map to perform masking
fn process_triple(triple: &Triple) -> Result<(), TurtleError> {
let mask = TripleMask::new();
println!("{}", pseudonymize_triple(&triple, mask).to_string());
Ok(())
}

pub fn encrypt(log: &Logger, input: &Path, output: &Path, type_map_file: &Path) {
// Construct the buffer either from `stdio` or from an input file.
Expand All @@ -13,9 +27,8 @@ pub fn encrypt(log: &Logger, input: &Path, output: &Path, type_map_file: &Path)
_ => Box::new(io::get_buffer(input)),
};

let triples = io::parse_ntriples(buffer);

for triple in triples {
info!(log, "{:?}", triple.hash_parts(TriplePart::SUBJECT));
let mut triples = io::parse_ntriples(buffer);
while !triples.is_end() {
cmdoret marked this conversation as resolved.
Show resolved Hide resolved
triples.parse_step(&mut |t| process_triple(&t)).unwrap();
}
}