Skip to content

Commit

Permalink
feat(parse)!: add proto::extensions::SimpleExtensionUri parser (#169)
Browse files Browse the repository at this point in the history
Adds a parser for `proto::extensions::SimpleExtensionUri` that parses
the uri and adds the extension to the parse context.

Breaking change because the parse `Context` trait gets two new functions
to support this:
- `add_simple_extension_uri`: this is used to add a simple extensions,
the parse context must directly resolve the uri and return the parsed
simple extensions - struct stub for that is added in this PR (the parser
is TODO).
- `simple_extensions`: given an reference (anchor) to a simple
extensions - check if this was added to the parse context and return it
  • Loading branch information
mbrobbel authored Apr 3, 2024
1 parent cddc53d commit 332d607
Show file tree
Hide file tree
Showing 11 changed files with 405 additions and 10 deletions.
3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ include = [

[features]
default = []
parse = ["dep:hex", "dep:thiserror", "semver"]
parse = ["dep:hex", "dep:thiserror", "dep:url", "semver"]
protoc = ["dep:protobuf-src"]
semver = ["dep:semver"]
serde = ["dep:pbjson", "dep:pbjson-build", "dep:pbjson-types"]
Expand All @@ -37,6 +37,7 @@ pbjson = { version = "0.6.0", optional = true }
pbjson-types = { version = "0.6.0", optional = true }
prost = "0.12.3"
prost-types = "0.12.3"
url = { version = "2.5.0", optional = true }
semver = { version = "1.0.22", optional = true }
serde = { version = "1.0.197", features = ["derive"] }
serde_json = "1.0.114"
Expand Down
101 changes: 97 additions & 4 deletions src/parse/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,11 @@

//! A parse context.

use crate::parse::Parse;
use thiserror::Error;

use crate::parse::{
proto::extensions::SimpleExtensionUri, text::simple_extensions::SimpleExtensions, Anchor, Parse,
};

/// A parse context.
///
Expand All @@ -18,12 +22,101 @@ pub trait Context {
{
item.parse(self)
}

/// Add a [SimpleExtensionUri] to this context. Must return an error for duplicate
/// anchors or when the URI is not supported.
///
/// This function must eagerly resolve and parse the simple extension, returning an
/// error if either fails.
fn add_simple_extension_uri(
&mut self,
simple_extension_uri: &SimpleExtensionUri,
) -> Result<&SimpleExtensions, ContextError>;

/// Returns the simple extensions for the given simple extension anchor.
fn simple_extensions(
&self,
anchor: &Anchor<SimpleExtensionUri>,
) -> Result<&SimpleExtensions, ContextError>;
}

/// Parse context errors.
#[derive(Debug, Error, PartialEq)]
pub enum ContextError {
/// Undefined reference to simple extension.
#[error("undefined reference to simple extension with anchor `{0}`")]
UndefinedSimpleExtension(Anchor<SimpleExtensionUri>),

/// Duplicate anchor for simple extension.
#[error("duplicate anchor `{0}` for simple extension")]
DuplicateSimpleExtension(Anchor<SimpleExtensionUri>),

/// Unsupported simple extension URI.
#[error("unsupported simple extension URI: {0}")]
UnsupportedURI(String),
}

#[cfg(test)]
pub(crate) mod tests {
#[derive(Default)]
pub struct Context {}
use std::collections::{hash_map::Entry, HashMap};

impl super::Context for Context {}
use crate::parse::{
context::ContextError, proto::extensions::SimpleExtensionUri,
text::simple_extensions::SimpleExtensions, Anchor,
};

/// A test context.
///
/// This currently mocks support for simple extensions (does not resolve or
/// parse).
pub struct Context {
empty_simple_extensions: SimpleExtensions,
simple_extensions: HashMap<Anchor<SimpleExtensionUri>, SimpleExtensionUri>,
}

impl Default for Context {
fn default() -> Self {
Self {
empty_simple_extensions: SimpleExtensions {},
simple_extensions: Default::default(),
}
}
}

impl super::Context for Context {
fn add_simple_extension_uri(
&mut self,
simple_extension_uri: &crate::parse::proto::extensions::SimpleExtensionUri,
) -> Result<&SimpleExtensions, ContextError> {
match self.simple_extensions.entry(simple_extension_uri.anchor()) {
Entry::Occupied(_) => Err(ContextError::DuplicateSimpleExtension(
simple_extension_uri.anchor(),
)),
Entry::Vacant(entry) => {
// This is where we would resolve and then parse.
// This check shows the use of the unsupported uri error.
if let "http" | "https" | "file" = simple_extension_uri.uri().scheme() {
entry.insert(simple_extension_uri.clone());
// Here we just return an empty simple extensions.
Ok(&self.empty_simple_extensions)
} else {
Err(ContextError::UnsupportedURI(format!(
"`{}` scheme not supported",
simple_extension_uri.uri().scheme()
)))
}
}
}
}

fn simple_extensions(
&self,
anchor: &Anchor<SimpleExtensionUri>,
) -> Result<&SimpleExtensions, ContextError> {
self.simple_extensions
.contains_key(anchor)
.then_some(&self.empty_simple_extensions)
.ok_or(ContextError::UndefinedSimpleExtension(*anchor))
}
}
}
4 changes: 4 additions & 0 deletions src/parse/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@ mod context;
pub use context::Context;

pub mod proto;
pub mod text;

mod typed;
pub use typed::Anchor;

/// A parse trait.
pub trait Parse<C: Context>: Debug + Sized {
Expand Down
6 changes: 6 additions & 0 deletions src/parse/proto/extensions/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
// SPDX-License-Identifier: Apache-2.0

//! Parsing of [proto::extensions] types.

mod simple_extension_uri;
pub use simple_extension_uri::SimpleExtensionUri;
155 changes: 155 additions & 0 deletions src/parse/proto/extensions/simple_extension_uri.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
// SPDX-License-Identifier: Apache-2.0

//! Parsing of [proto::extensions::SimpleExtensionUri].

use thiserror::Error;
use url::Url;

use crate::{
parse::{context::ContextError, Anchor, Context, Parse},
proto,
};

/// A parsed [proto::extensions::SimpleExtensionUri].
#[derive(Clone, Debug, PartialEq)]
pub struct SimpleExtensionUri {
/// The URI of this simple extension.
uri: Url,

/// The anchor value of this simple extension.
anchor: Anchor<Self>,
}

impl SimpleExtensionUri {
/// Returns the uri of this simple extension.
///
/// See [proto::extensions::SimpleExtensionUri::uri].
pub fn uri(&self) -> &Url {
&self.uri
}

/// Returns the anchor value of this simple extension.
///
/// See [proto::extensions::SimpleExtensionUri::extension_uri_anchor].
pub fn anchor(&self) -> Anchor<Self> {
self.anchor
}
}

/// Parse errors for [proto::extensions::SimpleExtensionUri].
#[derive(Debug, Error, PartialEq)]
pub enum SimpleExtensionUriError {
/// Invalid URI
#[error("invalid URI: {0}")]
InvalidURI(#[from] url::ParseError),

/// Context error
#[error(transparent)]
Context(#[from] ContextError),
}

impl<C: Context> Parse<C> for proto::extensions::SimpleExtensionUri {
type Parsed = SimpleExtensionUri;
type Error = SimpleExtensionUriError;

fn parse(self, ctx: &mut C) -> Result<Self::Parsed, Self::Error> {
let proto::extensions::SimpleExtensionUri {
extension_uri_anchor: anchor,
uri,
} = self;

// The uri is is required and must be valid.
let uri = Url::parse(&uri)?;

// Construct the parsed simple extension URI.
let simple_extension_uri = SimpleExtensionUri {
uri,
anchor: Anchor::new(anchor),
};

// Make sure the URI is supported by this parse context, resolves and
// parses, and the anchor is unique.
ctx.add_simple_extension_uri(&simple_extension_uri)?;

Ok(simple_extension_uri)
}
}

impl From<SimpleExtensionUri> for proto::extensions::SimpleExtensionUri {
fn from(simple_extension_uri: SimpleExtensionUri) -> Self {
let SimpleExtensionUri { uri, anchor } = simple_extension_uri;
proto::extensions::SimpleExtensionUri {
uri: uri.to_string(),
extension_uri_anchor: anchor.into_inner(),
}
}
}

#[cfg(test)]
mod tests {
use super::*;
use crate::parse::{context::tests::Context, Context as _};

#[test]
fn parse() -> Result<(), SimpleExtensionUriError> {
let simple_extension_uri = proto::extensions::SimpleExtensionUri {
extension_uri_anchor: 1,
uri: "https://substrait.io".to_string(),
};
let simple_extension_uri = simple_extension_uri.parse(&mut Context::default())?;
assert_eq!(simple_extension_uri.anchor(), Anchor::new(1));
assert_eq!(simple_extension_uri.uri().as_str(), "https://substrait.io/");
Ok(())
}

#[test]
fn invalid_uri() {
let simple_extension_uri = proto::extensions::SimpleExtensionUri::default();
assert_eq!(
simple_extension_uri.parse(&mut Context::default()),
Err(SimpleExtensionUriError::InvalidURI(
url::ParseError::RelativeUrlWithoutBase
))
);
let simple_extension_uri = proto::extensions::SimpleExtensionUri {
extension_uri_anchor: 1,
uri: "http://".to_string(),
};
assert_eq!(
simple_extension_uri.parse(&mut Context::default()),
Err(SimpleExtensionUriError::InvalidURI(
url::ParseError::EmptyHost
))
);
}

#[test]
fn duplicate_simple_extension() {
let mut ctx = Context::default();
let simple_extension_uri = proto::extensions::SimpleExtensionUri {
extension_uri_anchor: 1,
uri: "https://substrait.io".to_string(),
};
assert!(ctx.parse(simple_extension_uri.clone()).is_ok());
assert_eq!(
ctx.parse(simple_extension_uri),
Err(SimpleExtensionUriError::Context(
ContextError::DuplicateSimpleExtension(Anchor::new(1))
))
);
}

#[test]
fn unsupported_uri() {
let simple_extension_uri = proto::extensions::SimpleExtensionUri {
extension_uri_anchor: 1,
uri: "ftp://substrait.io".to_string(),
};
assert_eq!(
simple_extension_uri.parse(&mut Context::default()),
Err(SimpleExtensionUriError::Context(
ContextError::UnsupportedURI("`ftp` scheme not supported".to_string())
))
);
}
}
2 changes: 2 additions & 0 deletions src/parse/proto/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,5 @@ pub use version::{Version, VersionError};

mod plan_version;
pub use plan_version::{PlanVersion, PlanVersionError};

pub mod extensions;
5 changes: 3 additions & 2 deletions src/parse/proto/plan_version.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,11 +76,12 @@ mod tests {
};

#[test]
fn parse() {
fn parse() -> Result<(), PlanVersionError> {
let plan_version = proto::PlanVersion {
version: Some(version::version()),
};
assert!(plan_version.parse(&mut Context::default()).is_ok());
plan_version.parse(&mut Context::default())?;
Ok(())
}

#[test]
Expand Down
8 changes: 5 additions & 3 deletions src/parse/proto/version.rs
Original file line number Diff line number Diff line change
Expand Up @@ -145,15 +145,16 @@ mod tests {
use crate::parse::context::tests::Context;

#[test]
fn version() {
fn version() -> Result<(), VersionError> {
let version = proto::Version::default();
assert_eq!(
version.parse(&mut Context::default()),
Err(VersionError::Missing)
);

let version = version::version();
assert!(version.parse(&mut Context::default()).is_ok());
version.parse(&mut Context::default())?;
Ok(())
}

#[test]
Expand Down Expand Up @@ -217,7 +218,8 @@ mod tests {
producer: String::from(""),
..version::version()
};
assert!(version.parse(&mut Context::default())?.producer.is_none());
let version = version.parse(&mut Context::default())?;
assert!(version.producer.is_none());
Ok(())
}

Expand Down
5 changes: 5 additions & 0 deletions src/parse/text/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
// SPDX-License-Identifier: Apache-2.0

//! Parsing of [text](crate::text) types.

pub mod simple_extensions;
Loading

0 comments on commit 332d607

Please sign in to comment.