Skip to content

Commit

Permalink
apache#10 Improve documentation around URI based data-sources/-sinks
Browse files Browse the repository at this point in the history
  • Loading branch information
Novotnik, Petr authored and David Moravek committed May 15, 2018
1 parent cf430c9 commit b4633dd
Show file tree
Hide file tree
Showing 4 changed files with 75 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,17 @@ public interface Dataset<T> extends Serializable {
*/
boolean isBounded();

/**
* Declares this data set to be persisted to a data sink specified by its URI.
*
* @param uri the URI representing the data sink to persist this data set to
*
* @throws Exception if setting up the actual data sink implementation fails
* for some reason
*
* @see #persist(DataSink)
* @see cz.seznam.euphoria.core.client.io.IORegistry
*/
default void persist(URI uri) throws Exception {
persist(getFlow().createOutput(uri));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,8 @@ public Settings getSettings() {
* @return a dataset representing the specified source
*
* @throws Exception if setting up the source fails for some reason
*
* @see IORegistry
*/
public <T> Dataset<T> createInput(URI uri) throws Exception {
return createInput(getSourceFromURI(uri));
Expand All @@ -332,7 +334,22 @@ public <T> Dataset<T> createInput(DataSource<T> source) {
return ret;
}


/**
* Creates a new output/sink data set based on the specified URI.
*
* @param <T> the type of elements being written to the sink; this is
* not type-safe. if the caller mixes up the sink and the expected
* type of a such a sink the result may be {@link ClassCastException}s
* at later points in time
*
* @param uri the URI describing the sink of the data set
*
* @return a data sink based on the specified URI
*
* @throws Exception if setting up the sink fails for some reason
*
* @see IORegistry
*/
public <T> DataSink<T> createOutput(URI uri) throws Exception {
return getSinkFromURI(uri);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,36 @@
import java.net.URI;

/**
* Factory of {@code DataSource} from URI and settings.
* Factory for {@link DataSource}s and {@link DataSink}s based on URIs.
* <p>
* The registry instantiates {@link SchemeBasedIORegistry} by default to resolve
* a URI to a data source or sink. If {@link #REGISTRY_IMPL_CONF} is defined
* in the supplied configuration bundle, it is expected to define a subclass of
* {@link IORegistry} and have a public, default constructor. A new instance of
* this sub-class will be created for every URI resolve request and is then responsible
* for creating the corresponding data source or sink.
*/
public abstract class IORegistry {

/**
* The configuration key specifying a sub-class of {@link IORegistry} to
* instantiate to handle URI to data source/sink translation requests.
*/
public static final String REGISTRY_IMPL_CONF = "euphoria.io.registry.impl";

private static final String REGISTRY_IMPL_CONF = "euphoria.io.registry.impl";


/**
* Retrieves an {@link IORegistry} from the specified the configuration.
* Falls back to {@link SchemeBasedIORegistry} if none is explicitly defined.
*
* @param settings the configuration settings
*
* @return a {@link IORegistry}
*
* @throws Exception if instantiating the configured {@link IORegistry} fails
* for some reason or if the configured registry is not sub-class
* of {@link IORegistry}
*/
public static IORegistry get(Settings settings) throws Exception {
return getInstance(settings, REGISTRY_IMPL_CONF,
IORegistry.class, new SchemeBasedIORegistry());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,32 @@
import java.net.URI;

/**
* {@code IORegistry} that creates {@code DataSource} based on scheme.
* An {@code IORegistry} creating {@code DataSource} based on a URI's schema.
*
* Given a configuration bundle and a URI, this registry creates an associated
* data source as follows:
* <ol>
* <li>Extra the schema from the given URI</li>
* <li>Lookup the value under the key "{@link #SCHEME_SOURCE_PREFIX} + schema" where
* schema represents the schema value extracted from the given URI</li>
* <li>If no such value is define, fail.</li>
* <li>Otherwise validate that the value names an existing class which implements
* {@link DataSourceFactory}.</li>
* <li>Instantiate the class using its default public constructor.</li>
* <li>Invoke {@link DataSourceFactory#get(URI, Settings)} on the new factory instance
* passing on the original URI and configuration values.</li>
* <li>Return the result of the factory's {@code get} method invocation.</li>
* </ol>
*
* Similarly, the same applies to constructing sinks. The corresponding key prefix is
* {@link #SCHEME_SINK_PREFIX} and the factory interface {@link DataSinkFactory}.
*/
public class SchemeBasedIORegistry extends IORegistry {

/** Key prefix specifying associations of schemes to particular data source factories. */
public static final String SCHEME_SOURCE_PREFIX = "euphoria.io.datasource.factory.";

/** Key prefix specifying associations of schems to particular data sink factories. */
public static final String SCHEME_SINK_PREFIX = "euphoria.io.datasink.factory.";

@Override
Expand Down

0 comments on commit b4633dd

Please sign in to comment.