Skip to content

Commit

Permalink
remove enum and put dataset types in database instead #10517
Browse files Browse the repository at this point in the history
Also populate a few dataset types in database, with "dataset"
being the default. Add default type to existing datasets.

Also APIs for managing dataset types.
  • Loading branch information
pdurbin committed Jul 26, 2024
1 parent 3aab5c0 commit c8adf25
Show file tree
Hide file tree
Showing 24 changed files with 436 additions and 90 deletions.
23 changes: 23 additions & 0 deletions doc/release-notes/10517-datasetType.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,26 @@
### Initial Support for Dataset Types (Dataset, Software, Workflow)

Datasets now have types. By default the dataset type will be "dataset" but if you turn on support for additional types, datasets can have a type of "software" or "workflow" as well. For more details see <https://dataverse-guide--10694.org.readthedocs.build/en/10694/user/dataset-types.html> and #10517. Please note that this feature is highly experimental.

A handy query:

```
% DOCKER_CLI_HINTS=false docker exec -it postgres-1 bash -c "PGPASSWORD=secret psql -h localhost -U dataverse dataverse -c 'select dst.name, count(*) from dataset ds, datasettype dst where ds.datasettype_id = dst.id group by dst.name;'"
name | count
----------+-------
dataset | 136
software | 14
(2 rows)
```

Most API tests are passing but we do see a few failures:

```
[ERROR] Failures:
[ERROR] HarvestingClientsIT.testHarvestingClientRun_AllowHarvestingMissingCVV_False:187->harvestingClientRun:301 expected: <7> but was: <0>
[ERROR] HarvestingClientsIT.testHarvestingClientRun_AllowHarvestingMissingCVV_True:191->harvestingClientRun:301 expected: <8> but was: <0>
[ERROR] MakeDataCountApiIT.testMakeDataCountGetMetric:68 1 expectation failed.
Expected status code <200> but was <400>.
```

select dst.name, count(*) from dataset ds, datasettype dst where ds.datasettype_id = dst.id group by dst.name;
4 changes: 2 additions & 2 deletions src/main/java/edu/harvard/iq/dataverse/Dataset.java
Original file line number Diff line number Diff line change
Expand Up @@ -129,8 +129,8 @@ public class Dataset extends DvObjectContainer {
*/
private boolean useGenericThumbnail;

@OneToOne(cascade = {CascadeType.MERGE, CascadeType.PERSIST})
@JoinColumn(name = "datasettype_id", nullable = true)
@ManyToOne
@JoinColumn(name="datasettype_id", nullable = false)
private DatasetType datasetType;

@OneToOne(cascade = {CascadeType.MERGE, CascadeType.PERSIST})
Expand Down
11 changes: 10 additions & 1 deletion src/main/java/edu/harvard/iq/dataverse/EjbDataverseEngine.java
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser;
import edu.harvard.iq.dataverse.confirmemail.ConfirmEmailServiceBean;
import edu.harvard.iq.dataverse.datacapturemodule.DataCaptureModuleServiceBean;
import edu.harvard.iq.dataverse.dataset.DatasetTypeServiceBean;
import edu.harvard.iq.dataverse.engine.command.Command;
import edu.harvard.iq.dataverse.engine.command.CommandContext;
import edu.harvard.iq.dataverse.engine.command.DataverseRequest;
Expand Down Expand Up @@ -127,7 +128,10 @@ public class EjbDataverseEngine {

@EJB
MetadataBlockServiceBean metadataBlockService;


@EJB
DatasetTypeServiceBean datasetTypeService;

@EJB
DataverseLinkingServiceBean dvLinking;

Expand Down Expand Up @@ -603,6 +607,11 @@ public MetadataBlockServiceBean metadataBlocks() {
return metadataBlockService;
}

@Override
public DatasetTypeServiceBean datasetTypes() {
return datasetTypeService;
}

@Override
public void beginCommandSequence() {
this.commandsCalled = new Stack();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import edu.harvard.iq.dataverse.authorization.users.User;
import edu.harvard.iq.dataverse.confirmemail.ConfirmEmailServiceBean;
import edu.harvard.iq.dataverse.datacapturemodule.DataCaptureModuleServiceBean;
import edu.harvard.iq.dataverse.dataset.DatasetTypeServiceBean;
import edu.harvard.iq.dataverse.engine.command.Command;
import edu.harvard.iq.dataverse.engine.command.DataverseRequest;
import edu.harvard.iq.dataverse.engine.command.exception.CommandException;
Expand Down Expand Up @@ -164,6 +165,9 @@ String getWrappedMessageWhenJson() {
@EJB
protected LicenseServiceBean licenseSvc;

@EJB
protected DatasetTypeServiceBean datasetTypeSvc;

@EJB
protected UserServiceBean userSvc;

Expand Down Expand Up @@ -247,7 +251,7 @@ public enum Format {
private final LazyRef<JsonParser> jsonParserRef = new LazyRef<>(new Callable<JsonParser>() {
@Override
public JsonParser call() throws Exception {
return new JsonParser(datasetFieldSvc, metadataBlockSvc,settingsSvc, licenseSvc);
return new JsonParser(datasetFieldSvc, metadataBlockSvc,settingsSvc, licenseSvc, datasetTypeSvc);
}
});

Expand Down
105 changes: 105 additions & 0 deletions src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
Original file line number Diff line number Diff line change
Expand Up @@ -99,9 +99,12 @@
import java.util.stream.Collectors;

import static edu.harvard.iq.dataverse.api.ApiConstants.*;
import edu.harvard.iq.dataverse.dataset.DatasetType;
import edu.harvard.iq.dataverse.dataset.DatasetTypeServiceBean;
import static edu.harvard.iq.dataverse.util.json.JsonPrinter.*;
import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder;
import static jakarta.ws.rs.core.Response.Status.BAD_REQUEST;
import static jakarta.ws.rs.core.Response.Status.NOT_FOUND;

@Path("datasets")
public class Datasets extends AbstractApiBean {
Expand Down Expand Up @@ -187,6 +190,9 @@ public class Datasets extends AbstractApiBean {
@Inject
DatasetVersionFilesServiceBean datasetVersionFilesServiceBean;

@Inject
DatasetTypeServiceBean datasetTypeSvc;

/**
* Used to consolidate the way we parse and handle dataset versions.
* @param <T>
Expand Down Expand Up @@ -5071,4 +5077,103 @@ public Response resetPidGenerator(@Context ContainerRequestContext crc, @PathPar
return ok("Pid Generator reset to default: " + dataset.getEffectivePidGenerator().getId());
}

@GET
@Path("datasetTypes")
public Response getDatasetTypes() {
JsonArrayBuilder jab = Json.createArrayBuilder();
List<DatasetType> datasetTypes = datasetTypeSvc.listAll();
for (DatasetType datasetType : datasetTypes) {
JsonObjectBuilder job = Json.createObjectBuilder();
job.add("id", datasetType.getId());
job.add("name", datasetType.getName());
jab.add(job);
}
return ok(jab.build());
}

@GET
@Path("datasetTypes/byName/{name}")
public Response getDatasetTypes(@PathParam("name") String name) {
DatasetType datasetType = datasetTypeSvc.getByName(name);
if (datasetType != null) {
return ok(datasetType.toJson());
} else {
return error(NOT_FOUND, "Could not find a dataset type with name " + name);
}
}

@POST
@AuthRequired
@Path("datasetTypes")
public Response addDatasetType(@Context ContainerRequestContext crc, String jsonIn) {
System.out.println("json in: " + jsonIn);
AuthenticatedUser user;
try {
user = getRequestAuthenticatedUserOrDie(crc);
} catch (WrappedResponse ex) {
return error(Response.Status.BAD_REQUEST, "Authentication is required.");
}
if (!user.isSuperuser()) {
return error(Response.Status.FORBIDDEN, "Superusers only.");
}

if (jsonIn == null || jsonIn.isEmpty()) {
throw new IllegalArgumentException("JSON input was null or empty!");
}
JsonObject jsonObject = JsonUtil.getJsonObject(jsonIn);
String nameIn = jsonObject.getString("name", null);
if (nameIn == null) {
throw new IllegalArgumentException("A name for the dataset type is required");
}

try {
DatasetType datasetType = new DatasetType();
datasetType.setName(nameIn);
DatasetType saved = datasetTypeSvc.save(datasetType);
Long typeId = saved.getId();
String name = saved.getName();
actionLogSvc.log(new ActionLogRecord(ActionLogRecord.ActionType.Admin, "addDatasetType").setInfo("Dataset type added with id " + typeId + " and name " + name + "."));
return ok(saved.toJson());
} catch (WrappedResponse ex) {
return error(BAD_REQUEST, ex.getMessage());
}
}

@DELETE
@AuthRequired
@Path("datasetTypes/{id}")
public Response deleteDatasetType(@Context ContainerRequestContext crc, @PathParam("id") String doomed) {
AuthenticatedUser user;
try {
user = getRequestAuthenticatedUserOrDie(crc);
} catch (WrappedResponse ex) {
return error(Response.Status.BAD_REQUEST, "Authentication is required.");
}
if (!user.isSuperuser()) {
return error(Response.Status.FORBIDDEN, "Superusers only.");
}

if (doomed == null || doomed.isEmpty()) {
throw new IllegalArgumentException("ID is required!");
}

long idToDelete;
try {
idToDelete = Long.parseLong(doomed);
} catch (NumberFormatException e) {
throw new IllegalArgumentException("ID must be a number");
}

try {
int numDeleted = datasetTypeSvc.deleteById(idToDelete);
if (numDeleted == 1) {
return ok("deleted");
} else {
return error(BAD_REQUEST, "Something went wrong. Number of dataset types deleted: " + numDeleted);
}
} catch (WrappedResponse ex) {
return error(BAD_REQUEST, ex.getMessage());
}
}

}
13 changes: 4 additions & 9 deletions src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java
Original file line number Diff line number Diff line change
Expand Up @@ -242,13 +242,6 @@ public Response createDataset(@Context ContainerRequestContext crc, String jsonB
//Throw BadRequestException if metadataLanguage isn't compatible with setting
DataverseUtil.checkMetadataLangauge(ds, owner, settingsService.getBaseMetadataLanguageMap(null, true));

try {
logger.info("about to call checkDatasetType...");
DataverseUtil.checkDatasetType(ds, FeatureFlags.DATASET_TYPES.enabled());
} catch (BadRequestException ex) {
return badRequest(ex.getLocalizedMessage());
}

// clean possible version metadata
DatasetVersion version = ds.getVersions().get(0);

Expand Down Expand Up @@ -311,7 +304,7 @@ public Response createDatasetFromJsonLd(@Context ContainerRequestContext crc, St
Dataset ds = new Dataset();

ds.setOwner(owner);
ds = JSONLDUtil.updateDatasetMDFromJsonLD(ds, jsonLDBody, metadataBlockSvc, datasetFieldSvc, false, false, licenseSvc);
ds = JSONLDUtil.updateDatasetMDFromJsonLD(ds, jsonLDBody, metadataBlockSvc, datasetFieldSvc, false, false, licenseSvc, datasetTypeSvc);

ds.setOwner(owner);

Expand Down Expand Up @@ -508,7 +501,7 @@ public Response recreateDataset(@Context ContainerRequestContext crc, String jso
Dataset ds = new Dataset();

ds.setOwner(owner);
ds = JSONLDUtil.updateDatasetMDFromJsonLD(ds, jsonLDBody, metadataBlockSvc, datasetFieldSvc, false, true, licenseSvc);
ds = JSONLDUtil.updateDatasetMDFromJsonLD(ds, jsonLDBody, metadataBlockSvc, datasetFieldSvc, false, true, licenseSvc, datasetTypeSvc);
//ToDo - verify PID is one Dataverse can manage (protocol/authority/shoulder match)
if (!PidUtil.getPidProvider(ds.getGlobalId().getProviderId()).canManagePID()) {
throw new BadRequestException(
Expand Down Expand Up @@ -551,6 +544,8 @@ private Dataset parseDataset(String datasetJson) throws WrappedResponse {
try {
return jsonParser().parseDataset(JsonUtil.getJsonObject(datasetJson));
} catch (JsonParsingException | JsonParseException jpe) {
String message = jpe.getLocalizedMessage();
logger.log(Level.SEVERE, "Error parsing dataset JSON. message: {0}", message);
logger.log(Level.SEVERE, "Error parsing dataset json. Json: {0}", datasetJson);
throw new WrappedResponse(error(Status.BAD_REQUEST, "Error parsing Json: " + jpe.getMessage()));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import edu.harvard.iq.dataverse.api.dto.*;
import edu.harvard.iq.dataverse.api.dto.FieldDTO;
import edu.harvard.iq.dataverse.api.dto.MetadataBlockDTO;
import edu.harvard.iq.dataverse.dataset.DatasetTypeServiceBean;
import edu.harvard.iq.dataverse.license.LicenseServiceBean;
import edu.harvard.iq.dataverse.pidproviders.doi.AbstractDOIProvider;
import edu.harvard.iq.dataverse.pidproviders.handle.HandlePidProvider;
Expand Down Expand Up @@ -71,9 +72,13 @@ public class ImportGenericServiceBean {

@EJB
SettingsServiceBean settingsService;

@EJB
LicenseServiceBean licenseService;

@EJB
DatasetTypeServiceBean datasetTypeService;

@PersistenceContext(unitName = "VDCNet-ejbPU")
private EntityManager em;

Expand Down Expand Up @@ -110,7 +115,7 @@ public void importXML(String xmlToParse, String foreignFormat, DatasetVersion da
logger.fine(json);
JsonReader jsonReader = Json.createReader(new StringReader(json));
JsonObject obj = jsonReader.readObject();
DatasetVersion dv = new JsonParser(datasetFieldSvc, blockService, settingsService, licenseService).parseDatasetVersion(obj, datasetVersion);
DatasetVersion dv = new JsonParser(datasetFieldSvc, blockService, settingsService, licenseService, datasetTypeService).parseDatasetVersion(obj, datasetVersion);
} catch (XMLStreamException ex) {
//Logger.getLogger("global").log(Level.SEVERE, null, ex);
throw new EJBException("ERROR occurred while parsing XML fragment ("+xmlToParse.substring(0, 64)+"...); ", ex);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import edu.harvard.iq.dataverse.MetadataBlockServiceBean;
import edu.harvard.iq.dataverse.api.dto.DatasetDTO;
import edu.harvard.iq.dataverse.api.imports.ImportUtil.ImportType;
import edu.harvard.iq.dataverse.dataset.DatasetTypeServiceBean;
import edu.harvard.iq.dataverse.engine.command.DataverseRequest;
import edu.harvard.iq.dataverse.engine.command.exception.CommandException;
import edu.harvard.iq.dataverse.engine.command.impl.CreateDatasetVersionCommand;
Expand Down Expand Up @@ -104,8 +105,13 @@ public class ImportServiceBean {

@EJB
IndexServiceBean indexService;

@EJB
LicenseServiceBean licenseService;

@EJB
DatasetTypeServiceBean datasetTypeService;

/**
* This is just a convenience method, for testing migration. It creates
* a dummy dataverse with the directory name as dataverse name & alias.
Expand Down Expand Up @@ -264,7 +270,7 @@ public Dataset doImportHarvestedDataset(DataverseRequest dataverseRequest, Harve
JsonObject obj = JsonUtil.getJsonObject(json);
//and call parse Json to read it into a dataset
try {
JsonParser parser = new JsonParser(datasetfieldService, metadataBlockService, settingsService, licenseService, harvestingClient);
JsonParser parser = new JsonParser(datasetfieldService, metadataBlockService, settingsService, licenseService, datasetTypeService, harvestingClient);
parser.setLenient(true);
Dataset ds = parser.parseDataset(obj);

Expand Down Expand Up @@ -417,7 +423,7 @@ public JsonObjectBuilder doImport(DataverseRequest dataverseRequest, Dataverse o
JsonObject obj = JsonUtil.getJsonObject(json);
//and call parse Json to read it into a dataset
try {
JsonParser parser = new JsonParser(datasetfieldService, metadataBlockService, settingsService, licenseService);
JsonParser parser = new JsonParser(datasetfieldService, metadataBlockService, settingsService, licenseService, datasetTypeService);
parser.setLenient(!importType.equals(ImportType.NEW));
Dataset ds = parser.parseDataset(obj);

Expand Down
Loading

0 comments on commit c8adf25

Please sign in to comment.