diff --git a/.github/workflows/container_app_pr.yml b/.github/workflows/container_app_pr.yml index 2474742a082..9e514690a13 100644 --- a/.github/workflows/container_app_pr.yml +++ b/.github/workflows/container_app_pr.yml @@ -58,30 +58,33 @@ jobs: echo "IMAGE_TAG=$(echo "${{ github.event.client_payload.pull_request.head.ref }}" | tr '\\/_:&+,;#*' '-')" >> $GITHUB_ENV # Necessary to split as otherwise the submodules are not available (deploy skips install) - - name: Build app container image with local architecture and submodules (profile will skip tests) + - name: Build app and configbaker container image with local architecture and submodules (profile will skip tests) run: > mvn -B -f modules/dataverse-parent -P ct -pl edu.harvard.iq:dataverse -am install - - name: Deploy multi-arch application container image + - name: Deploy multi-arch application and configbaker container image run: > mvn -Dapp.image.tag=${{ env.IMAGE_TAG }} -Dbase.image.tag=${{ env.BASE_IMAGE_TAG }} - ${{ env.REGISTRY }} -Ddocker.platforms=${{ env.PLATFORMS }} - -P ct deploy + -Ddocker.registry=ghcr.io -Ddocker.platforms=${{ env.PLATFORMS }} + -Pct deploy - uses: marocchino/sticky-pull-request-comment@v2 with: - header: app-registry-push + header: registry-push hide_and_recreate: true hide_classify: "OUTDATED" number: ${{ github.event.client_payload.pull_request.number }} message: | - :package: Pushed preview application image as + :package: Pushed preview images as ``` ghcr.io/gdcc/dataverse:${{ env.IMAGE_TAG }} ``` - :ship: [See on GHCR](https://github.com/orgs/gdcc/packages/container/package/dataverse). Use by referencing with full name as printed above, mind the registry name. + ``` + ghcr.io/gdcc/configbaker:${{ env.IMAGE_TAG }} + ``` + :ship: [See on GHCR](https://github.com/orgs/gdcc/packages/container). Use by referencing with full name as printed above, mind the registry name. # Leave a note when things have gone sideways - uses: peter-evans/create-or-update-comment@v3 @@ -89,5 +92,5 @@ jobs: with: issue-number: ${{ github.event.client_payload.pull_request.number }} body: > - :package: Could not push preview image :disappointed:. - See [log](https://github.com/IQSS/dataverse/actions/runs/${{ github.run_id }}) for details. \ No newline at end of file + :package: Could not push preview images :disappointed:. + See [log](https://github.com/IQSS/dataverse/actions/runs/${{ github.run_id }}) for details. diff --git a/.github/workflows/container_app_push.yml b/.github/workflows/container_app_push.yml index d5e6b87d12a..c60691b1c85 100644 --- a/.github/workflows/container_app_push.yml +++ b/.github/workflows/container_app_push.yml @@ -11,6 +11,7 @@ on: - master paths: - 'src/main/docker/**' + - 'modules/container-configbaker/**' - '.github/workflows/container_app_push.yml' env: @@ -42,7 +43,7 @@ jobs: distribution: temurin cache: maven - - name: Build app container image with local architecture and submodules (profile will skip tests) + - name: Build app and configbaker container image with local architecture and submodules (profile will skip tests) run: > mvn -B -f modules/dataverse-parent -P ct -pl edu.harvard.iq:dataverse -am @@ -52,7 +53,7 @@ jobs: hub-description: needs: build - name: Push image description to Docker Hub + name: Push image descriptions to Docker Hub # Run this when triggered via push or schedule as reused workflow from base / maven unit tests. # Excluding PRs here means we will have no trouble with secrets access. Also avoid runs in forks. if: ${{ github.event_name != 'pull_request' && github.ref_name == 'develop' && github.repository_owner == 'IQSS' }} @@ -66,6 +67,13 @@ jobs: repository: gdcc/dataverse short-description: "Dataverse Application Container Image providing the executable" readme-filepath: ./src/main/docker/README.md + - uses: peter-evans/dockerhub-description@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + repository: gdcc/configbaker + short-description: "Dataverse Config Baker Container Image providing setup tooling and more" + readme-filepath: ./modules/container-configbaker/README.md # Note: Accessing, pushing tags etc. to DockerHub or GHCR will only succeed in upstream because secrets. # We check for them here and subsequent jobs can rely on this to decide if they shall run. @@ -130,12 +138,12 @@ jobs: echo "REGISTRY='-Ddocker.registry=ghcr.io'" >> $GITHUB_ENV # Necessary to split as otherwise the submodules are not available (deploy skips install) - - name: Build app container image with local architecture and submodules (profile will skip tests) + - name: Build app and configbaker container image with local architecture and submodules (profile will skip tests) run: > mvn -B -f modules/dataverse-parent -P ct -pl edu.harvard.iq:dataverse -am install - - name: Deploy multi-arch application container image + - name: Deploy multi-arch application and configbaker container image run: > mvn -Dapp.image.tag=${{ env.IMAGE_TAG }} -Dbase.image.tag=${{ env.BASE_IMAGE_TAG }} @@ -145,12 +153,15 @@ jobs: - uses: marocchino/sticky-pull-request-comment@v2 if: ${{ github.event_name == 'pull_request' }} with: - header: app-registry-push + header: registry-push hide_and_recreate: true hide_classify: "OUTDATED" message: | - :package: Pushed preview application image as + :package: Pushed preview images as ``` ghcr.io/gdcc/dataverse:${{ env.IMAGE_TAG }} ``` - :ship: [See on GHCR](https://github.com/orgs/gdcc/packages/container/package/dataverse). Use by referencing with full name as printed above, mind the registry name. + ``` + ghcr.io/gdcc/configbaker:${{ env.IMAGE_TAG }} + ``` + :ship: [See on GHCR](https://github.com/orgs/gdcc/packages/container). Use by referencing with full name as printed above, mind the registry name. diff --git a/.github/workflows/shellcheck.yml b/.github/workflows/shellcheck.yml index 2d910f54127..94ba041e135 100644 --- a/.github/workflows/shellcheck.yml +++ b/.github/workflows/shellcheck.yml @@ -1,19 +1,27 @@ name: "Shellcheck" on: push: + branches: + - develop paths: - - conf/solr/** - - modules/container-base/** + - conf/solr/**/.sh + - modules/container-base/**/*.sh + - modules/container-configbaker/**/*.sh pull_request: + branches: + - develop paths: - - conf/solr/** - - modules/container-base/** + - conf/solr/**/*.sh + - modules/container-base/**/*.sh + - modules/container-configbaker/**/*.sh jobs: shellcheck: name: Shellcheck runs-on: ubuntu-latest + permissions: + pull-requests: write steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: shellcheck uses: reviewdog/action-shellcheck@v1 with: @@ -21,4 +29,19 @@ jobs: reporter: github-pr-review # Change reporter. fail_on_error: true # Container base image uses dumb-init shebang, so nail to using bash - shellcheck_flags: "--shell=bash --external-sources" \ No newline at end of file + shellcheck_flags: "--shell=bash --external-sources" + # Exclude old scripts + exclude: | + */.git/* + conf/docker-aio/* + doc/* + downloads/* + scripts/database/* + scripts/globalid/* + scripts/icons/* + scripts/installer/* + scripts/issues/* + scripts/r/* + scripts/tests/* + scripts/vagrant/* + tests/* diff --git a/conf/solr/8.11.1/update-fields.sh b/conf/solr/8.11.1/update-fields.sh index 49ea8151c77..386c1ee4e87 100755 --- a/conf/solr/8.11.1/update-fields.sh +++ b/conf/solr/8.11.1/update-fields.sh @@ -2,6 +2,8 @@ set -euo pipefail +# [INFO]: Update a prepared Solr schema.xml for Dataverse with a given list of metadata fields + #### #### #### #### #### #### #### #### #### #### #### #### #### #### #### #### #### #### # This script will # 1. take a file (or read it from STDIN) with all and definitions diff --git a/doc/release-notes/6542-mdc-legacy-counts.md b/doc/release-notes/6542-mdc-legacy-counts.md new file mode 100644 index 00000000000..1f439747a08 --- /dev/null +++ b/doc/release-notes/6542-mdc-legacy-counts.md @@ -0,0 +1,3 @@ +###For installations using MDC (Make Data Count), it is now possible to display both the MDC metrics and the legacy access counts, generated before MDC was enabled. + +This is enabled via the new setting `:MDCStartDate` that specifies the cutoff date. If a dataset has any legacy access counts collected prior to that date, those numbers will be displayed in addition to the any MDC numbers recorded since then. diff --git a/doc/release-notes/8889-filepids-in-collections.md b/doc/release-notes/8889-filepids-in-collections.md new file mode 100644 index 00000000000..bc8aeea3b56 --- /dev/null +++ b/doc/release-notes/8889-filepids-in-collections.md @@ -0,0 +1,3 @@ +It is now possible to configure registering PIDs for files in individual collections. + +For example, registration of PIDs for files can be enabled in a specific collection when it is disabled instance-wide. Or it can be disabled in specific collections where it is enabled by default. See the [:FilePIDsEnabled](https://guides.dataverse.org/en/latest/installation/config.html#filepidsenabled) section of the Configuration guide for details. \ No newline at end of file diff --git a/doc/release-notes/9431-checksum-alg-in-direct-uploads.md b/doc/release-notes/9431-checksum-alg-in-direct-uploads.md new file mode 100644 index 00000000000..e754686f3f0 --- /dev/null +++ b/doc/release-notes/9431-checksum-alg-in-direct-uploads.md @@ -0,0 +1,4 @@ +Direct upload via the Dataverse UI will now support any algorithm configured via the :FileFixityChecksumAlgorithm setting. +External apps using the direct upload API can now query Dataverse to discover which algorithm should be used. + +Sites that have been using an algorithm other than MD5 and direct upload and/or dvwebloader may want to use the /api/admin/updateHashValues call (see https://guides.dataverse.org/en/latest/installation/config.html?highlight=updatehashvalues#filefixitychecksumalgorithm) to replace any MD5 hashes on existing files. diff --git a/doc/release-notes/9480-h5web.md b/doc/release-notes/9480-h5web.md new file mode 100644 index 00000000000..97beff70e4a --- /dev/null +++ b/doc/release-notes/9480-h5web.md @@ -0,0 +1 @@ +A file previewer called H5Web is now available for exploring and visualizing NetCDF and HDF5 files. diff --git a/doc/release-notes/9558-async-indexing.md b/doc/release-notes/9558-async-indexing.md new file mode 100644 index 00000000000..a44eac1ff75 --- /dev/null +++ b/doc/release-notes/9558-async-indexing.md @@ -0,0 +1,3 @@ +Performance improvements, especially for large datasets containing thousands of files. +Uploading files one by one to the dataset is much faster now, allowing uploading thousands of files in an acceptable timeframe. Not only uploading a file, but all edit operations on datasets containing many files, got faster. +Performance tweaks include indexing of the datasets in the background and optimizations in the amount of the indexing operations needed. Furthermore, updates to the dateset no longer wait for ingesting to finish. Ingesting was already running in the background, but it took a lock, preventing updating the dataset and degrading performance for datasets containing many files. \ No newline at end of file diff --git a/doc/release-notes/9573-configbaker.md b/doc/release-notes/9573-configbaker.md new file mode 100644 index 00000000000..bb68134794c --- /dev/null +++ b/doc/release-notes/9573-configbaker.md @@ -0,0 +1 @@ +A container has been added called "configbaker" that configures Dataverse while running in containers. This allows developers to spin up Dataverse with a single command. diff --git a/doc/release-notes/9588-datasets-api-extension.md b/doc/release-notes/9588-datasets-api-extension.md new file mode 100644 index 00000000000..f4fd6354d47 --- /dev/null +++ b/doc/release-notes/9588-datasets-api-extension.md @@ -0,0 +1,6 @@ +The following APIs have been added: + +- /api/datasets/summaryFieldNames +- /api/datasets/privateUrlDatasetVersion/{privateUrlToken} +- /api/datasets/privateUrlDatasetVersion/{privateUrlToken}/citation +- /api/datasets/{datasetId}/versions/{version}/citation diff --git a/doc/release-notes/9656-api-optional-dataset-params.md b/doc/release-notes/9656-api-optional-dataset-params.md new file mode 100644 index 00000000000..5d08f26386a --- /dev/null +++ b/doc/release-notes/9656-api-optional-dataset-params.md @@ -0,0 +1,5 @@ +The following fields are now available in the native JSON output: + +- alternativePersistentId +- publicationDate +- citationDate diff --git a/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv b/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv index 4fc9849f7a5..6e0eb810b27 100644 --- a/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv +++ b/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv @@ -2,5 +2,5 @@ Tool Type Scope Description Data Explorer explore file A GUI which lists the variables in a tabular data file allowing searching, charting and cross tabulation analysis. See the README.md file at https://github.com/scholarsportal/dataverse-data-explorer-v2 for the instructions on adding Data Explorer to your Dataverse. Whole Tale explore dataset A platform for the creation of reproducible research packages that allows users to launch containerized interactive analysis environments based on popular tools such as Jupyter and RStudio. Using this integration, Dataverse users can launch Jupyter and RStudio environments to analyze published datasets. For more information, see the `Whole Tale User Guide `_. Binder explore dataset Binder allows you to spin up custom computing environments in the cloud (including Jupyter notebooks) with the files from your dataset. `Installation instructions `_ are in the Data Exploration Lab girder_ythub project. See also :ref:`binder`. -File Previewers explore file A set of tools that display the content of files - including audio, html, `Hypothes.is `_ annotations, images, PDF, text, video, tabular data, spreadsheets, GeoJSON, zip, and NcML files - allowing them to be viewed without downloading the file. The previewers can be run directly from github.io, so the only required step is using the Dataverse API to register the ones you want to use. Documentation, including how to optionally brand the previewers, and an invitation to contribute through github are in the README.md file. Initial development was led by the Qualitative Data Repository and the spreasdheet previewer was added by the Social Sciences and Humanities Open Cloud (SSHOC) project. https://github.com/gdcc/dataverse-previewers +File Previewers explore file A set of tools that display the content of files - including audio, html, `Hypothes.is `_ annotations, images, PDF, text, video, tabular data, spreadsheets, GeoJSON, zip, HDF5, NetCDF, and NcML files - allowing them to be viewed without downloading the file. The previewers can be run directly from github.io, so the only required step is using the Dataverse API to register the ones you want to use. Documentation, including how to optionally brand the previewers, and an invitation to contribute through github are in the README.md file. Initial development was led by the Qualitative Data Repository and the spreasdheet previewer was added by the Social Sciences and Humanities Open Cloud (SSHOC) project. https://github.com/gdcc/dataverse-previewers Data Curation Tool configure file A GUI for curating data by adding labels, groups, weights and other details to assist with informed reuse. See the README.md file at https://github.com/scholarsportal/Dataverse-Data-Curation-Tool for the installation instructions. diff --git a/doc/sphinx-guides/source/_static/installation/files/root/external-tools/dynamicDatasetTool.json b/doc/sphinx-guides/source/_static/installation/files/root/external-tools/dynamicDatasetTool.json index 47413c8a625..22dd6477cb4 100644 --- a/doc/sphinx-guides/source/_static/installation/files/root/external-tools/dynamicDatasetTool.json +++ b/doc/sphinx-guides/source/_static/installation/files/root/external-tools/dynamicDatasetTool.json @@ -14,14 +14,14 @@ { "locale":"{localeCode}" } - ], - "allowedApiCalls": [ - { - "name":"retrieveDatasetJson", - "httpMethod":"GET", - "urlTemplate":"/api/v1/datasets/{datasetId}", - "timeOut":10 - } - ] - } + ] + }, + "allowedApiCalls": [ + { + "name":"retrieveDatasetJson", + "httpMethod":"GET", + "urlTemplate":"/api/v1/datasets/{datasetId}", + "timeOut":10 + } + ] } diff --git a/doc/sphinx-guides/source/_static/installation/files/root/external-tools/fabulousFileTool.json b/doc/sphinx-guides/source/_static/installation/files/root/external-tools/fabulousFileTool.json index 1c132576099..2b6a0b8e092 100644 --- a/doc/sphinx-guides/source/_static/installation/files/root/external-tools/fabulousFileTool.json +++ b/doc/sphinx-guides/source/_static/installation/files/root/external-tools/fabulousFileTool.json @@ -21,14 +21,14 @@ { "locale":"{localeCode}" } - ], - "allowedApiCalls": [ - { - "name":"retrieveDataFile", - "httpMethod":"GET", - "urlTemplate":"/api/v1/access/datafile/{fileId}", - "timeOut":270 - } ] - } + }, + "allowedApiCalls": [ + { + "name":"retrieveDataFile", + "httpMethod":"GET", + "urlTemplate":"/api/v1/access/datafile/{fileId}", + "timeOut":270 + } + ] } diff --git a/doc/sphinx-guides/source/admin/dataverses-datasets.rst b/doc/sphinx-guides/source/admin/dataverses-datasets.rst index 7f32e8c2514..92e01578f71 100644 --- a/doc/sphinx-guides/source/admin/dataverses-datasets.rst +++ b/doc/sphinx-guides/source/admin/dataverses-datasets.rst @@ -118,6 +118,28 @@ Creates a link between a dataset and a Dataverse collection (see the :ref:`datas curl -H "X-Dataverse-key: $API_TOKEN" -X PUT http://$SERVER/api/datasets/$linked-dataset-id/link/$linking-dataverse-alias +List Collections that are Linked from a Dataset +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Lists the link(s) created between a dataset and a Dataverse collection (see the :ref:`dataset-linking` section of the User Guide for more information). :: + + curl -H "X-Dataverse-key: $API_TOKEN" http://$SERVER/api/datasets/$linked-dataset-id/links + +It returns a list in the following format: + +.. code-block:: json + + { + "status": "OK", + "data": { + "dataverses that link to dataset id 56782": [ + "crc990 (id 18802)" + ] + } + } + +.. _unlink-a-dataset: + Unlink a Dataset ^^^^^^^^^^^^^^^^ @@ -131,15 +153,32 @@ Mint a PID for a File That Does Not Have One In the following example, the database id of the file is 42:: export FILE_ID=42 - curl http://localhost:8080/api/admin/$FILE_ID/registerDataFile + curl "http://localhost:8080/api/admin/$FILE_ID/registerDataFile" + +Mint PIDs for all unregistered published files in the specified collection +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Mint PIDs for Files That Do Not Have Them -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +The following API will register the PIDs for all the yet unregistered published files in the datasets **directly within the collection** specified by its alias:: -If you have a large number of files, you might want to consider miniting PIDs for files individually using the ``registerDataFile`` endpoint above in a for loop, sleeping between each registration:: + curl "http://localhost:8080/api/admin/registerDataFiles/{collection_alias}" + +It will not attempt to register the datafiles in its sub-collections, so this call will need to be repeated on any sub-collections where files need to be registered as well. File-level PID registration must be enabled on the collection. (Note that it is possible to have it enabled for a specific collection, even when it is disabled for the Dataverse installation as a whole. See :ref:`collection-attributes-api` in the Native API Guide.) + +This API will sleep for 1 second between registration calls by default. A longer sleep interval can be specified with an optional ``sleep=`` parameter:: + + curl "http://localhost:8080/api/admin/registerDataFiles/{collection_alias}?sleep=5" + +Mint PIDs for ALL unregistered files in the database +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The following API will attempt to register the PIDs for all the published files in your instance that do not yet have them:: curl http://localhost:8080/api/admin/registerDataFileAll +The application will attempt to sleep for 1 second between registration attempts as not to overload your persistent identifier service provider. Note that if you have a large number of files that need to be registered in your Dataverse, you may want to consider minting file PIDs within indivdual collections, or even for individual files using the ``registerDataFiles`` and/or ``registerDataFile`` endpoints above in a loop, with a longer sleep interval between calls. + + + Mint a New DOI for a Dataset with a Handle ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/doc/sphinx-guides/source/admin/metadatacustomization.rst b/doc/sphinx-guides/source/admin/metadatacustomization.rst index 9fb8626d4c4..058ba0d8725 100644 --- a/doc/sphinx-guides/source/admin/metadatacustomization.rst +++ b/doc/sphinx-guides/source/admin/metadatacustomization.rst @@ -95,6 +95,11 @@ Each of the three main sections own sets of properties: | displayName | Acts as a brief label for display related to this | Should be relatively brief. The limit is 256 character, | | | #metadataBlock. | but very long names might cause display problems. | +----------------+---------------------------------------------------------+---------------------------------------------------------+ +| displayFacet | Label displayed in the search area when this | Should be brief. Long names will cause display problems | +| | #metadataBlock is configured as a search facet | in the search area. | +| | for a collection. See | | +| | :ref:`the API `. | | ++----------------+---------------------------------------------------------+---------------------------------------------------------+ | blockURI | Associates the properties in a block with an external | The citation #metadataBlock has the blockURI | | | URI. | https://dataverse.org/schema/citation/ which assigns a | | | Properties will be assigned the | default global URI to terms such as | @@ -452,12 +457,16 @@ metadatablock.name=(the value of **name** property from #metadatablock) metadatablock.displayName=(the value of **displayName** property from #metadatablock) +metadatablock.displayFacet=(the value of **displayFacet** property from #metadatablock) + example: metadatablock.name=citation metadatablock.displayName=Citation Metadata +metadatablock.displayFacet=Citation + #datasetField (field) properties ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ datasetfieldtype.(the value of **name** property from #datasetField).title=(the value of **title** property from #datasetField) @@ -494,6 +503,8 @@ Running a curl command like "load" example above should make the new custom meta ``curl -H "X-Dataverse-key:$API_TOKEN" -X POST -H "Content-type:application/json" -d "[\"journal\",\"geospatial\"]" http://localhost:8080/api/dataverses/:root/metadatablocks`` +.. _update-solr-schema: + Updating the Solr Schema ~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/sphinx-guides/source/api/getting-started.rst b/doc/sphinx-guides/source/api/getting-started.rst index c465b726421..544f0921bd7 100644 --- a/doc/sphinx-guides/source/api/getting-started.rst +++ b/doc/sphinx-guides/source/api/getting-started.rst @@ -52,6 +52,20 @@ If you ever want to check an environment variable, you can "echo" it like this: echo $SERVER_URL +With curl version 7.56.0 and higher, it is recommended to use --form-string with outer quote rather than -F flag without outer quote. + +For example, curl command parameter below might cause error such as ``warning: garbage at end of field specification: ,"categories":["Data"]}``. + +.. code-block:: bash + + -F jsonData={\"description\":\"My description.\",\"categories\":[\"Data\"]} + +Instead, use --form-string with outer quote. See https://github.com/curl/curl/issues/2022 + +.. code-block:: bash + + --form-string 'jsonData={"description":"My description.","categories":["Data"]}' + If you don't like curl, don't have curl, or want to use a different programming language, you are encouraged to check out the Python, Javascript, R, and Java options in the :doc:`client-libraries` section. .. _curl: https://curl.haxx.se diff --git a/doc/sphinx-guides/source/api/metrics.rst b/doc/sphinx-guides/source/api/metrics.rst index f1eb1f88c71..28ac33ea228 100755 --- a/doc/sphinx-guides/source/api/metrics.rst +++ b/doc/sphinx-guides/source/api/metrics.rst @@ -158,8 +158,8 @@ The following table lists the available metrics endpoints (not including the Mak /api/info/metrics/uniquedownloads,"pid, count",json,collection subtree,published,y,total count of unique users who have downloaded from the datasets in scope,The use case for this metric (uniquedownloads) is to more fairly assess which datasets are getting downloaded/used by only counting each users who downloads any file from a dataset as one count (versus downloads of multiple files or repeat downloads counting as multiple counts which adds a bias for large datasets and/or use patterns where a file is accessed repeatedly for new analyses) /api/info/metrics/uniquedownloads/monthly,"date, pid, count","json, csv",collection subtree,published,y,monthly cumulative timeseries of unique user counts for datasets in the dataverse scope, /api/info/metrics/uniquedownloads/toMonth/{yyyy-MM},"pid, count",json,collection subtree,published,y,cumulative count of unique users who have downloaded from the datasets in scope through specified month, - /api/info/metrics/filedownloads/monthly,"date, count, id, pid","json, csv",collection subtree,published,y,"monthly cumulative timeseries by file id, pid from first date of first entry to now","unique downloads (as defined above) per month by file (id, pid) sorted in decreasing order of counts" /api/info/metrics/uniquefiledownloads,"count by id, pid","json, csv",collection subtree,published,y,as of now/totals,unique download counts per file id. PIDs are also included in output if they exist + /api/info/metrics/uniquefiledownloads/monthly,"date, count, id, pid","json, csv",collection subtree,published,y,"monthly cumulative timeseries by file id, pid from first date of first entry to now","unique downloads per month by file (id, pid) sorted in decreasing order of counts" /api/info/metrics/uniquefiledownloads/toMonth/{yyyy-MM},"count by id, pid","json, csv",collection subtree,published,y,cumulative up to month specified,unique download counts per file id to the specified month. PIDs are also included in output if they exist /api/info/metrics/tree,"id, ownerId, alias, depth, name, children",json,collection subtree,published,y,"tree of dataverses starting at the root or a specified parentAlias with their id, owner id, alias, name, a computed depth, and array of children dataverses","underlying code can also include draft dataverses, this is not currently accessible via api, depth starts at 0" /api/info/metrics/tree/toMonth/{yyyy-MM},"id, ownerId, alias, depth, name, children",json,collection subtree,published,y,"tree of dataverses in existence as of specified date starting at the root or a specified parentAlias with their id, owner id, alias, name, a computed depth, and array of children dataverses","underlying code can also include draft dataverses, this is not currently accessible via api, depth starts at 0" diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 369e92ba129..3cce9e70a38 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -229,6 +229,8 @@ The fully expanded example above (without environment variables) looks like this Where :download:`dataverse-facets.json <../_static/api/dataverse-facets.json>` contains a JSON encoded list of metadata keys (e.g. ``["authorName","authorAffiliation"]``). +.. _metadata-block-facet-api: + List Metadata Block Facets Configured for a Dataverse Collection ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -736,6 +738,24 @@ The fully expanded example above (without environment variables) looks like this curl -H X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx https://demo.dataverse.org/api/dataverses/root/guestbookResponses?guestbookId=1 -o myResponses.csv +.. _collection-attributes-api: + +Change Collection Attributes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: + + curl -X PUT -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/dataverses/$ID/attribute/$ATTRIBUTE?value=$VALUE" + +The following attributes are supported: + +* ``alias`` Collection alias +* ``name`` Name +* ``description`` Description +* ``affiliation`` Affiliation +* ``filePIDsEnabled`` ("true" or "false") Enables or disables registration of file-level PIDs in datasets within the collection (overriding the instance-wide setting). + + Datasets -------- @@ -2173,6 +2193,50 @@ Signposting is not supported for draft dataset versions. curl -H "Accept:application/json" "$SERVER_URL/api/datasets/:persistentId/versions/$VERSION/linkset?persistentId=$PERSISTENT_IDENTIFIER" +Get Dataset By Private URL Token +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: bash + + export SERVER_URL=https://demo.dataverse.org + export PRIVATE_URL_TOKEN=a56444bc-7697-4711-8964-e0577f055fd2 + + curl "$SERVER_URL/api/datasets/privateUrlDatasetVersion/$PRIVATE_URL_TOKEN" + +Get Citation +~~~~~~~~~~~~ + +.. code-block:: bash + + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/YD5QDG + export VERSION=1.0 + + curl -H "Accept:application/json" "$SERVER_URL/api/datasets/:persistentId/versions/$VERSION/{version}/citation?persistentId=$PERSISTENT_IDENTIFIER" + +Get Citation by Private URL Token +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: bash + + export SERVER_URL=https://demo.dataverse.org + export PRIVATE_URL_TOKEN=a56444bc-7697-4711-8964-e0577f055fd2 + + curl "$SERVER_URL/api/datasets/privateUrlDatasetVersion/$PRIVATE_URL_TOKEN/citation" + +.. _get-dataset-summary-field-names: + +Get Summary Field Names +~~~~~~~~~~~~~~~~~~~~~~~ + +See :ref:`:CustomDatasetSummaryFields` in the Installation Guide for how the list of dataset fields that summarize a dataset can be customized. Here's how to list them: + +.. code-block:: bash + + export SERVER_URL=https://demo.dataverse.org + + curl "$SERVER_URL/api/datasets/summaryFieldNames" + Files ----- @@ -2963,6 +3027,22 @@ The response is a JSON object described in the :doc:`/api/external-tools` sectio curl -H "X-Dataverse-key: $API_TOKEN" -H "Accept:application/json" "$SERVER_URL/api/files/$FILE_ID/metadata/$FILEMETADATA_ID/toolparams/$TOOL_ID +.. _get-fixity-algorithm: + +Get Fixity Algorithm +~~~~~~~~~~~~~~~~~~~~~~ + +This API call can be used to discover the configured fixity/checksum algorithm being used by a Dataverse installation (as configured by - :ref:`:FileFixityChecksumAlgorithm`). +Currently, the possible values are MD5, SHA-1, SHA-256, and SHA-512. +This algorithm will be used when the Dataverse software manages a file upload and should be used by external clients uploading files to a Dataverse instance. (Existing files may or may not have checksums with this algorithm.) + +.. code-block:: bash + + export SERVER_URL=https://demo.dataverse.org + + curl "$SERVER_URL/api/files/fixityAlgorithm + + Users Token Management ---------------------- @@ -4351,6 +4431,26 @@ It will report the specific files that have failed the validation. For example:: These are only available to super users. +.. _UpdateChecksums: + +Update Checksums To Use New Algorithm +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The fixity algorithm used on existing files can be changed by a superuser using this API call. An optional query parameter (num) can be used to limit the number of updates attempted (i.e. to do processing in batches). +The API call will only update the algorithm and checksum for a file if the existing checksum can be validated against the file. +Statistics concerning the updates are returned in the response to the API call with details in the log. +The primary use for this API call is to update existing files after the algorithm used when uploading new files is changes - see - :ref:`:FileFixityChecksumAlgorithm`. +Allowed values are MD5, SHA-1, SHA-256, and SHA-512 + +.. code-block:: bash + + export ALG=SHA-256 + export BATCHSIZE=1 + + curl http://localhost:8080/api/admin/updateHashValues/$ALG + curl http://localhost:8080/api/admin/updateHashValues/$ALG?num=$BATCHSIZE + + .. _dataset-validation-api: Dataset Validation diff --git a/doc/sphinx-guides/source/container/configbaker-image.rst b/doc/sphinx-guides/source/container/configbaker-image.rst new file mode 100644 index 00000000000..7218e2d8d14 --- /dev/null +++ b/doc/sphinx-guides/source/container/configbaker-image.rst @@ -0,0 +1,231 @@ +Config Baker Image +================== + +The config baker container may be used to execute all sorts of tasks around setting up, preparing and finalizing +an instance of the Dataverse software. Its focus is bootstrapping non-initialized installations. + +.. contents:: |toctitle| + :local: + +Quickstart +++++++++++ + +To see the Config Baker help screen: + +``docker run -it --rm gdcc/configbaker:unstable`` + +Supported Image Tags +++++++++++++++++++++ + +This image is sourced from the main upstream code `repository of the Dataverse software `_. +Development and maintenance of the `image's code `_ +happens there (again, by the community). Community-supported image tags are based on the two most important +upstream branches: + +- The ``unstable`` tag corresponds to the ``develop`` branch, where pull requests are merged. + (`Dockerfile `__) +- The ``alpha`` tag corresponds to the ``master`` branch, where releases are cut from. + (`Dockerfile `__) + + + +Image Contents +++++++++++++++ + +This image contains some crucial parts to make a freshly baked Dataverse installation usable. + +Scripts +^^^^^^^ + +.. list-table:: + :align: left + :widths: 20 80 + :header-rows: 1 + + * - Script + - Description + * - ``bootstrap.sh`` + - Run an initialization script contained in a persona. See ``bootstrap.sh -h`` for usage details. + For development purposes, use ``bootstrap.sh dev`` or provide your own. + * - ``fix-fs-perms.sh`` + - Fixes filesystem permissions. App and Solr container run as non-privileged users and might need adjusted + filesystem permissions on mounted volumes to be able to write data. Run without parameters to see usage details. + * - ``help.sh`` + - Default script when running container without parameters. Lists available scripts and details about them. + * - ``update-fields.sh`` + - Update a Solr ``schema.xml`` with a given list of metadata fields. See ``update-fields.sh -h`` for usage details + and :ref:`update-solr-schema` for an example use case. + +Solr Template +^^^^^^^^^^^^^ + +In addition, at ``/template`` a `Solr Configset `_ +is available, ready for Dataverse usage with a tuned core config and schema. + +Providing this template to a vanilla Solr image and using `solr-precreate `_ +with it will create the necessary Solr search index. + +The ``solrconfig.xml`` and ``schema.xml`` are included from the upstream project ``conf/solr/...`` folder. You are +obviously free to provide such a template in some other way, maybe tuned for your purposes. +As a start, the contained script ``update-fields.sh`` may be used to edit the field definitions. + + + +Build Instructions +++++++++++++++++++ + +Assuming you have `Docker `_, `Docker Desktop `_, +`Moby `_ or some remote Docker host configured, up and running from here on. +Note: You need to use Maven when building this image, as we collate selective files from different places of the upstream +repository. (Building with pure Docker Compose does not support this kind of selection.) + +By default, when building the application image, it will also create a new config baker image. Simply execute the +Maven modules packaging target with activated "container" profile from the projects Git root to build the image: + +``mvn -Pct package`` + +If you specifically want to build a config baker image *only*, try + +``mvn -Pct package -Ddocker.filter=dev_bootstrap`` + +The build of config baker involves copying Solr configset files. The Solr version used is inherited from Maven, +acting as the single source of truth. Also, the tag of the image should correspond the application image, as +their usage is intertwined. + +Some additional notes, using Maven parameters to change the build and use ...: + +- | ... a different tag only: add ``-Dconf.image.tag=tag``. + | *Note:* default is ``${app.image.tag}``, which defaults to ``unstable`` +- | ... a different image name and tag: add ``-Dconf.image=name:tag``. + | *Note:* default is ``gdcc/configbaker:${conf.image.tag}`` +- ... a different image registry than Docker Hub: add ``-Ddocker.registry=registry.example.org`` (see also + `DMP docs on registries `__) +- ... a different Solr version: use ``-Dsolr.version=x.y.z`` + +Processor Architecture and Multiarch +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +This image is published as a "multi-arch image", supporting the most common architectures Dataverse usually runs on: +AMD64 (Windows/Linux/...) and ARM64 (Apple M1/M2), by using `Maven Docker Plugin's BuildX mode `_. + +Building the image via ``mvn -Pct package``, etc. will only build for the architecture of the Docker machine's CPU. + +Only ``mvn -Pct deploy -Ddocker.platforms=linux/amd64,linux/arm64`` will trigger building on all enabled architectures. +Yet, to enable building with non-native code on your build machine, you will need to setup a cross-platform builder. + +On Linux, you should install `qemu-user-static `__ (preferably via +your package management) on the host and run ``docker run --rm --privileged multiarch/qemu-user-static --reset -p yes`` +to enable that builder. The Docker plugin will setup everything else for you. + + + +Tunables +++++++++ + +This image has no tunable runtime parameters yet. + + + +Locations ++++++++++ + +.. list-table:: + :align: left + :width: 100 + :widths: 10 10 50 + :header-rows: 1 + + * - Location + - Value + - Description + * - ``${SCRIPT_DIR}`` + - ``/scripts`` + - Place to store the scripts. Part of ``$PATH``. + * - ``${SOLR_TEMPLATE}`` + - ``/template`` + - Place where the Solr Configset resides to create an index core from it. + * - ``${BOOTSTRAP_DIR}`` + - ``/scripts/bootstrap`` + - Stores the bootstrapping personas in sub-folders. + * - ``${BOOTSTRAP_DIR}/base`` + - ``/scripts/bootstrap/base`` + - Minimal set of scripts and data from upstream ``scripts/api`` folder, just enough for the most basic setup. + The idea is that other personas may reuse it within their own ``init.sh``, avoiding (some) code duplication. + See ``dev`` persona for an example. + + + +Exposed Ports ++++++++++++++ + +This image contains no runnable services yet, so no ports exposed. + + + +Entry & Extension Points +++++++++++++++++++++++++ + +The entrypoint of this image is pinned to ``dumb-init`` to safeguard signal handling. You may feed any script or +executable to it as command. + +By using our released images as base image to add your own scripting, personas, Solr configset and so on, simply +adapt and alter any aspect you need changed. + + + +Examples +++++++++ + +Docker Compose snippet to wait for Dataverse deployment and execute bootstrapping using a custom persona you added +by bind mounting (as an alternative to extending the image): + +.. code-block:: yaml + + bootstrap: + image: gdcc/configbaker:unstable + restart: "no" + command: + - bootstrap.sh + - mypersona + volumes: + - ./mypersona:/scripts/bootstrap/mypersona + networks: + - dataverse + +Docker Compose snippet to prepare execution of Solr and copy your custom configset you added by bind mounting +(instead of an extension). Note that ``solr-precreate`` will not overwrite an already existing core! To update +the config of an existing core, you need to mount the right volume with the stateful data! + +.. code-block:: yaml + + solr_initializer: + container_name: solr_initializer + image: gdcc/configbaker:unstable + restart: "no" + command: + - sh + - -c + - "fix-fs-perms.sh solr && cp -a /template/* /solr-template" + volumes: + - ./volumes/solr/data:/var/solr + - ./volumes/solr/conf:/solr-template + - /tmp/my-generated-configset:/template + + solr: + container_name: solr + hostname: solr + image: solr:${SOLR_VERSION} + depends_on: + - dev_solr_initializer + restart: on-failure + ports: + - "8983:8983" + networks: + - dataverse + command: + - "solr-precreate" + - "collection1" + - "/template" + volumes: + - ./volumes/solr/data:/var/solr + - ./volumes/solr/conf:/template diff --git a/doc/sphinx-guides/source/container/dev-usage.rst b/doc/sphinx-guides/source/container/dev-usage.rst index 4eba70eb23b..3fbe55766d5 100644 --- a/doc/sphinx-guides/source/container/dev-usage.rst +++ b/doc/sphinx-guides/source/container/dev-usage.rst @@ -6,6 +6,21 @@ Please note! This Docker setup is not for production! .. contents:: |toctitle| :local: +Quickstart +---------- + +First, install Java 11 and Maven. + +After cloning the repo, try this: + +``mvn -Pct clean package docker:run`` + +After some time you should be able to log in: + +- url: http://localhost:8080 +- username: dataverseAdmin +- password: admin1 + Intro ----- @@ -14,38 +29,157 @@ Assuming you have `Docker `_, `Docker D you have Java and Maven installed, as you are at least about to develop code changes. To test drive these local changes to the Dataverse codebase in a containerized application server (and avoid the -setup described in :doc:`../developers/dev-environment`), you must a) build the application container and b) -run it in addition to the necessary dependencies. - -Building and Running --------------------- +setup described in :doc:`../developers/dev-environment`), you must a) build the application and b) run it in addition +to the necessary dependencies. (Which might involve building a new local version of the :doc:`configbaker-image`.) -To build the application image, run the following command, as described in :doc:`app-image`: +.. _dev-build: -``mvn -Pct clean package`` +Building +-------- -Now, start all the containers with a single command: +To build the :doc:`application ` and :doc:`config baker image `, run the following command: -``mvn -Pct docker:run`` +``mvn -Pct clean package`` -(You could also concatenate both commands into one.) +Once this is done, you will see images ``gdcc/dataverse:unstable`` and ``gdcc/configbaker:unstable`` available in your +Docker cache. + +**Note:** This will skip any unit tests. If you have built the code before for testing, etc. you might omit the +``clean`` to avoid recompiling. + +**Note:** Also we have a ``docker-compose-dev.yml`` file, it's currently not possible to build the images without +invoking Maven. This might change in the future. + + +.. _dev-run: + +Running +------- + +After building the app and config baker image containing your local changes to the Dataverse application, you want to +run it together with all dependencies. There are four ways to do this (commands executed at root of project directory): + +.. list-table:: Cheatsheet: Running Containers + :widths: 15 40 45 + :header-rows: 1 + :stub-columns: 1 + :align: left + + * - \ + - Using Maven + - Using Compose + * - In foreground + - ``mvn -Pct docker:run`` + - ``docker compose -f docker-compose-dev.yml up`` + * - In background + - ``mvn -Pct docker:start`` + - ``docker compose -f docker-compose-dev.yml up -d`` + +Both ways have their pros and cons: + +.. list-table:: Decision Helper: Fore- or Background? + :widths: 15 40 45 + :header-rows: 1 + :stub-columns: 1 + :align: left + + * - \ + - Pros + - Cons + * - Foreground + - | Logs scroll by when interacting with API / UI + | To stop all containers simply hit ``Ctrl+C`` + - | Lots and lots of logs scrolling by + | Must stop all containers to restart + * - Background + - | No logs scrolling by + | Easy to replace single containers + - | No logs scrolling by + | Stopping containers needs an extra command + +In case you want to concatenate building and running, here's a cheatsheet for you: + +.. list-table:: Cheatsheet: Building and Running Containers + :widths: 15 40 45 + :header-rows: 1 + :stub-columns: 1 + :align: left + + * - \ + - Using Maven + - Using Compose + * - In foreground + - ``mvn -Pct package docker:run`` + - ``mvn -Pct package && docker compose -f docker-compose-dev.yml up`` + * - In background + - ``mvn -Pct package docker:start`` + - ``mvn -Pct package && docker compose -f docker-compose-dev.yml up -d`` Once all containers have been started, you can check if the application was deployed correctly by checking the version -at http://localhost:8080/api/info/version. +at http://localhost:8080/api/info/version or watch the logs. -If all looks good, run the :download:`docker-final-setup.sh <../../../../scripts/dev/docker-final-setup.sh>` script below. -(This is a simplified version of the script described in :ref:`rebuilding-dev-environment`.) -In the future, we are planning on running this script within a container as part of https://github.com/IQSS/dataverse/issues/9443 - -.. literalinclude:: ../../../../scripts/dev/docker-final-setup.sh - :language: shell - :encoding: utf-8 - :caption: ``scripts/dev/docker-final-setup.sh`` - :name: docker-final-setup +**Note:** To stop all containers you started in background, invoke ``mvn -Pct docker:stop`` or +``docker compose -f docker-compose-dev.yml down``. Check that you can log in to http://localhost:8080 using user ``dataverseAdmin`` and password ``admin1``. -You can also access the Payara Admin Console if needed, which is available at http://localhost:4848. To log in, use user ``admin`` and password ``admin``. As a reminder, the application container is for development use only, so we are exposing the admin console for testing purposes. In a production environment, it may be more convenient to leave this console unopened. +You can also access the Payara Admin Console if needed, which is available at http://localhost:4848. To log in, use +user ``admin`` and password ``admin``. As a reminder, the application container is for development use only, so we +are exposing the admin console for testing purposes. In a production environment, it may be more convenient to leave +this console unopened. Note that data is persisted in ``./docker-dev-volumes`` in the root of the Git repo. For a clean start, you should remove this directory before running the ``mvn`` commands above. + + +.. _dev-logs: + +Viewing Logs +------------ + +In case you started containers in background mode (see :ref:`dev-run`), you can use the following commands to view and/or +watch logs from the containers. + +The safe bet for any running container's logs is to lookup the container name via ``docker ps`` and use it in +``docker logs ``. You can tail logs by adding ``-n`` and follow them by adding ``-f`` (just like ``tail`` cmd). +See ``docker logs --help`` for more. + +Alternatives: + +- In case you used Maven for running, you may use ``mvn -Pct docker:logs -Ddocker.filter=``. +- If you used Docker Compose for running, you may use ``docker compose -f docker-compose-dev.yml logs ``. + Options are the same. + + +Re-Deploying +------------ + +Currently, the only safe and tested way to re-deploy the Dataverse application after you applied code changes is +by recreating the container(s). In the future, more options may be added here. + +If you started your containers in foreground, just stop them and follow the steps for building and running again. +The same goes for using Maven to start the containers in the background. + +In case of using Docker Compose and starting the containers in the background, you can use a workaround to only +restart the application container: + +.. code-block:: + + # First rebuild the container (will complain about an image still in use, this is fine.) + mvn -Pct package + # Then re-create the container (will automatically restart the container for you) + docker compose -f docker-compose-dev.yml create dev_dataverse + +Using ``docker container inspect dev_dataverse | grep Image`` you can verify the changed checksums. + +Using A Debugger +---------------- + +The :doc:`base-image` enables usage of the `Java Debugging Wire Protocol `_ +for remote debugging if you set ``ENABLE_JDWP=1`` as environment variable for the application container. +The default configuration when executing containers with the commands listed at :ref:`dev-run` already enables this. + +There are a lot of tutorials how to connect your IDE's debugger to a remote endpoint. Please use ``localhost:9009`` +as the endpoint. Here are links to the most common IDEs docs on remote debugging: +`Eclipse `_, +`IntelliJ `_ diff --git a/doc/sphinx-guides/source/container/index.rst b/doc/sphinx-guides/source/container/index.rst index 1a6592b62e0..4bbc87a4845 100644 --- a/doc/sphinx-guides/source/container/index.rst +++ b/doc/sphinx-guides/source/container/index.rst @@ -26,3 +26,4 @@ develop and extend them further are provided. dev-usage base-image app-image + configbaker-image diff --git a/doc/sphinx-guides/source/developers/dev-environment.rst b/doc/sphinx-guides/source/developers/dev-environment.rst index 58b25d8b941..b3f7fb1c1af 100755 --- a/doc/sphinx-guides/source/developers/dev-environment.rst +++ b/doc/sphinx-guides/source/developers/dev-environment.rst @@ -7,12 +7,20 @@ These instructions are purposefully opinionated and terse to help you get your d .. contents:: |toctitle| :local: -Quick Start ------------ +Quick Start (Docker) +-------------------- -The quickest way to get the Dataverse Software running is to use Vagrant as described in the :doc:`tools` section, or use Docker containers as described the :doc:`../container/dev-usage` section of the Container Guide. +The quickest way to get Dataverse running is in Docker as explained in :doc:`../container/dev-usage` section of the Container Guide. -For day to day development work, we recommended the following setup. + +Classic Dev Environment +----------------------- + +Since before Docker existed, we have encouraged installing Dataverse and all its dependencies directly on your development machine, as described below. This can be thought of as the "classic" development environment for Dataverse. + +However, in 2023 we decided that we'd like to encourage all developers to start using Docker instead and opened https://github.com/IQSS/dataverse/issues/9616 to indicate that we plan to rewrite this page to recommend the use of Docker. + +There's nothing wrong with the classic instructions below and we don't plan to simply delete them. They are a valid alternative to running Dataverse in Docker. We will likely move them to another page. Set Up Dependencies ------------------- @@ -22,7 +30,7 @@ Supported Operating Systems Mac OS X or Linux is required because the setup scripts assume the presence of standard Unix utilities. -Windows is not well supported, unfortunately, but Vagrant and Minishift environments are described in the :doc:`windows` section. +Windows is gaining support through Docker as described in the :doc:`windows` section. Install Java ~~~~~~~~~~~~ @@ -87,12 +95,14 @@ To install Payara, run the following commands: ``cd /usr/local`` -``sudo curl -O -L https://s3-eu-west-1.amazonaws.com/payara.fish/Payara+Downloads/5.2022.3/payara-5.2022.3.zip`` +``sudo curl -O -L https://nexus.payara.fish/repository/payara-community/fish/payara/distributions/payara/5.2022.3/payara-5.2022.3.zip`` ``sudo unzip payara-5.2022.3.zip`` ``sudo chown -R $USER /usr/local/payara5`` +If nexus.payara.fish is ever down for maintenance, Payara distributions are also available from https://repo1.maven.org/maven2/fish/payara/distributions/payara/ + Install Service Dependencies Directly on localhost ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -184,6 +194,10 @@ If you want to remove the containers, then run: ``docker-compose -f docker-compose-dev.yml down`` +If you want to run a single container (the mail server, for example) then run: + +``docker-compose -f docker-compose-dev.yml up dev_smtp`` + For a fresh installation, and before running the Software Installer Script, it is recommended to delete the docker-dev-env folder to avoid installation problems due to existing data in the containers. Run the Dataverse Software Installer Script diff --git a/doc/sphinx-guides/source/developers/windows.rst b/doc/sphinx-guides/source/developers/windows.rst index e278b193e12..53578fe980c 100755 --- a/doc/sphinx-guides/source/developers/windows.rst +++ b/doc/sphinx-guides/source/developers/windows.rst @@ -2,86 +2,17 @@ Windows Development =================== -Development on Windows is not well supported, unfortunately. You will have a much easier time if you develop on Mac or Linux as described under :doc:`dev-environment` section. - -Vagrant commands appear below and were tested on Windows 10 but the Vagrant environment is currently broken. Please see https://github.com/IQSS/dataverse/issues/6849 - -There is a newer, experimental Docker option described under :doc:`/container/dev-usage` in the Container Guide. +Historically, development on Windows is `not well supported `_ but as of 2023 a container-based approach is recommended. .. contents:: |toctitle| :local: -Running the Dataverse Software in Vagrant ------------------------------------------ - -Install Vagrant -~~~~~~~~~~~~~~~ - -Download and install Vagrant from https://www.vagrantup.com - -Vagrant advises you to reboot but let's install VirtualBox first. - -Install VirtualBox -~~~~~~~~~~~~~~~~~~ - -Download and install VirtualBox from https://www.virtualbox.org - -Note that we saw an error saying "Oracle VM VirtualBox 5.2.8 Setup Wizard ended prematurely" but then we re-ran the installer and it seemed to work. - -Reboot -~~~~~~ - -Again, Vagrant asks you to reboot, so go ahead. - -Install Git -~~~~~~~~~~~ - -Download and install Git from https://git-scm.com - -Configure Git to use Unix Line Endings -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Launch Git Bash and run the following commands: - -``git config --global core.autocrlf input`` - -Pro tip: Use Shift-Insert to paste into Git Bash. - -See also https://help.github.com/articles/dealing-with-line-endings/ - -If you skip this step you are likely to see the following error when you run ``vagrant up``. - -``/tmp/vagrant-shell: ./install: /usr/bin/perl^M: bad interpreter: No such file or directory`` - -Clone Git Repo -~~~~~~~~~~~~~~ - -From Git Bash, run the following command: - -``git clone https://github.com/IQSS/dataverse.git`` - -vagrant up -~~~~~~~~~~ - -From Git Bash, run the following commands: - -``cd dataverse`` - -The ``dataverse`` directory you changed is the one you just cloned. Vagrant will operate on a file called ``Vagrantfile``. - -``vagrant up`` - -After a long while you hopefully will have a Dataverse installation available at http://localhost:8888 - -Improving Windows Support -------------------------- - -Windows Subsystem for Linux -~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Running Dataverse in Docker on Windows +-------------------------------------- -We have been unable to get Windows Subsystem for Linux (WSL) to work. We tried following the steps at https://docs.microsoft.com/en-us/windows/wsl/install-win10 but the "Get" button was greyed out when we went to download Ubuntu. +See the `post `_ by Akio Sone for additional details, but please observe the following: -Discussion and Feedback -~~~~~~~~~~~~~~~~~~~~~~~ +- In git, the line-ending setting should be set to always LF (line feed, ``core.autocrlf=input``) +- You must have jq installed: https://jqlang.github.io/jq/download/ -For more discussion of Windows support for Dataverse Software development see our community list thread `"Do you want to develop on Windows?" `_ We would be happy to incorporate feedback from Windows developers into this page. The :doc:`documentation` section describes how. +One the above is all set you can move on to :doc:`/container/dev-usage` in the Container Guide. diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index c0eb576d7f5..34217c2e73f 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -2766,13 +2766,14 @@ timestamps. :FilePIDsEnabled ++++++++++++++++ -Toggles publishing of file-based PIDs for the entire installation. By default this setting is absent and Dataverse Software assumes it to be true. If enabled, the registration will be performed asynchronously (in the background) during publishing of a dataset. +Toggles publishing of file-level PIDs for the entire installation. By default this setting is absent and Dataverse Software assumes it to be true. If enabled, the registration will be performed asynchronously (in the background) during publishing of a dataset. If you don't want to register file-based PIDs for your installation, set: ``curl -X PUT -d 'false' http://localhost:8080/api/admin/settings/:FilePIDsEnabled`` -Note: File-level PID registration was added in Dataverse Software 4.9; it could not be disabled until Dataverse Software 4.9.3. + +It is possible to override the installation-wide setting for specific collections. For example, registration of PIDs for files can be enabled in a specific collection when it is disabled instance-wide. Or it can be disabled in specific collections where it is enabled by default. See :ref:`collection-attributes-api` for details. .. _:IndependentHandleService: @@ -3112,6 +3113,8 @@ This curl command... See also :doc:`oauth2`. +.. _:FileFixityChecksumAlgorithm: + :FileFixityChecksumAlgorithm ++++++++++++++++++++++++++++ @@ -3121,12 +3124,9 @@ The default checksum algorithm used is MD5 and should be sufficient for establis ``curl -X PUT -d 'SHA-512' http://localhost:8080/api/admin/settings/:FileFixityChecksumAlgorithm`` -The fixity algorithm used on existing files can be changed by a superuser using the API. An optional query parameter (num) can be used to limit the number of updates attempted. -The API call will only update the algorithm and checksum for a file if the existing checksum can be validated against the file. -Statistics concerning the updates are returned in the response to the API call with details in the log. +To update the algorithm used for existing files, see :ref:`UpdateChecksums` -``curl http://localhost:8080/api/admin/updateHashValues/{alg}`` -``curl http://localhost:8080/api/admin/updateHashValues/{alg}?num=1`` +The fixity checksum algorithm in use can be discovered via API. See :ref:`get-fixity-algorithm` in the API Guide. .. _:PVMinLength: @@ -3406,6 +3406,8 @@ Limit on how many guestbook entries to display on the guestbook-responses page. ``curl -X PUT -d 10000 http://localhost:8080/api/admin/settings/:GuestbookResponsesPageDisplayLimit`` +.. _:CustomDatasetSummaryFields: + :CustomDatasetSummaryFields +++++++++++++++++++++++++++ @@ -3415,6 +3417,10 @@ You can replace the default dataset metadata fields that are displayed above fil You have to put the datasetFieldType name attribute in the :CustomDatasetSummaryFields setting for this to work. +The default fields are ``dsDescription,subject,keyword,publication,notesText``. + +This setting can be retrieved via API. See :ref:`get-dataset-summary-field-names` in the API Guide. + :AllowApiTokenLookupViaApi ++++++++++++++++++++++++++ diff --git a/doc/sphinx-guides/source/installation/prerequisites.rst b/doc/sphinx-guides/source/installation/prerequisites.rst index 59de507a264..d95aa78bb26 100644 --- a/doc/sphinx-guides/source/installation/prerequisites.rst +++ b/doc/sphinx-guides/source/installation/prerequisites.rst @@ -55,10 +55,12 @@ Installing Payara - Download and install Payara (installed in ``/usr/local/payara5`` in the example commands below):: - # wget https://s3-eu-west-1.amazonaws.com/payara.fish/Payara+Downloads/5.2022.3/payara-5.2022.3.zip + # wget https://nexus.payara.fish/repository/payara-community/fish/payara/distributions/payara/5.2022.3/payara-5.2022.3.zip # unzip payara-5.2022.3.zip # mv payara5 /usr/local +If nexus.payara.fish is ever down for maintenance, Payara distributions are also available from https://repo1.maven.org/maven2/fish/payara/distributions/payara/ + If you intend to install and run Payara under a service account (and we hope you do), chown -R the Payara hierarchy to root to protect it but give the service account access to the below directories: - Set service account permissions:: diff --git a/doc/sphinx-guides/source/user/dataset-management.rst b/doc/sphinx-guides/source/user/dataset-management.rst index 9223768b49f..7dfce6153dc 100755 --- a/doc/sphinx-guides/source/user/dataset-management.rst +++ b/doc/sphinx-guides/source/user/dataset-management.rst @@ -208,7 +208,7 @@ Previewers are available for the following file types: - Zip (preview and extract/download) - HTML - GeoJSON -- NetCDF/HDF5 (NcML format) +- NetCDF/HDF5 - Hypothes.is Additional file types will be added to the `dataverse-previewers `_ repo before they are listed above so please check there for the latest information or to request (or contribute!) an additional file previewer. @@ -346,6 +346,15 @@ A map will be shown as a preview of GeoJSON files when the previewer has been en NetCDF and HDF5 --------------- +H5Web Previewer +~~~~~~~~~~~~~~~ + +NetCDF and HDF5 files can be explored and visualized with H5Web_, which has been adapted into a file previewer tool (see :ref:`file-previews`) that can be enabled in your Dataverse installation. + +.. _H5Web: https://h5web.panosc.eu + +|h5web| + NcML ~~~~ @@ -794,6 +803,8 @@ If you deaccession the most recently published version of the dataset but not al :class: img-responsive .. |bagit-image1| image:: ./img/bagit-handler-errors.png :class: img-responsive +.. |h5web| image:: ./img/h5web.png + :class: img-responsive .. _Make Data Count: https://makedatacount.org .. _Crossref: https://crossref.org diff --git a/doc/sphinx-guides/source/user/dataverse-management.rst b/doc/sphinx-guides/source/user/dataverse-management.rst index ed90497da8c..b5e8d8f4fc9 100755 --- a/doc/sphinx-guides/source/user/dataverse-management.rst +++ b/doc/sphinx-guides/source/user/dataverse-management.rst @@ -216,7 +216,7 @@ In order to link a dataset, you will need your account to have the "Add Dataset" To link a dataset to your Dataverse collection, you must navigate to that dataset and click the white "Link" button in the upper-right corner of the dataset page. This will open up a window where you can type in the name of the Dataverse collection that you would like to link the dataset to. Select your Dataverse collection and click the save button. This will establish the link, and the dataset will now appear under your Dataverse collection. -There is currently no way to remove established links in the UI. If you need to remove a link between a Dataverse collection and a dataset, please contact the support team for the Dataverse installation you are using. +There is currently no way to remove established links in the UI. If you need to remove a link between a Dataverse collection and a dataset, please contact the support team for the Dataverse installation you are using (see the :ref:`unlink-a-dataset` section of the Admin Guide for more information). .. _dataverse-linking: diff --git a/doc/sphinx-guides/source/user/img/h5web.png b/doc/sphinx-guides/source/user/img/h5web.png new file mode 100644 index 00000000000..176aa775114 Binary files /dev/null and b/doc/sphinx-guides/source/user/img/h5web.png differ diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index 30c55661a20..ab44dbc1806 100644 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -12,6 +12,7 @@ services: - DATAVERSE_DB_HOST=postgres - DATAVERSE_DB_PASSWORD=secret - DATAVERSE_DB_USER=${DATAVERSE_DB_USER} + - ENABLE_JDWP=1 - DATAVERSE_FEATURE_API_BEARER_AUTH=1 ports: - "8080:8080" # HTTP (Dataverse Application) @@ -33,6 +34,16 @@ services: mem_reservation: 1024m privileged: false + dev_bootstrap: + container_name: "dev_bootstrap" + image: gdcc/configbaker:unstable + restart: "no" + command: + - bootstrap.sh + - dev + networks: + - dataverse + dev_postgres: container_name: "dev_postgres" hostname: postgres @@ -50,17 +61,15 @@ services: dev_solr_initializer: container_name: "dev_solr_initializer" - image: alpine + image: gdcc/configbaker:unstable restart: "no" command: - sh - -c - - "chown 8983:8983 /conf /var/solr && cp *.xml /conf" + - "fix-fs-perms.sh solr && cp -a /template/* /solr-template" volumes: - ./docker-dev-volumes/solr/data:/var/solr - - ./docker-dev-volumes/solr/conf:/conf - - ./conf/solr/8.11.1/schema.xml:/schema.xml - - ./conf/solr/8.11.1/solrconfig.xml:/solrconfig.xml + - ./docker-dev-volumes/solr/conf:/solr-template dev_solr: container_name: "dev_solr" @@ -74,9 +83,9 @@ services: networks: - dataverse command: - - bash - - -c - - "cd /opt/solr-${SOLR_VERSION}/server/solr/configsets/_default/conf && cp -R -n . /template && solr-precreate collection1 /template" + - "solr-precreate" + - "collection1" + - "/template" volumes: - ./docker-dev-volumes/solr/data:/var/solr - ./docker-dev-volumes/solr/conf:/template diff --git a/modules/container-configbaker/Dockerfile b/modules/container-configbaker/Dockerfile new file mode 100644 index 00000000000..20e103ccca8 --- /dev/null +++ b/modules/container-configbaker/Dockerfile @@ -0,0 +1,56 @@ +# Copyright 2023 Forschungszentrum Jülich GmbH +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 + +# This build arg must be given or build will fail +ARG SOLR_VERSION +# We simply have this intermediate stage here without any activity to copy the default configset over +FROM solr:${SOLR_VERSION} AS solr + +# Let's build us a baker +FROM alpine:3 + +ENV SCRIPT_DIR="/scripts" \ + SECRETS_DIR="/secrets" \ + SOLR_TEMPLATE="/template" +ENV PATH="${PATH}:${SCRIPT_DIR}" \ + BOOTSTRAP_DIR="${SCRIPT_DIR}/bootstrap" + +ARG APK_PACKAGES="curl bind-tools netcat-openbsd jq bash dumb-init wait4x" + +RUN true && \ + # Install necessary software and tools + apk add --no-cache ${APK_PACKAGES} && \ + # Make our working directories + mkdir -p ${SCRIPT_DIR} ${SECRETS_DIR} ${SOLR_TEMPLATE} + +# Get in the scripts and make them executable (just in case...) +COPY maven/scripts maven/solr/update-fields.sh ${SCRIPT_DIR}/ +RUN chmod +x ${SCRIPT_DIR}/*.sh ${BOOTSTRAP_DIR}/*/*.sh + +# Copy the Solr config bits +COPY --from=solr /opt/solr/server/solr/configsets/_default ${SOLR_TEMPLATE}/ +COPY maven/solr/*.xml ${SOLR_TEMPLATE}/conf/ + +# Copy the data from scripts/api that provide the common base setup you'd get from the installer. +# ".dockerignore" will take care of taking only the bare necessities +COPY maven/setup ${SCRIPT_DIR}/bootstrap/base/ + +# Set the entrypoint to tini (as a process supervisor) +ENTRYPOINT ["/usr/bin/dumb-init", "--"] +# By default run a script that will print a help message and terminate +CMD ["help.sh"] + +LABEL org.opencontainers.image.created="@git.build.time@" \ + org.opencontainers.image.authors="Research Data Management at FZJ " \ + org.opencontainers.image.url="https://guides.dataverse.org/en/latest/container/" \ + org.opencontainers.image.documentation="https://guides.dataverse.org/en/latest/container/" \ + org.opencontainers.image.source="https://github.com/IQSS/dataverse/tree/develop/modules/container-configbaker" \ + org.opencontainers.image.version="@project.version@" \ + org.opencontainers.image.revision="@git.commit.id.abbrev@" \ + org.opencontainers.image.vendor="Global Dataverse Community Consortium" \ + org.opencontainers.image.licenses="Apache-2.0" \ + org.opencontainers.image.title="Dataverse Config Baker Image" \ + org.opencontainers.image.description="This container image configures Dataverse and provides other tooling" diff --git a/modules/container-configbaker/README.md b/modules/container-configbaker/README.md new file mode 100644 index 00000000000..17b6f985798 --- /dev/null +++ b/modules/container-configbaker/README.md @@ -0,0 +1,46 @@ +# Config Baker + +The Config Baker container may be used to execute all sorts of tasks around setting up, preparing and finalizing +an instance of the Dataverse software. Its focus is bootstrapping non-initialized installations. + +You may use this image as is, base your own derivative image on it or use bind mounts to change behavior. + +## Quick Reference + +**Maintained by:** + +This image is created, maintained and supported by the Dataverse community on a best-effort basis. + +**Where to find documentation:** + +The [Dataverse Container Guide - Config Baker Image](https://guides.dataverse.org/en/latest/container/configbaker-image.html) +provides information about this image. + +**Where to get help and ask questions:** + +IQSS will not offer support on how to deploy or run it. Please reach out to the community for help on using it. +You can join the Community Chat on Matrix at https://chat.dataverse.org and https://groups.google.com/g/dataverse-community +to ask for help and guidance. + +## Supported Image Tags + +This image is sourced within the main upstream code [repository of the Dataverse software](https://github.com/IQSS/dataverse). +Development and maintenance of the [image's code](https://github.com/IQSS/dataverse/tree/develop/modules/container-configbaker) +happens there (again, by the community). Community-supported image tags are based on the two most important branches: + +- The `unstable` tag corresponds to the `develop` branch, where pull requests are merged. + ([`Dockerfile`](https://github.com/IQSS/dataverse/tree/develop/modules/container-configbaker/src/main/docker/Dockerfile)) +- The `alpha` tag corresponds to the `master` branch, where releases are cut from. + ([`Dockerfile`](https://github.com/IQSS/dataverse/tree/master/modules/container-configbaker/src/main/docker/Dockerfile)) + +## License + +Image content created by the community is licensed under [Apache License, Version 2.0](https://www.apache.org/licenses/LICENSE-2.0), +like the [main Dataverse project](https://github.com/IQSS/dataverse/blob/develop/LICENSE.md). + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and limitations under the License. + +As for any pre-built image usage, it is the image user's responsibility to ensure that any use of this image complies +with any relevant licenses for all software contained within. diff --git a/modules/container-configbaker/assembly.xml b/modules/container-configbaker/assembly.xml new file mode 100644 index 00000000000..f5b309175ed --- /dev/null +++ b/modules/container-configbaker/assembly.xml @@ -0,0 +1,46 @@ + + + + + modules/container-configbaker/scripts + scripts + + + + conf/solr/8.11.1 + solr + + + + scripts/api + setup + + setup-all.sh + setup-builtin-roles.sh + setup-datasetfields.sh + setup-identity-providers.sh + + data/licenses/*.json + data/authentication-providers/builtin.json + data/metadatablocks/*.tsv + + data/dv-root.json + + data/role-admin.json + data/role-curator.json + data/role-dsContributor.json + data/role-dvContributor.json + data/role-editor.json + data/role-filedownloader.json + data/role-fullContributor.json + data/role-member.json + + data/user-admin.json + + + data/metadatablocks/custom* + + + + \ No newline at end of file diff --git a/modules/container-configbaker/scripts/bootstrap.sh b/modules/container-configbaker/scripts/bootstrap.sh new file mode 100644 index 00000000000..1aa9e232953 --- /dev/null +++ b/modules/container-configbaker/scripts/bootstrap.sh @@ -0,0 +1,59 @@ +#!/bin/bash + +# [INFO]: Execute bootstrapping configuration of a freshly baked instance + +set -euo pipefail + +function usage() { + echo "Usage: $(basename "$0") [-h] [-u instanceUrl] [-t timeout] []" + echo "" + echo "Execute initial configuration (bootstrapping) of an empty Dataverse instance." + echo -n "Known personas: " + find "${BOOTSTRAP_DIR}" -mindepth 1 -maxdepth 1 -type d -exec basename {} \; | paste -sd ' ' + echo "" + echo "Parameters:" + echo "instanceUrl - Location on container network where to reach your instance. Default: 'http://dataverse:8080'" + echo " timeout - Provide how long to wait for the instance to become available (using wait4x). Default: '2m'" + echo " persona - Configure persona to execute. Calls ${BOOTSTRAP_DIR}//init.sh. Default: 'base'" + echo "" + echo "Note: This script will wait for the Dataverse instance to be available before executing the bootstrapping." + echo " It also checks if already bootstrapped before (availability of metadata blocks) and skip if true." + echo "" + exit 1 +} + +# Set some defaults as documented +DATAVERSE_URL=${DATAVERSE_URL:-"http://dataverse:8080"} +TIMEOUT=${TIMEOUT:-"2m"} + +while getopts "u:t:h" OPTION +do + case "$OPTION" in + u) DATAVERSE_URL="$OPTARG" ;; + t) TIMEOUT="$OPTARG" ;; + h) usage;; + \?) usage;; + esac +done +shift $((OPTIND-1)) + +# Assign persona if present or go default +PERSONA=${1:-"base"} + +# Export the URL to be reused in the actual setup scripts +export DATAVERSE_URL + +# Wait for the instance to become available +echo "Waiting for ${DATAVERSE_URL} to become ready in max ${TIMEOUT}." +wait4x http "${DATAVERSE_URL}/api/info/version" -i 8s -t "$TIMEOUT" --expect-status-code 200 --expect-body-json data.version + +# Avoid bootstrapping again by checking if a metadata block has been loaded +BLOCK_COUNT=$(curl -sSf "${DATAVERSE_URL}/api/metadatablocks" | jq ".data | length") +if [[ $BLOCK_COUNT -gt 0 ]]; then + echo "Your instance has been bootstrapped already, skipping." + exit 0 +fi + +# Now execute the bootstrapping script +echo "Now executing bootstrapping script at ${BOOTSTRAP_DIR}/${PERSONA}/init.sh." +exec "${BOOTSTRAP_DIR}/${PERSONA}/init.sh" diff --git a/modules/container-configbaker/scripts/bootstrap/base/init.sh b/modules/container-configbaker/scripts/bootstrap/base/init.sh new file mode 100644 index 00000000000..81c2b59f347 --- /dev/null +++ b/modules/container-configbaker/scripts/bootstrap/base/init.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +set -euo pipefail + +# Set some defaults as documented +DATAVERSE_URL=${DATAVERSE_URL:-"http://dataverse:8080"} +export DATAVERSE_URL + +./setup-all.sh diff --git a/modules/container-configbaker/scripts/bootstrap/dev/init.sh b/modules/container-configbaker/scripts/bootstrap/dev/init.sh new file mode 100644 index 00000000000..1042478963d --- /dev/null +++ b/modules/container-configbaker/scripts/bootstrap/dev/init.sh @@ -0,0 +1,31 @@ +#!/bin/bash + +set -euo pipefail + +# Set some defaults as documented +DATAVERSE_URL=${DATAVERSE_URL:-"http://dataverse:8080"} +export DATAVERSE_URL + +echo "Running base setup-all.sh (INSECURE MODE)..." +"${BOOTSTRAP_DIR}"/base/setup-all.sh --insecure -p=admin1 | tee /tmp/setup-all.sh.out + +echo "Setting system mail address..." +curl -X PUT -d "dataverse@localhost" "${DATAVERSE_URL}/api/admin/settings/:SystemEmail" + +echo "Setting DOI provider to \"FAKE\"..." +curl "${DATAVERSE_URL}/api/admin/settings/:DoiProvider" -X PUT -d FAKE + +API_TOKEN=$(grep apiToken "/tmp/setup-all.sh.out" | jq ".data.apiToken" | tr -d \") +export API_TOKEN + +echo "Publishing root dataverse..." +curl -H "X-Dataverse-key:$API_TOKEN" -X POST "${DATAVERSE_URL}/api/dataverses/:root/actions/:publish" + +echo "Allowing users to create dataverses and datasets in root..." +curl -H "X-Dataverse-key:$API_TOKEN" -X POST -H "Content-type:application/json" -d "{\"assignee\": \":authenticated-users\",\"role\": \"fullContributor\"}" "${DATAVERSE_URL}/api/dataverses/:root/assignments" + +echo "Checking Dataverse version..." +curl "${DATAVERSE_URL}/api/info/version" + +echo "" +echo "Done, your instance has been configured for development. Have a nice day!" diff --git a/modules/container-configbaker/scripts/fix-fs-perms.sh b/modules/container-configbaker/scripts/fix-fs-perms.sh new file mode 100644 index 00000000000..9ce8f475d70 --- /dev/null +++ b/modules/container-configbaker/scripts/fix-fs-perms.sh @@ -0,0 +1,62 @@ +#!/bin/bash + +# [INFO]: Fix folder permissions using 'chown' to be writeable by containers not running as root. + +set -euo pipefail + +if [[ "$(id -un)" != "root" ]]; then + echo "This script must be run as user root (not $(id -un)), otherwise no fix is possible." +fi + +DEF_DV_PATH="/dv" +DEF_SOLR_PATH="/var/solr" +DEF_DV_UID="1000" +DEF_SOLR_UID="8983" + +function usage() { + echo "Usage: $(basename "$0") (dv|solr|[1-9][0-9]{3,4}) [PATH [PATH [...]]]" + echo "" + echo "You may omit a path when using 'dv' or 'solr' as first argument:" + echo " - 'dv' will default to user $DEF_DV_UID and $DEF_DV_PATH" + echo " - 'solr' will default to user $DEF_SOLR_UID and $DEF_SOLR_PATH" + exit 1 +} + +# Get a target name or id +TARGET=${1:-help} +# Get the rest of the arguments as paths to apply the fix to +PATHS=( "${@:2}" ) + +ID=0 +case "$TARGET" in + dv) + ID="$DEF_DV_UID" + # If there is no path, add the default for our app image + if [[ ${#PATHS[@]} -eq 0 ]]; then + PATHS=( "$DEF_DV_PATH" ) + fi + ;; + solr) + ID="$DEF_SOLR_UID" + # In case there is no path, add the default path for Solr images + if [[ ${#PATHS[@]} -eq 0 ]]; then + PATHS=( "$DEF_SOLR_PATH" ) + fi + ;; + # If there is a digit in the argument, check if this is a valid UID (>= 1000, ...) + *[[:digit:]]* ) + echo "$TARGET" | grep -q "^[1-9][0-9]\{3,4\}$" || usage + ID="$TARGET" + ;; + *) + usage + ;; +esac + +# Check that we actually have at least 1 path +if [[ ${#PATHS[@]} -eq 0 ]]; then + usage +fi + +# Do what we came for +chown -R "$ID:$ID" "${PATHS[@]}" diff --git a/modules/container-configbaker/scripts/help.sh b/modules/container-configbaker/scripts/help.sh new file mode 100644 index 00000000000..744ec8c8b4c --- /dev/null +++ b/modules/container-configbaker/scripts/help.sh @@ -0,0 +1,34 @@ +#!/bin/bash + +set -euo pipefail + +# [INFO]: This script. + +# This is the Dataverse logo in ASCII +# shellcheck disable=SC2016 +echo -e ' ╓mαo\n ╫ jh\n `%╥æ╨\n ╫µ\n ╓@M%╗,\n ▓` ╫U\n ▓² ╫╛\n ▓M#M╝"\n ┌æM╝╝%φ╫┘\n┌╫" "╫┐\n▓ ▓\n▓ ▓\n`╫µ ¿╫"\n "╜%%MM╜`' +echo "" +echo "Hello!" +echo "" +echo "My name is Config Baker. I'm a container image with lots of tooling to 'bake' a containerized Dataverse instance!" +echo "I can cook up an instance (initial config), put icing on your Solr search index configuration, and more!" +echo "" +echo "Here's a list of things I can do for you:" + +# Get the longest name length +LENGTH=1 +for SCRIPT in "${SCRIPT_DIR}"/*.sh; do + L="$(basename "$SCRIPT" | wc -m)" + if [ "$L" -gt "$LENGTH" ]; then + LENGTH="$L" + fi +done + +# Print script names and info, but formatted +for SCRIPT in "${SCRIPT_DIR}"/*.sh; do + printf "%${LENGTH}s - " "$(basename "$SCRIPT")" + grep "# \[INFO\]: " "$SCRIPT" | sed -e "s|# \[INFO\]: ||" +done + +echo "" +echo "Simply execute this container with the script name (and potentially arguments) as 'command'." diff --git a/modules/dataverse-parent/pom.xml b/modules/dataverse-parent/pom.xml index 57761e19b8a..7d46f7c49b9 100644 --- a/modules/dataverse-parent/pom.xml +++ b/modules/dataverse-parent/pom.xml @@ -196,10 +196,10 @@ 3.1.2 1.6.13 - 1.9.0 + 1.7.0 - 0.42.1 + 0.43.0 diff --git a/modules/dataverse-spi/pom.xml b/modules/dataverse-spi/pom.xml index 7a2255fdbfb..6235d309e89 100644 --- a/modules/dataverse-spi/pom.xml +++ b/modules/dataverse-spi/pom.xml @@ -195,6 +195,7 @@ + org.apache.maven.plugins maven-javadoc-plugin diff --git a/pom.xml b/pom.xml index 170f7ed52ec..96f598af0f5 100644 --- a/pom.xml +++ b/pom.xml @@ -184,6 +184,11 @@ provided + + fish.payara.api + payara-api + provided + com.sun.mail jakarta.mail @@ -777,6 +782,8 @@ unstable gdcc/base:${base.image.tag} unstable + gdcc/configbaker:${conf.image.tag} + ${app.image.tag} @@ -831,23 +838,8 @@ assembly.xml - + - - @@ -856,6 +848,35 @@ docker-compose-dev.yml + + + dev_bootstrap + ${conf.image} + + + + ${docker.platforms} + + + ${project.basedir}/modules/container-configbaker/Dockerfile + + ${SOLR_VERSION} + + @ + + ${project.basedir}/modules/container-configbaker/assembly.xml + + + + + + true + + true diff --git a/scripts/api/data/dataset-create-new-all-default-fields.json b/scripts/api/data/dataset-create-new-all-default-fields.json index 58e2b26e8e8..4af128955c9 100644 --- a/scripts/api/data/dataset-create-new-all-default-fields.json +++ b/scripts/api/data/dataset-create-new-all-default-fields.json @@ -710,9 +710,9 @@ }, { "typeName": "series", - "multiple": false, + "multiple": true, "typeClass": "compound", - "value": { + "value": [{ "seriesName": { "typeName": "seriesName", "multiple": false, @@ -725,7 +725,7 @@ "typeClass": "primitive", "value": "SeriesInformation" } - } + }] }, { "typeName": "software", @@ -1404,7 +1404,7 @@ "multiple": true, "typeClass": "controlledVocabulary", "value": [ - "cell counting", + "genome sequencing", "cell sorting", "clinical chemistry analysis", "DNA methylation profiling" diff --git a/scripts/api/setup-all.sh b/scripts/api/setup-all.sh index c4bd6c2c9c5..e247caa72b5 100755 --- a/scripts/api/setup-all.sh +++ b/scripts/api/setup-all.sh @@ -3,7 +3,14 @@ SECURESETUP=1 DV_SU_PASSWORD="admin" -for opt in $* +DATAVERSE_URL=${DATAVERSE_URL:-"http://localhost:8080"} +# Make sure scripts we call from this one also get this env var! +export DATAVERSE_URL + +# scripts/api when called from the root of the source tree +SCRIPT_PATH="$(dirname "$0")" + +for opt in "$@" do case $opt in "--insecure") @@ -24,13 +31,9 @@ do esac done +# shellcheck disable=SC2016 command -v jq >/dev/null 2>&1 || { echo >&2 '`jq` ("sed for JSON") is required, but not installed. Download the binary for your platform from http://stedolan.github.io/jq/ and make sure it is in your $PATH (/usr/bin/jq is fine) and executable with `sudo chmod +x /usr/bin/jq`. On Mac, you can install it with `brew install jq` if you use homebrew: http://brew.sh . Aborting.'; exit 1; } -echo "deleting all data from Solr" -curl http://localhost:8983/solr/collection1/update/json?commit=true -H "Content-type: application/json" -X POST -d "{\"delete\": { \"query\":\"*:*\"}}" - -SERVER=http://localhost:8080/api - # Everything + the kitchen sink, in a single script # - Setup the metadata blocks and controlled vocabulary # - Setup the builtin roles @@ -41,49 +44,49 @@ SERVER=http://localhost:8080/api echo "Setup the metadata blocks" -./setup-datasetfields.sh +"$SCRIPT_PATH"/setup-datasetfields.sh echo "Setup the builtin roles" -./setup-builtin-roles.sh +"$SCRIPT_PATH"/setup-builtin-roles.sh echo "Setup the authentication providers" -./setup-identity-providers.sh +"$SCRIPT_PATH"/setup-identity-providers.sh echo "Setting up the settings" echo "- Allow internal signup" -curl -X PUT -d yes "$SERVER/admin/settings/:AllowSignUp" -curl -X PUT -d /dataverseuser.xhtml?editMode=CREATE "$SERVER/admin/settings/:SignUpUrl" - -curl -X PUT -d doi "$SERVER/admin/settings/:Protocol" -curl -X PUT -d 10.5072 "$SERVER/admin/settings/:Authority" -curl -X PUT -d "FK2/" "$SERVER/admin/settings/:Shoulder" -curl -X PUT -d DataCite "$SERVER/admin/settings/:DoiProvider" -curl -X PUT -d burrito $SERVER/admin/settings/BuiltinUsers.KEY -curl -X PUT -d localhost-only $SERVER/admin/settings/:BlockedApiPolicy -curl -X PUT -d 'native/http' $SERVER/admin/settings/:UploadMethods +curl -X PUT -d yes "${DATAVERSE_URL}/api/admin/settings/:AllowSignUp" +curl -X PUT -d "/dataverseuser.xhtml?editMode=CREATE" "${DATAVERSE_URL}/api/admin/settings/:SignUpUrl" + +curl -X PUT -d doi "${DATAVERSE_URL}/api/admin/settings/:Protocol" +curl -X PUT -d 10.5072 "${DATAVERSE_URL}/api/admin/settings/:Authority" +curl -X PUT -d "FK2/" "${DATAVERSE_URL}/api/admin/settings/:Shoulder" +curl -X PUT -d DataCite "${DATAVERSE_URL}/api/admin/settings/:DoiProvider" +curl -X PUT -d burrito "${DATAVERSE_URL}/api/admin/settings/BuiltinUsers.KEY" +curl -X PUT -d localhost-only "${DATAVERSE_URL}/api/admin/settings/:BlockedApiPolicy" +curl -X PUT -d 'native/http' "${DATAVERSE_URL}/api/admin/settings/:UploadMethods" echo echo "Setting up the admin user (and as superuser)" -adminResp=$(curl -s -H "Content-type:application/json" -X POST -d @data/user-admin.json "$SERVER/builtin-users?password=$DV_SU_PASSWORD&key=burrito") -echo $adminResp -curl -X POST "$SERVER/admin/superuser/dataverseAdmin" +adminResp=$(curl -s -H "Content-type:application/json" -X POST -d @"$SCRIPT_PATH"/data/user-admin.json "${DATAVERSE_URL}/api/builtin-users?password=$DV_SU_PASSWORD&key=burrito") +echo "$adminResp" +curl -X POST "${DATAVERSE_URL}/api/admin/superuser/dataverseAdmin" echo echo "Setting up the root dataverse" -adminKey=$(echo $adminResp | jq .data.apiToken | tr -d \") -curl -s -H "Content-type:application/json" -X POST -d @data/dv-root.json "$SERVER/dataverses/?key=$adminKey" +adminKey=$(echo "$adminResp" | jq .data.apiToken | tr -d \") +curl -s -H "Content-type:application/json" -X POST -d @"$SCRIPT_PATH"/data/dv-root.json "${DATAVERSE_URL}/api/dataverses/?key=$adminKey" echo echo "Set the metadata block for Root" -curl -s -X POST -H "Content-type:application/json" -d "[\"citation\"]" $SERVER/dataverses/:root/metadatablocks/?key=$adminKey +curl -s -X POST -H "Content-type:application/json" -d "[\"citation\"]" "${DATAVERSE_URL}/api/dataverses/:root/metadatablocks/?key=$adminKey" echo echo "Set the default facets for Root" -curl -s -X POST -H "Content-type:application/json" -d "[\"authorName\",\"subject\",\"keywordValue\",\"dateOfDeposit\"]" $SERVER/dataverses/:root/facets/?key=$adminKey +curl -s -X POST -H "Content-type:application/json" -d "[\"authorName\",\"subject\",\"keywordValue\",\"dateOfDeposit\"]" "${DATAVERSE_URL}/api/dataverses/:root/facets/?key=$adminKey" echo echo "Set up licenses" # Note: CC0 has been added and set as the default license through # Flyway script V5.9.0.1__7440-configurable-license-list.sql -curl -X POST -H 'Content-Type: application/json' -H "X-Dataverse-key:$adminKey" $SERVER/licenses --upload-file data/licenses/licenseCC-BY-4.0.json +curl -X POST -H 'Content-Type: application/json' -H "X-Dataverse-key:$adminKey" "${DATAVERSE_URL}/api/licenses" --upload-file "$SCRIPT_PATH"/data/licenses/licenseCC-BY-4.0.json # OPTIONAL USERS AND DATAVERSES #./setup-optional.sh @@ -92,8 +95,8 @@ if [ $SECURESETUP = 1 ] then # Revoke the "burrito" super-key; # Block sensitive API endpoints; - curl -X DELETE $SERVER/admin/settings/BuiltinUsers.KEY - curl -X PUT -d 'admin,builtin-users' $SERVER/admin/settings/:BlockedApiEndpoints + curl -X DELETE "${DATAVERSE_URL}/api/admin/settings/BuiltinUsers.KEY" + curl -X PUT -d 'admin,builtin-users' "${DATAVERSE_URL}/api/admin/settings/:BlockedApiEndpoints" echo "Access to the /api/admin and /api/test is now disabled, except for connections from localhost." else echo "IMPORTANT!!!" diff --git a/scripts/api/setup-builtin-roles.sh b/scripts/api/setup-builtin-roles.sh index 0f3c1c150cd..f1f268debbc 100755 --- a/scripts/api/setup-builtin-roles.sh +++ b/scripts/api/setup-builtin-roles.sh @@ -1,34 +1,37 @@ -SERVER=http://localhost:8080/api +#!/bin/bash + +DATAVERSE_URL=${DATAVERSE_URL:-"http://localhost:8080"} +SCRIPT_PATH="$(dirname "$0")" # Setup the builtin roles echo "Setting up admin role" -curl -H "Content-type:application/json" -d @data/role-admin.json http://localhost:8080/api/admin/roles/ +curl -H "Content-type:application/json" -d @"$SCRIPT_PATH"/data/role-admin.json "${DATAVERSE_URL}/api/admin/roles/" echo echo "Setting up file downloader role" -curl -H "Content-type:application/json" -d @data/role-filedownloader.json http://localhost:8080/api/admin/roles/ +curl -H "Content-type:application/json" -d @"$SCRIPT_PATH"/data/role-filedownloader.json "${DATAVERSE_URL}/api/admin/roles/" echo echo "Setting up full contributor role" -curl -H "Content-type:application/json" -d @data/role-fullContributor.json http://localhost:8080/api/admin/roles/ +curl -H "Content-type:application/json" -d @"$SCRIPT_PATH"/data/role-fullContributor.json "${DATAVERSE_URL}/api/admin/roles/" echo echo "Setting up dv contributor role" -curl -H "Content-type:application/json" -d @data/role-dvContributor.json http://localhost:8080/api/admin/roles/ +curl -H "Content-type:application/json" -d @"$SCRIPT_PATH"/data/role-dvContributor.json "${DATAVERSE_URL}/api/admin/roles/" echo echo "Setting up ds contributor role" -curl -H "Content-type:application/json" -d @data/role-dsContributor.json http://localhost:8080/api/admin/roles/ +curl -H "Content-type:application/json" -d @"$SCRIPT_PATH"/data/role-dsContributor.json "${DATAVERSE_URL}/api/admin/roles/" echo echo "Setting up editor role" -curl -H "Content-type:application/json" -d @data/role-editor.json http://localhost:8080/api/admin/roles/ +curl -H "Content-type:application/json" -d @"$SCRIPT_PATH"/data/role-editor.json "${DATAVERSE_URL}/api/admin/roles/" echo echo "Setting up curator role" -curl -H "Content-type:application/json" -d @data/role-curator.json http://localhost:8080/api/admin/roles/ +curl -H "Content-type:application/json" -d @"$SCRIPT_PATH"/data/role-curator.json "${DATAVERSE_URL}/api/admin/roles/" echo echo "Setting up member role" -curl -H "Content-type:application/json" -d @data/role-member.json http://localhost:8080/api/admin/roles/ +curl -H "Content-type:application/json" -d @"$SCRIPT_PATH"/data/role-member.json "${DATAVERSE_URL}/api/admin/roles/" echo diff --git a/scripts/api/setup-datasetfields.sh b/scripts/api/setup-datasetfields.sh index 0d2d60b9538..51da677ceb8 100755 --- a/scripts/api/setup-datasetfields.sh +++ b/scripts/api/setup-datasetfields.sh @@ -1,9 +1,13 @@ -#!/bin/sh -curl http://localhost:8080/api/admin/datasetfield/loadNAControlledVocabularyValue +#!/bin/bash + +DATAVERSE_URL=${DATAVERSE_URL:-"http://localhost:8080"} +SCRIPT_PATH="$(dirname "$0")" + +curl "${DATAVERSE_URL}/api/admin/datasetfield/loadNAControlledVocabularyValue" # TODO: The "@" is confusing. Consider switching to --upload-file citation.tsv -curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @data/metadatablocks/citation.tsv -H "Content-type: text/tab-separated-values" -curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @data/metadatablocks/geospatial.tsv -H "Content-type: text/tab-separated-values" -curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @data/metadatablocks/social_science.tsv -H "Content-type: text/tab-separated-values" -curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @data/metadatablocks/astrophysics.tsv -H "Content-type: text/tab-separated-values" -curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @data/metadatablocks/biomedical.tsv -H "Content-type: text/tab-separated-values" -curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @data/metadatablocks/journals.tsv -H "Content-type: text/tab-separated-values" +curl "${DATAVERSE_URL}/api/admin/datasetfield/load" -X POST --data-binary @"$SCRIPT_PATH"/data/metadatablocks/citation.tsv -H "Content-type: text/tab-separated-values" +curl "${DATAVERSE_URL}/api/admin/datasetfield/load" -X POST --data-binary @"$SCRIPT_PATH"/data/metadatablocks/geospatial.tsv -H "Content-type: text/tab-separated-values" +curl "${DATAVERSE_URL}/api/admin/datasetfield/load" -X POST --data-binary @"$SCRIPT_PATH"/data/metadatablocks/social_science.tsv -H "Content-type: text/tab-separated-values" +curl "${DATAVERSE_URL}/api/admin/datasetfield/load" -X POST --data-binary @"$SCRIPT_PATH"/data/metadatablocks/astrophysics.tsv -H "Content-type: text/tab-separated-values" +curl "${DATAVERSE_URL}/api/admin/datasetfield/load" -X POST --data-binary @"$SCRIPT_PATH"/data/metadatablocks/biomedical.tsv -H "Content-type: text/tab-separated-values" +curl "${DATAVERSE_URL}/api/admin/datasetfield/load" -X POST --data-binary @"$SCRIPT_PATH"/data/metadatablocks/journals.tsv -H "Content-type: text/tab-separated-values" diff --git a/scripts/api/setup-identity-providers.sh b/scripts/api/setup-identity-providers.sh index 89ac59de32f..e877f71c6b0 100755 --- a/scripts/api/setup-identity-providers.sh +++ b/scripts/api/setup-identity-providers.sh @@ -1,8 +1,11 @@ -SERVER=http://localhost:8080/api +#!/bin/bash + +DATAVERSE_URL=${DATAVERSE_URL:-"http://localhost:8080"} +SCRIPT_PATH="$(dirname "$0")" # Setup the authentication providers echo "Setting up internal user provider" -curl -H "Content-type:application/json" -d @data/authentication-providers/builtin.json http://localhost:8080/api/admin/authenticationProviders/ +curl -H "Content-type:application/json" -d @"$SCRIPT_PATH"/data/authentication-providers/builtin.json "${DATAVERSE_URL}/api/admin/authenticationProviders/" #echo "Setting up Echo providers" #curl -H "Content-type:application/json" -d @data/authentication-providers/echo.json http://localhost:8080/api/admin/authenticationProviders/ diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFile.java b/src/main/java/edu/harvard/iq/dataverse/DataFile.java index 28d814d9844..4e323496188 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataFile.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataFile.java @@ -418,7 +418,7 @@ public String getIngestReportMessage() { return ingestReports.get(0).getReport(); } } - return "Ingest failed. No further information is available."; + return BundleUtil.getStringFromBundle("file.ingestFailed"); } public boolean isTabularData() { diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java index 196f84b6877..ca69caa9802 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java @@ -1,7 +1,5 @@ package edu.harvard.iq.dataverse; -import edu.harvard.iq.dataverse.authorization.AccessRequest; -import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.dataaccess.DataAccess; import edu.harvard.iq.dataverse.dataaccess.ImageThumbConverter; import edu.harvard.iq.dataverse.dataaccess.StorageIO; @@ -11,19 +9,15 @@ import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.FileSortFieldAndOrder; import edu.harvard.iq.dataverse.util.FileUtil; -import edu.harvard.iq.dataverse.util.SystemConfig; import java.io.IOException; import java.sql.Timestamp; import java.util.ArrayList; import java.util.Collections; import java.util.Date; import java.util.HashMap; -import java.util.HashSet; import java.util.Iterator; -import java.util.LinkedList; import java.util.List; import java.util.Map; -import java.util.Set; import java.util.UUID; import java.util.logging.Level; import java.util.logging.Logger; @@ -36,9 +30,7 @@ import javax.persistence.NoResultException; import javax.persistence.PersistenceContext; import javax.persistence.Query; -import javax.persistence.StoredProcedureQuery; import javax.persistence.TypedQuery; -import org.apache.commons.lang3.RandomStringUtils; /** * @@ -199,6 +191,18 @@ public List findByDatasetId(Long studyId) { .setParameter("studyId", studyId).getResultList(); } + /** + * + * @param collectionId numeric id of the parent collection ("dataverse") + * @return list of files in the datasets that are *direct* children of the collection specified + * (i.e., no datafiles in sub-collections of this collection will be included) + */ + public List findByDirectCollectionOwner(Long collectionId) { + String queryString = "select f from DataFile f, Dataset d where f.owner.id = d.id and d.owner.id = :collectionId order by f.id"; + return em.createQuery(queryString, DataFile.class) + .setParameter("collectionId", collectionId).getResultList(); + } + public List findAllRelatedByRootDatafileId(Long datafileId) { /* Get all files with the same root datafile id @@ -561,364 +565,6 @@ public DataFile findCheapAndEasy(Long id) { return dataFile; } - /* - * This is an experimental method for populating the versions of - * the datafile with the filemetadatas, optimized for making as few db - * queries as possible. - * It should only be used to retrieve filemetadata for the DatasetPage! - * It is not guaranteed to adequately perform anywhere else. - */ - - public void findFileMetadataOptimizedExperimental(Dataset owner, DatasetVersion version, AuthenticatedUser au) { - List dataFiles = new ArrayList<>(); - List dataTables = new ArrayList<>(); - //List retList = new ArrayList<>(); - - // TODO: - // replace these maps with simple lists and run binary search on them. -- 4.2.1 - - Map userMap = new HashMap<>(); - Map filesMap = new HashMap<>(); - Map datatableMap = new HashMap<>(); - Map categoryMap = new HashMap<>(); - Map> fileTagMap = new HashMap<>(); - List accessRequestFileIds = new ArrayList(); - - List fileTagLabels = DataFileTag.listTags(); - - - int i = 0; - //Cache responses - Map embargoMap = new HashMap(); - - List dataTableResults = em.createNativeQuery("SELECT t0.ID, t0.DATAFILE_ID, t0.UNF, t0.CASEQUANTITY, t0.VARQUANTITY, t0.ORIGINALFILEFORMAT, t0.ORIGINALFILESIZE, t0.ORIGINALFILENAME FROM dataTable t0, dataFile t1, dvObject t2 WHERE ((t0.DATAFILE_ID = t1.ID) AND (t1.ID = t2.ID) AND (t2.OWNER_ID = " + owner.getId() + ")) ORDER BY t0.ID").getResultList(); - - for (Object[] result : dataTableResults) { - DataTable dataTable = new DataTable(); - long fileId = ((Number) result[1]).longValue(); - - dataTable.setId(((Number) result[1]).longValue()); - - dataTable.setUnf((String)result[2]); - - dataTable.setCaseQuantity((Long)result[3]); - - dataTable.setVarQuantity((Long)result[4]); - - dataTable.setOriginalFileFormat((String)result[5]); - - dataTable.setOriginalFileSize((Long)result[6]); - - dataTable.setOriginalFileName((String)result[7]); - - dataTables.add(dataTable); - datatableMap.put(fileId, i++); - - } - - logger.fine("Retrieved "+dataTables.size()+" DataTable objects."); - - List dataTagsResults = em.createNativeQuery("SELECT t0.DATAFILE_ID, t0.TYPE FROM DataFileTag t0, dvObject t1 WHERE (t1.ID = t0.DATAFILE_ID) AND (t1.OWNER_ID="+ owner.getId() + ")").getResultList(); - for (Object[] result : dataTagsResults) { - Long datafile_id = (Long) result[0]; - Integer tagtype_id = (Integer) result[1]; - if (fileTagMap.get(datafile_id) == null) { - fileTagMap.put(datafile_id, new HashSet<>()); - } - fileTagMap.get(datafile_id).add(tagtype_id); - } - logger.fine("Retrieved "+dataTagsResults.size()+" data tags."); - dataTagsResults = null; - - //Only need to check for access requests if there is an authenticated user - if (au != null) { - List accessRequests = em.createNativeQuery("SELECT t0.ID FROM DVOBJECT t0, FILEACCESSREQUESTS t1 WHERE t1.datafile_id = t0.id and t0.OWNER_ID = " + owner.getId() + " and t1.AUTHENTICATED_USER_ID = " + au.getId() + " ORDER BY t0.ID").getResultList(); - for (Object result : accessRequests) { - accessRequestFileIds.add(Long.valueOf((Integer)result)); - } - logger.fine("Retrieved " + accessRequests.size() + " access requests."); - accessRequests = null; - } - - i = 0; - - List fileResults = em.createNativeQuery("SELECT t0.ID, t0.CREATEDATE, t0.INDEXTIME, t0.MODIFICATIONTIME, t0.PERMISSIONINDEXTIME, t0.PERMISSIONMODIFICATIONTIME, t0.PUBLICATIONDATE, t0.CREATOR_ID, t0.RELEASEUSER_ID, t1.CONTENTTYPE, t0.STORAGEIDENTIFIER, t1.FILESIZE, t1.INGESTSTATUS, t1.CHECKSUMVALUE, t1.RESTRICTED, t1.CHECKSUMTYPE, t1.PREVIOUSDATAFILEID, t1.ROOTDATAFILEID, t0.PROTOCOL, t0.AUTHORITY, t0.IDENTIFIER, t1.EMBARGO_ID FROM DVOBJECT t0, DATAFILE t1 WHERE ((t0.OWNER_ID = " + owner.getId() + ") AND ((t1.ID = t0.ID) AND (t0.DTYPE = 'DataFile'))) ORDER BY t0.ID").getResultList(); - - for (Object[] result : fileResults) { - Integer file_id = (Integer) result[0]; - - DataFile dataFile = new DataFile(); - dataFile.setMergeable(false); - - dataFile.setId(file_id.longValue()); - - Timestamp createDate = (Timestamp) result[1]; - Timestamp indexTime = (Timestamp) result[2]; - Timestamp modificationTime = (Timestamp) result[3]; - Timestamp permissionIndexTime = (Timestamp) result[4]; - Timestamp permissionModificationTime = (Timestamp) result[5]; - Timestamp publicationDate = (Timestamp) result[6]; - - dataFile.setCreateDate(createDate); - dataFile.setIndexTime(indexTime); - dataFile.setModificationTime(modificationTime); - dataFile.setPermissionIndexTime(permissionIndexTime); - dataFile.setPermissionModificationTime(permissionModificationTime); - dataFile.setPublicationDate(publicationDate); - - Long creatorId = (Long) result[7]; - if (creatorId != null) { - AuthenticatedUser creator = userMap.get(creatorId); - if (creator == null) { - creator = userService.find(creatorId); - if (creator != null) { - userMap.put(creatorId, creator); - } - } - if (creator != null) { - dataFile.setCreator(creator); - } - } - - dataFile.setOwner(owner); - - Long releaseUserId = (Long) result[8]; - if (releaseUserId != null) { - AuthenticatedUser releaseUser = userMap.get(releaseUserId); - if (releaseUser == null) { - releaseUser = userService.find(releaseUserId); - if (releaseUser != null) { - userMap.put(releaseUserId, releaseUser); - } - } - if (releaseUser != null) { - dataFile.setReleaseUser(releaseUser); - } - } - - String contentType = (String) result[9]; - - if (contentType != null) { - dataFile.setContentType(contentType); - } - - String storageIdentifier = (String) result[10]; - - if (storageIdentifier != null) { - dataFile.setStorageIdentifier(storageIdentifier); - } - - Long fileSize = (Long) result[11]; - - if (fileSize != null) { - dataFile.setFilesize(fileSize); - } - - if (result[12] != null) { - String ingestStatusString = (String) result[12]; - dataFile.setIngestStatus(ingestStatusString.charAt(0)); - } - - String md5 = (String) result[13]; - - if (md5 != null) { - dataFile.setChecksumValue(md5); - } - - Boolean restricted = (Boolean) result[14]; - if (restricted != null) { - dataFile.setRestricted(restricted); - } - - String checksumType = (String) result[15]; - if (checksumType != null) { - try { - // In the database we store "SHA1" rather than "SHA-1". - DataFile.ChecksumType typeFromStringInDatabase = DataFile.ChecksumType.valueOf(checksumType); - dataFile.setChecksumType(typeFromStringInDatabase); - } catch (IllegalArgumentException ex) { - logger.info("Exception trying to convert " + checksumType + " to enum: " + ex); - } - } - - Long previousDataFileId = (Long) result[16]; - if (previousDataFileId != null) { - dataFile.setPreviousDataFileId(previousDataFileId); - } - - Long rootDataFileId = (Long) result[17]; - if (rootDataFileId != null) { - dataFile.setRootDataFileId(rootDataFileId); - } - - String protocol = (String) result[18]; - if (protocol != null) { - dataFile.setProtocol(protocol); - } - - String authority = (String) result[19]; - if (authority != null) { - dataFile.setAuthority(authority); - } - - String identifier = (String) result[20]; - if (identifier != null) { - dataFile.setIdentifier(identifier); - } - - Long embargo_id = (Long) result[21]; - if (embargo_id != null) { - if (embargoMap.containsKey(embargo_id)) { - dataFile.setEmbargo(embargoMap.get(embargo_id)); - } else { - Embargo e = embargoService.findByEmbargoId(embargo_id); - dataFile.setEmbargo(e); - embargoMap.put(embargo_id, e); - } - } - - // TODO: - // - if ingest status is "bad", look up the ingest report; - // - is it a dedicated thumbnail for the dataset? (do we ever need that info?? - not on the dataset page, I don't think...) - - // Is this a tabular file? - - if (datatableMap.get(dataFile.getId()) != null) { - dataTables.get(datatableMap.get(dataFile.getId())).setDataFile(dataFile); - dataFile.setDataTable(dataTables.get(datatableMap.get(dataFile.getId()))); - - } - - if (fileTagMap.get(dataFile.getId()) != null) { - for (Integer tag_id : fileTagMap.get(dataFile.getId())) { - DataFileTag tag = new DataFileTag(); - tag.setTypeByLabel(fileTagLabels.get(tag_id)); - tag.setDataFile(dataFile); - dataFile.addTag(tag); - } - } - - if (dataFile.isRestricted() && accessRequestFileIds.contains(dataFile.getId())) { - dataFile.addFileAccessRequester(au); - } - - dataFiles.add(dataFile); - filesMap.put(dataFile.getId(), i++); - } - - logger.fine("Retrieved and cached "+i+" datafiles."); - - i = 0; - for (DataFileCategory fileCategory : owner.getCategories()) { - //logger.fine("category: id="+fileCategory.getId()); - categoryMap.put(fileCategory.getId(), i++); - } - - logger.fine("Retrieved "+i+" file categories attached to the dataset."); - - version.setFileMetadatas(retrieveFileMetadataForVersion(owner, version, dataFiles, filesMap, categoryMap)); - logger.fine("Retrieved " + version.getFileMetadatas().size() + " filemetadatas for the version " + version.getId()); - owner.setFiles(dataFiles); - } - - private List retrieveFileMetadataForVersion(Dataset dataset, DatasetVersion version, List dataFiles, Map filesMap, Map categoryMap) { - List retList = new ArrayList<>(); - Map> categoryMetaMap = new HashMap<>(); - - List categoryResults = em.createNativeQuery("select t0.filecategories_id, t0.filemetadatas_id from filemetadata_datafilecategory t0, filemetadata t1 where (t0.filemetadatas_id = t1.id) AND (t1.datasetversion_id = "+version.getId()+")").getResultList(); - int i = 0; - for (Object[] result : categoryResults) { - Long category_id = (Long) result[0]; - Long filemeta_id = (Long) result[1]; - if (categoryMetaMap.get(filemeta_id) == null) { - categoryMetaMap.put(filemeta_id, new HashSet<>()); - } - categoryMetaMap.get(filemeta_id).add(category_id); - i++; - } - logger.fine("Retrieved and mapped "+i+" file categories attached to files in the version "+version.getId()); - - List metadataResults = em.createNativeQuery("select id, datafile_id, DESCRIPTION, LABEL, RESTRICTED, DIRECTORYLABEL, prov_freeform from FileMetadata where datasetversion_id = "+version.getId() + " ORDER BY LABEL").getResultList(); - - for (Object[] result : metadataResults) { - Integer filemeta_id = (Integer) result[0]; - - if (filemeta_id == null) { - continue; - } - - Long file_id = (Long) result[1]; - if (file_id == null) { - continue; - } - - Integer file_list_id = filesMap.get(file_id); - if (file_list_id == null) { - continue; - } - FileMetadata fileMetadata = new FileMetadata(); - fileMetadata.setId(filemeta_id.longValue()); - fileMetadata.setCategories(new LinkedList<>()); - - if (categoryMetaMap.get(fileMetadata.getId()) != null) { - for (Long cat_id : categoryMetaMap.get(fileMetadata.getId())) { - if (categoryMap.get(cat_id) != null) { - fileMetadata.getCategories().add(dataset.getCategories().get(categoryMap.get(cat_id))); - } - } - } - - fileMetadata.setDatasetVersion(version); - - // Link the FileMetadata object to the DataFile: - fileMetadata.setDataFile(dataFiles.get(file_list_id)); - // ... and the DataFile back to the FileMetadata: - fileMetadata.getDataFile().getFileMetadatas().add(fileMetadata); - - String description = (String) result[2]; - - if (description != null) { - fileMetadata.setDescription(description); - } - - String label = (String) result[3]; - - if (label != null) { - fileMetadata.setLabel(label); - } - - Boolean restricted = (Boolean) result[4]; - if (restricted != null) { - fileMetadata.setRestricted(restricted); - } - - String dirLabel = (String) result[5]; - if (dirLabel != null){ - fileMetadata.setDirectoryLabel(dirLabel); - } - - String provFreeForm = (String) result[6]; - if (provFreeForm != null){ - fileMetadata.setProvFreeForm(provFreeForm); - } - - retList.add(fileMetadata); - } - - logger.fine("Retrieved "+retList.size()+" file metadatas for version "+version.getId()+" (inside the retrieveFileMetadataForVersion method)."); - - - /* - We no longer perform this sort here, just to keep this filemetadata - list as identical as possible to when it's produced by the "traditional" - EJB method. When it's necessary to have the filemetadatas sorted by - FileMetadata.compareByLabel, the DatasetVersion.getFileMetadatasSorted() - method should be called. - - Collections.sort(retList, FileMetadata.compareByLabel); */ - - return retList; - } public List findIngestsInProgress() { if ( em.isOpen() ) { diff --git a/src/main/java/edu/harvard/iq/dataverse/Dataset.java b/src/main/java/edu/harvard/iq/dataverse/Dataset.java index 7823b753845..f9c839a0fff 100644 --- a/src/main/java/edu/harvard/iq/dataverse/Dataset.java +++ b/src/main/java/edu/harvard/iq/dataverse/Dataset.java @@ -43,6 +43,10 @@ * @author skraffmiller */ @NamedQueries({ + // Dataset.findById should only be used if you're going to iterate over files (otherwise, lazy loading in DatasetService.find() is better). + // If you are going to iterate over files, preferably call the DatasetService.findDeep() method i.s.o. using this query directly. + @NamedQuery(name = "Dataset.findById", + query = "SELECT o FROM Dataset o LEFT JOIN FETCH o.files WHERE o.id=:id"), @NamedQuery(name = "Dataset.findIdStale", query = "SELECT d.id FROM Dataset d WHERE d.indexTime is NULL OR d.indexTime < d.modificationTime"), @NamedQuery(name = "Dataset.findIdStalePermission", diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java index 8b807f78bca..132955859ff 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java @@ -16,6 +16,7 @@ import javax.validation.ConstraintValidator; import javax.validation.ConstraintValidatorContext; +import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.validation.EMailValidator; import edu.harvard.iq.dataverse.validation.URLValidator; import org.apache.commons.lang3.StringUtils; @@ -59,7 +60,7 @@ public boolean isValid(DatasetFieldValue value, ConstraintValidatorContext conte boolean valid = value.getValue().matches(value.getDatasetField().getDatasetFieldType().getValidationFormat()); if (!valid) { try { - context.buildConstraintViolationWithTemplate(dsfType.getDisplayName() + " is not a valid entry.").addConstraintViolation(); + context.buildConstraintViolationWithTemplate(dsfType.getDisplayName() + " " + BundleUtil.getStringFromBundle("dataset.metadata.invalidEntry")).addConstraintViolation(); } catch (NullPointerException e) { return false; } @@ -128,7 +129,7 @@ public boolean isValid(DatasetFieldValue value, ConstraintValidatorContext conte } if (!valid) { try { - context.buildConstraintViolationWithTemplate(dsfType.getDisplayName() + " is not a valid date. \"" + YYYYformat + "\" is a supported format.").addConstraintViolation(); + context.buildConstraintViolationWithTemplate(dsfType.getDisplayName() + " " + BundleUtil.getStringFromBundle("dataset.metadata.invalidDate") ).addConstraintViolation(); } catch (NullPointerException npe) { } @@ -143,7 +144,7 @@ public boolean isValid(DatasetFieldValue value, ConstraintValidatorContext conte } catch (Exception e) { logger.fine("Float value failed validation: " + value.getValue() + " (" + dsfType.getDisplayName() + ")"); try { - context.buildConstraintViolationWithTemplate(dsfType.getDisplayName() + " is not a valid number.").addConstraintViolation(); + context.buildConstraintViolationWithTemplate(dsfType.getDisplayName() + " " + BundleUtil.getStringFromBundle("dataset.metadata.invalidNumber") ).addConstraintViolation(); } catch (NullPointerException npe) { } @@ -157,7 +158,7 @@ public boolean isValid(DatasetFieldValue value, ConstraintValidatorContext conte Integer.parseInt(value.getValue()); } catch (Exception e) { try { - context.buildConstraintViolationWithTemplate(dsfType.getDisplayName() + " is not a valid integer.").addConstraintViolation(); + context.buildConstraintViolationWithTemplate(dsfType.getDisplayName() + " " + BundleUtil.getStringFromBundle("dataset.metadata.invalidInteger") ).addConstraintViolation(); } catch (NullPointerException npe) { } @@ -170,7 +171,7 @@ public boolean isValid(DatasetFieldValue value, ConstraintValidatorContext conte if (fieldType.equals(FieldType.URL) && !lengthOnly) { boolean isValidUrl = URLValidator.isURLValid(value.getValue()); if (!isValidUrl) { - context.buildConstraintViolationWithTemplate(dsfType.getDisplayName() + " " + value.getValue() + " {url.invalid}").addConstraintViolation(); + context.buildConstraintViolationWithTemplate(dsfType.getDisplayName() + " " + value.getValue() + " " + BundleUtil.getStringFromBundle("dataset.metadata.invalidURL")).addConstraintViolation(); return false; } } @@ -178,7 +179,7 @@ public boolean isValid(DatasetFieldValue value, ConstraintValidatorContext conte if (fieldType.equals(FieldType.EMAIL) && !lengthOnly) { boolean isValidMail = EMailValidator.isEmailValid(value.getValue()); if (!isValidMail) { - context.buildConstraintViolationWithTemplate(dsfType.getDisplayName() + " " + value.getValue() + " {email.invalid}").addConstraintViolation(); + context.buildConstraintViolationWithTemplate(dsfType.getDisplayName() + " " + value.getValue() + " " + BundleUtil.getStringFromBundle("dataset.metadata.invalidEmail")).addConstraintViolation(); return false; } } diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 3594d2375a3..3d608153ba3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -40,6 +40,7 @@ import edu.harvard.iq.dataverse.ingest.IngestServiceBean; import edu.harvard.iq.dataverse.license.LicenseServiceBean; import edu.harvard.iq.dataverse.metadataimport.ForeignMetadataImportServiceBean; +import edu.harvard.iq.dataverse.pidproviders.PidUtil; import edu.harvard.iq.dataverse.privateurl.PrivateUrl; import edu.harvard.iq.dataverse.privateurl.PrivateUrlServiceBean; import edu.harvard.iq.dataverse.privateurl.PrivateUrlUtil; @@ -81,6 +82,8 @@ import java.util.Set; import java.util.Collection; import java.util.logging.Logger; +import java.util.stream.Collectors; + import javax.ejb.EJB; import javax.ejb.EJBException; import javax.faces.application.FacesMessage; @@ -233,6 +236,8 @@ public enum DisplayMode { ExternalToolServiceBean externalToolService; @EJB SolrClientService solrClientService; + @EJB + DvObjectServiceBean dvObjectService; @Inject DataverseRequestServiceBean dvRequestService; @Inject @@ -678,48 +683,45 @@ public void showAll(){ } private List selectFileMetadatasForDisplay() { - Set searchResultsIdSet = null; - - if (isIndexedVersion()) { + final Set searchResultsIdSet; + if (isIndexedVersion() && StringUtil.isEmpty(fileLabelSearchTerm) && StringUtil.isEmpty(fileTypeFacet) && StringUtil.isEmpty(fileAccessFacet) && StringUtil.isEmpty(fileTagsFacet)) { + // Indexed version: we need facets, they are set as a side effect of getFileIdsInVersionFromSolr method. + // But, no search terms were specified, we will return the full + // list of the files in the version: we discard the result from getFileIdsInVersionFromSolr. + getFileIdsInVersionFromSolr(workingVersion.getId(), this.fileLabelSearchTerm); + // Since the search results should include the full set of fmds if all the + // terms/facets are empty, setting them to null should just be + // an optimization to skip the loop below + searchResultsIdSet = null; + } else if (isIndexedVersion()) { // We run the search even if no search term and/or facets are // specified - to generate the facet labels list: searchResultsIdSet = getFileIdsInVersionFromSolr(workingVersion.getId(), this.fileLabelSearchTerm); - // But, if no search terms were specified, we return the full - // list of the files in the version: - if (StringUtil.isEmpty(fileLabelSearchTerm) - && StringUtil.isEmpty(fileTypeFacet) - && StringUtil.isEmpty(fileAccessFacet) - && StringUtil.isEmpty(fileTagsFacet)) { - // Since the search results should include the full set of fmds if all the - // terms/facets are empty, setting them to null should just be - // an optimization for the loop below - searchResultsIdSet = null; - } - } else { + } else if (!StringUtil.isEmpty(this.fileLabelSearchTerm)) { // No, this is not an indexed version. // If the search term was specified, we'll run a search in the db; // if not - return the full list of files in the version. // (no facets without solr!) - if (!StringUtil.isEmpty(this.fileLabelSearchTerm)) { - searchResultsIdSet = getFileIdsInVersionFromDb(workingVersion.getId(), this.fileLabelSearchTerm); - } + searchResultsIdSet = getFileIdsInVersionFromDb(workingVersion.getId(), this.fileLabelSearchTerm); + } else { + searchResultsIdSet = null; } - List retList = new ArrayList<>(); - - for (FileMetadata fileMetadata : workingVersion.getFileMetadatas()) { - if (searchResultsIdSet == null || searchResultsIdSet.contains(fileMetadata.getDataFile().getId())) { - retList.add(fileMetadata); - } + final List md = workingVersion.getFileMetadatas(); + final List retList; + if (searchResultsIdSet == null) { + retList = new ArrayList<>(md); + } else { + retList = md.stream().filter(x -> searchResultsIdSet.contains(x.getDataFile().getId())).collect(Collectors.toList()); } sortFileMetadatas(retList); return retList; } - private void sortFileMetadatas(List fileList) { + private void sortFileMetadatas(final List fileList) { - DataFileComparator dfc = new DataFileComparator(); - Comparator comp = dfc.compareBy(folderPresort, tagPresort, fileSortField, !"desc".equals(fileSortOrder)); + final DataFileComparator dfc = new DataFileComparator(); + final Comparator comp = dfc.compareBy(folderPresort, tagPresort, fileSortField, !"desc".equals(fileSortOrder)); Collections.sort(fileList, comp); } @@ -1843,6 +1845,17 @@ public boolean webloaderUploadSupported() { return settingsWrapper.isWebloaderUpload() && StorageIO.isDirectUploadEnabled(dataset.getEffectiveStorageDriverId()); } + private void setIdByPersistentId() { + GlobalId gid = PidUtil.parseAsGlobalID(persistentId); + Long id = dvObjectService.findIdByGlobalId(gid, DvObject.DType.Dataset); + if (id == null) { + id = dvObjectService.findIdByAltGlobalId(gid, DvObject.DType.Dataset); + } + if (id != null) { + this.setId(id); + } + } + private String init(boolean initFull) { //System.out.println("_YE_OLDE_QUERY_COUNTER_"); // for debug purposes @@ -1866,23 +1879,11 @@ private String init(boolean initFull) { // Set the workingVersion and Dataset // --------------------------------------- if (persistentId != null) { - logger.fine("initializing DatasetPage with persistent ID " + persistentId); - // Set Working Version and Dataset by PersistentID - dataset = datasetService.findByGlobalId(persistentId); - if (dataset == null) { - logger.warning("No such dataset: "+persistentId); - return permissionsWrapper.notFound(); - } - logger.fine("retrieved dataset, id="+dataset.getId()); - - retrieveDatasetVersionResponse = datasetVersionService.selectRequestedVersion(dataset.getVersions(), version); - //retrieveDatasetVersionResponse = datasetVersionService.retrieveDatasetVersionByPersistentId(persistentId, version); - this.workingVersion = retrieveDatasetVersionResponse.getDatasetVersion(); - logger.fine("retrieved version: id: " + workingVersion.getId() + ", state: " + this.workingVersion.getVersionState()); - - } else if (this.getId() != null) { + setIdByPersistentId(); + } + if (this.getId() != null) { // Set Working Version and Dataset by Datasaet Id and Version - dataset = datasetService.find(this.getId()); + dataset = datasetService.findDeep(this.getId()); if (dataset == null) { logger.warning("No such dataset: "+dataset); return permissionsWrapper.notFound(); @@ -1978,11 +1979,6 @@ private String init(boolean initFull) { // init the list of FileMetadatas if (workingVersion.isDraft() && canUpdateDataset()) { readOnly = false; - } else { - // an attempt to retreive both the filemetadatas and datafiles early on, so that - // we don't have to do so later (possibly, many more times than necessary): - AuthenticatedUser au = session.getUser() instanceof AuthenticatedUser ? (AuthenticatedUser) session.getUser() : null; - datafileService.findFileMetadataOptimizedExperimental(dataset, workingVersion, au); } // This will default to all the files in the version, if the search term // parameter hasn't been specified yet: @@ -2849,15 +2845,14 @@ public String refresh() { DatasetVersionServiceBean.RetrieveDatasetVersionResponse retrieveDatasetVersionResponse = null; if (persistentId != null) { - //retrieveDatasetVersionResponse = datasetVersionService.retrieveDatasetVersionByPersistentId(persistentId, version); - dataset = datasetService.findByGlobalId(persistentId); + setIdByPersistentId(); + } + if (dataset.getId() != null) { + //retrieveDatasetVersionResponse = datasetVersionService.retrieveDatasetVersionById(dataset.getId(), version); + dataset = datasetService.findDeep(dataset.getId()); retrieveDatasetVersionResponse = datasetVersionService.selectRequestedVersion(dataset.getVersions(), version); } else if (versionId != null) { retrieveDatasetVersionResponse = datasetVersionService.retrieveDatasetVersionByVersionId(versionId); - } else if (dataset.getId() != null) { - //retrieveDatasetVersionResponse = datasetVersionService.retrieveDatasetVersionById(dataset.getId(), version); - dataset = datasetService.find(dataset.getId()); - retrieveDatasetVersionResponse = datasetVersionService.selectRequestedVersion(dataset.getVersions(), version); } if (retrieveDatasetVersionResponse == null) { @@ -2882,11 +2877,6 @@ public String refresh() { this.dataset = this.workingVersion.getDataset(); } - if (readOnly) { - AuthenticatedUser au = session.getUser() instanceof AuthenticatedUser ? (AuthenticatedUser) session.getUser() : null; - datafileService.findFileMetadataOptimizedExperimental(dataset, workingVersion, au); - } - fileMetadatasSearch = selectFileMetadatasForDisplay(); displayCitation = dataset.getCitation(true, workingVersion); diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index 0f7599f6ae1..305afd2ed30 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -38,6 +38,7 @@ import javax.ejb.TransactionAttributeType; import javax.inject.Named; import javax.persistence.EntityManager; +import javax.persistence.LockModeType; import javax.persistence.NoResultException; import javax.persistence.PersistenceContext; import javax.persistence.Query; @@ -105,6 +106,38 @@ public Dataset find(Object pk) { return em.find(Dataset.class, pk); } + /** + * Retrieve a dataset with the deep underlying structure in one query execution. + * This is a more optimal choice when accessing files of a dataset. + * In a contrast, the find() method does not pre-fetch the file objects and results in point queries when accessing these objects. + * Since the files have a deep structure, many queries can be prevented by using the findDeep() method, especially for large datasets + * containing many files, and when iterating over all the files. + * When you are not going to access the file objects, the default find() method is better because of the lazy loading. + * @return a dataset with pre-fetched file objects + */ + public Dataset findDeep(Object pk) { + return (Dataset) em.createNamedQuery("Dataset.findById") + .setParameter("id", pk) + // Optimization hints: retrieve all data in one query; this prevents point queries when iterating over the files + .setHint("eclipselink.left-join-fetch", "o.files.ingestRequest") + .setHint("eclipselink.left-join-fetch", "o.files.thumbnailForDataset") + .setHint("eclipselink.left-join-fetch", "o.files.dataTables") + .setHint("eclipselink.left-join-fetch", "o.files.auxiliaryFiles") + .setHint("eclipselink.left-join-fetch", "o.files.ingestReports") + .setHint("eclipselink.left-join-fetch", "o.files.dataFileTags") + .setHint("eclipselink.left-join-fetch", "o.files.fileMetadatas") + .setHint("eclipselink.left-join-fetch", "o.files.fileMetadatas.fileCategories") + .setHint("eclipselink.left-join-fetch", "o.files.guestbookResponses") + .setHint("eclipselink.left-join-fetch", "o.files.embargo") + .setHint("eclipselink.left-join-fetch", "o.files.fileAccessRequests") + .setHint("eclipselink.left-join-fetch", "o.files.owner") + .setHint("eclipselink.left-join-fetch", "o.files.releaseUser") + .setHint("eclipselink.left-join-fetch", "o.files.creator") + .setHint("eclipselink.left-join-fetch", "o.files.alternativePersistentIndentifiers") + .setHint("eclipselink.left-join-fetch", "o.files.roleAssignments") + .getSingleResult(); + } + public List findByOwnerId(Long ownerId) { return findByOwnerId(ownerId, false); } diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java index 439e4b17ed4..9f272ec6877 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java @@ -1118,13 +1118,7 @@ public JsonObjectBuilder fixMissingUnf(String datasetVersionId, boolean forceRec // reindexing the dataset, to make sure the new UNF is in SOLR: boolean doNormalSolrDocCleanUp = true; - try { - Future indexingResult = indexService.indexDataset(datasetVersion.getDataset(), doNormalSolrDocCleanUp); - } catch (IOException | SolrServerException e) { - String failureLogText = "Post UNF update indexing failed. You can kickoff a re-index of this dataset with: \r\n curl http://localhost:8080/api/admin/index/datasets/" + datasetVersion.getDataset().getId().toString(); - failureLogText += "\r\n" + e.getLocalizedMessage(); - LoggingUtil.writeOnSuccessFailureLog(null, failureLogText, datasetVersion.getDataset()); - } + indexService.asyncIndexDataset(datasetVersion.getDataset(), doNormalSolrDocCleanUp); return info; } diff --git a/src/main/java/edu/harvard/iq/dataverse/Dataverse.java b/src/main/java/edu/harvard/iq/dataverse/Dataverse.java index bc8716b6129..50d5ae09548 100644 --- a/src/main/java/edu/harvard/iq/dataverse/Dataverse.java +++ b/src/main/java/edu/harvard/iq/dataverse/Dataverse.java @@ -590,8 +590,34 @@ public void setCitationDatasetFieldTypes(List citationDatasetF this.citationDatasetFieldTypes = citationDatasetFieldTypes; } - + /** + * @Note: this setting is Nullable, with {@code null} indicating that the + * desired behavior is not explicitly configured for this specific collection. + * See the comment below. + */ + @Column(nullable = true) + private Boolean filePIDsEnabled; + /** + * Specifies whether the PIDs for Datafiles should be registered when publishing + * datasets in this Collection, if the behavior is explicitly configured. + * @return {@code Boolean.TRUE} if explicitly enabled, {@code Boolean.FALSE} if explicitly disabled. + * {@code null} indicates that the behavior is not explicitly defined, in which + * case the behavior should follow the explicit configuration of the first + * direct ancestor collection, or the instance-wide configuration, if none + * present. + * @Note: If present, this configuration therefore by default applies to all + * the sub-collections, unless explicitly overwritten there. + * @author landreev + */ + public Boolean getFilePIDsEnabled() { + return filePIDsEnabled; + } + + public void setFilePIDsEnabled(boolean filePIDsEnabled) { + this.filePIDsEnabled = filePIDsEnabled; + } + public List getDataverseFacets() { return getDataverseFacets(false); } diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseRoleServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataverseRoleServiceBean.java index 9d09d0580e2..b83593f5b6e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataverseRoleServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataverseRoleServiceBean.java @@ -303,7 +303,7 @@ public Set availableRoles(Long dvId) { Set roles = dv.getRoles(); roles.addAll(findBuiltinRoles()); - while (!dv.isEffectivelyPermissionRoot()) { + while (dv.getOwner() != null) { dv = dv.getOwner(); roles.addAll(dv.getRoles()); } diff --git a/src/main/java/edu/harvard/iq/dataverse/DvObject.java b/src/main/java/edu/harvard/iq/dataverse/DvObject.java index 854888737ee..e3013b8cf51 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DvObject.java +++ b/src/main/java/edu/harvard/iq/dataverse/DvObject.java @@ -30,9 +30,13 @@ query="SELECT COUNT(obj) FROM DvObject obj WHERE obj.owner.id=:id"), @NamedQuery(name = "DvObject.findByGlobalId", query = "SELECT o FROM DvObject o WHERE o.identifier=:identifier and o.authority=:authority and o.protocol=:protocol and o.dtype=:dtype"), + @NamedQuery(name = "DvObject.findIdByGlobalId", + query = "SELECT o.id FROM DvObject o WHERE o.identifier=:identifier and o.authority=:authority and o.protocol=:protocol and o.dtype=:dtype"), @NamedQuery(name = "DvObject.findByAlternativeGlobalId", query = "SELECT o FROM DvObject o, AlternativePersistentIdentifier a WHERE o.id = a.dvObject.id and a.identifier=:identifier and a.authority=:authority and a.protocol=:protocol and o.dtype=:dtype"), + @NamedQuery(name = "DvObject.findIdByAlternativeGlobalId", + query = "SELECT o.id FROM DvObject o, AlternativePersistentIdentifier a WHERE o.id = a.dvObject.id and a.identifier=:identifier and a.authority=:authority and a.protocol=:protocol and o.dtype=:dtype"), @NamedQuery(name = "DvObject.findByProtocolIdentifierAuthority", query = "SELECT o FROM DvObject o WHERE o.identifier=:identifier and o.authority=:authority and o.protocol=:protocol"), diff --git a/src/main/java/edu/harvard/iq/dataverse/DvObjectServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DvObjectServiceBean.java index e22e2f188fd..3430528aea3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DvObjectServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DvObjectServiceBean.java @@ -116,6 +116,16 @@ public DvObject findByAltGlobalId(GlobalId globalId, DvObject.DType dtype) { return runFindByGlobalId(query, globalId, dtype); } + public Long findIdByGlobalId(GlobalId globalId, DvObject.DType dtype) { + Query query = em.createNamedQuery("DvObject.findIdByGlobalId"); + return runFindIdByGlobalId(query, globalId, dtype); + } + + public Long findIdByAltGlobalId(GlobalId globalId, DvObject.DType dtype) { + Query query = em.createNamedQuery("DvObject.findIdByAlternativeGlobalId"); + return runFindIdByGlobalId(query, globalId, dtype); + } + private DvObject runFindByGlobalId(Query query, GlobalId gid, DvObject.DType dtype) { DvObject foundDvObject = null; try { @@ -136,6 +146,27 @@ private DvObject runFindByGlobalId(Query query, GlobalId gid, DvObject.DType dty } return foundDvObject; } + + private Long runFindIdByGlobalId(Query query, GlobalId gid, DvObject.DType dtype) { + Long foundDvObject = null; + try { + query.setParameter("identifier", gid.getIdentifier()); + query.setParameter("protocol", gid.getProtocol()); + query.setParameter("authority", gid.getAuthority()); + query.setParameter("dtype", dtype.getDType()); + foundDvObject = (Long) query.getSingleResult(); + } catch (javax.persistence.NoResultException e) { + // (set to .info, this can fill the log file with thousands of + // these messages during a large harvest run) + logger.fine("no dvObject found: " + gid.asString()); + // DO nothing, just return null. + return null; + } catch (Exception ex) { + logger.info("Exception caught in findByGlobalId: " + ex.getLocalizedMessage()); + return null; + } + return foundDvObject; + } public DvObject findByGlobalId(GlobalId globalId) { return (DvObject) em.createNamedQuery("DvObject.findByProtocolIdentifierAuthority") diff --git a/src/main/java/edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java index f4cf38979c5..2f795a4da74 100644 --- a/src/main/java/edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java @@ -12,6 +12,7 @@ import java.io.IOException; import java.io.OutputStream; import java.text.SimpleDateFormat; +import java.time.LocalDate; import java.util.ArrayList; import java.util.Calendar; import java.util.Date; @@ -910,8 +911,17 @@ public Long getCountGuestbookResponsesByDataFileId(Long dataFileId) { } public Long getCountGuestbookResponsesByDatasetId(Long datasetId) { + return getCountGuestbookResponsesByDatasetId(datasetId, null); + } + + public Long getCountGuestbookResponsesByDatasetId(Long datasetId, LocalDate date) { // dataset id is null, will return 0 - Query query = em.createNativeQuery("select count(o.id) from GuestbookResponse o where o.dataset_id = " + datasetId); + Query query; + if(date != null) { + query = em.createNativeQuery("select count(o.id) from GuestbookResponse o where o.dataset_id = " + datasetId + " and responsetime < '" + date.toString() + "'"); + }else { + query = em.createNativeQuery("select count(o.id) from GuestbookResponse o where o.dataset_id = " + datasetId); + } return (Long) query.getSingleResult(); } diff --git a/src/main/java/edu/harvard/iq/dataverse/MailServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/MailServiceBean.java index 2f8c2b6d6b4..bc7b34ee8b7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/MailServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/MailServiceBean.java @@ -11,6 +11,7 @@ import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.branding.BrandingUtil; import edu.harvard.iq.dataverse.confirmemail.ConfirmEmailServiceBean; +import edu.harvard.iq.dataverse.dataset.DatasetUtil; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.settings.SettingsServiceBean.Key; import edu.harvard.iq.dataverse.util.BundleUtil; @@ -494,7 +495,7 @@ public String getMessageTextBasedOnNotification(UserNotification userNotificatio case STATUSUPDATED: version = (DatasetVersion) targetObject; pattern = BundleUtil.getStringFromBundle("notification.email.status.change"); - String[] paramArrayStatus = {version.getDataset().getDisplayName(), (version.getExternalStatusLabel()==null) ? "" : version.getExternalStatusLabel()}; + String[] paramArrayStatus = {version.getDataset().getDisplayName(), (version.getExternalStatusLabel()==null) ? "" : DatasetUtil.getLocaleExternalStatus(version.getExternalStatusLabel())}; messageText += MessageFormat.format(pattern, paramArrayStatus); return messageText; case CREATEACC: diff --git a/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java index 8f7f53de1a2..8c0a0bf90b0 100644 --- a/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java @@ -750,6 +750,14 @@ else if (dataset.isLockedFor(DatasetLock.Reason.InReview)) { } } } + + public void checkUpdateDatasetVersionLock(Dataset dataset, DataverseRequest dataverseRequest, Command command) throws IllegalCommandException { + boolean hasAtLeastOneLockThatIsNotAnIngestLock = dataset.isLocked() && dataset.getLocks().stream() + .anyMatch(lock -> !DatasetLock.Reason.Ingest.equals(lock.getReason())); + if (hasAtLeastOneLockThatIsNotAnIngestLock) { + checkEditDatasetLock(dataset, dataverseRequest, command); + } + } public void checkPublishDatasetLock(Dataset dataset, DataverseRequest dataverseRequest, Command command) throws IllegalCommandException { if (dataset.isLocked()) { diff --git a/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java b/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java index 7bf473bb7f5..31cea6d09cd 100644 --- a/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java +++ b/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java @@ -464,7 +464,16 @@ public boolean isDataCiteInstallation() { public boolean isMakeDataCountDisplayEnabled() { boolean safeDefaultIfKeyNotFound = (getValueForKey(SettingsServiceBean.Key.MDCLogPath)!=null); //Backward compatible return isTrueForKey(SettingsServiceBean.Key.DisplayMDCMetrics, safeDefaultIfKeyNotFound); + } + public LocalDate getMDCStartDate() { + String date = getValueForKey(SettingsServiceBean.Key.MDCStartDate); + LocalDate ld=null; + if(date!=null) { + ld = LocalDate.parse(date); + } + return ld; + } public boolean displayChronologicalDateFacets() { @@ -582,16 +591,21 @@ public Map getMetadataLanguages(DvObjectContainer target) { currentMap.put(DvObjectContainer.UNDEFINED_METADATA_LANGUAGE_CODE, getDefaultMetadataLanguageLabel(target)); return currentMap; } - + private String getDefaultMetadataLanguageLabel(DvObjectContainer target) { String mlLabel = BundleUtil.getStringFromBundle("dataverse.metadatalanguage.setatdatasetcreation"); - String mlCode = target.getEffectiveMetadataLanguage(); - // If it's 'undefined', it's the global default - if (!mlCode.equals(DvObjectContainer.UNDEFINED_METADATA_LANGUAGE_CODE)) { - // Get the label for the language code found - mlLabel = getBaseMetadataLanguageMap(false).get(mlCode); - mlLabel = mlLabel + " " + BundleUtil.getStringFromBundle("dataverse.inherited"); + + if(target.getOwner() != null) { // Root collection is excluded from inherit metadata language research + String mlCode = target.getOwner().getEffectiveMetadataLanguage(); + + // If it's undefined, no parent has a metadata language defined, and the global default should be used. + if (!mlCode.equals(DvObjectContainer.UNDEFINED_METADATA_LANGUAGE_CODE)) { + // Get the label for the language code found + mlLabel = getBaseMetadataLanguageMap(false).get(mlCode); + mlLabel = mlLabel + " " + BundleUtil.getStringFromBundle("dataverse.inherited"); + } } + return mlLabel; } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index d219339add9..b11a78c2416 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -1376,7 +1376,7 @@ public Response fixMissingOriginalTypes() { "All the tabular files in the database already have the original types set correctly; exiting."); } else { for (Long fileid : affectedFileIds) { - logger.info("found file id: " + fileid); + logger.fine("found file id: " + fileid); } info.add("message", "Found " + affectedFileIds.size() + " tabular files with missing original types. Kicking off an async job that will repair the files in the background."); @@ -1566,6 +1566,12 @@ public Response registerDataFileAll(@Context ContainerRequestContext crc) { } catch (Exception e) { logger.info("Unexpected Exception: " + e.getMessage()); } + + try { + Thread.sleep(1000); + } catch (InterruptedException ie) { + logger.warning("Interrupted Exception when attempting to execute Thread.sleep()!"); + } } logger.info("Final Results:"); logger.info(alreadyRegistered + " of " + count + " files were already registered. " + new Date()); @@ -1577,6 +1583,88 @@ public Response registerDataFileAll(@Context ContainerRequestContext crc) { return ok("Datafile registration complete." + successes + " of " + released + " unregistered, published files registered successfully."); } + + @GET + @AuthRequired + @Path("/registerDataFiles/{alias}") + public Response registerDataFilesInCollection(@Context ContainerRequestContext crc, @PathParam("alias") String alias, @QueryParam("sleep") Integer sleepInterval) { + Dataverse collection; + try { + collection = findDataverseOrDie(alias); + } catch (WrappedResponse r) { + return r.getResponse(); + } + + AuthenticatedUser superuser = authSvc.getAdminUser(); + if (superuser == null) { + return error(Response.Status.INTERNAL_SERVER_ERROR, "Cannot find the superuser to execute /admin/registerDataFiles."); + } + + if (!systemConfig.isFilePIDsEnabledForCollection(collection)) { + return ok("Registration of file-level pid is disabled in collection "+alias+"; nothing to do"); + } + + List dataFiles = fileService.findByDirectCollectionOwner(collection.getId()); + Integer count = dataFiles.size(); + Integer countSuccesses = 0; + Integer countAlreadyRegistered = 0; + Integer countReleased = 0; + Integer countDrafts = 0; + + if (sleepInterval == null) { + sleepInterval = 1; + } else if (sleepInterval.intValue() < 1) { + return error(Response.Status.BAD_REQUEST, "Invalid sleep interval: "+sleepInterval); + } + + logger.info("Starting to register: analyzing " + count + " files. " + new Date()); + logger.info("Only unregistered, published files will be registered."); + + + + for (DataFile df : dataFiles) { + try { + if ((df.getIdentifier() == null || df.getIdentifier().isEmpty())) { + if (df.isReleased()) { + countReleased++; + DataverseRequest r = createDataverseRequest(superuser); + execCommand(new RegisterDvObjectCommand(r, df)); + countSuccesses++; + if (countSuccesses % 100 == 0) { + logger.info(countSuccesses + " out of " + count + " files registered successfully. " + new Date()); + } + } else { + countDrafts++; + logger.fine(countDrafts + " out of " + count + " files not yet published"); + } + } else { + countAlreadyRegistered++; + logger.fine(countAlreadyRegistered + " out of " + count + " files are already registered. " + new Date()); + } + } catch (WrappedResponse ex) { + countReleased++; + logger.info("Failed to register file id: " + df.getId()); + Logger.getLogger(Datasets.class.getName()).log(Level.SEVERE, null, ex); + } catch (Exception e) { + logger.info("Unexpected Exception: " + e.getMessage()); + } + + try { + Thread.sleep(sleepInterval * 1000); + } catch (InterruptedException ie) { + logger.warning("Interrupted Exception when attempting to execute Thread.sleep()!"); + } + } + + logger.info(countAlreadyRegistered + " out of " + count + " files were already registered. " + new Date()); + logger.info(countDrafts + " out of " + count + " files are not yet published. " + new Date()); + logger.info(countReleased + " out of " + count + " unregistered, published files to register. " + new Date()); + logger.info(countSuccesses + " out of " + countReleased + " unregistered, published files registered successfully. " + + new Date()); + + return ok("Datafile registration complete. " + countSuccesses + " out of " + countReleased + + " unregistered, published files registered successfully."); + } @GET @AuthRequired diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index d40bc153141..8c1390b597e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -10,6 +10,7 @@ import edu.harvard.iq.dataverse.authorization.RoleAssignee; import edu.harvard.iq.dataverse.authorization.users.ApiToken; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; +import edu.harvard.iq.dataverse.authorization.users.PrivateUrlUser; import edu.harvard.iq.dataverse.authorization.users.User; import edu.harvard.iq.dataverse.batch.jobs.importer.ImportMode; import edu.harvard.iq.dataverse.datacapturemodule.DataCaptureModuleUtil; @@ -82,6 +83,7 @@ import edu.harvard.iq.dataverse.makedatacount.MakeDataCountLoggingServiceBean.MakeDataCountEntry; import edu.harvard.iq.dataverse.metrics.MetricsUtil; import edu.harvard.iq.dataverse.makedatacount.MakeDataCountUtil; +import edu.harvard.iq.dataverse.privateurl.PrivateUrlServiceBean; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.settings.SettingsServiceBean.Key; import edu.harvard.iq.dataverse.util.ArchiverUtil; @@ -236,6 +238,9 @@ public class Datasets extends AbstractApiBean { @EJB DatasetVersionServiceBean datasetversionService; + @Inject + PrivateUrlServiceBean privateUrlService; + /** * Used to consolidate the way we parse and handle dataset versions. * @param @@ -2764,14 +2769,7 @@ public Response deleteLocks(@Context ContainerRequestContext crc, @PathParam("id } // kick of dataset reindexing, in case the locks removed // affected the search card: - try { - indexService.indexDataset(dataset, true); - } catch (IOException | SolrServerException e) { - String failureLogText = "Post lock removal indexing failed. You can kickoff a re-index of this dataset with: \r\n curl http://localhost:8080/api/admin/index/datasets/" + dataset.getId().toString(); - failureLogText += "\r\n" + e.getLocalizedMessage(); - LoggingUtil.writeOnSuccessFailureLog(null, failureLogText, dataset); - - } + indexService.asyncIndexDataset(dataset, true); return ok("locks removed"); } return ok("dataset not locked"); @@ -2784,14 +2782,7 @@ public Response deleteLocks(@Context ContainerRequestContext crc, @PathParam("id dataset = findDatasetOrDie(id); // ... and kick of dataset reindexing, in case the lock removed // affected the search card: - try { - indexService.indexDataset(dataset, true); - } catch (IOException | SolrServerException e) { - String failureLogText = "Post lock removal indexing failed. You can kickoff a re-index of this dataset with: \r\n curl http://localhost:8080/api/admin/index/datasets/" + dataset.getId().toString(); - failureLogText += "\r\n" + e.getLocalizedMessage(); - LoggingUtil.writeOnSuccessFailureLog(null, failureLogText, dataset); - - } + indexService.asyncIndexDataset(dataset, true); return ok("lock type " + lock.getReason() + " removed"); } return ok("no lock type " + lockType + " on the dataset"); @@ -2823,14 +2814,7 @@ public Response lockDataset(@Context ContainerRequestContext crc, @PathParam("id // refresh the dataset: dataset = findDatasetOrDie(id); // ... and kick of dataset reindexing: - try { - indexService.indexDataset(dataset, true); - } catch (IOException | SolrServerException e) { - String failureLogText = "Post add lock indexing failed. You can kickoff a re-index of this dataset with: \r\n curl http://localhost:8080/api/admin/index/datasets/" + dataset.getId().toString(); - failureLogText += "\r\n" + e.getLocalizedMessage(); - LoggingUtil.writeOnSuccessFailureLog(null, failureLogText, dataset); - - } + indexService.asyncIndexDataset(dataset, true); return ok("dataset locked with lock type " + lockType); } catch (WrappedResponse wr) { @@ -3849,4 +3833,62 @@ public Response getExternalToolDVParams(@Context ContainerRequestContext crc, return wr.getResponse(); } } + + @GET + @Path("summaryFieldNames") + public Response getDatasetSummaryFieldNames() { + String customFieldNames = settingsService.getValueForKey(SettingsServiceBean.Key.CustomDatasetSummaryFields); + String[] fieldNames = DatasetUtil.getDatasetSummaryFieldNames(customFieldNames); + JsonArrayBuilder fieldNamesArrayBuilder = Json.createArrayBuilder(); + for (String fieldName : fieldNames) { + fieldNamesArrayBuilder.add(fieldName); + } + return ok(fieldNamesArrayBuilder); + } + + @GET + @Path("privateUrlDatasetVersion/{privateUrlToken}") + public Response getPrivateUrlDatasetVersion(@PathParam("privateUrlToken") String privateUrlToken) { + PrivateUrlUser privateUrlUser = privateUrlService.getPrivateUrlUserFromToken(privateUrlToken); + if (privateUrlUser == null) { + return notFound("Private URL user not found"); + } + boolean isAnonymizedAccess = privateUrlUser.hasAnonymizedAccess(); + String anonymizedFieldTypeNames = settingsSvc.getValueForKey(SettingsServiceBean.Key.AnonymizedFieldTypeNames); + if(isAnonymizedAccess && anonymizedFieldTypeNames == null) { + throw new NotAcceptableException("Anonymized Access not enabled"); + } + DatasetVersion dsv = privateUrlService.getDraftDatasetVersionFromToken(privateUrlToken); + if (dsv == null || dsv.getId() == null) { + return notFound("Dataset version not found"); + } + JsonObjectBuilder responseJson; + if (isAnonymizedAccess) { + List anonymizedFieldTypeNamesList = new ArrayList<>(Arrays.asList(anonymizedFieldTypeNames.split(",\\s"))); + responseJson = json(dsv, anonymizedFieldTypeNamesList); + } else { + responseJson = json(dsv); + } + return ok(responseJson); + } + + @GET + @Path("privateUrlDatasetVersion/{privateUrlToken}/citation") + public Response getPrivateUrlDatasetVersionCitation(@PathParam("privateUrlToken") String privateUrlToken) { + PrivateUrlUser privateUrlUser = privateUrlService.getPrivateUrlUserFromToken(privateUrlToken); + if (privateUrlUser == null) { + return notFound("Private URL user not found"); + } + DatasetVersion dsv = privateUrlService.getDraftDatasetVersionFromToken(privateUrlToken); + return (dsv == null || dsv.getId() == null) ? notFound("Dataset version not found") + : ok(dsv.getCitation(true, privateUrlUser.hasAnonymizedAccess())); + } + + @GET + @AuthRequired + @Path("{id}/versions/{versionId}/citation") + public Response getDatasetVersionCitation(@Context ContainerRequestContext crc, @PathParam("id") String datasetId, @PathParam("versionId") String versionId, @Context UriInfo uriInfo, @Context HttpHeaders headers) { + return response(req -> ok( + getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers).getCitation(true, false)), getRequestUser(crc)); + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java b/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java index b57fe1dcd5d..bdab2818fbc 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java @@ -82,6 +82,7 @@ import edu.harvard.iq.dataverse.util.json.JSONLDUtil; import edu.harvard.iq.dataverse.util.json.JsonParseException; +import edu.harvard.iq.dataverse.util.json.JsonPrinter; import static edu.harvard.iq.dataverse.util.json.JsonPrinter.brief; import java.io.StringReader; import java.util.Collections; @@ -129,6 +130,7 @@ import java.util.Optional; import java.util.stream.Collectors; import javax.servlet.http.HttpServletResponse; +import javax.validation.constraints.NotNull; import javax.ws.rs.WebApplicationException; import javax.ws.rs.core.Context; import javax.ws.rs.core.StreamingOutput; @@ -166,7 +168,7 @@ public class Dataverses extends AbstractApiBean { @EJB SwordServiceBean swordService; - + @POST @AuthRequired public Response addRoot(@Context ContainerRequestContext crc, String body) { @@ -590,6 +592,69 @@ public Response deleteDataverse(@Context ContainerRequestContext crc, @PathParam }, getRequestUser(crc)); } + /** + * Endpoint to change attributes of a Dataverse collection. + * + * @apiNote Example curl command: + * curl -X PUT -d "test" http://localhost:8080/api/dataverses/$ALIAS/attribute/alias + * to change the alias of the collection named $ALIAS to "test". + */ + @PUT + @AuthRequired + @Path("{identifier}/attribute/{attribute}") + public Response updateAttribute(@Context ContainerRequestContext crc, @PathParam("identifier") String identifier, + @PathParam("attribute") String attribute, @QueryParam("value") String value) { + try { + Dataverse collection = findDataverseOrDie(identifier); + User user = getRequestUser(crc); + DataverseRequest dvRequest = createDataverseRequest(user); + + // TODO: The cases below use hard coded strings, because we have no place for definitions of those! + // They are taken from util.json.JsonParser / util.json.JsonPrinter. This shall be changed. + // This also should be extended to more attributes, like the type, theme, contacts, some booleans, etc. + switch (attribute) { + case "alias": + collection.setAlias(value); + break; + case "name": + collection.setName(value); + break; + case "description": + collection.setDescription(value); + break; + case "affiliation": + collection.setAffiliation(value); + break; + /* commenting out the code from the draft pr #9462: + case "versionPidsConduct": + CollectionConduct conduct = CollectionConduct.findBy(value); + if (conduct == null) { + return badRequest("'" + value + "' is not one of [" + + String.join(",", CollectionConduct.asList()) + "]"); + } + collection.setDatasetVersionPidConduct(conduct); + break; + */ + case "filePIDsEnabled": + collection.setFilePIDsEnabled(parseBooleanOrDie(value)); + break; + default: + return badRequest("'" + attribute + "' is not a supported attribute"); + } + + // Off to persistence layer + execCommand(new UpdateDataverseCommand(collection, null, null, dvRequest, null)); + + // Also return modified collection to user + return ok("Update successful", JsonPrinter.json(collection)); + + // TODO: This is an anti-pattern, necessary due to this bean being an EJB, causing very noisy and unnecessary + // logging by the EJB container for bubbling exceptions. (It would be handled by the error handlers.) + } catch (WrappedResponse e) { + return e.getResponse(); + } + } + @DELETE @AuthRequired @Path("{linkingDataverseId}/deleteLink/{linkedDataverseId}") diff --git a/src/main/java/edu/harvard/iq/dataverse/api/EditDDI.java b/src/main/java/edu/harvard/iq/dataverse/api/EditDDI.java index 513f27c9e3d..3960fe4e996 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/EditDDI.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/EditDDI.java @@ -247,11 +247,7 @@ private boolean createNewDraftVersion(ArrayList neededToUpdate } boolean doNormalSolrDocCleanUp = true; - try { - Future indexDatasetFuture = indexService.indexDataset(dataset, doNormalSolrDocCleanUp); - } catch (IOException | SolrServerException ex) { - logger.log(Level.SEVERE, "Couldn''t index dataset: " + ex.getMessage()); - } + indexService.asyncIndexDataset(dataset, doNormalSolrDocCleanUp); return true; } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Files.java b/src/main/java/edu/harvard/iq/dataverse/api/Files.java index 4b2e7a37b98..f6eda085c95 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Files.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Files.java @@ -704,7 +704,7 @@ public Response reingest(@Context ContainerRequestContext crc, @PathParam("id") dataFile = fileService.save(dataFile); // queue the data ingest job for asynchronous execution: - String status = ingestService.startIngestJobs(new ArrayList<>(Arrays.asList(dataFile)), u); + String status = ingestService.startIngestJobs(dataset.getId(), new ArrayList<>(Arrays.asList(dataFile)), u); if (!StringUtil.isEmpty(status)) { // This most likely indicates some sort of a problem (for example, @@ -812,4 +812,10 @@ public Response getExternalToolFMParams(@Context ContainerRequestContext crc, @P eth = new ExternalToolHandler(externalTool, target.getDataFile(), apiToken, target, locale); return ok(eth.createPostBody(eth.getParams(JsonUtil.getJsonObject(externalTool.getToolParameters())))); } + + @GET + @Path("fixityAlgorithm") + public Response getFixityAlgorithm() { + return ok(systemConfig.getFileFixityChecksumAlgorithm().toString()); + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Index.java b/src/main/java/edu/harvard/iq/dataverse/api/Index.java index 728d86addcb..1361de8fbf7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Index.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Index.java @@ -243,12 +243,7 @@ public Response indexTypeById(@PathParam("type") String type, @PathParam("id") L Dataset dataset = datasetService.find(id); if (dataset != null) { boolean doNormalSolrDocCleanUp = true; - try { - Future indexDatasetFuture = indexService.indexDataset(dataset, doNormalSolrDocCleanUp); - } catch (IOException | SolrServerException e) { - // - return error(Status.BAD_REQUEST, writeFailureToLog(e.getLocalizedMessage(), dataset)); - } + indexService.asyncIndexDataset(dataset, doNormalSolrDocCleanUp); return ok("starting reindex of dataset " + id); } else { @@ -266,11 +261,7 @@ public Response indexTypeById(@PathParam("type") String type, @PathParam("id") L * @todo How can we display the result to the user? */ boolean doNormalSolrDocCleanUp = true; - try { - Future indexDatasetFuture = indexService.indexDataset(datasetThatOwnsTheFile, doNormalSolrDocCleanUp); - } catch (IOException | SolrServerException e) { - writeFailureToLog(e.getLocalizedMessage(), datasetThatOwnsTheFile); - } + indexService.asyncIndexDataset(datasetThatOwnsTheFile, doNormalSolrDocCleanUp); return ok("started reindexing " + type + "/" + id); } else { @@ -318,11 +309,7 @@ public Response indexDatasetByPersistentId(@QueryParam("persistentId") String pe } if (dataset != null) { boolean doNormalSolrDocCleanUp = true; - try { - Future indexDatasetFuture = indexService.indexDataset(dataset, doNormalSolrDocCleanUp); - } catch (IOException | SolrServerException e) { - writeFailureToLog(e.getLocalizedMessage(), dataset); - } + indexService.asyncIndexDataset(dataset, doNormalSolrDocCleanUp); JsonObjectBuilder data = Json.createObjectBuilder(); data.add("message", "Reindexed dataset " + persistentId); data.add("id", dataset.getId()); diff --git a/src/main/java/edu/harvard/iq/dataverse/authorization/groups/GroupServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/authorization/groups/GroupServiceBean.java index 98fe3ad18c3..66293a4f781 100644 --- a/src/main/java/edu/harvard/iq/dataverse/authorization/groups/GroupServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/authorization/groups/GroupServiceBean.java @@ -7,10 +7,13 @@ import edu.harvard.iq.dataverse.authorization.groups.impl.explicit.ExplicitGroup; import edu.harvard.iq.dataverse.authorization.groups.impl.explicit.ExplicitGroupProvider; import edu.harvard.iq.dataverse.authorization.groups.impl.explicit.ExplicitGroupServiceBean; +import edu.harvard.iq.dataverse.authorization.groups.impl.ipaddress.IpGroup; import edu.harvard.iq.dataverse.authorization.groups.impl.ipaddress.IpGroupProvider; import edu.harvard.iq.dataverse.authorization.groups.impl.ipaddress.IpGroupsServiceBean; +import edu.harvard.iq.dataverse.authorization.groups.impl.maildomain.MailDomainGroup; import edu.harvard.iq.dataverse.authorization.groups.impl.maildomain.MailDomainGroupProvider; import edu.harvard.iq.dataverse.authorization.groups.impl.maildomain.MailDomainGroupServiceBean; +import edu.harvard.iq.dataverse.authorization.groups.impl.shib.ShibGroup; import edu.harvard.iq.dataverse.authorization.groups.impl.shib.ShibGroupProvider; import edu.harvard.iq.dataverse.authorization.groups.impl.shib.ShibGroupServiceBean; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; @@ -97,9 +100,49 @@ public MailDomainGroupProvider getMailDomainGroupProvider() { * @return The groups {@code req} is part of under {@code dvo}. */ public Set groupsFor( DataverseRequest req, DvObject dvo ) { - return groupProviders.values().stream() + Set ret = groupProviders.values().stream() .flatMap(gp->(Stream)gp.groupsFor(req, dvo).stream()) .collect(toSet()); + + // ShibGroupProvider.groupsFor(), above, only returns the Shib Groups + // (as you would expect), but not the Explicit Groups that may include them + // (unlike the ExplicitGroupProvider, that returns all the ancestors too). + // We appear to rely on this method returning all of the ancestor groups + // for everything, so we need to perform some extra hacky steps in + // order to obtain the ancestors for the shib groups as well: + + Set directAncestorsOfShibGroups = new HashSet<>(); + for (Group group : ret) { + + if (group instanceof ShibGroup + || group instanceof IpGroup + || group instanceof MailDomainGroup) { + // if this is one of the non-explicit group types above, we + // need to find if it is included in some explicit group; i.e., + // if it has direct ancestors that happen to be explicit groups: + + directAncestorsOfShibGroups.addAll(explicitGroupService.findDirectlyContainingGroups(group)); + } + } + + if (!directAncestorsOfShibGroups.isEmpty()) { + // ... and now we can run the Monster Query in the ExplicitServiceBean + // that will find ALL the hierarchical explicit group ancestors of + // these groups that include the shib groups fond + + Set allAncestorsOfShibGroups = explicitGroupService.findClosure(directAncestorsOfShibGroups); + + if (allAncestorsOfShibGroups != null) { + ret.addAll(allAncestorsOfShibGroups); + } + } + + // Perhaps the code above should be moved into the ShibGroupProvider (??) + // Also, this most likely applies not just to ShibGroups, but to the + // all the groups that are not ExplicitGroups, i.e., IP- and domain-based + // groups too. (??) + + return ret; } /** diff --git a/src/main/java/edu/harvard/iq/dataverse/authorization/groups/impl/explicit/ExplicitGroup.java b/src/main/java/edu/harvard/iq/dataverse/authorization/groups/impl/explicit/ExplicitGroup.java index 93de4480e55..43705a2240e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/authorization/groups/impl/explicit/ExplicitGroup.java +++ b/src/main/java/edu/harvard/iq/dataverse/authorization/groups/impl/explicit/ExplicitGroup.java @@ -61,7 +61,7 @@ @NamedQuery( name="ExplicitGroup.findByAuthenticatedUserIdentifier", query="SELECT eg FROM ExplicitGroup eg JOIN eg.containedAuthenticatedUsers au " + "WHERE au.userIdentifier=:authenticatedUserIdentifier"), - @NamedQuery( name="ExplicitGroup.findByRoleAssgineeIdentifier", + @NamedQuery( name="ExplicitGroup.findByRoleAssigneeIdentifier", query="SELECT eg FROM ExplicitGroup eg JOIN eg.containedRoleAssignees cra " + "WHERE cra=:roleAssigneeIdentifier"), @NamedQuery( name="ExplicitGroup.findByContainedExplicitGroupId", diff --git a/src/main/java/edu/harvard/iq/dataverse/authorization/groups/impl/explicit/ExplicitGroupServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/authorization/groups/impl/explicit/ExplicitGroupServiceBean.java index de9b9ba530d..b7c1b46b3a7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/authorization/groups/impl/explicit/ExplicitGroupServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/authorization/groups/impl/explicit/ExplicitGroupServiceBean.java @@ -169,7 +169,7 @@ public Set findDirectlyContainingGroups( RoleAssignee ra ) { } else { return provider.updateProvider( new HashSet<>( - em.createNamedQuery("ExplicitGroup.findByRoleAssgineeIdentifier", ExplicitGroup.class) + em.createNamedQuery("ExplicitGroup.findByRoleAssigneeIdentifier", ExplicitGroup.class) .setParameter("roleAssigneeIdentifier", ra.getIdentifier()) .getResultList() )); @@ -198,7 +198,7 @@ public Set findGroups( RoleAssignee ra, DvObject o ) { .filter( g -> g.owner.isAncestorOf(o) ) .collect( Collectors.toSet() ); } - + /** * Finds all the groups {@code ra} directly belongs to in the context of {@code o}. In effect, * collects all the groups {@code ra} belongs to and that are defined at {@code o} @@ -252,7 +252,7 @@ public Set findDirectGroups( RoleAssignee ra, DvObject o ) { * @param seed the initial set of groups. * @return Transitive closure (based on group containment) of the groups in {@code seed}. */ - protected Set findClosure( Set seed ) { + public Set findClosure( Set seed ) { if ( seed.isEmpty() ) return Collections.emptySet(); diff --git a/src/main/java/edu/harvard/iq/dataverse/authorization/groups/impl/shib/ShibGroup.java b/src/main/java/edu/harvard/iq/dataverse/authorization/groups/impl/shib/ShibGroup.java index 3beb8dadedb..79fda0ca7d7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/authorization/groups/impl/shib/ShibGroup.java +++ b/src/main/java/edu/harvard/iq/dataverse/authorization/groups/impl/shib/ShibGroup.java @@ -135,5 +135,4 @@ public RoleAssigneeDisplayInfo getDisplayInfo() { public boolean contains(DataverseRequest aRequest) { throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. } - } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java b/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java index 9e805a304a5..a75775810d9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java @@ -44,6 +44,7 @@ public class DatasetUtil { private static final Logger logger = Logger.getLogger(DatasetUtil.class.getCanonicalName()); + public static final String datasetDefaultSummaryFieldNames = "dsDescription,subject,keyword,publication,notesText"; public static String datasetLogoFilenameFinal = "dataset_logo_original"; public static String datasetLogoThumbnail = "dataset_logo"; public static String thumbExtension = ".thumb"; @@ -429,32 +430,33 @@ public static boolean isDatasetLogoPresent(Dataset dataset, int size) { return false; } - public static List getDatasetSummaryFields(DatasetVersion datasetVersion, String customFields) { - - List datasetFields = new ArrayList<>(); - - //if customFields are empty, go with default fields. - if(customFields==null || customFields.isEmpty()){ - customFields="dsDescription,subject,keyword,publication,notesText"; - } - - String[] customFieldList= customFields.split(","); - Map DatasetFieldsSet=new HashMap<>(); - + public static List getDatasetSummaryFields(DatasetVersion datasetVersion, String customFieldNames) { + Map datasetFieldsSet = new HashMap<>(); for (DatasetField dsf : datasetVersion.getFlatDatasetFields()) { - DatasetFieldsSet.put(dsf.getDatasetFieldType().getName(),dsf); + datasetFieldsSet.put(dsf.getDatasetFieldType().getName(), dsf); + } + String[] summaryFieldNames = getDatasetSummaryFieldNames(customFieldNames); + List datasetSummaryFields = new ArrayList<>(); + for (String summaryFieldName : summaryFieldNames) { + DatasetField df = datasetFieldsSet.get(summaryFieldName); + if (df != null) { + datasetSummaryFields.add(df); + } } - - for(String cfl : customFieldList) - { - DatasetField df = DatasetFieldsSet.get(cfl); - if(df!=null) - datasetFields.add(df); + return datasetSummaryFields; + } + + public static String[] getDatasetSummaryFieldNames(String customFieldNames) { + String summaryFieldNames; + // If the custom fields are empty, go with the default fields. + if(customFieldNames == null || customFieldNames.isEmpty()){ + summaryFieldNames = datasetDefaultSummaryFieldNames; + } else { + summaryFieldNames = customFieldNames; } - - return datasetFields; + return summaryFieldNames.split(","); } - + public static boolean isRsyncAppropriateStorageDriver(Dataset dataset){ // ToDo - rsync was written before multiple store support and currently is hardcoded to use the DataAccess.S3 store. // When those restrictions are lifted/rsync can be configured per store, this test should check that setting diff --git a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java index bf831d39965..4c2510b6ccb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java +++ b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java @@ -47,6 +47,8 @@ import java.util.Set; import java.util.logging.Level; import java.util.logging.Logger; + +import javax.ejb.Asynchronous; import javax.ejb.EJBException; import javax.json.Json; import javax.json.JsonArrayBuilder; @@ -643,7 +645,7 @@ private boolean runAddReplacePhase1(Dataset owner, df.setRootDataFileId(fileToReplace.getRootDataFileId()); } // Reuse any file PID during a replace operation (if File PIDs are in use) - if (systemConfig.isFilePIDsEnabled()) { + if (systemConfig.isFilePIDsEnabledForCollection(owner.getOwner())) { df.setGlobalId(fileToReplace.getGlobalId()); df.setGlobalIdCreateTime(fileToReplace.getGlobalIdCreateTime()); // Should be true or fileToReplace wouldn't have an identifier (since it's not @@ -1928,11 +1930,6 @@ private boolean step_100_startIngestJobs(){ // finalFileList.clear(); - // TODO: Need to run ingwest async...... - //if (true){ - //return true; - //} - if (!multifile) { msg("pre ingest start"); // start the ingest! @@ -1941,7 +1938,6 @@ private boolean step_100_startIngestJobs(){ } return true; } - private void msg(String m){ logger.fine(m); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractCreateDatasetCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractCreateDatasetCommand.java index d9c5e58d250..eb171160376 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractCreateDatasetCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractCreateDatasetCommand.java @@ -139,16 +139,7 @@ public Dataset execute(CommandContext ctxt) throws CommandException { //Use for code that requires database ids postDBFlush(theDataset, ctxt); - // TODO: this needs to be moved in to an onSuccess method; not adding to this PR as its out of scope - // TODO: switch to asynchronous version when JPA sync works - // ctxt.index().asyncIndexDataset(theDataset.getId(), true); - try{ - ctxt.index().indexDataset(theDataset, true); - } catch (IOException | SolrServerException e) { - String failureLogText = "Post create dataset indexing failed. You can kickoff a re-index of this dataset with: \r\n curl http://localhost:8080/api/admin/index/datasets/" + theDataset.getId().toString(); - failureLogText += "\r\n" + e.getLocalizedMessage(); - LoggingUtil.writeOnSuccessFailureLog(null, failureLogText, theDataset); - } + ctxt.index().asyncIndexDataset(theDataset, true); return theDataset; } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDatasetVersionCommand.java index 7e2269c375d..1d83f522f29 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDatasetVersionCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDatasetVersionCommand.java @@ -65,10 +65,11 @@ public DatasetVersion execute(CommandContext ctxt) throws CommandException { //good wrapped response if the TOA/Request Access not in compliance prepareDatasetAndVersion(); - // TODO make async - // ctxt.index().indexDataset(dataset); - return ctxt.datasets().storeVersion(newVersion); - + DatasetVersion version = ctxt.datasets().storeVersion(newVersion); + if (ctxt.index() != null) { + ctxt.index().asyncIndexDataset(dataset, true); + } + return version; } /** diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeaccessionDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeaccessionDatasetVersionCommand.java index 3572ac1d0a5..8c643d5cd65 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeaccessionDatasetVersionCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeaccessionDatasetVersionCommand.java @@ -106,15 +106,7 @@ public boolean onSuccess(CommandContext ctxt, Object r) { DatasetVersion version = (DatasetVersion) r; Dataset dataset = version.getDataset(); - try { - ctxt.index().indexDataset(dataset, true); - Future indexString = ctxt.index().indexDataset(dataset, true); - } catch (IOException | SolrServerException e) { - String failureLogText = "Post-publication indexing failed. You can kickoff a re-index of this dataset with: \r\n curl http://localhost:8080/api/admin/index/datasets/" + dataset.getId().toString(); - failureLogText += "\r\n" + e.getLocalizedMessage(); - LoggingUtil.writeOnSuccessFailureLog(this, failureLogText, dataset); - retVal = false; - } + ctxt.index().asyncIndexDataset(dataset, true); return retVal; } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteDatasetLinkingDataverseCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteDatasetLinkingDataverseCommand.java index aada2663bf6..f21a2782609 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteDatasetLinkingDataverseCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteDatasetLinkingDataverseCommand.java @@ -66,15 +66,7 @@ public boolean onSuccess(CommandContext ctxt, Object r) { Dataset dataset = (Dataset) r; if (index) { - try { - ctxt.index().indexDataset(dataset, true); - } catch (IOException | SolrServerException e) { - String failureLogText = "Post delete linked dataset indexing failed. You can kickoff a re-index of this dataset with: \r\n curl http://localhost:8080/api/admin/index/datasets/" + dataset.getId().toString(); - failureLogText += "\r\n" + e.getLocalizedMessage(); - LoggingUtil.writeOnSuccessFailureLog(this, failureLogText, dataset); - retVal = false; - } - + ctxt.index().asyncIndexDataset(dataset, true); } return retVal; diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteDatasetVersionCommand.java index 3f63c3c6d27..a67d7008ef8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteDatasetVersionCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteDatasetVersionCommand.java @@ -96,13 +96,7 @@ protected void executeImpl(CommandContext ctxt) throws CommandException { } } boolean doNormalSolrDocCleanUp = true; - try { - ctxt.index().indexDataset(doomed, doNormalSolrDocCleanUp); - } catch (IOException | SolrServerException e) { - String failureLogText = "Post delete version indexing failed. You can kickoff a re-index of this dataset with: \r\n curl http://localhost:8080/api/admin/index/datasets/" + doomed.getId().toString(); - failureLogText += "\r\n" + e.getLocalizedMessage(); - LoggingUtil.writeOnSuccessFailureLog(this, failureLogText, doomed); - } + ctxt.index().asyncIndexDataset(doomed, doNormalSolrDocCleanUp); return; } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java index 7ae9145009c..253c761f0c3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java @@ -238,14 +238,7 @@ public boolean onSuccess(CommandContext ctxt, Object r) { } catch (Exception e) { logger.warning("Failure to send dataset published messages for : " + dataset.getId() + " : " + e.getMessage()); } - try { - Future indexString = ctxt.index().indexDataset(dataset, true); - } catch (IOException | SolrServerException e) { - String failureLogText = "Post-publication indexing failed. You can kick off a re-index of this dataset with: \r\n curl http://localhost:8080/api/admin/index/datasets/" + dataset.getId().toString(); - failureLogText += "\r\n" + e.getLocalizedMessage(); - LoggingUtil.writeOnSuccessFailureLog(this, failureLogText, dataset); - retVal = false; - } + ctxt.index().asyncIndexDataset(dataset, true); //re-indexing dataverses that have additional subjects if (!dataversesToIndex.isEmpty()){ @@ -373,7 +366,7 @@ private void publicizeExternalIdentifier(Dataset dataset, CommandContext ctxt) t String currentGlobalIdProtocol = ctxt.settings().getValueForKey(SettingsServiceBean.Key.Protocol, ""); String currentGlobalAuthority = ctxt.settings().getValueForKey(SettingsServiceBean.Key.Authority, ""); String dataFilePIDFormat = ctxt.settings().getValueForKey(SettingsServiceBean.Key.DataFilePIDFormat, "DEPENDENT"); - boolean isFilePIDsEnabled = ctxt.systemConfig().isFilePIDsEnabled(); + boolean isFilePIDsEnabled = ctxt.systemConfig().isFilePIDsEnabledForCollection(getDataset().getOwner()); // We will skip trying to register the global identifiers for datafiles // if "dependent" file-level identifiers are requested, AND the naming // protocol, or the authority of the dataset global id is different from diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/LinkDatasetCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/LinkDatasetCommand.java index da70529e76d..aef749d7e26 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/LinkDatasetCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/LinkDatasetCommand.java @@ -69,14 +69,7 @@ public boolean onSuccess(CommandContext ctxt, Object r) { boolean retVal = true; DatasetLinkingDataverse dld = (DatasetLinkingDataverse) r; - try { - ctxt.index().indexDataset(dld.getDataset(), true); - } catch (IOException | SolrServerException e) { - String failureLogText = "Post link dataset indexing failed. You can kickoff a re-index of this dataset with: \r\n curl http://localhost:8080/api/admin/index/datasets/" + dld.getDataset().getId().toString(); - failureLogText += "\r\n" + e.getLocalizedMessage(); - LoggingUtil.writeOnSuccessFailureLog(this, failureLogText, dld.getDataset()); - retVal = false; - } + ctxt.index().asyncIndexDataset(dld.getDataset(), true); return retVal; } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ListRolesCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ListRolesCommand.java index da9e5adf247..d82b2e7a81d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ListRolesCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ListRolesCommand.java @@ -27,7 +27,7 @@ public ListRolesCommand(DataverseRequest aRequest, Dataverse aDefinitionPoint) { @Override public Set execute(CommandContext ctxt) throws CommandException { - return definitionPoint.getRoles(); + return ctxt.roles().availableRoles(definitionPoint.getId()); } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/MoveDatasetCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/MoveDatasetCommand.java index 55d02362e88..94bcfa2f5b7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/MoveDatasetCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/MoveDatasetCommand.java @@ -141,15 +141,8 @@ public void executeImpl(CommandContext ctxt) throws CommandException { moved.setOwner(destination); ctxt.em().merge(moved); - try { - boolean doNormalSolrDocCleanUp = true; - ctxt.index().indexDataset(moved, doNormalSolrDocCleanUp); - - } catch (Exception e) { // RuntimeException e ) { - logger.log(Level.WARNING, "Exception while indexing:" + e.getMessage()); //, e); - throw new CommandException(BundleUtil.getStringFromBundle("dashboard.card.datamove.dataset.command.error.indexingProblem"), this); - - } + boolean doNormalSolrDocCleanUp = true; + ctxt.index().asyncIndexDataset(moved, doNormalSolrDocCleanUp); } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/MoveDataverseCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/MoveDataverseCommand.java index dcae4e039e6..ea38f5a7af7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/MoveDataverseCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/MoveDataverseCommand.java @@ -302,14 +302,7 @@ public void executeImpl(CommandContext ctxt) throws CommandException { if (moved.getDatasetLinkingDataverses() != null && !moved.getDatasetLinkingDataverses().isEmpty()) { for (DatasetLinkingDataverse dld : moved.getDatasetLinkingDataverses()) { Dataset linkedDS = ctxt.datasets().find(dld.getDataset().getId()); - try { - ctxt.index().indexDataset(linkedDS, true); - } catch (IOException | SolrServerException e) { - String failureLogText = "Post move dataverse dataset indexing failed. You can kickoff a re-index of this dataset with: \r\n curl http://localhost:8080/api/admin/index/datasets/" + linkedDS.getId().toString(); - failureLogText += "\r\n" + e.getLocalizedMessage(); - LoggingUtil.writeOnSuccessFailureLog(this, failureLogText, linkedDS); - - } + ctxt.index().asyncIndexDataset(linkedDS, true); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/PublishDatasetCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/PublishDatasetCommand.java index 5e29a21b6a1..f5ef121dee2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/PublishDatasetCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/PublishDatasetCommand.java @@ -135,7 +135,7 @@ public PublishDatasetResult execute(CommandContext ctxt) throws CommandException String dataFilePIDFormat = ctxt.settings().getValueForKey(SettingsServiceBean.Key.DataFilePIDFormat, "DEPENDENT"); boolean registerGlobalIdsForFiles = (currentGlobalIdProtocol.equals(theDataset.getProtocol()) || dataFilePIDFormat.equals("INDEPENDENT")) - && ctxt.systemConfig().isFilePIDsEnabled(); + && ctxt.systemConfig().isFilePIDsEnabledForCollection(theDataset.getOwner()); if ( registerGlobalIdsForFiles ){ registerGlobalIdsForFiles = currentGlobalAuthority.equals( theDataset.getAuthority() ); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RedetectFileTypeCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RedetectFileTypeCommand.java index 564c4a1f6b5..bdb6ceffd6d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RedetectFileTypeCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RedetectFileTypeCommand.java @@ -83,12 +83,8 @@ public DataFile execute(CommandContext ctxt) throws CommandException { throw new CommandException("Exception while attempting to save the new file type: " + EjbUtil.ejbExceptionToString(ex), this); } Dataset dataset = fileToRedetect.getOwner(); - try { - boolean doNormalSolrDocCleanUp = true; - ctxt.index().indexDataset(dataset, doNormalSolrDocCleanUp); - } catch (Exception ex) { - logger.info("Exception while reindexing files during file type redetection: " + ex.getLocalizedMessage()); - } + boolean doNormalSolrDocCleanUp = true; + ctxt.index().asyncIndexDataset(dataset, doNormalSolrDocCleanUp); try { ExportService instance = ExportService.getInstance(); instance.exportAllFormats(dataset); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RegisterDvObjectCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RegisterDvObjectCommand.java index 299d1a925f4..6da3bf0ad84 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RegisterDvObjectCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RegisterDvObjectCommand.java @@ -137,15 +137,8 @@ protected void executeImpl(CommandContext ctxt) throws CommandException { //Only continue if you can successfully migrate the handle boolean doNormalSolrDocCleanUp = true; Dataset dataset = (Dataset) target; - try { - ctxt.index().indexDataset(dataset, doNormalSolrDocCleanUp); - ctxt.solrIndex().indexPermissionsForOneDvObject( dataset); - } catch (IOException | SolrServerException e) { - String failureLogText = "Post migrate handle dataset indexing failed. You can kickoff a re-index of this dataset with: \r\n curl http://localhost:8080/api/admin/index/datasets/" + dataset.getId().toString(); - failureLogText += "\r\n" + e.getLocalizedMessage(); - LoggingUtil.writeOnSuccessFailureLog(this, failureLogText, dataset); - - } + ctxt.index().asyncIndexDataset(dataset, doNormalSolrDocCleanUp); + ctxt.solrIndex().indexPermissionsForOneDvObject( dataset); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ReturnDatasetToAuthorCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ReturnDatasetToAuthorCommand.java index ba0348f57d6..caf37ad4de1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ReturnDatasetToAuthorCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ReturnDatasetToAuthorCommand.java @@ -72,14 +72,7 @@ public boolean onSuccess(CommandContext ctxt, Object r) { boolean retVal = true; Dataset dataset = (Dataset) r; - try { - Future indexString = ctxt.index().indexDataset(dataset, true); - } catch (IOException | SolrServerException e) { - String failureLogText = "Post return to author indexing failed. You can kickoff a re-index of this dataset with: \r\n curl http://localhost:8080/api/admin/index/datasets/" + dataset.getId().toString(); - failureLogText += "\r\n" + e.getLocalizedMessage(); - LoggingUtil.writeOnSuccessFailureLog(this, failureLogText, dataset); - retVal = false; - } + ctxt.index().asyncIndexDataset(dataset, true); return retVal; } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetCurationStatusCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetCurationStatusCommand.java index 72f0ef335fb..557f9dff622 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetCurationStatusCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetCurationStatusCommand.java @@ -101,14 +101,7 @@ public boolean onSuccess(CommandContext ctxt, Object r) { boolean retVal = true; Dataset dataset = (Dataset) r; - try { - Future indexString = ctxt.index().indexDataset(dataset, true); - } catch (IOException | SolrServerException e) { - String failureLogText = "Post submit for review indexing failed. You can kickoff a re-index of this dataset with: \r\n curl http://localhost:8080/api/admin/index/datasets/" + dataset.getId().toString(); - failureLogText += "\r\n" + e.getLocalizedMessage(); - LoggingUtil.writeOnSuccessFailureLog(this, failureLogText, dataset); - retVal = false; - } + ctxt.index().asyncIndexDataset(dataset, true); return retVal; } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetDatasetCitationDateCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetDatasetCitationDateCommand.java index fe14d56562d..2cae9e51896 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetDatasetCitationDateCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetDatasetCitationDateCommand.java @@ -49,14 +49,8 @@ public boolean onSuccess(CommandContext ctxt, Object r) { boolean retVal = true; Dataset dataset = (Dataset) r; - try { - Future indexString = ctxt.index().indexDataset(dataset, false); - } catch (IOException | SolrServerException e) { - String failureLogText = "Post set dataset citation date indexing failed. You can kickoff a re-index of this dataset with: \r\n curl http://localhost:8080/api/admin/index/datasets/" + dataset.getId().toString(); - failureLogText += "\r\n" + e.getLocalizedMessage(); - LoggingUtil.writeOnSuccessFailureLog(this, failureLogText, dataset); - retVal = false; - } + ctxt.index().asyncIndexDataset(dataset, false); + return retVal; } } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SubmitDatasetForReviewCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SubmitDatasetForReviewCommand.java index 130030798ab..77a4bf5b8ba 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SubmitDatasetForReviewCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SubmitDatasetForReviewCommand.java @@ -75,14 +75,8 @@ public boolean onSuccess(CommandContext ctxt, Object r) { boolean retVal = true; Dataset dataset = (Dataset) r; - try { - Future indexString = ctxt.index().indexDataset(dataset, true); - } catch (IOException | SolrServerException e) { - String failureLogText = "Post submit for review indexing failed. You can kickoff a re-index of this dataset with: \r\n curl http://localhost:8080/api/admin/index/datasets/" + dataset.getId().toString(); - failureLogText += "\r\n" + e.getLocalizedMessage(); - LoggingUtil.writeOnSuccessFailureLog(this, failureLogText, dataset); - retVal = false; - } + ctxt.index().asyncIndexDataset(dataset, true); + return retVal; } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java index 33f64f23076..12a5d8611f4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java @@ -3,7 +3,6 @@ import edu.harvard.iq.dataverse.*; import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; -import edu.harvard.iq.dataverse.batch.util.LoggingUtil; import edu.harvard.iq.dataverse.engine.command.CommandContext; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; @@ -12,17 +11,13 @@ import edu.harvard.iq.dataverse.util.DatasetFieldUtil; import edu.harvard.iq.dataverse.util.FileMetadataUtil; -import java.io.IOException; import java.util.ArrayList; import java.util.List; -import java.util.concurrent.Future; import java.util.logging.Level; import java.util.logging.Logger; import javax.validation.ConstraintViolationException; -import org.apache.solr.client.solrj.SolrServerException; - /** * * @author skraffmiller @@ -101,7 +96,7 @@ public Dataset execute(CommandContext ctxt) throws CommandException { } Dataset theDataset = getDataset(); - ctxt.permissions().checkEditDatasetLock(theDataset, getRequest(), this); + ctxt.permissions().checkUpdateDatasetVersionLock(theDataset, getRequest(), this); Dataset savedDataset = null; try { @@ -270,21 +265,12 @@ public Dataset execute(CommandContext ctxt) throws CommandException { @Override public boolean onSuccess(CommandContext ctxt, Object r) { - - boolean retVal = true; - Dataset dataset = (Dataset) r; - - try { - Future indexString = ctxt.index().indexDataset(dataset, true); - } catch (IOException | SolrServerException e) { - String failureLogText = "Post update dataset indexing failed. You can kickoff a re-index of this dataset with: \r\n curl http://localhost:8080/api/admin/index/datasets/" + dataset.getId().toString(); - failureLogText += "\r\n" + e.getLocalizedMessage(); - LoggingUtil.writeOnSuccessFailureLog(this, failureLogText, dataset); - retVal = false; - } - - return retVal; - + // Async indexing significantly improves performance when updating datasets with thousands of files + // Indexing will be started immediately, unless an index is already busy for the given data + // (it will be scheduled then for later indexing of the newest version). + // See the documentation of asyncIndexDataset method for more details. + ctxt.index().asyncIndexDataset((Dataset) r, true); + return true; } } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDataverseCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDataverseCommand.java index 57a3394ff77..218b0ea89d9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDataverseCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDataverseCommand.java @@ -113,13 +113,9 @@ public boolean onSuccess(CommandContext ctxt, Object r) { // first kick of async index of datasets // TODO: is this actually needed? Is there a better way to handle - try { - Dataverse result = (Dataverse) r; - List datasets = ctxt.datasets().findByOwnerId(result.getId()); - ctxt.index().asyncIndexDatasetList(datasets, true); - } catch (IOException | SolrServerException e) { - // these datasets are being indexed asynchrounously, so not sure how to handle errors here - } + Dataverse result = (Dataverse) r; + List datasets = ctxt.datasets().findByOwnerId(result.getId()); + ctxt.index().asyncIndexDatasetList(datasets, true); return ctxt.dataverses().index((Dataverse) r); } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDvObjectPIDMetadataCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDvObjectPIDMetadataCommand.java index 7e37241563c..7230f9f9c0a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDvObjectPIDMetadataCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDvObjectPIDMetadataCommand.java @@ -57,7 +57,7 @@ protected void executeImpl(CommandContext ctxt) throws CommandException { // didn't need updating. String currentGlobalIdProtocol = ctxt.settings().getValueForKey(SettingsServiceBean.Key.Protocol, ""); String dataFilePIDFormat = ctxt.settings().getValueForKey(SettingsServiceBean.Key.DataFilePIDFormat, "DEPENDENT"); - boolean isFilePIDsEnabled = ctxt.systemConfig().isFilePIDsEnabled(); + boolean isFilePIDsEnabled = ctxt.systemConfig().isFilePIDsEnabledForCollection(target.getOwner()); // We will skip trying to update the global identifiers for datafiles if they // aren't being used. // If they are, we need to assure that there's an existing PID or, as when diff --git a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java index 88a51017b75..dac046373ba 100644 --- a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java +++ b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java @@ -111,6 +111,7 @@ public String handleRequest(boolean preview) { case DATASET: callback=SystemConfig.getDataverseSiteUrlStatic() + "/api/v1/datasets/" + dataset.getId() + "/versions/:latest/toolparams/" + externalTool.getId(); + break; case FILE: callback= SystemConfig.getDataverseSiteUrlStatic() + "/api/v1/files/" + dataFile.getId() + "/metadata/" + fileMetadata.getId() + "/toolparams/" diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestMessage.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestMessage.java index e9923012fad..b1c93e52ebd 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestMessage.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestMessage.java @@ -19,7 +19,6 @@ */ package edu.harvard.iq.dataverse.ingest; -import edu.harvard.iq.dataverse.DataFile; import java.io.Serializable; import java.util.List; import java.util.ArrayList; @@ -32,49 +31,21 @@ * @author Leonid Andreev */ public class IngestMessage implements Serializable { - public static final int INGEST_MESAGE_LEVEL_ERROR = 1; - public static final int INGEST_MESAGE_LEVEL_INFO = 2; - /** Creates a new instance of IngestMessage */ - public IngestMessage() { - this(INGEST_MESAGE_LEVEL_INFO); - } - public IngestMessage(int messageLevel) { - this.messageLevel = messageLevel; + public IngestMessage() { datafile_ids = new ArrayList(); } - public IngestMessage(int messageLevel, Long authenticatedUserId) { - this.messageLevel = messageLevel; + public IngestMessage(Long authenticatedUserId) { this.authenticatedUserId = authenticatedUserId; datafile_ids = new ArrayList(); } - - private int messageLevel = INGEST_MESAGE_LEVEL_INFO; private Long datasetId; - private Long datasetVersionId; - private String versionNote; - private String datasetVersionNumber; private List datafile_ids; private Long authenticatedUserId; - - public String getVersionNote() { - return versionNote; - } - - public void setVersionNote(String versionNote) { - this.versionNote = versionNote; - } - - public int getMessageLevel() { - return messageLevel; - } - - public void setMessageLevel(int messageLevel) { - this.messageLevel = messageLevel; - } + private String info; public Long getDatasetId() { return datasetId; @@ -83,30 +54,6 @@ public Long getDatasetId() { public void setDatasetId(Long datasetId) { this.datasetId = datasetId; } - - public Long getDatasetVersionId() { - return datasetVersionId; - } - - public void setDatasetVersionId(Long datasetVersionId) { - this.datasetVersionId = datasetVersionId; - } - - public boolean sendInfoMessage() { - return messageLevel >= INGEST_MESAGE_LEVEL_INFO; - } - - public boolean sendErrorMessage() { - return messageLevel >= INGEST_MESAGE_LEVEL_ERROR; - } - - public String getDatasetVersionNumber() { - return datasetVersionNumber; - } - - public void setDatasetVersionNumber(String datasetVersionNumber) { - this.datasetVersionNumber = datasetVersionNumber; - } public List getFileIds() { return datafile_ids; @@ -123,4 +70,12 @@ public void addFileId(Long file_id) { public Long getAuthenticatedUserId() { return authenticatedUserId; } + + public void setInfo(String info) { + this.info = info; + } + + public String getInfo() { + return info; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestMessageBean.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestMessageBean.java index b029c0c97c5..77ec6701bc6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestMessageBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestMessageBean.java @@ -38,7 +38,6 @@ import javax.jms.Message; import javax.jms.MessageListener; import javax.jms.ObjectMessage; - /** * * This is an experimental, JMS-based implementation of asynchronous @@ -53,6 +52,7 @@ @ActivationConfigProperty(propertyName = "destinationType", propertyValue = "javax.jms.Queue") } ) + public class IngestMessageBean implements MessageListener { private static final Logger logger = Logger.getLogger(IngestMessageBean.class.getCanonicalName()); @EJB DatasetServiceBean datasetService; @@ -69,17 +69,27 @@ public IngestMessageBean() { public void onMessage(Message message) { IngestMessage ingestMessage = null; - Long datafile_id = null; AuthenticatedUser authenticatedUser = null; try { ObjectMessage om = (ObjectMessage) message; ingestMessage = (IngestMessage) om.getObject(); + // if the lock was removed while an ingest was queued, ratake the lock + // The "if" is the first thing that addDatasetLock method does. + // It has some complexity and would result in the code duplication if repeated here. + // If that check would be removed from the addDatasetLock method in the future without + // updating the code using this method, ingest code would still not break because + // we remove "all" ingest locks at the end (right now, there can be at most one ingest lock). + datasetService.addDatasetLock(ingestMessage.getDatasetId(), + DatasetLock.Reason.Ingest, + ingestMessage.getAuthenticatedUserId(), + ingestMessage.getInfo()); + authenticatedUser = authenticationServiceBean.findByID(ingestMessage.getAuthenticatedUserId()); - Iterator iter = ingestMessage.getFileIds().iterator(); - datafile_id = null; + Iterator iter = ingestMessage.getFileIds().iterator(); + Long datafile_id = null; boolean ingestWithErrors = false; @@ -87,7 +97,7 @@ public void onMessage(Message message) { sbIngestedFiles.append("
    "); while (iter.hasNext()) { - datafile_id = (Long) iter.next(); + datafile_id = iter.next(); logger.fine("Start ingest job;"); try { @@ -128,9 +138,9 @@ public void onMessage(Message message) { IngestReport errorReport = new IngestReport(); errorReport.setFailure(); if (ex.getMessage() != null) { - errorReport.setReport("Ingest succeeded, but failed to save the ingested tabular data in the database: " + ex.getMessage()); + errorReport.setReport(BundleUtil.getStringFromBundle("file.ingest.saveFailed.detail.message") + ex.getMessage()); } else { - errorReport.setReport("Ingest succeeded, but failed to save the ingested tabular data in the database; no further information is available"); + errorReport.setReport(BundleUtil.getStringFromBundle("file.ingest.saveFailed.message")); } errorReport.setDataFile(datafile); datafile.setIngestReport(errorReport); @@ -139,11 +149,10 @@ public void onMessage(Message message) { logger.info("trying to save datafile and the failed ingest report, id=" + datafile_id); datafile = datafileService.save(datafile); - Dataset dataset = datafile.getOwner(); - if (dataset != null && dataset.getId() != null) { + if (ingestMessage.getDatasetId() != null) { //logger.info("attempting to remove dataset lock for dataset " + dataset.getId()); //datasetService.removeDatasetLock(dataset.getId()); - ingestService.sendFailNotification(dataset.getId()); + ingestService.sendFailNotification(ingestMessage.getDatasetId()); } } } @@ -152,27 +161,11 @@ public void onMessage(Message message) { sbIngestedFiles.append("
"); - Long objectId = null; - - // Remove the dataset lock: - // (note that the assumption here is that all of the datafiles - // packed into this IngestMessage belong to the same dataset) - if (datafile_id != null) { - DataFile datafile = datafileService.find(datafile_id); - if (datafile != null) { - Dataset dataset = datafile.getOwner(); - objectId = dataset.getId(); - if (dataset != null && dataset.getId() != null) { - datasetService.removeDatasetLocks(dataset, DatasetLock.Reason.Ingest); - } - } - } - userNotificationService.sendNotification( authenticatedUser, Timestamp.from(Instant.now()), !ingestWithErrors ? UserNotification.Type.INGESTCOMPLETED : UserNotification.Type.INGESTCOMPLETEDWITHERRORS, - objectId, + ingestMessage.getDatasetId(), sbIngestedFiles.toString(), true ); @@ -182,9 +175,15 @@ public void onMessage(Message message) { ex.printStackTrace(); // error in getting object from message; can't send e-mail } finally { - // when we're done, go ahead and remove the lock (not yet) + // when we're done, go ahead and remove the lock try { - //datasetService.removeDatasetLock( ingestMessage.getDatasetId() ); + // Remove the dataset lock: + // (note that the assumption here is that all of the datafiles + // packed into this IngestMessage belong to the same dataset) + Dataset dataset = datasetService.find(ingestMessage.getDatasetId()); + if (dataset != null && dataset.getId() != null) { + datasetService.removeDatasetLocks(dataset, DatasetLock.Reason.Ingest); + } } catch (Exception ex) { ex.printStackTrace(); // application was unable to remove the datasetLock } diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java index 5a353453fe8..0bbbfb3cb24 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java @@ -483,15 +483,17 @@ public void startIngestJobsForDataset(Dataset dataset, AuthenticatedUser user) { // todo: investigate why when calling save with the file object // gotten from the loop, the roles assignment added at create is removed // (switching to refinding via id resolves that) + // possible explanation: when flush-mode is auto, flush is on query, + // we make sure that the roles assignment added at create is flushed dataFile = fileService.find(dataFile.getId()); scheduledFiles.add(dataFile); } } - startIngestJobs(scheduledFiles, user); + startIngestJobs(dataset.getId(), scheduledFiles, user); } - public String startIngestJobs(List dataFiles, AuthenticatedUser user) { + public String startIngestJobs(Long datasetId, List dataFiles, AuthenticatedUser user) { IngestMessage ingestMessage = null; StringBuilder sb = new StringBuilder(); @@ -532,7 +534,7 @@ public String startIngestJobs(List dataFiles, AuthenticatedUser user) if (count > 0) { String info = "Ingest of " + count + " tabular data file(s) is in progress."; logger.info(info); - datasetService.addDatasetLock(scheduledFiles.get(0).getOwner().getId(), + datasetService.addDatasetLock(datasetId, DatasetLock.Reason.Ingest, (user != null) ? user.getId() : null, info); @@ -550,10 +552,12 @@ public int compare(DataFile d1, DataFile d2) { } }); - ingestMessage = new IngestMessage(IngestMessage.INGEST_MESAGE_LEVEL_INFO, user.getId()); + ingestMessage = new IngestMessage(user.getId()); for (int i = 0; i < count; i++) { ingestMessage.addFileId(scheduledFilesArray[i].getId()); } + ingestMessage.setDatasetId(datasetId); + ingestMessage.setInfo(info); QueueConnection conn = null; QueueSession session = null; diff --git a/src/main/java/edu/harvard/iq/dataverse/privateurl/PrivateUrlServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/privateurl/PrivateUrlServiceBean.java index efe64052c4a..8eb0dfe4ebd 100644 --- a/src/main/java/edu/harvard/iq/dataverse/privateurl/PrivateUrlServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/privateurl/PrivateUrlServiceBean.java @@ -2,6 +2,7 @@ import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetServiceBean; +import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.RoleAssignment; import edu.harvard.iq.dataverse.authorization.users.PrivateUrlUser; import edu.harvard.iq.dataverse.util.SystemConfig; @@ -61,6 +62,13 @@ public PrivateUrlRedirectData getPrivateUrlRedirectDataFromToken(String token) { return PrivateUrlUtil.getPrivateUrlRedirectData(getRoleAssignmentFromPrivateUrlToken(token)); } + /** + * @return DatasetVersion if it can be found using the token or null. + */ + public DatasetVersion getDraftDatasetVersionFromToken(String token) { + return PrivateUrlUtil.getDraftDatasetVersionFromRoleAssignment(getRoleAssignmentFromPrivateUrlToken(token)); + } + /** * @return A RoleAssignment or null. * diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexBatchServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexBatchServiceBean.java index 34c145fa6e8..932f58d875d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexBatchServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexBatchServiceBean.java @@ -205,15 +205,9 @@ public Future indexAllOrSubset(long numPartitions, long partitionId, boo int datasetFailureCount = 0; List datasetIds = datasetService.findAllOrSubsetOrderByFilesOwned(skipIndexed); for (Long id : datasetIds) { - try { - datasetIndexCount++; - logger.info("indexing dataset " + datasetIndexCount + " of " + datasetIds.size() + " (id=" + id + ")"); - Future result = indexService.indexDatasetInNewTransaction(id); - } catch (Exception e) { - //We want to keep running even after an exception so throw some more info into the log - datasetFailureCount++; - logger.info("FAILURE indexing dataset " + datasetIndexCount + " of " + datasetIds.size() + " (id=" + id + ") Exception info: " + e.getMessage()); - } + datasetIndexCount++; + logger.info("indexing dataset " + datasetIndexCount + " of " + datasetIds.size() + " (id=" + id + ")"); + indexService.indexDatasetInNewTransaction(id); } logger.info("done iterating through all datasets"); @@ -269,15 +263,9 @@ public void indexDataverseRecursively(Dataverse dataverse) { // index the Dataset children for (Long childId : datasetChildren) { - try { - datasetIndexCount++; - logger.info("indexing dataset " + datasetIndexCount + " of " + datasetChildren.size() + " (id=" + childId + ")"); - indexService.indexDatasetInNewTransaction(childId); - } catch (Exception e) { - //We want to keep running even after an exception so throw some more info into the log - datasetFailureCount++; - logger.info("FAILURE indexing dataset " + datasetIndexCount + " of " + datasetChildren.size() + " (id=" + childId + ") Exception info: " + e.getMessage()); - } + datasetIndexCount++; + logger.info("indexing dataset " + datasetIndexCount + " of " + datasetChildren.size() + " (id=" + childId + ")"); + indexService.indexDatasetInNewTransaction(childId); } long end = System.currentTimeMillis(); if (datasetFailureCount + dataverseFailureCount > 0){ diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index 2d0bf8f467c..0b8f93e47a9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -24,6 +24,7 @@ import edu.harvard.iq.dataverse.PermissionServiceBean; import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; import edu.harvard.iq.dataverse.authorization.providers.builtin.BuiltinUserServiceBean; +import edu.harvard.iq.dataverse.batch.util.LoggingUtil; import edu.harvard.iq.dataverse.dataaccess.DataAccess; import edu.harvard.iq.dataverse.dataaccess.DataAccessRequest; import edu.harvard.iq.dataverse.dataaccess.StorageIO; @@ -55,6 +56,7 @@ import java.util.Map; import java.util.Optional; import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.Future; import java.util.function.Function; import java.util.logging.Logger; @@ -241,7 +243,7 @@ public Future indexDataverse(Dataverse dataverse, boolean processPaths) solrInputDocument.addField(SearchFields.SOURCE, HARVESTED); } else { (this means that all dataverses are "local" - should this be removed? */ solrInputDocument.addField(SearchFields.IS_HARVESTED, false); - solrInputDocument.addField(SearchFields.METADATA_SOURCE, findRootDataverseCached().getName()); //rootDataverseName); + solrInputDocument.addField(SearchFields.METADATA_SOURCE, rootDataverse.getName()); //rootDataverseName); /*}*/ addDataverseReleaseDateToSolrDoc(solrInputDocument, dataverse); @@ -348,48 +350,98 @@ public Future indexDataverse(Dataverse dataverse, boolean processPaths) } @TransactionAttribute(REQUIRES_NEW) - public Future indexDatasetInNewTransaction(Long datasetId) throws SolrServerException, IOException{ //Dataset dataset) { + public void indexDatasetInNewTransaction(Long datasetId) { //Dataset dataset) { boolean doNormalSolrDocCleanUp = false; - Dataset dataset = em.find(Dataset.class, datasetId); - // return indexDataset(dataset, doNormalSolrDocCleanUp); - Future ret = indexDataset(dataset, doNormalSolrDocCleanUp); + Dataset dataset = datasetService.findDeep(datasetId); + asyncIndexDataset(dataset, doNormalSolrDocCleanUp); dataset = null; - return ret; } - @TransactionAttribute(REQUIRES_NEW) - public Future indexDatasetObjectInNewTransaction(Dataset dataset) throws SolrServerException, IOException{ //Dataset dataset) { - boolean doNormalSolrDocCleanUp = false; - // return indexDataset(dataset, doNormalSolrDocCleanUp); - Future ret = indexDataset(dataset, doNormalSolrDocCleanUp); - dataset = null; - return ret; + // The following two variables are only used in the synchronized getNextToIndex method and do not need to be synchronized themselves + + // nextToIndex contains datasets mapped by dataset id that were added for future indexing while the indexing was already ongoing for a given dataset + // (if there already was a dataset scheduled for indexing, it is overwritten and only the most recently requested version is kept in the map) + private static final Map NEXT_TO_INDEX = new ConcurrentHashMap<>(); + // indexingNow is a set of dataset ids of datasets being indexed asynchronously right now + private static final Map INDEXING_NOW = new ConcurrentHashMap<>(); + + // When you pass null as Dataset parameter to this method, it indicates that the indexing of the dataset with "id" has finished + // Pass non-null Dataset to schedule it for indexing + synchronized private static Dataset getNextToIndex(Long id, Dataset d) { + if (d == null) { // -> indexing of the dataset with id has finished + Dataset next = NEXT_TO_INDEX.remove(id); + if (next == null) { // -> no new indexing jobs were requested while indexing was ongoing + // the job can be stopped now + INDEXING_NOW.remove(id); + } + return next; + } + // index job is requested for a non-null dataset + if (INDEXING_NOW.containsKey(id)) { // -> indexing job is already ongoing, and a new job should not be started by the current thread -> return null + NEXT_TO_INDEX.put(id, d); + return null; + } + // otherwise, start a new job + INDEXING_NOW.put(id, true); + return d; } + /** + * Indexes a dataset asynchronously. + * + * Note that this method implement a synchronized skipping mechanism. When an + * indexing job is already running for a given dataset in the background, the + * new call will not index that dataset, but will delegate the execution to + * the already running job. The running job will pick up the requested indexing + * once that it is finished with the ongoing indexing. If another indexing is + * requested before the ongoing indexing is finished, only the indexing that is + * requested most recently will be picked up for the next indexing. + * + * In other words: we can have at most one indexing ongoing for the given + * dataset, and at most one (most recent) request for reindexing of the same + * dataset. All requests that come between the most recent one and the ongoing + * one are skipped for the optimization reasons. For a more in depth discussion, + * see the pull request: https://github.com/IQSS/dataverse/pull/9558 + * + * @param dataset The dataset to be indexed. + * @param doNormalSolrDocCleanUp Flag for normal Solr doc clean up. + */ @Asynchronous - public Future asyncIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) throws SolrServerException, IOException { - return indexDataset(dataset, doNormalSolrDocCleanUp); + public void asyncIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) { + Long id = dataset.getId(); + Dataset next = getNextToIndex(id, dataset); // if there is an ongoing index job for this dataset, next is null (ongoing index job will reindex the newest version after current indexing finishes) + while (next != null) { + try { + indexDataset(next, doNormalSolrDocCleanUp); + } catch (Exception e) { // catch all possible exceptions; otherwise when something unexpected happes the dataset wold remain locked and impossible to reindex + String failureLogText = "Indexing failed. You can kickoff a re-index of this dataset with: \r\n curl http://localhost:8080/api/admin/index/datasets/" + dataset.getId().toString(); + failureLogText += "\r\n" + e.getLocalizedMessage(); + LoggingUtil.writeOnSuccessFailureLog(null, failureLogText, dataset); + } + next = getNextToIndex(id, null); // if dataset was not changed during the indexing (and no new job was requested), next is null and loop can be stopped + } } - - @Asynchronous - public void asyncIndexDatasetList(List datasets, boolean doNormalSolrDocCleanUp) throws SolrServerException, IOException { + + public void asyncIndexDatasetList(List datasets, boolean doNormalSolrDocCleanUp) { for(Dataset dataset : datasets) { - indexDataset(dataset, true); + asyncIndexDataset(dataset, true); } } - public Future indexDvObject(DvObject objectIn) throws SolrServerException, IOException { - + public void indexDvObject(DvObject objectIn) throws SolrServerException, IOException { if (objectIn.isInstanceofDataset() ){ - return (indexDataset((Dataset)objectIn, true)); + asyncIndexDataset((Dataset)objectIn, true); + } else if (objectIn.isInstanceofDataverse() ){ + indexDataverse((Dataverse)objectIn); } - if (objectIn.isInstanceofDataverse() ){ - return (indexDataverse((Dataverse)objectIn)); - } - return null; + } + + private void indexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) throws SolrServerException, IOException { + doIndexDataset(dataset, doNormalSolrDocCleanUp); + updateLastIndexedTime(dataset.getId()); } - public Future indexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) throws SolrServerException, IOException { + private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) throws SolrServerException, IOException { logger.fine("indexing dataset " + dataset.getId()); /** * @todo should we use solrDocIdentifierDataset or @@ -548,7 +600,6 @@ public Future indexDataset(Dataset dataset, boolean doNormalSolrDocClean String result = getDesiredCardState(desiredCards) + results.toString() + debug.toString(); logger.fine(result); indexDatasetPermissions(dataset); - return new AsyncResult<>(result); } else if (latestVersionState.equals(DatasetVersion.VersionState.DEACCESSIONED)) { desiredCards.put(DatasetVersion.VersionState.DEACCESSIONED, true); @@ -595,11 +646,9 @@ public Future indexDataset(Dataset dataset, boolean doNormalSolrDocClean String result = getDesiredCardState(desiredCards) + results.toString() + debug.toString(); logger.fine(result); indexDatasetPermissions(dataset); - return new AsyncResult<>(result); } else { String result = "No-op. Unexpected condition reached: No released version and latest version is neither draft nor deaccessioned"; logger.fine(result); - return new AsyncResult<>(result); } } else if (atLeastOnePublishedVersion == true) { results.append("Published versions found. ") @@ -652,7 +701,6 @@ public Future indexDataset(Dataset dataset, boolean doNormalSolrDocClean String result = getDesiredCardState(desiredCards) + results.toString() + debug.toString(); logger.fine(result); indexDatasetPermissions(dataset); - return new AsyncResult<>(result); } else if (latestVersionState.equals(DatasetVersion.VersionState.DRAFT)) { IndexableDataset indexableDraftVersion = new IndexableDataset(latestVersion); @@ -706,16 +754,13 @@ public Future indexDataset(Dataset dataset, boolean doNormalSolrDocClean String result = getDesiredCardState(desiredCards) + results.toString() + debug.toString(); logger.fine(result); indexDatasetPermissions(dataset); - return new AsyncResult<>(result); } else { String result = "No-op. Unexpected condition reached: There is at least one published version but the latest version is neither published nor draft"; logger.fine(result); - return new AsyncResult<>(result); } } else { String result = "No-op. Unexpected condition reached: Has a version been published or not?"; logger.fine(result); - return new AsyncResult<>(result); } } @@ -741,10 +786,11 @@ private IndexResponse indexDatasetPermissions(Dataset dataset) { } private String addOrUpdateDataset(IndexableDataset indexableDataset) throws SolrServerException, IOException { - return addOrUpdateDataset(indexableDataset, null); + String result = addOrUpdateDataset(indexableDataset, null); + return result; } - public SolrInputDocuments toSolrDocs(IndexableDataset indexableDataset, Set datafilesInDraftVersion) throws SolrServerException, IOException { + public SolrInputDocuments toSolrDocs(IndexableDataset indexableDataset, Set datafilesInDraftVersion) throws SolrServerException, IOException { IndexableDataset.DatasetState state = indexableDataset.getDatasetState(); Dataset dataset = indexableDataset.getDatasetVersion().getDataset(); logger.fine("adding or updating Solr document for dataset id " + dataset.getId()); @@ -771,10 +817,15 @@ public SolrInputDocuments toSolrDocs(IndexableDataset indexableDataset, Set d throw new IOException(ex); } } + return docs.getMessage(); + } + + @Asynchronous + private void updateLastIndexedTime(Long id) { + // indexing is often in a transaction with update statements + // if we flush on query (flush-mode auto), we want to prevent locking + // -> update the dataset asynchronously in a new transaction + updateLastIndexedTimeInNewTransaction(id); + } + + @TransactionAttribute(REQUIRES_NEW) + private void updateLastIndexedTimeInNewTransaction(Long id) { /// Dataset updatedDataset = /// (Dataset)dvObjectService.updateContentIndexTime(dataset); /// updatedDataset = null; // instead of making a call to dvObjectService, let's try and // modify the index time stamp using the local EntityManager: - DvObject dvObjectToModify = em.find(DvObject.class, docs.getDatasetId()); + DvObject dvObjectToModify = em.find(DvObject.class, id); dvObjectToModify.setIndexTime(new Timestamp(new Date().getTime())); dvObjectToModify = em.merge(dvObjectToModify); - dvObjectToModify = null; - - return docs.getMessage(); } /** @@ -1630,7 +1694,11 @@ private void updatePathForExistingSolrDocs(DvObject object) throws SolrServerExc sid.addField(fieldName, doc.getFieldValue(fieldName)); } - List paths = object.isInstanceofDataset() ? retrieveDVOPaths(datasetService.find(object.getId())) + Dataset dataset = null; + if (object.isInstanceofDataset()) { + dataset = datasetService.findDeep(object.getId()); + } + List paths = object.isInstanceofDataset() ? retrieveDVOPaths(dataset) : retrieveDVOPaths(dataverseService.find(object.getId())); sid.removeField(SearchFields.SUBTREE); @@ -1638,7 +1706,7 @@ private void updatePathForExistingSolrDocs(DvObject object) throws SolrServerExc UpdateResponse addResponse = solrClientService.getSolrClient().add(sid); UpdateResponse commitResponse = solrClientService.getSolrClient().commit(); if (object.isInstanceofDataset()) { - for (DataFile df : datasetService.find(object.getId()).getFiles()) { + for (DataFile df : dataset.getFiles()) { solrQuery.setQuery(SearchUtil.constructQuery(SearchFields.ENTITY_ID, df.getId().toString())); res = solrClientService.getSolrClient().query(solrQuery); if (!res.getResults().isEmpty()) { diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java index ced23538be4..d1ed3191556 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java @@ -406,9 +406,10 @@ Whether Harvesting (OAI) service is enabled */ InheritParentRoleAssignments, - /** Make Data Count Logging and Display */ + /** Make Data Count Logging, Display, and Start Date */ MDCLogPath, DisplayMDCMetrics, + MDCStartDate, /** * Allow CORS flag (true or false). It is true by default diff --git a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java index bf6dddd621a..45f7f396783 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java @@ -2,6 +2,7 @@ import com.ocpsoft.pretty.PrettyContext; import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.Dataverse; import edu.harvard.iq.dataverse.DataverseServiceBean; import edu.harvard.iq.dataverse.DvObjectContainer; import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; @@ -995,9 +996,29 @@ public boolean isAllowCustomTerms() { return settingsService.isTrueForKey(SettingsServiceBean.Key.AllowCustomTermsOfUse, safeDefaultIfKeyNotFound); } - public boolean isFilePIDsEnabled() { - boolean safeDefaultIfKeyNotFound = true; - return settingsService.isTrueForKey(SettingsServiceBean.Key.FilePIDsEnabled, safeDefaultIfKeyNotFound); + public boolean isFilePIDsEnabledForCollection(Dataverse collection) { + if (collection == null) { + return false; + } + + Dataverse thisCollection = collection; + + // If neither enabled nor disabled specifically for this collection, + // the parent collection setting is inhereted (recursively): + while (thisCollection.getFilePIDsEnabled() == null) { + if (thisCollection.getOwner() == null) { + // We've reached the root collection, and file PIDs registration + // hasn't been explicitly enabled, therefore we presume that it is + // subject to how the registration is configured for the + // entire instance: + return settingsService.isTrueForKey(SettingsServiceBean.Key.FilePIDsEnabled, true); + } + thisCollection = thisCollection.getOwner(); + } + + // If present, the setting of the first direct ancestor collection + // takes precedent: + return thisCollection.getFilePIDsEnabled(); } public boolean isIndependentHandleService() { diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java index 4fe9654cc64..59290449988 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java @@ -153,6 +153,10 @@ public Dataverse parseDataverse(JsonObject jobj) throws JsonParseException { } } } + + if (jobj.containsKey("filePIDsEnabled")) { + dv.setFilePIDsEnabled(jobj.getBoolean("filePIDsEnabled")); + } /* We decided that subject is not user set, but gotten from the subject of the dataverse's datasets - leavig this code in for now, in case we need to go back to it at some point diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java index 4720f399487..4bc1e224977 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java @@ -47,6 +47,7 @@ import edu.harvard.iq.dataverse.harvest.client.HarvestingClient; import edu.harvard.iq.dataverse.privateurl.PrivateUrl; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.DatasetFieldWalker; import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder; @@ -305,6 +306,9 @@ public static JsonObjectBuilder json(Dataverse dv, Boolean hideEmail) { if(dv.getStorageDriverId() != null) { bld.add("storageDriverLabel", DataAccess.getStorageDriverLabelFor(dv.getStorageDriverId())); } + if (dv.getFilePIDsEnabled() != null) { + bld.add("filePIDsEnabled", dv.getFilePIDsEnabled()); + } return bld; } @@ -368,23 +372,28 @@ public static JsonObjectBuilder json(FileDetailsHolder ds) { } public static JsonObjectBuilder json(DatasetVersion dsv) { + return json(dsv, null); + } + + public static JsonObjectBuilder json(DatasetVersion dsv, List anonymizedFieldTypeNamesList) { + Dataset dataset = dsv.getDataset(); JsonObjectBuilder bld = jsonObjectBuilder() - .add("id", dsv.getId()).add("datasetId", dsv.getDataset().getId()) - .add("datasetPersistentId", dsv.getDataset().getGlobalId().asString()) - .add("storageIdentifier", dsv.getDataset().getStorageIdentifier()) + .add("id", dsv.getId()).add("datasetId", dataset.getId()) + .add("datasetPersistentId", dataset.getGlobalId().asString()) + .add("storageIdentifier", dataset.getStorageIdentifier()) .add("versionNumber", dsv.getVersionNumber()).add("versionMinorNumber", dsv.getMinorVersionNumber()) .add("versionState", dsv.getVersionState().name()).add("versionNote", dsv.getVersionNote()) .add("archiveNote", dsv.getArchiveNote()).add("deaccessionLink", dsv.getDeaccessionLink()) .add("distributionDate", dsv.getDistributionDate()).add("productionDate", dsv.getProductionDate()) .add("UNF", dsv.getUNF()).add("archiveTime", format(dsv.getArchiveTime())) .add("lastUpdateTime", format(dsv.getLastUpdateTime())).add("releaseTime", format(dsv.getReleaseTime())) - .add("createTime", format(dsv.getCreateTime())); - License license = DatasetUtil.getLicense(dsv);; + .add("createTime", format(dsv.getCreateTime())) + .add("alternativePersistentId", dataset.getAlternativePersistentIdentifier()) + .add("publicationDate", dataset.getPublicationDateFormattedYYYYMMDD()) + .add("citationDate", dataset.getCitationDateFormattedYYYYMMDD()); + License license = DatasetUtil.getLicense(dsv); if (license != null) { - // Standard license - bld.add("license", jsonObjectBuilder() - .add("name", DatasetUtil.getLicenseName(dsv)) - .add("uri", DatasetUtil.getLicenseURI(dsv))); + bld.add("license", jsonLicense(dsv)); } else { // Custom terms bld.add("termsOfUse", dsv.getTermsOfUseAndAccess().getTermsOfUse()) @@ -405,14 +414,15 @@ public static JsonObjectBuilder json(DatasetVersion dsv) { .add("studyCompletion", dsv.getTermsOfUseAndAccess().getStudyCompletion()) .add("fileAccessRequest", dsv.getTermsOfUseAndAccess().isFileAccessRequest()); - bld.add("metadataBlocks", jsonByBlocks(dsv.getDatasetFields())); - + bld.add("metadataBlocks", (anonymizedFieldTypeNamesList != null) ? + jsonByBlocks(dsv.getDatasetFields(), anonymizedFieldTypeNamesList) + : jsonByBlocks(dsv.getDatasetFields()) + ); bld.add("files", jsonFileMetadatas(dsv.getFileMetadatas())); return bld; } - - + public static JsonObjectBuilder jsonDataFileList(List dataFiles){ if (dataFiles==null){ @@ -485,11 +495,15 @@ public static JsonObjectBuilder json(DatasetDistributor dist) { } public static JsonObjectBuilder jsonByBlocks(List fields) { + return jsonByBlocks(fields, null); + } + + public static JsonObjectBuilder jsonByBlocks(List fields, List anonymizedFieldTypeNamesList) { JsonObjectBuilder blocksBld = jsonObjectBuilder(); for (Map.Entry> blockAndFields : DatasetField.groupByBlock(fields).entrySet()) { MetadataBlock block = blockAndFields.getKey(); - blocksBld.add(block.getName(), JsonPrinter.json(block, blockAndFields.getValue())); + blocksBld.add(block.getName(), JsonPrinter.json(block, blockAndFields.getValue(), anonymizedFieldTypeNamesList)); } return blocksBld; } @@ -503,6 +517,10 @@ public static JsonObjectBuilder jsonByBlocks(List fields) { * @return JSON Object builder with the block and fields information. */ public static JsonObjectBuilder json(MetadataBlock block, List fields) { + return json(block, fields, null); + } + + public static JsonObjectBuilder json(MetadataBlock block, List fields, List anonymizedFieldTypeNamesList) { JsonObjectBuilder blockBld = jsonObjectBuilder(); blockBld.add("displayName", block.getDisplayName()); @@ -510,7 +528,7 @@ public static JsonObjectBuilder json(MetadataBlock block, List fie final JsonArrayBuilder fieldsArray = Json.createArrayBuilder(); Map cvocMap = (datasetFieldService==null) ? new HashMap() :datasetFieldService.getCVocConf(true); - DatasetFieldWalker.walk(fields, settingsService, cvocMap, new DatasetFieldsToJson(fieldsArray)); + DatasetFieldWalker.walk(fields, settingsService, cvocMap, new DatasetFieldsToJson(fieldsArray, anonymizedFieldTypeNamesList)); blockBld.add("fields", fieldsArray); return blockBld; @@ -891,12 +909,16 @@ private static class DatasetFieldsToJson implements DatasetFieldWalker.Listener Deque objectStack = new LinkedList<>(); Deque valueArrStack = new LinkedList<>(); - JsonObjectBuilder result = null; - + List anonymizedFieldTypeNamesList = null; DatasetFieldsToJson(JsonArrayBuilder result) { valueArrStack.push(result); } + DatasetFieldsToJson(JsonArrayBuilder result, List anonymizedFieldTypeNamesList) { + this(result); + this.anonymizedFieldTypeNamesList = anonymizedFieldTypeNamesList; + } + @Override public void startField(DatasetField f) { objectStack.push(jsonObjectBuilder()); @@ -921,15 +943,19 @@ public void endField(DatasetField f) { JsonArray expandedValues = valueArrStack.pop().build(); JsonArray jsonValues = valueArrStack.pop().build(); if (!jsonValues.isEmpty()) { - jsonField.add("value", - f.getDatasetFieldType().isAllowMultiples() ? jsonValues - : jsonValues.get(0)); - if (!expandedValues.isEmpty()) { - jsonField.add("expandedvalue", - f.getDatasetFieldType().isAllowMultiples() ? expandedValues - : expandedValues.get(0)); + String datasetFieldName = f.getDatasetFieldType().getName(); + if (anonymizedFieldTypeNamesList != null && anonymizedFieldTypeNamesList.contains(datasetFieldName)) { + anonymizeField(jsonField); + } else { + jsonField.add("value", + f.getDatasetFieldType().isAllowMultiples() ? jsonValues + : jsonValues.get(0)); + if (!expandedValues.isEmpty()) { + jsonField.add("expandedvalue", + f.getDatasetFieldType().isAllowMultiples() ? expandedValues + : expandedValues.get(0)); + } } - valueArrStack.peek().add(jsonField); } } @@ -974,6 +1000,12 @@ public void endCompoundValue(DatasetFieldCompoundValue dsfcv) { valueArrStack.peek().add(jsonField); } } + + private void anonymizeField(JsonObjectBuilder jsonField) { + jsonField.add("typeClass", "primitive"); + jsonField.add("value", BundleUtil.getStringFromBundle("dataset.anonymized.withheld")); + jsonField.add("multiple", false); + } } public static JsonObjectBuilder json(AuthenticationProviderRow aRow) { @@ -1153,4 +1185,15 @@ public static JsonObjectBuilder jsonLinkset(Dataset ds) { .add("publicationDate", ds.getPublicationDateFormattedYYYYMMDD()) .add("storageIdentifier", ds.getStorageIdentifier()); } + + private static JsonObjectBuilder jsonLicense(DatasetVersion dsv) { + JsonObjectBuilder licenseJsonObjectBuilder = jsonObjectBuilder() + .add("name", DatasetUtil.getLicenseName(dsv)) + .add("uri", DatasetUtil.getLicenseURI(dsv)); + String licenseIconUri = DatasetUtil.getLicenseIcon(dsv); + if (licenseIconUri != null) { + licenseJsonObjectBuilder.add("iconUri", licenseIconUri); + } + return licenseJsonObjectBuilder; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/workflow/WorkflowServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/workflow/WorkflowServiceBean.java index d57b7072be7..cf78c4f8cdf 100644 --- a/src/main/java/edu/harvard/iq/dataverse/workflow/WorkflowServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/workflow/WorkflowServiceBean.java @@ -394,7 +394,7 @@ private void workflowCompleted(Workflow wf, WorkflowContext ctxt) { String dataFilePIDFormat = settings.getValueForKey(SettingsServiceBean.Key.DataFilePIDFormat, "DEPENDENT"); boolean registerGlobalIdsForFiles = (currentGlobalIdProtocol.equals(ctxt.getDataset().getProtocol()) || dataFilePIDFormat.equals("INDEPENDENT")) - && systemConfig.isFilePIDsEnabled(); + && systemConfig.isFilePIDsEnabledForCollection(ctxt.getDataset().getOwner()); if ( registerGlobalIdsForFiles ){ registerGlobalIdsForFiles = currentGlobalAuthority.equals( ctxt.getDataset().getAuthority() ); } diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index 768c6bc5e2c..c16a1f23bd1 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -1406,17 +1406,22 @@ metrics.title=Metrics metrics.title.tip=View more metrics information metrics.dataset.title=Dataset Metrics metrics.dataset.tip.default=Aggregated metrics for this dataset. +metrics.dataset.makedatacount.title=Make Data Count (MDC) Metrics +metrics.dataset.makedatacount.since=since metrics.dataset.tip.makedatacount=Metrics collected using Make Data Count standards. -metrics.dataset.views.tip=Dataset views are combined with both aggregated file views and file downloads. +metrics.dataset.views.tip=Aggregate of views of the dataset landing page, file views, and file downloads. metrics.dataset.downloads.default.tip=Total aggregated downloads of files in this dataset. metrics.dataset.downloads.makedatacount.tip=Each file downloaded is counted as 1, and added to the total download count. +metrics.dataset.downloads.premakedatacount.tip=Downloads prior to enabling MDC. Counts do not have the same filtering and detail as MDC metrics. metrics.dataset.citations.tip=Click for a list of citation URLs. metrics.file.title=File Metrics metrics.file.tip.default=Metrics for this individual file. metrics.file.tip.makedatacount=Individual file downloads are tracked in Dataverse but are not reported as part of the Make Data Count standard. metrics.file.downloads.tip=Total downloads of this file. +metrics.file.downloads.nonmdc.tip=Total downloads. Due to differences between MDC and Dataverse's internal tracking, the sum of these for all files in a dataset may be larger than total downloads reported for a dataset. metrics.views={0, choice, 0#Views|1#View|2#Views} metrics.downloads={0, choice, 0#Downloads|1#Download|2#Downloads} +metrics.downloads.nonMDC={0, choice, 0#|1# (+ 1 pre-MDC |2< (+ {0} pre-MDC } metrics.citations={0, choice, 0#Citations|1#Citation|2#Citations} metrics.citations.dialog.header=Dataset Citations metrics.citations.dialog.help=Citations for this dataset are retrieved from Crossref via DataCite using Make Data Count standards. For more information about dataset metrics, please refer to the User Guide. @@ -1595,6 +1600,12 @@ dataset.metadata.persistentId=Persistent Identifier dataset.metadata.persistentId.tip=The Dataset's unique persistent identifier, either a DOI or Handle dataset.metadata.alternativePersistentId=Previous Dataset Persistent ID dataset.metadata.alternativePersistentId.tip=A previously used persistent identifier for the Dataset, either a DOI or Handle +dataset.metadata.invalidEntry=is not a valid entry. +dataset.metadata.invalidDate=is not a valid date. "yyyy" is a supported format. +dataset.metadata.invalidNumber=is not a valid number. +dataset.metadata.invalidInteger=is not a valid integer. +dataset.metadata.invalidURL=is not a valid URL. +dataset.metadata.invalidEmail=is not a valid email address. file.metadata.preview=Preview file.metadata.filetags=File Tags file.metadata.persistentId=File Persistent ID @@ -1696,7 +1707,9 @@ file.editFile=Edit file.actionsBlock=File Actions file.accessBtn=Access File +file.accessBtn.header.access=File Access file.accessBtn.header.download=Download Options +file.accessBtn.header.metadata=Download Metadata file.optionsBtn=File Options file.optionsBtn.header.edit=Edit Options file.optionsBtn.header.configure=Configure Options @@ -1733,6 +1746,7 @@ file.download.subset.header=Download Data Subset file.preview=Preview: file.fileName=File Name file.sizeNotAvailable=Size not available +file.ingestFailed=Ingest failed. No further information is available. file.type.tabularData=Tabular Data file.originalChecksumType=Original File {0} file.checksum.exists.tip=A file with this checksum already exists in the dataset. @@ -1813,6 +1827,8 @@ file.spss-savEncoding.current=Current Selection: file.spss-porExtraLabels=Variable Labels file.spss-porExtraLabels.title=Upload an additional text file with extra variable labels. file.spss-porExtraLabels.selectToAddBtn=Select File to Add +file.ingest.saveFailed.message=Ingest succeeded, but failed to save the ingested tabular data in the database; no further information is available +file.ingest.saveFailed.detail.message=Ingest succeeded, but failed to save the ingested tabular data in the database: file.ingestFailed.header=File available in original format only file.ingestFailed.message=Tabular ingest was unsuccessful. file.downloadBtn.format.all=All File Formats + Information @@ -2823,6 +2839,8 @@ Restricted=Restricted EmbargoedThenPublic=Embargoed then Public EmbargoedThenRestricted=Embargoed then Restricted +#metadata source - Facet Label +Harvested=Harvested #Shibboleth login idp.fatal.divMissing=
specified as "insertAtDiv" could not be located in the HTML diff --git a/src/main/resources/db/migration/V5.13.0.2__8889-filepids-in-collections.sql b/src/main/resources/db/migration/V5.13.0.2__8889-filepids-in-collections.sql new file mode 100644 index 00000000000..5e6ce945fe2 --- /dev/null +++ b/src/main/resources/db/migration/V5.13.0.2__8889-filepids-in-collections.sql @@ -0,0 +1 @@ +ALTER TABLE dataverse ADD COLUMN IF NOT EXISTS filePIDsEnabled bool; diff --git a/src/main/webapp/dataset-versions.xhtml b/src/main/webapp/dataset-versions.xhtml index f3bcd6007fa..fe0758d74f0 100644 --- a/src/main/webapp/dataset-versions.xhtml +++ b/src/main/webapp/dataset-versions.xhtml @@ -92,7 +92,7 @@ - + diff --git a/src/main/webapp/dataset.xhtml b/src/main/webapp/dataset.xhtml index 88fe06b1b25..4d0c1485335 100644 --- a/src/main/webapp/dataset.xhtml +++ b/src/main/webapp/dataset.xhtml @@ -522,7 +522,7 @@
- #{bundle['metrics.dataset.title']} + #{settingsWrapper.makeDataCountDisplayEnabled ? bundle['metrics.dataset.makedatacount.title'] : bundle['metrics.dataset.title']} @@ -530,7 +530,9 @@ +
#{bundle['metrics.dataset.makedatacount.since']} #{settingsWrapper.getMDCStartDate().toString()}
+
@@ -556,6 +558,13 @@ + + + + + ) +
diff --git a/src/main/webapp/dataverseuser.xhtml b/src/main/webapp/dataverseuser.xhtml index e3579c6812f..51f5bfa9f8a 100644 --- a/src/main/webapp/dataverseuser.xhtml +++ b/src/main/webapp/dataverseuser.xhtml @@ -422,12 +422,13 @@ + #{item.theObject.getDataset().getDisplayName()} - + diff --git a/src/main/webapp/editFilesFragment.xhtml b/src/main/webapp/editFilesFragment.xhtml index a4e635b8c14..1a049331ae4 100644 --- a/src/main/webapp/editFilesFragment.xhtml +++ b/src/main/webapp/editFilesFragment.xhtml @@ -11,9 +11,20 @@ xmlns:o="http://omnifaces.org/ui" xmlns:iqbs="http://xmlns.jcp.org/jsf/composite/iqbs"> - - - + + + + + + + + + + + + + +
    @@ -109,7 +120,7 @@ $(document).ready(function () { uploadWidgetDropMsg(); - setupDirectUpload(#{systemConfig.directUploadEnabled(EditDatafilesPage.dataset)}); + #{useDirectUpload ? 'setupDirectUpload(true);':''} }); //]]> @@ -584,7 +595,7 @@

    #{EditDatafilesPage.warningMessageForFileTypeDifferentPopUp}

    -
    - -
    - - - - -
    - -
    - - - - + + +
    +
    +
    + #{bundle['metrics.file.title']} + + + + + + +
    +
    + +
    + + + + +
    + +
    + + + + +
    +
    diff --git a/src/main/webapp/filesFragment.xhtml b/src/main/webapp/filesFragment.xhtml index 6122b86b274..99ab30fed68 100644 --- a/src/main/webapp/filesFragment.xhtml +++ b/src/main/webapp/filesFragment.xhtml @@ -144,7 +144,7 @@