Skip to content

Commit

Permalink
Delete and purge individual submissions (#1150)
Browse files Browse the repository at this point in the history
* added soft-delete submission endpoint

* added submission purge mechanism

* More tests and redacting audit log details

* Tests of deleted submissions not appearing in export

* Use submission.delete verb

* odata test and comment test refinement

* deleted submissions with odata skip token

* failing/skipped test for client audit purge bug

* small changes

* properly purge client audits of deleted forms and submissions

* fix parameters

* refactoring purge task and cli

* Fixing how the cli script runs

* restore soft-deleted submissions

* updating migration dates and names

* manually call blob purge in tests and add purge audit log tests

* test for not deleting draft submissions

* Small fixes

* shared client audit blob test

* more tests of reusing instance id

* test of migration

* purge task transaction

* filter for non deleted subs/forms in an entity source query
  • Loading branch information
ktuite committed Sep 18, 2024
1 parent 3ae8a69 commit b348912
Show file tree
Hide file tree
Showing 17 changed files with 1,430 additions and 228 deletions.
20 changes: 14 additions & 6 deletions lib/bin/purge-forms.js → lib/bin/purge.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,20 +7,28 @@
// including this file, may be copied, modified, propagated, or distributed
// except according to the terms contained in the LICENSE file.
//
// This script checks for (soft-)deleted forms and purges any that were deleted
// over 30 days ago.
// This script checks for (soft-)deleted forms and submissions and purges
// any that were deleted over 30 days ago.
//
// It also accepts command line arguments that can force the purging of
// forms and submissions that were deleted less than 30 days ago.
//
// It can also be used to purge a specific form or submission
// (that has already been marked deleted).

const { run } = require('../task/task');
const { purgeForms } = require('../task/purge');
const { purgeTask } = require('../task/purge');

const { program } = require('commander');
program.option('-f, --force', 'Force any soft-deleted form to be purged right away.');
program.option('-m, --mode <value>', 'Mode of purging. Can be "forms", "submissions", or "all". Default is "all".', 'all');
program.option('-i, --formId <integer>', 'Purge a specific form based on its id.', parseInt);
program.option('-p, --projectId <integer>', 'Restrict purging to a specific project.', parseInt);
program.option('-x, --xmlFormId <value>', 'Restrict purging to specific form based on xmlFormId (must be used with project id).');
program.option('-x, --xmlFormId <value>', 'Restrict purging to specific form based on xmlFormId (must be used with projectId).');
program.option('-s, --instanceId <value>', 'Restrict purging to a specific submission based on instanceId (use with projectId and xmlFormId).');

program.parse();

const options = program.opts();

run(purgeForms(options.force, options.formId, options.projectId, options.xmlFormId)
.then((count) => `Forms purged: ${count}`));
run(purgeTask(options));
23 changes: 23 additions & 0 deletions lib/model/migrations/20240914-01-add-submission-delete-verb.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
// Copyright 2024 ODK Central Developers
// See the NOTICE file at the top-level directory of this distribution and at
// https://github.com/getodk/central-backend/blob/master/NOTICE.
// This file is part of ODK Central. It is subject to the license terms in
// the LICENSE file found in the top-level directory of this distribution and at
// https://www.apache.org/licenses/LICENSE-2.0. No part of ODK Central,
// including this file, may be copied, modified, propagated, or distributed
// except according to the terms contained in the LICENSE file.

const up = (db) => db.raw(`
UPDATE roles
SET verbs = verbs || '["submission.delete", "submission.restore"]'::jsonb
WHERE system in ('admin', 'manager')
`);

const down = (db) => db.raw(`
UPDATE roles
SET verbs = (verbs - 'submission.delete' - 'submission.restore')
WHERE system in ('admin', 'manager')
`);

module.exports = { up, down };

27 changes: 27 additions & 0 deletions lib/model/migrations/20240914-02-remove-orphaned-client-audits.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
// Copyright 2024 ODK Central Developers
// See the NOTICE file at the top-level directory of this distribution and at
// https://github.com/getodk/central-backend/blob/master/NOTICE.
// This file is part of ODK Central. It is subject to the license terms in
// the LICENSE file found in the top-level directory of this distribution and at
// https://www.apache.org/licenses/LICENSE-2.0. No part of ODK Central,
// including this file, may be copied, modified, propagated, or distributed
// except according to the terms contained in the LICENSE file.

// From earlier form-purging, there could have been leftover client audits
// that were not properly purged because of how they joined to submissions.
// This migration deletes those client audit rows, allowing the blobs to finally
// be de-referenced and also eventually purged (by the puring cron job).

const up = (db) => db.raw(`
DELETE FROM client_audits
WHERE "blobId" NOT IN (
SELECT "blobId" FROM submission_attachments
WHERE "blobId" IS NOT NULL
AND "isClientAudit" IS true
);
`);

const down = () => {};

module.exports = { up, down };

4 changes: 2 additions & 2 deletions lib/model/query/audits.js
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ const actionCondition = (action) => {
// The backup action was logged by a backup script that has been removed.
// Even though the script has been removed, the audit log entries it logged
// have not, so we should continue to exclude those.
return sql`action not in ('entity.create', 'entity.bulk.create', 'entity.error', 'entity.update.version', 'entity.update.resolve', 'entity.delete', 'submission.create', 'submission.update', 'submission.update.version', 'submission.attachment.update', 'submission.reprocess', 'backup', 'analytics')`;
return sql`action not in ('entity.create', 'entity.bulk.create', 'entity.error', 'entity.update.version', 'entity.update.resolve', 'entity.delete', 'submission.create', 'submission.update', 'submission.update.version', 'submission.attachment.update', 'submission.reprocess', 'submission.delete', 'submission.restore', 'backup', 'analytics')`;
else if (action === 'user')
return sql`action in ('user.create', 'user.update', 'user.delete', 'user.assignment.create', 'user.assignment.delete', 'user.session.create')`;
else if (action === 'field_key')
Expand All @@ -48,7 +48,7 @@ const actionCondition = (action) => {
else if (action === 'form')
return sql`action in ('form.create', 'form.update', 'form.delete', 'form.restore', 'form.purge', 'form.attachment.update', 'form.submission.export', 'form.update.draft.set', 'form.update.draft.delete', 'form.update.publish')`;
else if (action === 'submission')
return sql`action in ('submission.create', 'submission.update', 'submission.update.version', 'submission.attachment.update', 'submission.reprocess')`;
return sql`action in ('submission.create', 'submission.update', 'submission.update.version', 'submission.attachment.update', 'submission.reprocess', 'submission.delete', 'submission.restore', 'submission.purge')`;
else if (action === 'dataset')
return sql`action in ('dataset.create', 'dataset.update')`;
else if (action === 'entity')
Expand Down
7 changes: 6 additions & 1 deletion lib/model/query/entities.js
Original file line number Diff line number Diff line change
Expand Up @@ -592,7 +592,12 @@ const _get = (includeSource) => {
`}
${!includeSource ? sql`` : sql`
LEFT JOIN entity_def_sources ON entity_defs."sourceId" = entity_def_sources."id"
LEFT JOIN submission_defs ON submission_defs.id = entity_def_sources."submissionDefId"
LEFT JOIN (
SELECT submission_defs.* FROM submission_defs
JOIN submissions ON submission_defs."submissionId" = submissions.id
JOIN forms ON submissions."formId" = forms.id
WHERE submissions."deletedAt" IS NULL AND forms."deletedAt" IS NULL
) as submission_defs on submission_defs.id = entity_def_sources."submissionDefId"
LEFT JOIN (
SELECT submissions.*, submission_defs."userAgent" FROM submissions
JOIN submission_defs ON submissions.id = submission_defs."submissionId" AND root
Expand Down
15 changes: 11 additions & 4 deletions lib/model/query/forms.js
Original file line number Diff line number Diff line change
Expand Up @@ -406,7 +406,7 @@ const _trashedFilter = (force, id, projectId, xmlFormId) => {
// 3. Update actees table for the specific form to leave some useful information behind
// 4. Delete the forms and their resources from the database
// 5. Purge unattached blobs
const purge = (force = false, id = null, projectId = null, xmlFormId = null) => ({ oneFirst, Blobs }) => {
const purge = (force = false, id = null, projectId = null, xmlFormId = null) => ({ oneFirst }) => {
if (xmlFormId != null && projectId == null)
throw Problem.internal.unknown({ error: 'Must also specify projectId when using xmlFormId' });
return oneFirst(sql`
Expand All @@ -415,6 +415,15 @@ with redacted_audits as (
from forms
where audits."acteeId" = forms."acteeId"
and ${_trashedFilter(force, id, projectId, xmlFormId)}
), deleted_client_audits as (
delete from client_audits
using submission_attachments, submission_defs, submissions, forms
where client_audits."blobId" = submission_attachments."blobId"
and submission_attachments."submissionDefId" = submission_defs.id
and submission_attachments."isClientAudit" = true
and submission_defs."submissionId" = submissions.id
and submissions."formId" = forms.id
and ${_trashedFilter(force, id, projectId, xmlFormId)}
), purge_audits as (
insert into audits ("action", "acteeId", "loggedAt", "processed")
select 'form.purge', "acteeId", clock_timestamp(), clock_timestamp()
Expand All @@ -437,9 +446,7 @@ with redacted_audits as (
where ${_trashedFilter(force, id, projectId, xmlFormId)}
returning 1
)
select count(*) from deleted_forms`)
.then((count) => Blobs.purgeUnattached()
.then(() => Promise.resolve(count)));
select count(*) from deleted_forms`);
};

////////////////////////////////////////////////////////////////////////////////
Expand Down
88 changes: 86 additions & 2 deletions lib/model/query/submissions.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,14 @@
// including this file, may be copied, modified, propagated, or distributed
// except according to the terms contained in the LICENSE file.

const config = require('config');
const { map } = require('ramda');
const { sql } = require('slonik');
const { Frame, table } = require('../frame');
const { Actor, Form, Submission } = require('../frames');
const { odataFilter, odataOrderBy } = require('../../data/odata-filter');
const { odataToColumnMap, odataSubTableToColumnMap } = require('../../data/submission');
const { unjoiner, extender, equals, page, updater, QueryOptions, insertMany } = require('../../util/db');
const { unjoiner, extender, equals, page, updater, QueryOptions, insertMany, markDeleted, markUndeleted } = require('../../util/db');
const { blankStringToNull, construct } = require('../../util/util');
const Problem = require('../../util/problem');
const { streamEncBlobs } = require('../../util/blob');
Expand Down Expand Up @@ -200,6 +201,17 @@ const getById = (submissionId) => ({ maybeOne }) =>
maybeOne(sql`select * from submissions where id=${submissionId} and "deletedAt" is null`)
.then(map(construct(Submission)));

const getDeleted = (projectId, formId, instanceId) => ({ maybeOne }) =>
maybeOne(sql` select submissions.*
from submissions
join forms on submissions."formId" = forms.id
where forms."projectId" = ${projectId}
and submissions."formId" = ${formId}
and submissions."instanceId" = ${instanceId}
and submissions."deletedAt" IS NOT NULL
`)
.then(map(construct(Submission)));

const joinOnSkiptoken = (skiptoken, formId, draft) => {
if (skiptoken == null) return sql``;
// In the case of a subtable, we fetch all submissions without pagination: we
Expand Down Expand Up @@ -386,14 +398,86 @@ const getForExport = (formId, instanceId, draft, options = QueryOptions.none) =>
maybeOne(_export(formId, draft, [], options.withCondition({ 'submissions.instanceId': instanceId })))
.then(map(_exportUnjoiner));

////////////////////////////////////////////////////////////////////////////////
// DELETE SUBMISSION

const del = (submission) => ({ run }) =>
run(markDeleted(submission));
del.audit = (submission, form) => (log) => log('submission.delete', { acteeId: form.acteeId }, { instanceId: submission.instanceId });

const restore = (submission) => ({ run }) =>
run(markUndeleted(submission));
restore.audit = (submission, form) => (log) => log('submission.restore', { acteeId: form.acteeId }, { instanceId: submission.instanceId });

////////////////////////////////////////////////////////////////////////////////
// PURGING SOFT-DELETED SUBMISSIONS

const DAY_RANGE = config.has('default.taskSchedule.purge')
? config.get('default.taskSchedule.purge')
: 30; // Default is 30 days


// Submission purging and the trash filter can target a specific submission
// or all deleted submissions, but can't be used to filter-purge by project or form.
const _trashedFilter = (force, projectId, xmlFormId, instanceId) => {
const idFilter = ((instanceId != null) && (projectId != null) && (xmlFormId != null)
? sql`and submissions."instanceId" = ${instanceId}
and forms."projectId" = ${projectId}
and forms."xmlFormId" = ${xmlFormId}`
: sql``);
return (force
? sql`submissions."deletedAt" is not null ${idFilter}`
: sql`submissions."deletedAt" < current_date - cast(${DAY_RANGE} as int) ${idFilter}`);
};

const purge = (force = false, projectId = null, xmlFormId = null, instanceId = null) => ({ oneFirst }) => {
if ((instanceId != null || projectId != null || xmlFormId != null) && !(instanceId != null && projectId != null && xmlFormId != null)) {
throw Problem.internal.unknown({ error: 'Must specify either all or none of projectId, xmlFormId, and instanceId' });
}
return oneFirst(sql`
with redacted_audits as (
update audits set notes = ''
from submissions
join forms on submissions."formId" = forms.id
where (audits.details->>'submissionId')::int = submissions.id
and ${_trashedFilter(force, projectId, xmlFormId, instanceId)}
), deleted_client_audits as (
delete from client_audits
using submission_attachments, submission_defs, submissions, forms
where client_audits."blobId" = submission_attachments."blobId"
and submission_attachments."submissionDefId" = submission_defs.id
and submission_attachments."isClientAudit" = true
and submission_defs."submissionId" = submissions.id
and submissions."formId" = forms.id
and ${_trashedFilter(force, projectId, xmlFormId, instanceId)}
), purge_audits as (
insert into audits ("action", "loggedAt", "processed", "details")
select 'submission.purge', clock_timestamp(), clock_timestamp(), jsonb_build_object('submissions_deleted', "count")
from (
select count(*) as count
from submissions
join forms on forms.id = submissions."formId"
where ${_trashedFilter(force, projectId, xmlFormId, instanceId)}
) as del_sub_count
where del_sub_count.count > 0
), deleted_submissions as (
delete from submissions
using forms
where submissions."formId" = forms.id
and ${_trashedFilter(force, projectId, xmlFormId, instanceId)}
returning 1
)
select count(*) from deleted_submissions`);
};

module.exports = {
createNew, createVersion,
update, clearDraftSubmissions, clearDraftSubmissionsForProject,
update, del, restore, purge, clearDraftSubmissions, clearDraftSubmissionsForProject,
setSelectMultipleValues, getSelectMultipleValuesForExport,
getByIdsWithDef, getSubAndDefById,
getByIds, getAllForFormByIds, getById, countByFormId, verifyVersion,
getDefById, getCurrentDefByIds, getAnyDefByFormAndInstanceId, getDefsByFormAndLogicalId, getDefBySubmissionAndInstanceId, getRootForInstanceId,
getDeleted,
streamForExport, getForExport
};

4 changes: 2 additions & 2 deletions lib/resources/entities.js
Original file line number Diff line number Diff line change
Expand Up @@ -156,13 +156,13 @@ module.exports = (service, endpoint) => {

}));

service.delete('/projects/:projectId/datasets/:name/entities/:uuid', endpoint(async ({ Datasets, Entities }, { auth, params, queryOptions }) => {
service.delete('/projects/:projectId/datasets/:name/entities/:uuid', endpoint(async ({ Datasets, Entities }, { auth, params }) => {

const dataset = await Datasets.get(params.projectId, params.name, true).then(getOrNotFound);

await auth.canOrReject('entity.delete', dataset);

const entity = await Entities.getById(dataset.id, params.uuid, queryOptions).then(getOrNotFound);
const entity = await Entities.getById(dataset.id, params.uuid).then(getOrNotFound);

await Entities.del(entity, dataset);

Expand Down
16 changes: 16 additions & 0 deletions lib/resources/submissions.js
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,22 @@ module.exports = (service, endpoint) => {
.then(getOrNotFound)
.then((submission) => Submissions.update(form, submission, Submission.fromApi(body))))));

service.delete('/projects/:projectId/forms/:formId/submissions/:instanceId', endpoint(async ({ Forms, Submissions }, { params, auth }) => {
const form = await Forms.getByProjectAndXmlFormId(params.projectId, params.formId, false).then(getOrNotFound);
await auth.canOrReject('submission.delete', form);
const submission = await Submissions.getByIdsWithDef(params.projectId, params.formId, params.instanceId, false).then(getOrNotFound);
await Submissions.del(submission, form);
return success();
}));

service.post('/projects/:projectId/forms/:formId/submissions/:instanceId/restore', endpoint(async ({ Forms, Submissions }, { params, auth }) => {
const form = await Forms.getByProjectAndXmlFormId(params.projectId, params.formId, false).then(getOrNotFound);
await auth.canOrReject('submission.restore', form);
const submission = await Submissions.getDeleted(params.projectId, form.id, params.instanceId).then(getOrNotFound);
await Submissions.restore(submission, form);
return success();
}));


const dataOutputs = (base, draft, getForm) => {

Expand Down
30 changes: 27 additions & 3 deletions lib/task/purge.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,31 @@

const { task } = require('./task');

const purgeForms = task.withContainer(({ Forms }) => (force = false, formId = null, projectId = null, xmlFormId = null) =>
Forms.purge(force, formId, projectId, xmlFormId));
const purgeTask = task.withContainer((container) => async (options = {}) => {
// Form/submission purging will happen within its own transaction
const message = await container.db.transaction(async trxn => {
const { Forms, Submissions } = container.with({ db: trxn });
try {
if (options.mode === 'submissions' || options.instanceId) {
const count = await Submissions.purge(options.force, options.projectId, options.xmlFormId, options.instanceId);
return `Submissions purged: ${count}`;
} else if (options.mode === 'forms' || (options.formId || options.xmlFormId)) {
const count = await Forms.purge(options.force, options.formId, options.projectId, options.xmlFormId);
return `Forms purged: ${count}`;
} else {
const formCount = await Forms.purge(options.force, options.formId, options.projectId, options.xmlFormId);
const submissionCount = await Submissions.purge(options.force, options.projectId, options.xmlFormId, options.instanceId);
return `Forms purged: ${formCount}, Submissions purged: ${submissionCount}`;
}
} catch (error) {
return error?.problemDetails?.error;
}
});

module.exports = { purgeForms };
// Purging unattached blobs is outside of the above transaction because it
// may interact with an external blob store.
await container.Blobs.purgeUnattached();
return message;
});

module.exports = { purgeTask };
Loading

0 comments on commit b348912

Please sign in to comment.