Skip to content

Commit

Permalink
Merge pull request #124 from darwin-eu-dev/release-v3
Browse files Browse the repository at this point in the history
Release candidate v3.0.0
  • Loading branch information
MaximMoinat committed Dec 21, 2023
2 parents 1e0c9b7 + ec214db commit 5813dd4
Show file tree
Hide file tree
Showing 74 changed files with 2,055 additions and 2,606 deletions.
3 changes: 2 additions & 1 deletion .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,5 @@
^errorReport.+\.txt$
^.+session\.sql$
^\.vscode$
^\.lintr$
^\.lintr$
^\.github$
1 change: 0 additions & 1 deletion .Rprofile

This file was deleted.

1 change: 1 addition & 0 deletions .github/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*.html
47 changes: 47 additions & 0 deletions .github/workflows/R-CMD-check.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
on:
push:
branches: [main, master]
pull_request:
branches: [main, master]

name: R-CMD-check

jobs:
R-CMD-check:
runs-on: ${{ matrix.config.os }}

name: ${{ matrix.config.os }} (${{ matrix.config.r }})

strategy:
fail-fast: false
matrix:
config:
- {os: macos-latest, r: 'release'}
- {os: windows-latest, r: 'release'}
- {os: ubuntu-latest, r: 'release'}

env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
R_KEEP_PKG_SOURCE: yes

steps:
- uses: actions/checkout@v3

- uses: r-lib/actions/setup-pandoc@v2

- uses: r-lib/actions/setup-r@v2
with:
r-version: ${{ matrix.config.r }}
http-user-agent: ${{ matrix.config.http-user-agent }}
use-public-rspm: true

- uses: r-lib/actions/setup-r-dependencies@v2
with:
extra-packages: any::rcmdcheck
needs: check

- uses: r-lib/actions/check-r-package@v2
with:
upload-snapshots: true
30 changes: 18 additions & 12 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: CdmOnboarding
Title: Generate report on an OMOP CDM instance
Version: 2.2.0
Version: 3.0.0
Authors@R:
c(person(given = "Peter",
family = "Rijnbeek",
Expand All @@ -21,14 +21,16 @@ BugReports: https://github.com/darwin-eu/CdmOnboarding/issue
Encoding: UTF-8
LazyData: true
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.2.1
RoxygenNote: 7.2.3
Depends:
R (>= 3.5.0),
DatabaseConnector (>= 3.0.0),
R (>= 4.0.0),
DatabaseConnector (>= 5.0.2)
Imports:
ROhdsiWebApi (>= 1.1.2),
ParallelLogger,
SqlRender
Imports:
SqlRender,
DrugExposureDiagnostics (>= 1.0.0),
CDMConnector (>= 1.0.0),
benchmarkme,
prettyunits,
tools,
Expand All @@ -37,16 +39,20 @@ Imports:
data.table,
lubridate,
ggplot2,
tidyr,
jsonlite,
stats,
DrugExposureDiagnostics (>= 0.4.2),
CDMConnector
cowplot,
scales,
stringr
Suggests:
flextable,
devtools,
knitr
knitr,
testthat (>= 3.0.0),
Eunomia,
Achilles
Remotes:
OHDSI/ROhdsiWebApi,
DARWIN-EU/DrugExposureDiagnostics,
DARWIN-EU/CDMConnector
OHDSI/Eunomia,
OHDSI/Achilles
Config/testthat/edition: 3
5 changes: 5 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@

export(bundleResults)
export(cdmOnboarding)
export(compat)
export(dataTablesChecks)
export(generateResultsDocument)
export(getDARWINpackages)
export(getDedIngredients)
export(getHADESpackages)
export(performanceChecks)
Expand All @@ -15,6 +17,9 @@ import(SqlRender)
import(dplyr)
import(ggplot2)
import(officer)
importFrom(stringr,str_replace)
importFrom(stringr,str_replace_all)
importFrom(stringr,str_to_title)
importFrom(utils,compareVersion)
importFrom(utils,installed.packages)
importFrom(utils,packageVersion)
Expand Down
125 changes: 54 additions & 71 deletions R/CdmOnboarding.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,6 @@
#' On SQL Server, this should specifiy both the database and the schema, so for example, on SQL Server, 'cdm_results.dbo'.
#' @param scratchDatabaseSchema Fully qualified name of database schema that we can write temporary tables to. Default is resultsDatabaseSchema.
#' On SQL Server, this should specifiy both the database and the schema, so for example, on SQL Server, 'cdm_scratch.dbo'.
#' @param vocabDatabaseSchema String name of database schema that contains OMOP Vocabulary. Default is cdmDatabaseSchema.
#' On SQL Server, this should specifiy both the database and the schema, so for example 'results.dbo'.
#' @param oracleTempSchema For Oracle only: the name of the database schema where you want all temporary tables to be managed. Requires create/insert permissions to this database.
#' @param databaseId ID of your database, this will be used as subfolder for the results and naming of the report
#' @param databaseName String name of the database name. If blank, CDM_SOURCE table will be queried to try to obtain this.
Expand All @@ -63,7 +61,6 @@ cdmOnboarding <- function(connectionDetails,
cdmDatabaseSchema,
resultsDatabaseSchema,
scratchDatabaseSchema = resultsDatabaseSchema,
vocabDatabaseSchema = cdmDatabaseSchema,
oracleTempSchema = resultsDatabaseSchema,
databaseId,
databaseName,
Expand Down Expand Up @@ -91,7 +88,6 @@ cdmOnboarding <- function(connectionDetails,
cdmDatabaseSchema = cdmDatabaseSchema,
resultsDatabaseSchema = resultsDatabaseSchema,
scratchDatabaseSchema = scratchDatabaseSchema,
vocabDatabaseSchema = vocabDatabaseSchema,
oracleTempSchema = oracleTempSchema,
databaseId = databaseId,
databaseName = databaseName,
Expand Down Expand Up @@ -157,7 +153,6 @@ cdmOnboarding <- function(connectionDetails,
cdmDatabaseSchema,
resultsDatabaseSchema,
scratchDatabaseSchema,
vocabDatabaseSchema,
oracleTempSchema,
databaseId,
databaseName,
Expand Down Expand Up @@ -242,9 +237,9 @@ cdmOnboarding <- function(connectionDetails,
# Check whether Achilles output is available and get Achilles run info ---------------------------------------
achillesMetadata <- NULL
if (!sqlOnly) {
achillesTablesExists <- .checkAchillesTablesExist(connectionDetails, resultsDatabaseSchema)
achillesMetadata <- .getAchillesMetadata(connectionDetails, resultsDatabaseSchema, outputFolder)
achillesTableExists <- .checkAchillesTablesExist(connectionDetails, resultsDatabaseSchema, outputFolder)
if (is.null(achillesMetadata) || !achillesTableExists) {
if (is.null(achillesMetadata) || !achillesTablesExists) {
ParallelLogger::logError("The output from the Achilles analyses is required.")
ParallelLogger::logError(sprintf(
"Please run Achilles first and make sure the resulting Achilles tables are in the given results schema ('%s').",
Expand Down Expand Up @@ -306,40 +301,42 @@ cdmOnboarding <- function(connectionDetails,
vocabularyResults <- vocabularyChecks(
connectionDetails = connectionDetails,
cdmDatabaseSchema = cdmDatabaseSchema,
vocabDatabaseSchema = vocabDatabaseSchema,
smallCellCount = smallCellCount,
sqlOnly = sqlOnly,
cdmVersion = cdmVersion,
outputFolder = outputFolder,
sqlOnly = sqlOnly,
optimize = optimize
)
}

# performance checks --------------------------------------------------------------------------------------------
missingPackages <- NULL
packinfo <- NULL
hadesPackageVersions <- NULL
sys_details <- NULL
dmsVersion <- NULL
performanceResults <- NULL
if (runPerformanceChecks) {
ParallelLogger::logInfo("Check installed R Packages")
hadesPackages <- getHADESpackages()
diffPackages <- setdiff(hadesPackages, rownames(installed.packages()))
missingPackages <- paste(diffPackages, collapse = ', ')

if (length(diffPackages) > 0) {
ParallelLogger::logInfo("Not all the HADES packages are installed, see https://ohdsi.github.io/Hades/installingHades.html for more information") # nolint
ParallelLogger::logInfo(sprintf("Missing: %s", missingPackages))
packinfo <- as.data.frame(installed.packages(fields = c("URL")))
packinfo <- packinfo[, c("Package", "Version", "LibPath", "URL")]

hadesPackages <- getHADESpackages()
diffHADESPackages <- setdiff(hadesPackages, packinfo$Package)
if (length(diffHADESPackages) > 0) {
ParallelLogger::logInfo("> Not all the HADES packages are installed, see https://ohdsi.github.io/Hades/installingHades.html for more information") # nolint
ParallelLogger::logInfo(sprintf("> Missing: %s", paste(diffHADESPackages, collapse = ', ')))
} else {
ParallelLogger::logInfo("> All HADES packages are installed")
ParallelLogger::logInfo("> All HADES packages are installed.")
}

# Note: can have multiple versions of the same package due to renvs
# Sorting on LibPath to get packages in same environment together
packinfo <- as.data.frame(installed.packages())
packinfo <- packinfo[order(packinfo$LibPath, packinfo$Package), c("Package", "Version")]
hadesPackageVersions <- packinfo[packinfo$Package %in% hadesPackages, ]

darwinPackages <- getDARWINpackages()
diffDARWINPackages <- setdiff(darwinPackages, packinfo$Package)
if (length(diffDARWINPackages) > 0) {
ParallelLogger::logInfo("> Not all the DARWIN EU\u00AE packages are installed.")
ParallelLogger::logInfo(sprintf("> Missing: %s", paste(diffDARWINPackages, collapse = ', ')))
} else {
ParallelLogger::logInfo("> All DARWIN EU\u00AE packages are installed.")
}
darwinPackageVersions <- packinfo[packinfo$Package %in% darwinPackages, ]

sys_details <- benchmarkme::get_sys_details(sys_info = FALSE)
ParallelLogger::logInfo(
sprintf(
Expand All @@ -356,11 +353,16 @@ cdmOnboarding <- function(connectionDetails,
ParallelLogger::logInfo("Running Performance Checks SQL")
performanceResults <- performanceChecks(
connectionDetails = connectionDetails,
vocabDatabaseSchema = vocabDatabaseSchema,
cdmDatabaseSchema = cdmDatabaseSchema,
resultsDatabaseSchema = resultsDatabaseSchema,
sqlOnly = sqlOnly,
outputFolder = outputFolder
)
performanceResults$sys_details <- sys_details
performanceResults$dmsVersion <- dmsVersion
performanceResults$packinfo <- packinfo
performanceResults$hadesPackageVersions <- hadesPackageVersions
performanceResults$darwinPackageVersions <- darwinPackageVersions
}

webApiVersion <- "unknown"
Expand Down Expand Up @@ -402,16 +404,11 @@ cdmOnboarding <- function(connectionDetails,
databaseDescription = databaseDescription,
vocabularyResults = vocabularyResults,
dataTablesResults = dataTablesResults,
packinfo = packinfo,
hadesPackageVersions = hadesPackageVersions,
missingPackages = missingPackages,
performanceResults = performanceResults,
sys_details = sys_details,
webAPIversion = webApiVersion,
dms = connectionDetails$dbms,
cdmSource = cdmSource,
achillesMetadata = achillesMetadata,
dms = connectionDetails$dbms,
dmsVersion = dmsVersion,
smallCellCount = smallCellCount,
runWithOptimizedQueries = optimize,
dqdResults = dqdResults,
Expand Down Expand Up @@ -475,49 +472,35 @@ cdmOnboarding <- function(connectionDetails,
return(cdmSource)
}

.checkAchillesTablesExist <- function(connectionDetails, resultsDatabaseSchema, outputFolder) {
.checkAchillesTablesExist <- function(connectionDetails, resultsDatabaseSchema) {
required_achilles_tables <- c("achilles_analysis", "achilles_results", "achilles_results_dist")
errorReportFile <- file.path(outputFolder, "errorAchillesExistsSql.txt")
achilles_tables_exist <- tryCatch({
connection <- DatabaseConnector::connect(connectionDetails = connectionDetails)
for (x in required_achilles_tables) {
sql <- SqlRender::translate(
SqlRender::render(
"SELECT COUNT(*) FROM @resultsDatabaseSchema.@table",
resultsDatabaseSchema = resultsDatabaseSchema,
table = x
),
targetDialect = 'postgresql'
)
DatabaseConnector::executeSql(
connection = connection,
sql = sql,
progressBar = FALSE,
reportOverallTime = FALSE,
errorReportFile = errorReportFile

connection <- DatabaseConnector::connect(connectionDetails = connectionDetails)
on.exit(DatabaseConnector::disconnect(connection = connection))

achilles_tables_exist <- TRUE
for (table in required_achilles_tables) {
table_exists <- DatabaseConnector::existsTable(connection, resultsDatabaseSchema, table)
if (!table_exists) {
ParallelLogger::logWarn(
sprintf("Achilles table '%s.%s' has not been found", resultsDatabaseSchema, table)
)
}
TRUE
},
error = function(e) {
ParallelLogger::logWarn(sprintf("> The Achilles tables have not been found (%s). Please see error report in %s",
paste(required_achilles_tables, collapse = ', '),
errorReportFile))
FALSE
},
finally = {
DatabaseConnector::disconnect(connection = connection)
rm(connection)
})
achilles_tables_exist <- achilles_tables_exist && table_exists
}

return(achilles_tables_exist)
}

.getAchillesMetadata <- function(connectionDetails, resultsDatabaseSchema, outputFolder) {
sql <- SqlRender::loadRenderTranslateSql(sqlFilename = file.path("checks", "get_achilles_metadata.sql"),
packageName = "CdmOnboarding",
dbms = connectionDetails$dbms,
warnOnMissingParameters = FALSE,
resultsDatabaseSchema = resultsDatabaseSchema)
sql <- SqlRender::loadRenderTranslateSql(
sqlFilename = file.path("checks", "get_achilles_metadata.sql"),
packageName = "CdmOnboarding",
dbms = connectionDetails$dbms,
warnOnMissingParameters = FALSE,
resultsDatabaseSchema = resultsDatabaseSchema
)

errorReportFile <- file.path(outputFolder, "getAchillesMetadataError.txt")
achillesMetadata <- tryCatch({
connection <- DatabaseConnector::connect(connectionDetails = connectionDetails)
Expand Down Expand Up @@ -560,7 +543,7 @@ cdmOnboarding <- function(connectionDetails,
startTimestamp = df$startTimestamp,
executionTime = df$executionTime
)
ParallelLogger::logInfo(sprintf("> Succesfully extracted DQD results overview from '%s'", dqdJsonPath))
ParallelLogger::logInfo(sprintf("> Successfully extracted DQD results overview from '%s'", dqdJsonPath))
}, error = function(e) {
ParallelLogger::logError(sprintf("Could not process dqdJsonPath '%s'", dqdJsonPath))
}
Expand All @@ -571,7 +554,7 @@ cdmOnboarding <- function(connectionDetails,
.getAvailableAchillesAnalysisIds <- function(connectionDetails, resultsDatabaseSchema) {
sql <- SqlRender::loadRenderTranslateSql(
sqlFilename = "getAchillesAnalyses.sql",
packageName = "DashboardExport",
packageName = "CdmOnboarding",
dbms = connectionDetails$dbms,
results_database_schema = resultsDatabaseSchema
)
Expand Down
Loading

0 comments on commit 5813dd4

Please sign in to comment.