virtual panel 0 update

sigven · Mar 10, 2024 · dfd3ad6 · dfd3ad6
1 parent 8db47b6
commit dfd3ad6
Show file tree

Hide file tree

Showing 9 changed files with 130 additions and 26 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -2,7 +2,7 @@ Package: cpsr
 Type: Package
 Title: Cancer Predisposition Sequencing Reporter (CPSR)
 Version: 1.0.1.9003
-Date: 2024-03-07
+Date: 2024-03-10
 Authors@R:
     c(person(given = "Sigve",
              family = "Nakken",

diff --git a/R/utils.R b/R/utils.R
@@ -147,6 +147,7 @@ retrieve_secondary_calls <- function(calls) {
     colnames = c(
       "CPG_SOURCE",
       "FINAL_CLASSIFICATION",
+      "CPSR_CLASSIFICATION_SOURCE",
       "PRIMARY_TARGET",
       "GENOTYPE",
       "SYMBOL",
@@ -164,7 +165,9 @@ retrieve_secondary_calls <- function(calls) {
         !is.na(.data$SYMBOL) &
         !is.na(.data$CPG_SOURCE) &
         stringr::str_detect(.data$CPG_SOURCE,"ACMG_SF") &
-        .data$PRIMARY_TARGET == FALSE,
+        .data$PRIMARY_TARGET == FALSE &
+        !is.na(.data$CPSR_CLASSIFICATION_SOURCE) &
+        .data$CPSR_CLASSIFICATION_SOURCE == "ClinVar" &
         !is.na(.data$FINAL_CLASSIFICATION) &
         stringr::str_detect(
           .data$FINAL_CLASSIFICATION,"Pathogenic")) |>

diff --git a/data-raw/data-raw.R b/data-raw/data-raw.R
@@ -365,3 +365,107 @@ acmg[["score2tier"]] <-
 
 usethis::use_data(acmg, overwrite = T)
 usethis::use_data(col_format_output, overwrite = T)
+
+my_log4r_layout <- function(level, ...) {
+  paste0(format(Sys.time()), " - cpsr-report-generation - ",
+         level, " - ", ..., "\n", collapse = "")
+}
+
+log4r_logger <-
+  log4r::logger(
+    threshold = "INFO", appenders = log4r::console_appender(my_log4r_layout))
+
+# this gets passed on to all the log4r_* functions inside the pkg
+options("PCGRR_LOG4R_LOGGER" = log4r_logger)
+
+panel_zero <- list()
+for(build in c('grch37','grch38')){
+  ref_data <- pcgrr::load_reference_data(
+    pcgr_db_assembly_dir =
+      file.path(
+        "/Users/sigven/project_data/data/data__pcgrdb/dev/pcgrdb",
+        "20240309/data",
+        build)
+  )
+  panel_zero[[build]] <- ref_data$gene$cpg |>
+    dplyr::filter(CPG_SOURCE != "ACMG_SF") |>
+    dplyr::mutate(
+      PANEL_NAME = "CPSR superpanel of cancer predisposition genes",
+      PANEL_VERSION = "v2024_03") |>
+    dplyr::inner_join(
+      dplyr::select(ref_data$gene$gene_xref,
+                    ENTREZGENE,
+                    ENSEMBL_GENE_ID,
+                    GENE_BIOTYPE,
+                    GENENAME,
+                    TSG,
+                    TSG_SUPPORT,
+                    ONCOGENE,
+                    ONCOGENE_SUPPORT),
+      by = c("ENTREZGENE","ENSEMBL_GENE_ID")
+    ) |>
+    dplyr::rename(
+      TUMOR_SUPPRESSOR = TSG,
+      TUMOR_SUPPRESSOR_SUPPORT = TSG_SUPPORT
+    ) |>
+    dplyr::left_join(
+      dplyr::select(
+        dplyr::filter(
+          ref_data$variant$clinvar_gene_stats,
+          .data$CONFIDENCE == "min2goldstars"),
+        c("ENTREZGENE",
+          "N_TRUNC_PATH",
+          "N_NONTRUNC_PATH",
+          "N_MISSENSE_PATH",
+          "N_MISSENSE_BENIGN",
+          "BENIGN_MISSENSE_FRAC",
+          "PATH_TRUNC_FRAC")
+      ), by = "ENTREZGENE"
+    ) |>
+    dplyr::select(
+      dplyr::any_of(
+        c("ENTREZGENE",
+          "SYMBOL",
+          "GENENAME",
+          "GENE_BIOTYPE",
+          "ENSEMBL_GENE_ID",
+          "TUMOR_SUPPRESSOR",
+          "TUMOR_SUPPRESSOR_SUPPORT",
+          "ONCOGENE",
+          "ONCOGENE_SUPPORT",
+          "CPG_SOURCE",
+          "CPG_MOD",
+          "CPG_MOI",
+          "CPG_PHENOTYPES",
+          "CPG_CANCER_CUI",
+          "CPG_SYNDROME_CUI")
+      ),
+      dplyr::everything()
+    ) |>
+    dplyr::distinct()
+}
+
+workbook <- openxlsx2::wb_workbook() |>
+  openxlsx2::wb_add_worksheet(sheet = "CPSR_SUPERPANEL.GRCH37") |>
+  openxlsx2::wb_add_worksheet(sheet = "CPSR_SUPERPANEL.GRCH38") |>
+  openxlsx2::wb_add_data_table(
+    sheet = "CPSR_SUPERPANEL.GRCH37",
+    x = panel_zero[['grch37']],
+    start_row = 1,
+    start_col = 1,
+    col_names = TRUE,
+    na.strings = "NA",
+    table_style = "TableStyleMedium15") |>
+  openxlsx2::wb_add_data_table(
+    sheet = "CPSR_SUPERPANEL.GRCH38",
+    x = panel_zero[['grch38']],
+    start_row = 1,
+    start_col = 1,
+    col_names = TRUE,
+    na.strings = "NA",
+    table_style = "TableStyleMedium16")
+
+openxlsx2::wb_save(
+  wb = workbook,
+  "pkgdown/assets/cpsr_superpanel_2024_03.xlsx",
+  overwrite = TRUE)
diff --git a/inst/templates/quarto/cpsr_documentation.qmd b/inst/templates/quarto/cpsr_documentation.qmd
@@ -37,14 +37,13 @@ for(i in 1:NROW(ref_datasets)){
   description <- ref_datasets[i,"source_description"]
   url <- ref_datasets[i,"source_url"]
   version <- ref_datasets[i,"source_version"]
-  #if(version == "."){
-  #  version = NA
-  #}
   license <- ref_datasets[i, "source_license"]
   license_url <- ref_datasets[i, "source_license_url"]
-  #if(license_url == "."){
-  #  license_url <- NA
-  #}
+  
+  ## temporary fix for outdated ACMG URL (geneOncoX needs update):
+  if(source == "acmg_sf"){
+    url <- "https://pubmed.ncbi.nlm.nih.gov/37347242/"
+  }
   wflow <- ref_datasets[i, "wflow"]
   if(!(stringr::str_detect(
     wflow,"cpsr"))){

diff --git a/pkgdown/assets/cpsr_superpanel_2022_01.xlsx b/pkgdown/assets/cpsr_superpanel_2022_01.xlsx
diff --git a/pkgdown/assets/cpsr_superpanel_2024_03.xlsx b/pkgdown/assets/cpsr_superpanel_2024_03.xlsx
diff --git a/vignettes/output.Rmd b/vignettes/output.Rmd
@@ -7,10 +7,10 @@ output: rmarkdown::html_document
 
 ### Interactive HTML report
 
-An interactive and structured HTML report, generated through [quarto](https://quarto.org/) technology, that lists variants in known cancer predisposition genes, is provided with the following naming convention:
+An interactive and structured [quarto](https://quarto.org/)-generated HTML report, lists variants in known cancer predisposition genes and is provided with the following naming convention:
 
 - `<sample_id>.cpsr.<genome_assembly>.html`
-  - The __sample_id__ is provided as input by the user, and reflects a unique identifier of the tumor-normal sample pair to be analyzed.
+  - The __sample_id__ is provided as input by the user, and reflects a unique identifier of the sample to be analyzed.
 
 The report is structured in multiple sections, described briefly below:
 
@@ -19,9 +19,8 @@ The report is structured in multiple sections, described briefly below:
 	   that constitute the virtual gene panel in the report
 	   
   2. __Summary of findings__
-     * Summarizes the findings through donut charts
-     * Number of variants in each of the five variant classification levels
-
+     * Summarizes the main findings in the sample through value boxes
+
   3. __Variant classification__
      * For all coding variants in the selected cancer predisposition geneset, interactive variant tables are shown for each level (__ClinVar__ and __non-ClinVar (Other)__ variants combined):
 	      * Pathogenic
@@ -31,7 +30,7 @@ The report is structured in multiple sections, described briefly below:
 	      * Benign
 
   4. __Genomic biomarkers__
-     * Reported clinical evidence items from [CIViC](https://civicdb.org) that overlap with variants in the query set are reported in four distinct tabs (Predictive / Prognostic / Diagnostic / Predisposing)
+     * Reported clinical evidence items from [CIViC](https://civicdb.org) that match with variants in the query set are reported in four distinct tabs (Predictive / Prognostic / Diagnostic / Predisposing)
         - See section below for [details of biomarker annotations]()
 
   5. __Secondary findings__
@@ -42,9 +41,9 @@ The report is structured in multiple sections, described briefly below:
 
   7. __Documentation__
 	    * Introduction
-	       * Short overview of the predisposition report - aims and contents
+	       * Short overview of the CPSR variant report - aims and contents
 	    * Annotation resources
-	       * Underlying tools, databases and annotation sources (with versions)
+	       * Information on annotation sources utilized by CPSR, including versions and licensing requirements
 	    * Variant classification
 	       * Overview of how CPSR performs variant classification of variants not recorded in ClinVar, listing ACMG criteria and associated scores
 
@@ -56,7 +55,7 @@ The report is structured in multiple sections, described briefly below:
 A VCF file containing annotated, germline calls (single nucleotide variants and insertions/deletions) is generated with the following naming convention:
 
 - `<sample_id>.cpsr.<genome_assembly>.vcf.gz (.tbi)`
-  - The __sample_id__ is provided as input by the user, and reflects a unique identifier of the tumor-normal sample pair to be analyzed. Following common standards, the annotated VCF file is compressed with [bgzip](http://www.htslib.org/doc/bgzip.html) and indexed with [tabix](http://www.htslib.org/doc/tabix.html). Below follows a description of all annotations/tags present in the VCF INFO column after processing with the CPSR annotation pipeline:
+  - The __sample_id__ is provided as input by the user, and reflects a unique identifier of the sample to be analyzed. Following common standards, the annotated VCF file is compressed with [bgzip](http://www.htslib.org/doc/bgzip.html) and indexed with [tabix](http://www.htslib.org/doc/tabix.html). Below follows a description of all annotations/tags present in the VCF INFO column after processing with the CPSR annotation pipeline:
 
 <br>
 

diff --git a/vignettes/running.Rmd b/vignettes/running.Rmd
@@ -229,11 +229,10 @@ This command will produce the following output files in the _output_ folder:
   2. __example.cpsr.grch37.pass.vcf.gz (.tbi)__ - Bgzipped VCF file with relevant annotations appended by CPSR (PASS variants only)
   3. __example.cpsr.grch37.yaml__ - CPSR configuration file - output from pre-reporting (Python) workflow
   4. __example.cpsr.grch37.pass.tsv.gz__ - Compressed TSV file (generated with [vcf2tsvpy](https://github.com/sigven/vcf2tsvpy)) of VCF content with relevant annotations appended by CPSR
-  5. __example.cpsr.grch37.xlsx__ - A multisheet Excel workbook that contain
+  5. __example.cpsr.grch37.xlsx__ - A four-sheet Excel workbook that contains
       * _i)_ information on virtual gene panel interrogated for variants
       * _ii)_ classification of variants found in input VCF
-      * _iii)_ overlap of variants with existing biomarkers
+      * _iii)_ match of variants with existing biomarkers
       * _iv)_ secondary findings
-      * _v)_ GWAS hits
   6. __example.cpsr.grch37.html__ - Interactive HTML report with clinically relevant variants in cancer predisposition genes
   7. __example.cpsr.grch37.snvs_indels.classification.tsv.gz__ - TSV file with key annotations of SNVs/InDels classified according to clinical signififance
diff --git a/vignettes/virtual_panels.Rmd b/vignettes/virtual_panels.Rmd
@@ -5,16 +5,16 @@ output: rmarkdown::html_document
 
 <br>
 
-The cancer predisposition report can show variants found in a number of well-known cancer predisposition genes, and the specific set of genes can be customized by the user by choosing any of the following __virtual gene panels (0 - 42)__:
+The cancer predisposition report can show variants found in a number of well-known cancer predisposition genes, and the specific set of genes can be customized by the user by choosing any of the following __virtual gene panels (0 - 44)__:
 
-  * **Panel 0** is a comprehensive, research-based _superpanel_ assembled through known sources on cancer predisposition:
+  * **Panel 0** is a non-conservative, research-based _superpanel_ assembled through multiple sources on cancer predisposition genes:
 	* A list of 152 genes that were curated and established within TCGA’s pan-cancer study ([Huang et al., *Cell*, 2018](https://www.ncbi.nlm.nih.gov/pubmed/29625052))
-	* A list of 107 protein-coding genes that has been manually curated in COSMIC’s [Cancer Gene Census v91](https://cancer.sanger.ac.uk/census),
+	* A list of 114 protein-coding genes that has been manually curated in COSMIC’s [Cancer Gene Census v91](https://cancer.sanger.ac.uk/census),
 	* Genes from all [Genomics England PanelApp](https://panelapp.genomicsengland.co.uk/) panels for inherited cancers and tumor syndromes (detailed below)
 	* Additional genes deemed relevant for cancer predisposition (contributed by the CPSR user community)
 
 
-	The combination of the above sources resulted in a [non-redundant set of n = 433
+	The combination of the above sources resulted in a [non-redundant set of **n = 563**
 	genes](https://cpsr.readthedocs.io/en/latest/superpanel.html) of relevance for cancer predisposition (see complete details [below](#panel-0))
 
 	Data with respect to mechanisms of inheritance (<i>MoI</i> - autosomal recessive (AR) vs. autosomal
@@ -66,14 +66,14 @@ The cancer predisposition report can show variants found in a number of well-kno
 | 39 | [Sarcoma cancer susceptibility](https://panelapp.genomicsengland.co.uk/panels/217/) |
 | 40 | [Sarcoma susceptibility](https://panelapp.genomicsengland.co.uk/panels/734/) |
 | 41 | [Thyroid cancer pertinent cancer susceptibility](https://panelapp.genomicsengland.co.uk/panels/421/) |
-| 42 | [Childhood solid tumours](https://panelapp.genomicsengland.co.uk/panels/243/) |
+| 42 | [Tumor predisposition - childhood onset](https://panelapp.genomicsengland.co.uk/panels/243/) |
 | 43 | [Upper gastrointestinal cancer pertinent cancer susceptibility](https://panelapp.genomicsengland.co.uk/panels/273/) |
 | 44 | [DNA repair genes pertinent cancer susceptibility](https://panelapp.genomicsengland.co.uk/panels/256/) |
 
 
 ## Panel 0
 
-[Download the complete set of CPSR superpanel genes, grch37/grch38 versions (xlsx)](https://sigven.github.io/cpsr/cpsr_superpanel_2022_01.xlsx)
+[Download the complete set of CPSR superpanel genes, grch37/grch38 versions (xlsx)](https://sigven.github.io/cpsr/cpsr_superpanel_2024_03.xlsx)
 
 
 |  no | gene_link                                                                          | entrezgene | ensembl_gene_id | moi       | mod | gene_name                                                                                         | source                                           | phenotype_syndrome_term                                                                                                                      |