From 6f1f2e2393b2b3b56c654f7ccea7e4f59ca651b8 Mon Sep 17 00:00:00 2001 From: Trevor Bedford Date: Fri, 12 Apr 2024 11:23:07 -0700 Subject: [PATCH 1/4] Include strain and date annotations for sequenced vaccine strains This commit adds strain and date annotations for 5 vaccine strains that all descend from Edmonston isolate collected in 1954. The Parks et al. paper describes these well. I purposely chose not to include location for these as I wanted the gray dot in the Auspice tree to make these look a bit different than wild-type isolates This also includes strain, date and location for Edmonston WT strain. --- ingest/defaults/annotations.tsv | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/ingest/defaults/annotations.tsv b/ingest/defaults/annotations.tsv index 89c0059..7463a31 100644 --- a/ingest/defaults/annotations.tsv +++ b/ingest/defaults/annotations.tsv @@ -4,3 +4,23 @@ # If there are multiple annotations for the same id and field, then the last value is used # Lines starting with '#' are treated as comments # Any '#' after the field value are treated as comments. +# +# Vaccine strain information from Parks et al. Comparison of predicted amino acid +# sequences of measles virus strains in the Edmonston vaccine lineage +# https://doi.org/10.1128/jvi.75.2.910-920.2001 +AF266288.2 strain Measles strain Edmonston WT +AF266288.2 date 1954 +AF266288.2 region North America +AF266288.2 country USA +AF266288.2 division Massachusetts +AF266288.2 location Boston +AF266287.1 strain Measles vaccine strain Moraten +AF266287.1 date 1954 +AF266290.1 strain Measles vaccine strain Zagreb +AF266290.1 date 1954 +AF266289.1 strain Measles vaccine strain Rubeovax +AF266289.1 date 1954 +AF266291.1 strain Measles vaccine strain Schwarz +AF266291.1 date 1954 +AF266286.1 strain Measles vaccine strain AIK-C +AF266286.1 date 1954 From 4770a7d9d60020dd017198bfb3893b3f805a23f8 Mon Sep 17 00:00:00 2001 From: Trevor Bedford Date: Fri, 12 Apr 2024 11:26:16 -0700 Subject: [PATCH 2/4] Switch to country / year subsampling for genome build There's not enough genome data to warrant inclusion of month in the subsampling grouping. Also, by including month the subsampling was dropping a number of older samples that were only annotated by year. I noticed this in wanting to include the 1954 Edmonston related vaccine strains and they were getting filtered out with the previous "country year month" group-by. --- phylogenetic/defaults/config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/phylogenetic/defaults/config.yaml b/phylogenetic/defaults/config.yaml index 035e5b3..1ac9172 100644 --- a/phylogenetic/defaults/config.yaml +++ b/phylogenetic/defaults/config.yaml @@ -7,8 +7,8 @@ files: colors: "defaults/colors.tsv" auspice_config: "defaults/auspice_config.json" auspice_config_N450: "defaults/auspice_config_N450.json" -filter: - group_by: "country year month" +filter: + group_by: "country year" sequences_per_group: 20 min_date: 1950 min_length: 5000 From 796f74750b2b3e04e81a54f9b1128afe9afc926b Mon Sep 17 00:00:00 2001 From: Trevor Bedford Date: Fri, 12 Apr 2024 11:30:04 -0700 Subject: [PATCH 3/4] Include "strain name" as coloring Strain name is often not included in GenBank or is not very helpful. But still good to surface as metadata for modal. I particularly wanted this for the 1954 Edmonston-related vaccine strains. People know these by their strain names, certainly not their GenBank accessions. --- phylogenetic/defaults/auspice_config.json | 5 +++++ phylogenetic/defaults/auspice_config_N450.json | 7 ++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/phylogenetic/defaults/auspice_config.json b/phylogenetic/defaults/auspice_config.json index 27f2484..445323c 100644 --- a/phylogenetic/defaults/auspice_config.json +++ b/phylogenetic/defaults/auspice_config.json @@ -25,6 +25,11 @@ "key": "region", "title": "Region", "type": "categorical" + }, + { + "key": "strain", + "title": "Strain name", + "type": "categorical" } ], "geo_resolutions": [ diff --git a/phylogenetic/defaults/auspice_config_N450.json b/phylogenetic/defaults/auspice_config_N450.json index 5193bc0..a644eb2 100644 --- a/phylogenetic/defaults/auspice_config_N450.json +++ b/phylogenetic/defaults/auspice_config_N450.json @@ -25,7 +25,12 @@ "key": "region", "title": "Region", "type": "categorical" - } + }, + { + "key": "strain", + "title": "Strain name", + "type": "categorical" + } ], "geo_resolutions": [ "country", From b51ea6073f865f704f112e83c17d9eee9f5e863d Mon Sep 17 00:00:00 2001 From: Trevor Bedford Date: Fri, 12 Apr 2024 15:37:20 -0700 Subject: [PATCH 4/4] Export strain, division and location as additional metadata This swap to using --metadata-columns in augur export to surface strain, division and location. --- phylogenetic/defaults/auspice_config.json | 5 ----- phylogenetic/defaults/auspice_config_N450.json | 7 +------ phylogenetic/defaults/config.yaml | 4 +++- phylogenetic/rules/export.smk | 6 +++--- 4 files changed, 7 insertions(+), 15 deletions(-) diff --git a/phylogenetic/defaults/auspice_config.json b/phylogenetic/defaults/auspice_config.json index 445323c..27f2484 100644 --- a/phylogenetic/defaults/auspice_config.json +++ b/phylogenetic/defaults/auspice_config.json @@ -25,11 +25,6 @@ "key": "region", "title": "Region", "type": "categorical" - }, - { - "key": "strain", - "title": "Strain name", - "type": "categorical" } ], "geo_resolutions": [ diff --git a/phylogenetic/defaults/auspice_config_N450.json b/phylogenetic/defaults/auspice_config_N450.json index a644eb2..5193bc0 100644 --- a/phylogenetic/defaults/auspice_config_N450.json +++ b/phylogenetic/defaults/auspice_config_N450.json @@ -25,12 +25,7 @@ "key": "region", "title": "Region", "type": "categorical" - }, - { - "key": "strain", - "title": "Strain name", - "type": "categorical" - } + } ], "geo_resolutions": [ "country", diff --git a/phylogenetic/defaults/config.yaml b/phylogenetic/defaults/config.yaml index 1ac9172..ce84005 100644 --- a/phylogenetic/defaults/config.yaml +++ b/phylogenetic/defaults/config.yaml @@ -20,6 +20,8 @@ filter_N450: refine: coalescent: "opt" date_inference: "marginal" - clock_filter_iqd: 4 + clock_filter_iqd: 4 ancestral: inference: "joint" +export: + metadata_columns: "strain division location" diff --git a/phylogenetic/rules/export.smk b/phylogenetic/rules/export.smk index dcd1893..d2c190a 100644 --- a/phylogenetic/rules/export.smk +++ b/phylogenetic/rules/export.smk @@ -15,12 +15,12 @@ rule export: aa_muts = "results/{gene}/aa_muts.json", colors = config["files"]["colors"], auspice_config = lambda wildcard: "defaults/auspice_config.json" if wildcard.gene in ["genome"] else "defaults/auspice_config_N450.json" - output: auspice_json = "auspice/measles_{gene}.json", root_sequence = "auspice/measles_{gene}_root-sequence.json" params: - strain_id = config["strain_id_field"] + strain_id = config["strain_id_field"], + metadata_columns = config["export"]["metadata_columns"] shell: """ augur export v2 \ @@ -29,8 +29,8 @@ rule export: --metadata-id-columns {params.strain_id} \ --node-data {input.branch_lengths} {input.nt_muts} {input.aa_muts} \ --colors {input.colors} \ + --metadata-columns {params.metadata_columns} \ --auspice-config {input.auspice_config} \ --include-root-sequence \ --output {output.auspice_json} """ - \ No newline at end of file