Merge pull request #41 from sanjaynagi/check-my-oligo-18-08-23

check my oligo and add tests
sanjaynagi · Aug 21, 2023 · b5c3d42 · b5c3d42
2 parents 60d4593 + ccffb4c
commit b5c3d42
Show file tree

Hide file tree

Showing 10 changed files with 243 additions and 12 deletions.
diff --git a/.github/workflows/github-action-AgamPrimer-funestus.yml b/.github/workflows/github-action-AgamPrimer-funestus.yml
@@ -0,0 +1,44 @@
+name: notebooks-funestus
+
+on:
+  push:
+      branches:
+          - main
+  pull_request:
+      branches:
+          - main
+
+jobs:
+
+  run:
+
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: true
+      matrix:
+          python-version: ['3.10']
+
+    steps:
+
+    - name: Checkout source
+      uses: actions/checkout@v3
+
+    - name: Install poetry
+      run: pipx install poetry==1.4.2
+
+    - name: Set up Python
+      uses: actions/setup-python@v4
+      with:
+        python-version: ${{ matrix.python-version }}
+        cache: 'poetry'
+
+    - name: Install dependencies
+      run: |
+        poetry install
+        poetry run python -m ipykernel install --user --name AgamPrimer
+
+    - name: Run notebooks
+      run: |
+        poetry run papermill notebooks/AgamPrimer-long.ipynb qPCR_run.ipynb -k AgamPrimer -f tests/cDNA_Params_fun.json
+        poetry run papermill notebooks/AgamPrimer-long.ipynb qPCR2_run.ipynb -k AgamPrimer -f tests/cDNA_Params_2_fun.json
+        poetry run papermill notebooks/AgamPrimer-long.ipynb gDNA_run.ipynb -k AgamPrimer -f tests/gDNA_probe_Params_fun.json
diff --git a/...ub/workflows/github-action-AgamPrimer.yml → ...lows/github-action-AgamPrimer-gambiae.yml b/...ub/workflows/github-action-AgamPrimer.yml → ...lows/github-action-AgamPrimer-gambiae.yml
@@ -1,4 +1,4 @@
-name: Execute notebooks
+name: notebooks-gambiae
 
 on:
   push:
@@ -16,7 +16,7 @@ jobs:
     strategy:
       fail-fast: true
       matrix:
-          python-version: ['3.8', '3.10', '3.11']
+          python-version: ['3.8', '3.11']
 
     steps:
 
@@ -39,12 +39,8 @@ jobs:
 
     - name: Run notebooks
       run: |
-        poetry run papermill notebooks/AgamPrimer-long.ipynb qPCR_run.ipynb -k AgamPrimer -f tests/cDNA_Params_fun.json
         poetry run papermill notebooks/AgamPrimer-long.ipynb qPCR_run.ipynb -k AgamPrimer -f tests/cDNA_Params.json
-        poetry run papermill notebooks/AgamPrimer-long.ipynb qPCR2_run.ipynb -k AgamPrimer -f tests/cDNA_Params_2_fun.json
         poetry run papermill notebooks/AgamPrimer-long.ipynb qPCR2_run.ipynb -k AgamPrimer -f tests/cDNA_Params_2.json
-        poetry run papermill notebooks/AgamPrimer-long.ipynb gDNA_run.ipynb -k AgamPrimer -f tests/gDNA_probe_Params_fun.json
         poetry run papermill notebooks/AgamPrimer-long.ipynb gDNA_run.ipynb -k AgamPrimer -f tests/gDNA_probe_Params.json
         poetry run papermill notebooks/AgamPrimer-long.ipynb probe_run.ipynb -k AgamPrimer -f tests/probe_Params.json
-        poetry run papermill notebooks/AgamPrimer-long.ipynb probe_run.ipynb -k AgamPrimer -f tests/probe_Params_fun.json
         poetry run papermill notebooks/AgamPrimer-short.ipynb short_run.ipynb -k AgamPrimer
diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml
@@ -0,0 +1,36 @@
+name: pytest
+on:
+  push:
+    branches:
+    - main
+  pull_request:
+    branches:
+    - main
+jobs:
+  tests:
+    strategy:
+      fail-fast: true
+      matrix:
+        python-version: ["3.9"]
+        poetry-version: ["1.3.1"]
+        os: [ubuntu-latest]
+    runs-on: ${{ matrix.os }}
+    steps:
+
+      - name: Checkout source
+        uses: actions/checkout@v3
+
+      - name: Install poetry
+        run: pipx install poetry==${{ matrix.poetry-version }}
+
+      - name: Setup python
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python-version }}
+          cache: 'poetry'
+
+      - name: Install dependencies
+        run: poetry install
+
+      - name: Run tests
+        run: poetry run pytest -v
diff --git a/AgamPrimer/AgamPrimer.py b/AgamPrimer/AgamPrimer.py
@@ -1227,3 +1227,137 @@ def _get_base_freqs(freqs, ref_alt_array):
             base = ref_alt_array[i, i_base]
             freq_df.loc[i, f"{base}_freq"] = freqs[i, i_base]
     return freq_df
+
+
+### check my oligos ###
+
+
+def check_my_oligo(
+    sequence, sample_sets="3.0", sample_query=None, width=700, height=400
+):
+    """
+    Align a sequence to AgamP3, retrieve ag3 frequencies in this region and plot.
+    Only works with An.gambiae_sl for now.
+    """
+
+    print("Aligning sequence to AgamP3 genome with BLAT")
+    blat_df = gget.blat(sequence=sequence, seqtype="DNA", assembly="anoGam3")
+    if blat_df is None:
+        print(f"No hit for {sequence}")
+        return
+
+    contig, start, end = blat_df.loc[0, ["chromosome", "start", "end"]]
+    contig = contig.replace("chr", "")
+    region_span = f"{contig}:{start}-{end}"
+    print("plotting frequencies in ag3 data")
+    fig = plot_sequence_frequencies(
+        region=region_span,
+        sample_sets=sample_sets,
+        sample_query=sample_query,
+        width=width,
+        height=height,
+    )
+
+
+def plot_sequence_frequencies(
+    region, sample_sets=None, sample_query=None, width=700, height=400
+):
+    """Retrieve frequencies"""
+
+    snps = ag3.snp_calls(
+        region=region, sample_sets=sample_sets, sample_query=sample_query
+    )
+    ref_alt_arr = snps["variant_allele"].compute().values.astype(str)
+    freq_arr = (
+        allel.GenotypeArray(snps["call_genotype"]).count_alleles().to_frequencies()
+    )
+    pos = snps["variant_position"].compute().values
+    df = pd.DataFrame(
+        {
+            "position": pos,
+            "base": ref_alt_arr[:, 0],
+            "alt_frequency": freq_arr[:, 1:].sum(axis=1),
+        }
+    )  # Make dataframe for plotting
+    df["base_pos"] = df["base"] + "_" + df["position"].astype(str)
+    # Get the frequency of each base and store as data frame
+    freq_df = _get_base_freqs(_addZeroCols(freq_arr), ref_alt_arr).filter(like="freq")
+
+    data = pd.concat([df, freq_df], axis=1)
+
+    fig = _plotly_frequencies(
+        data=data,
+        region=region,
+        sample_sets=sample_sets,
+        sample_query=sample_query,
+        width=width,
+        height=height,
+    )
+    return fig
+
+
+def _plotly_frequencies(
+    data, region, sample_sets, sample_query=None, width=700, height=400, save=False
+):
+    import plotly.graph_objects as go
+    from plotly.subplots import make_subplots
+
+    hover_template = "<br>".join(
+        [
+            "Base / Position: %{customdata[4]}",
+            "Total Alternate freq: %{y}",
+            "A_freq: %{customdata[0]}",
+            "C_freq: %{customdata[1]}",
+            "G_freq: %{customdata[2]}",
+            "T_freq: %{customdata[3]}",
+        ]
+    )
+    # Color scatterpoints blue if segregating SNP
+    color = [-1 if v == 0 else 1 if v > 0 else 0 for v in data["alt_frequency"]]
+    colorscale = [[0, "lightgray"], [0.5, "lightgray"], [1, "dodgerblue"]]
+
+    fig = go.Figure(
+        go.Scatter(
+            x=data["position"],
+            y=data["alt_frequency"],
+            customdata=data[["A_freq", "C_freq", "G_freq", "T_freq", "base_pos"]],
+            hovertemplate=hover_template,
+            mode="markers",
+            marker=dict(
+                size=14,
+                color=color,
+                colorscale=colorscale,
+                line=dict(width=2, color="black"),
+            ),
+            marker_symbol="circle",
+        )
+    )
+    # Set xticks to be the REF allele
+    fig.update_xaxes(
+        tickmode="array",
+        tickangle=0,
+        tickvals=data["position"].to_list(),
+        ticktext=data["base"].to_list(),
+    )
+    fig.update_yaxes(
+        tickmode="array",
+        tickvals=np.arange(0, 1, 0.2),
+        range=[0, 1],
+        title="Alternate allele frequency",
+    )
+    # Set plot title
+    if sample_query is not None:
+        title_text = f"{region} | {sample_sets} | {sample_query} | allele frequencies"
+    else:
+        title_text = f"{region} | {sample_sets} | allele frequencies"
+
+    fig.update_layout(
+        height=height,
+        width=width,
+        title_text=title_text,
+        title_x=0.5,
+        template="simple_white",
+        showlegend=False,
+    )
+    fig.show()
+    return fig
diff --git a/notebooks/AgamPrimer-long.ipynb b/notebooks/AgamPrimer-long.ipynb
@@ -101,7 +101,8 @@
         "#@markdown primer_target should be a region string ('2L:28545767') for gDNA primers and probes, and an AGAP transcript identifier for cDNA primers.\n",
         "\n",
         "primer_target = 'AGAP006228-RA'      #@param {type:\"string\"} \n",
-        "sample_sets = 'AG1000G-GH'           #@param {type:\"string\"}"
+        "sample_sets = 'AG1000G-GH'           #@param {type:\"string\"}\n",
+        "sample_query = None #'taxon == \"coluzzii\"' "
       ]
     },
     {
@@ -429,7 +430,7 @@
       "outputs": [],
       "source": [
         "#sample_sets = None  #'AG1000G-GH'          # sample_set = '3.0' .you can also supply lists with multiple sample sets e.g ['AG1000G-GH', 'AG1000G-CI', 'AG1000G-BF-A']\n",
-        "sample_query = None  #\"taxon == 'coluzzii'\"                # here we can subset to specific values in the metadata e.g :     \"taxon == 'gambiae'\" , or \"taxon == 'arabiensis'\"          "
+        "#sample_query = None  #\"taxon == 'coluzzii'\"                # here we can subset to specific values in the metadata e.g :     \"taxon == 'gambiae'\" , or \"taxon == 'arabiensis'\"          "
       ]
     },
     {

diff --git a/tests/cDNA_Params.json b/tests/cDNA_Params.json
@@ -6,5 +6,6 @@
     "max_amplicon_size": 120,
     "n_primer_pairs":6,
     "primer_target": "AGAP006228-RA",
-    "sample_sets": "AG1000G-GH"
+    "sample_sets": "AG1000G-GH",
+    "sample_query":"taxon == 'coluzzii'"
 }
diff --git a/tests/cDNA_Params_2.json b/tests/cDNA_Params_2.json
@@ -7,5 +7,6 @@
     "n_primer_pairs":6,
     "primer_target": "AGAP028081-RA",
     "cDNA_exon_junction" : false,
-    "sample_sets": "AG1000G-GH"
+    "sample_sets": "AG1000G-GH",
+    "sample_query":"taxon == 'coluzzii'"
 }
diff --git a/tests/gDNA_probe_Params.json b/tests/gDNA_probe_Params.json
@@ -6,5 +6,6 @@
     "max_amplicon_size": 100,
     "n_primer_pairs":4,
     "primer_target": "2L:2422652",
-    "sample_sets": "AG1000G-GH"
+    "sample_sets": "AG1000G-GH",
+    "sample_query":"taxon == 'coluzzii'"
 }
diff --git a/tests/probe_Params.json b/tests/probe_Params.json
@@ -6,5 +6,6 @@
     "max_amplicon_size": 240,
     "n_primer_pairs":6,
     "primer_target": "X:9500000",
-    "sample_sets": "AG1000G-GH"
+    "sample_sets": "AG1000G-GH",
+    "sample_query":"taxon == 'coluzzii'"
 }
diff --git a/tests/test_agamprimer.py b/tests/test_agamprimer.py
@@ -0,0 +1,16 @@
+import numpy as np
+import pandas as pd
+import pytest
+
+import AgamPrimer
+
+ace1_seq = "GCGGCGGCTTCTACTCCGG"
+kdr_seq = "AGTGATAGGAAATTTAGTCGT"
+
+
+@pytest.mark.parametrize(
+    "sequence",
+    [ace1_seq, kdr_seq],
+)
+def test_check_my_oligo(sequence):
+    AgamPrimer.check_my_oligo(sequence=sequence, sample_sets="AG1000G-GH")