Skip to content

Commit

Permalink
Merge pull request #41 from sanjaynagi/check-my-oligo-18-08-23
Browse files Browse the repository at this point in the history
check my oligo and add tests
  • Loading branch information
sanjaynagi committed Aug 21, 2023
2 parents 60d4593 + ccffb4c commit b5c3d42
Show file tree
Hide file tree
Showing 10 changed files with 243 additions and 12 deletions.
44 changes: 44 additions & 0 deletions .github/workflows/github-action-AgamPrimer-funestus.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
name: notebooks-funestus

on:
push:
branches:
- main
pull_request:
branches:
- main

jobs:

run:

runs-on: ubuntu-latest
strategy:
fail-fast: true
matrix:
python-version: ['3.10']

steps:

- name: Checkout source
uses: actions/checkout@v3

- name: Install poetry
run: pipx install poetry==1.4.2

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
cache: 'poetry'

- name: Install dependencies
run: |
poetry install
poetry run python -m ipykernel install --user --name AgamPrimer
- name: Run notebooks
run: |
poetry run papermill notebooks/AgamPrimer-long.ipynb qPCR_run.ipynb -k AgamPrimer -f tests/cDNA_Params_fun.json
poetry run papermill notebooks/AgamPrimer-long.ipynb qPCR2_run.ipynb -k AgamPrimer -f tests/cDNA_Params_2_fun.json
poetry run papermill notebooks/AgamPrimer-long.ipynb gDNA_run.ipynb -k AgamPrimer -f tests/gDNA_probe_Params_fun.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: Execute notebooks
name: notebooks-gambiae

on:
push:
Expand All @@ -16,7 +16,7 @@ jobs:
strategy:
fail-fast: true
matrix:
python-version: ['3.8', '3.10', '3.11']
python-version: ['3.8', '3.11']

steps:

Expand All @@ -39,12 +39,8 @@ jobs:
- name: Run notebooks
run: |
poetry run papermill notebooks/AgamPrimer-long.ipynb qPCR_run.ipynb -k AgamPrimer -f tests/cDNA_Params_fun.json
poetry run papermill notebooks/AgamPrimer-long.ipynb qPCR_run.ipynb -k AgamPrimer -f tests/cDNA_Params.json
poetry run papermill notebooks/AgamPrimer-long.ipynb qPCR2_run.ipynb -k AgamPrimer -f tests/cDNA_Params_2_fun.json
poetry run papermill notebooks/AgamPrimer-long.ipynb qPCR2_run.ipynb -k AgamPrimer -f tests/cDNA_Params_2.json
poetry run papermill notebooks/AgamPrimer-long.ipynb gDNA_run.ipynb -k AgamPrimer -f tests/gDNA_probe_Params_fun.json
poetry run papermill notebooks/AgamPrimer-long.ipynb gDNA_run.ipynb -k AgamPrimer -f tests/gDNA_probe_Params.json
poetry run papermill notebooks/AgamPrimer-long.ipynb probe_run.ipynb -k AgamPrimer -f tests/probe_Params.json
poetry run papermill notebooks/AgamPrimer-long.ipynb probe_run.ipynb -k AgamPrimer -f tests/probe_Params_fun.json
poetry run papermill notebooks/AgamPrimer-short.ipynb short_run.ipynb -k AgamPrimer
36 changes: 36 additions & 0 deletions .github/workflows/testing.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
name: pytest
on:
push:
branches:
- main
pull_request:
branches:
- main
jobs:
tests:
strategy:
fail-fast: true
matrix:
python-version: ["3.9"]
poetry-version: ["1.3.1"]
os: [ubuntu-latest]
runs-on: ${{ matrix.os }}
steps:

- name: Checkout source
uses: actions/checkout@v3

- name: Install poetry
run: pipx install poetry==${{ matrix.poetry-version }}

- name: Setup python
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
cache: 'poetry'

- name: Install dependencies
run: poetry install

- name: Run tests
run: poetry run pytest -v
134 changes: 134 additions & 0 deletions AgamPrimer/AgamPrimer.py
Original file line number Diff line number Diff line change
Expand Up @@ -1227,3 +1227,137 @@ def _get_base_freqs(freqs, ref_alt_array):
base = ref_alt_array[i, i_base]
freq_df.loc[i, f"{base}_freq"] = freqs[i, i_base]
return freq_df


### check my oligos ###


def check_my_oligo(
sequence, sample_sets="3.0", sample_query=None, width=700, height=400
):
"""
Align a sequence to AgamP3, retrieve ag3 frequencies in this region and plot.
Only works with An.gambiae_sl for now.
"""

print("Aligning sequence to AgamP3 genome with BLAT")
blat_df = gget.blat(sequence=sequence, seqtype="DNA", assembly="anoGam3")
if blat_df is None:
print(f"No hit for {sequence}")
return

contig, start, end = blat_df.loc[0, ["chromosome", "start", "end"]]
contig = contig.replace("chr", "")
region_span = f"{contig}:{start}-{end}"
print("plotting frequencies in ag3 data")
fig = plot_sequence_frequencies(
region=region_span,
sample_sets=sample_sets,
sample_query=sample_query,
width=width,
height=height,
)


def plot_sequence_frequencies(
region, sample_sets=None, sample_query=None, width=700, height=400
):
"""Retrieve frequencies"""

snps = ag3.snp_calls(
region=region, sample_sets=sample_sets, sample_query=sample_query
)
ref_alt_arr = snps["variant_allele"].compute().values.astype(str)
freq_arr = (
allel.GenotypeArray(snps["call_genotype"]).count_alleles().to_frequencies()
)
pos = snps["variant_position"].compute().values
df = pd.DataFrame(
{
"position": pos,
"base": ref_alt_arr[:, 0],
"alt_frequency": freq_arr[:, 1:].sum(axis=1),
}
) # Make dataframe for plotting
df["base_pos"] = df["base"] + "_" + df["position"].astype(str)
# Get the frequency of each base and store as data frame
freq_df = _get_base_freqs(_addZeroCols(freq_arr), ref_alt_arr).filter(like="freq")

data = pd.concat([df, freq_df], axis=1)

fig = _plotly_frequencies(
data=data,
region=region,
sample_sets=sample_sets,
sample_query=sample_query,
width=width,
height=height,
)
return fig


def _plotly_frequencies(
data, region, sample_sets, sample_query=None, width=700, height=400, save=False
):
import plotly.graph_objects as go
from plotly.subplots import make_subplots

hover_template = "<br>".join(
[
"Base / Position: %{customdata[4]}",
"Total Alternate freq: %{y}",
"A_freq: %{customdata[0]}",
"C_freq: %{customdata[1]}",
"G_freq: %{customdata[2]}",
"T_freq: %{customdata[3]}",
]
)
# Color scatterpoints blue if segregating SNP
color = [-1 if v == 0 else 1 if v > 0 else 0 for v in data["alt_frequency"]]
colorscale = [[0, "lightgray"], [0.5, "lightgray"], [1, "dodgerblue"]]

fig = go.Figure(
go.Scatter(
x=data["position"],
y=data["alt_frequency"],
customdata=data[["A_freq", "C_freq", "G_freq", "T_freq", "base_pos"]],
hovertemplate=hover_template,
mode="markers",
marker=dict(
size=14,
color=color,
colorscale=colorscale,
line=dict(width=2, color="black"),
),
marker_symbol="circle",
)
)
# Set xticks to be the REF allele
fig.update_xaxes(
tickmode="array",
tickangle=0,
tickvals=data["position"].to_list(),
ticktext=data["base"].to_list(),
)
fig.update_yaxes(
tickmode="array",
tickvals=np.arange(0, 1, 0.2),
range=[0, 1],
title="Alternate allele frequency",
)
# Set plot title
if sample_query is not None:
title_text = f"{region} | {sample_sets} | {sample_query} | allele frequencies"
else:
title_text = f"{region} | {sample_sets} | allele frequencies"

fig.update_layout(
height=height,
width=width,
title_text=title_text,
title_x=0.5,
template="simple_white",
showlegend=False,
)
fig.show()
return fig
5 changes: 3 additions & 2 deletions notebooks/AgamPrimer-long.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,8 @@
"#@markdown primer_target should be a region string ('2L:28545767') for gDNA primers and probes, and an AGAP transcript identifier for cDNA primers.\n",
"\n",
"primer_target = 'AGAP006228-RA' #@param {type:\"string\"} \n",
"sample_sets = 'AG1000G-GH' #@param {type:\"string\"}"
"sample_sets = 'AG1000G-GH' #@param {type:\"string\"}\n",
"sample_query = None #'taxon == \"coluzzii\"' "
]
},
{
Expand Down Expand Up @@ -429,7 +430,7 @@
"outputs": [],
"source": [
"#sample_sets = None #'AG1000G-GH' # sample_set = '3.0' .you can also supply lists with multiple sample sets e.g ['AG1000G-GH', 'AG1000G-CI', 'AG1000G-BF-A']\n",
"sample_query = None #\"taxon == 'coluzzii'\" # here we can subset to specific values in the metadata e.g : \"taxon == 'gambiae'\" , or \"taxon == 'arabiensis'\" "
"#sample_query = None #\"taxon == 'coluzzii'\" # here we can subset to specific values in the metadata e.g : \"taxon == 'gambiae'\" , or \"taxon == 'arabiensis'\" "
]
},
{
Expand Down
3 changes: 2 additions & 1 deletion tests/cDNA_Params.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,6 @@
"max_amplicon_size": 120,
"n_primer_pairs":6,
"primer_target": "AGAP006228-RA",
"sample_sets": "AG1000G-GH"
"sample_sets": "AG1000G-GH",
"sample_query":"taxon == 'coluzzii'"
}
3 changes: 2 additions & 1 deletion tests/cDNA_Params_2.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,6 @@
"n_primer_pairs":6,
"primer_target": "AGAP028081-RA",
"cDNA_exon_junction" : false,
"sample_sets": "AG1000G-GH"
"sample_sets": "AG1000G-GH",
"sample_query":"taxon == 'coluzzii'"
}
3 changes: 2 additions & 1 deletion tests/gDNA_probe_Params.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,6 @@
"max_amplicon_size": 100,
"n_primer_pairs":4,
"primer_target": "2L:2422652",
"sample_sets": "AG1000G-GH"
"sample_sets": "AG1000G-GH",
"sample_query":"taxon == 'coluzzii'"
}
3 changes: 2 additions & 1 deletion tests/probe_Params.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,6 @@
"max_amplicon_size": 240,
"n_primer_pairs":6,
"primer_target": "X:9500000",
"sample_sets": "AG1000G-GH"
"sample_sets": "AG1000G-GH",
"sample_query":"taxon == 'coluzzii'"
}
16 changes: 16 additions & 0 deletions tests/test_agamprimer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import numpy as np
import pandas as pd
import pytest

import AgamPrimer

ace1_seq = "GCGGCGGCTTCTACTCCGG"
kdr_seq = "AGTGATAGGAAATTTAGTCGT"


@pytest.mark.parametrize(
"sequence",
[ace1_seq, kdr_seq],
)
def test_check_my_oligo(sequence):
AgamPrimer.check_my_oligo(sequence=sequence, sample_sets="AG1000G-GH")

0 comments on commit b5c3d42

Please sign in to comment.