diff --git a/google_colab/ont_dorado.ipynb b/google_colab/ont_dorado.ipynb new file mode 100644 index 0000000..4b564b8 --- /dev/null +++ b/google_colab/ont_dorado.ipynb @@ -0,0 +1,110 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "gpuType": "T4" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + }, + "accelerator": "GPU" + }, + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "gxcweSl4fmRz" + }, + "outputs": [], + "source": [ + "# Initiate GPU runtime\n", + "\n", + "import tensorflow as tf\n", + "tf.test.gpu_device_name()\n", + "\n", + "import torch\n", + "torch.cuda.get_device_name(0)\n", + "\n", + "!rm -rf sample_data" + ] + }, + { + "cell_type": "code", + "source": [ + "# Install Dorado (Oxford Nanopore Technologies)\n", + "\n", + "%%shell\n", + "\n", + "DORADO_VERSION=0.6.0 # Updated 02 Apr 2024\n", + "\n", + "pip install pod5\n", + "\n", + "wget https://cdn.oxfordnanoportal.com/software/analysis/dorado-\"$DORADO_VERSION\"-linux-x64.tar.gz -O dorado.tar.gz\n", + "tar -vzxf dorado.tar.gz; rm -rf dorado.tar.gz; mv dorado-\"$DORADO_VERSION\"-linux-x64 ont-dorado\n", + "./ont-dorado/bin/dorado download --directory ont-dorado/models\n", + "./ont-dorado/bin/dorado --version" + ], + "metadata": { + "id": "dSavIZSzhTlP" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Import Google Drive\n", + "\n", + "from google.colab import drive\n", + "drive.mount('/content/gdrive', force_remount=True)\n", + "\n", + "!ls gdrive/MyDrive" + ], + "metadata": { + "id": "i6QNwjcWlX7L" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Basecall and Demultiplex process\n", + "\n", + "%%shell\n", + "\n", + "# Specify the name of the folder (i.e., 20240411_1608_MN22990_FAT84733_f175dd69/fast5)\n", + "DIR=\"20240411_1608_MN22990_FAT84733_f175dd69/fast5\"\n", + "\n", + "# Specify the barcode kit used for library preparation\n", + "BARKIT=\"EXP-NBD114\"\n", + "\n", + "PREFIX=\"$(echo \"$IDIR\" | awk -F/ '{print $1}')\"\n", + "\n", + "pod5 convert fast5 $IDIR --output \"/content/gdrive/MyDrive/\"$PREFIX\".pod5\"\n", + "\n", + "# Specify the accurate basecalling model - fast (fast), high (hac) or super (sup) accuracy\n", + "# dna_r9.4.1_e8_fast@v3.4\n", + "# dna_r9.4.1_e8_hac@v3.3\n", + "# dna_r9.4.1_e8_sup@v3.3\n", + "./ont-dorado/bin/dorado basecaller \"ont-dorado/models/dna_r9.4.1_e8_sup@v3.3\" \\\n", + "\"/content/gdrive/MyDrive/\"$PREFIX\".pod5\" -r --min-qscore 8 --emit-fastq >> \"/content/gdrive/MyDrive/\"$PREFIX\"_DORADO-BASECALL.fastq\"\n", + "\n", + "./ont-dorado/bin/dorado demux \"/content/gdrive/MyDrive/\"$PREFIX\"_DORADO-SUP.fastq\" --emit-fastq \\\n", + "--output-dir \"/content/gdrive/MyDrive/\"$PREFIX\"_DORADO-DEMUX\" --kit-name $BARKIT -t $(nproc)" + ], + "metadata": { + "id": "3h3DJ72AH1I9" + }, + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file