diff --git a/extensions/labgraph_diarization/Ego4D_AudioVisual.ipynb b/extensions/labgraph_diarization/Ego4D_AudioVisual.ipynb
new file mode 100644
index 00000000..a5e1c82d
--- /dev/null
+++ b/extensions/labgraph_diarization/Ego4D_AudioVisual.ipynb
@@ -0,0 +1,530 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Ad3Kas-mn3Ke"
+      },
+      "source": [
+        "**EGO4D Audio-Visual Diarization Benchmark**\n",
+        "- This notebook allows a quickstart into the [EGO4D Audio Visual Diarization](https://github.com/EGO4D/audio-visual/blob/main/diarization/audio-visual/README.md) and [Transcription](https://github.com/EGO4D/audio-visual/blob/main/transcription/README.md) from the [EGO4D Audio Visual Diarization Benchmark](https://github.com/EGO4D/audio-visual)\n",
+        "- It runs a subset of video clips from the EGO4D dataset in EGO4D's Audio-Visual repo\n",
+        "- Hardware accelerator should be T4 GPU\n",
+        "- Some changes to the code have been made in the forked repo so that it could be compatible with Google Colab"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "hIBjA7mUpK4F"
+      },
+      "source": [
+        "##Install Dependencies"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "lhUO_Ov678bm"
+      },
+      "outputs": [],
+      "source": [
+        "!apt install ffmpeg python3-pip git\n",
+        "!pip install ego4d awscli numpy opencv-python pyqt5 opencv-contrib-python libtorch torchvision torchaudio\n",
+        "!sudo apt-get install libavcodec-dev libavformat-dev libswscale-dev libv4l-dev\n",
+        "!sudo apt-get install libxvidcore-dev libx264-dev\n",
+        "!sudo apt install libgtk2.0-dev liblcm-dev\n",
+        "!sudo apt-get install liblcm-dev\n",
+        "!pip install pydub audiosegment"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "fbt09eafpoET"
+      },
+      "source": [
+        "##Clone Repository & download the videos"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "o77Hf1uJ8Fei"
+      },
+      "outputs": [],
+      "source": [
+        "# Create the new egocentric directory\n",
+        "!mkdir egocentric\n",
+        "%cd /content/egocentric\n",
+        "\n",
+        "# Clone the audio-visualrepo\n",
+        "!git clone https://github.com/ashneet1/audio-visual.git\n",
+        "%cd /content/egocentric/audio-visual\n",
+        "!mkdir data\n",
+        "\n",
+        "# List the video uids to download\n",
+        "!touch video_uids.txt\n",
+        "!echo \"0b4cacb1-970f-4ef0-85da-371d81f899e0\" >> video_uids.txt\n",
+        "!echo \"c2413391-7c1b-4fd6-8b1d-98ee7888b9f8\" >> video_uids.txt\n",
+        "!echo \"fe69a78e-7773-45d1-9e0f-bacee52dac83\" >> video_uids.txt\n",
+        "!echo \"3b79017c-4d42-40fc-a1bb-4a20bc8ebca7\" >> video_uids.txt\n",
+        "!echo \"6dbfc053-7899-40d8-9827-0ccd21f3ee0a\" >> video_uids.txt\n",
+        "!echo \"7e6dfd31-8544-4fad-9e49-0f05516cf8cf\" >> video_uids.txt\n",
+        "!echo \"56c5af79-f9d4-478d-96ef-6d71e0bbbdfe\" >> video_uids.txt\n",
+        "!echo \"d97bedc8-72df-43be-a55b-4da1ae42dfd1\" >> video_uids.txt\n",
+        "!echo \"f0cb79ef-c081-4049-85ef-2623e02c9589\" >> video_uids.txt\n",
+        "!echo \"08b0935e-6260-4bd6-86ca-f6fc54e388be\" >> video_uids.txt\n",
+        "!echo \"6b34c327-000c-42b6-b242-d3dca63a7508\" >> video_uids.txt\n",
+        "!echo \"076bdb81-5c75-4282-9f3a-a387624575f3\" >> video_uids.txt\n",
+        "\n",
+        "# Configure aws cli to be able to access the ego4d dataset\n",
+        "!aws configure\n",
+        "\n",
+        "# Download ego4d model, annotation, and videos\n",
+        "!ego4d -y --output_directory ./data  --datasets av_models clips annotations --benchmarks av --video_uid_file video_uids.txt\n",
+        "!tar xf data/v2/av_models/pretrained_av_models.tar.gz\n",
+        "!mv data/v2/annotations/* utils/ground_truth"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# Install libtorch\n",
+        "%cd /content/egocentric\n",
+        "!wget \"https://download.pytorch.org/libtorch/cu118/libtorch-cxx11-abi-shared-with-deps-2.1.0%2Bcu118.zip\"\n",
+        "!unzip \"libtorch-cxx11-abi-shared-with-deps-2.1.0+cu118.zip\"\n",
+        "!rm -rf \"libtorch-cxx11-abi-shared-with-deps-2.1.0+cu118.zip\""
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Mlbjm9kPvqLr"
+      },
+      "source": [
+        "#EGO4D Audio-Visual Diarization Baseline"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "d4opZblNpyHC"
+      },
+      "source": [
+        "##Preprocess ground truth data"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "IM6020Xg8aSF"
+      },
+      "outputs": [],
+      "source": [
+        "# Preprocess ground truth data\n",
+        "%cd /content/egocentric/audio-visual/utils/ground_truth\n",
+        "!bash init_dirs.sh\n",
+        "!python3 extract_clipnames_and_split_indices.py\n",
+        "!python3 extract_boxes_and_speakers.py\n",
+        "!python3 make_mot_ground_truth.py ../../data/v2/clips val\n",
+        "!mv tracking_evaluation/mot_challenge ../../tracking/tracking_evaluation/data/gt"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "jt7yzV648gOr",
+        "outputId": "acf51bc2-fb54-4769-c20a-5e7d1b53d25f"
+      },
+      "outputs": [],
+      "source": [
+        "#Run visualize_ground_truth.py (It downloads the output video to the current directory)\n",
+        "%cd /content/egocentric/audio-visual/utils/ground_truth/\n",
+        "!python3 visualize_ground_truth.py  /content/egocentric/audio-visual/data/v2/clips 0b4cacb1-970f-4ef0-85da-371d81f899e0 #This is 389"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "c2fNByhuqTHn"
+      },
+      "source": [
+        "##Localization & Tracking"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ZPQT5dMUqZjo"
+      },
+      "source": [
+        "###People Detection Setup"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "ZqOp5PWA8nNR"
+      },
+      "outputs": [],
+      "source": [
+        "# People detection setup\n",
+        "# https://github.com/EGO4D/audio-visual/blob/main/tracking/README.md#people-detection\n",
+        "%cd /content/egocentric/audio-visual/tracking/people_detection\n",
+        "\n",
+        "# Replace the lines in the makefile to use opencv4 instead of opencv3\n",
+        "!sed -i '44s/.*/LDFLAGS+= `pkg-config --libs opencv4` -lstdc++/' Makefile\n",
+        "!sed -i '45s/.*/COMMON+= `pkg-config --cflags opencv4`/' Makefile\n",
+        "\n",
+        "#Specifying the arch\n",
+        "!sed -i '14s/.*/ARCH= -gencode arch=compute_75,code=sm_75/' Makefile\n",
+        "\n",
+        "# Add missing headers required to build using opencv4\n",
+        "# https://stackoverflow.com/questions/64885148/error-iplimage-does-not-name-a-type-when-trying-to-build-darknet-with-opencv\n",
+        "!sed -i '3 i #include \"opencv2/core/core_c.h\"' src/image_opencv.cpp\n",
+        "!sed -i '3 i #include \"opencv2/videoio/legacy/constants_c.h\"' src/image_opencv.cpp\n",
+        "!sed -i '3 i #include \"opencv2/highgui/highgui_c.h\"' src/image_opencv.cpp\n",
+        "\n",
+        "!sed -i '3 i #include \"opencv2/core/core_c.h\"' src/image_opencv.hpp\n",
+        "!sed -i '3 i #include \"opencv2/videoio/legacy/constants_c.h\"' src/image_opencv.hpp\n",
+        "!sed -i '3 i #include \"opencv2/highgui/highgui_c.h\"' src/image_opencv.hpp\n",
+        "\n",
+        "!make -j"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "vqidCROwqfJA"
+      },
+      "source": [
+        "###Short Term Tracking"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "2gpLUYd68vE2"
+      },
+      "outputs": [],
+      "source": [
+        "# Short term tracking setup\n",
+        "# https://github.com/EGO4D/audio-visual/blob/main/tracking/README.md#short_term_tracking\n",
+        "%cd /content/egocentric/audio-visual/tracking/short_term_tracking\n",
+        "\n",
+        "# Modify line 13 in the CMake file to include the opencv4 directory\n",
+        "# https://stackoverflow.com/questions/58478074/how-to-fix-fatal-error-opencv2-core-hpp-no-such-file-or-directory-for-opencv\n",
+        "!sed -i '13s,.*,include_directories( /usr/local/include /usr/local/cuda/include /usr/include/opencv4/ ),' CMakeLists.txt\n",
+        "\n",
+        "# Modify line 17 in the CMake file to fix a compilation error\n",
+        "# https://github.com/pytorch/pytorch/issues/103371\n",
+        "!sed -i '17s,.*,set_property(TARGET short_term_tracker PROPERTY CXX_STANDARD 17),' CMakeLists.txt\n",
+        "!mkdir build\n",
+        "%cd build\n",
+        "!cmake -DCMAKE_PREFIX_PATH=/content/egocentric/libtorch ..\n",
+        "!make"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "jIfxzYDgqlkf"
+      },
+      "source": [
+        "###Run Global People Tracking"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "D-a-ez1A85EB"
+      },
+      "outputs": [],
+      "source": [
+        "#Global People Tracking\n",
+        "%cd /content/egocentric/audio-visual/tracking\n",
+        "!python3 single_run.py /content/egocentric/audio-visual/data/v2/clips 438"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "uG8NV_oUrDZY"
+      },
+      "source": [
+        "##Voice Activity Detection (VAD)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "SxlxvS0Y81BU"
+      },
+      "outputs": [],
+      "source": [
+        "#Voice Activity Audio Detection\n",
+        "%cd /content/egocentric/audio-visual/active-speaker-detection/vad\n",
+        "!python3 extract_all_audio.py /content/egocentric/audio-visual/data/v2/clips\n",
+        "!python3 vad.py"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "rVr2OzKIrRBY"
+      },
+      "source": [
+        "##Active Speaker Detection (ASD)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "2BN1eTm_rePW"
+      },
+      "source": [
+        "####Mouth Region Classification (MRC)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "ipOe-4y1883Q"
+      },
+      "outputs": [],
+      "source": [
+        "# Active Speaker Detection (ASD)\n",
+        "# Mouth region classification (MRC)\n",
+        "%cd /content/egocentric/audio-visual/active-speaker-detection/active_speaker/mrc_active_speaker_detection/prediction\n",
+        "\n",
+        "# Modify line 15 and 17 in the CMake file to include the opencv4 directory\n",
+        "# https://stackoverflow.com/questions/58478074/how-to-fix-fatal-error-opencv2-core-hpp-no-such-file-or-directory-for-opencv\n",
+        "!sed -i '15s,.*,link_directories( /usr/local/lib /usr/local/cuda/lib64 /usr/include/opencv4/ ),' CMakeLists.txt\n",
+        "!sed -i '17s,.*,include_directories( /usr/local/include /usr/local/cuda/include /usr/local/cuda/targets/x86_64-linux/include /usr/include/opencv4/ ),' CMakeLists.txt\n",
+        "\n",
+        "# Modify line 21 in the CMake file to fix a compilation error\n",
+        "# https://github.com/pytorch/pytorch/issues/103371\n",
+        "!sed -i '21s,.*,set_property(TARGET mrc PROPERTY CXX_STANDARD 17),' CMakeLists.txt\n",
+        "# Build MRC tracking code\n",
+        "!mkdir build\n",
+        "%cd build\n",
+        "!cmake -DCMAKE_PREFIX_PATH=/content/egocentric/libtorch ..\n",
+        "!make"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "FcFzpREn8_1P"
+      },
+      "outputs": [],
+      "source": [
+        "#Running the MRC\n",
+        "%cd /content/egocentric/audio-visual/active-speaker-detection/active_speaker/mrc_active_speaker_detection/prediction\n",
+        "!python3 run_once.py /content/egocentric/audio-visual/data/v2/clips ego4d 389"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "PY4Mha_QrjV-"
+      },
+      "source": [
+        "##Audio Embedding"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "_OzROruu9CVO"
+      },
+      "outputs": [],
+      "source": [
+        "#Voice Embedding\n",
+        "%cd /content/egocentric/audio-visual/active-speaker-detection/audio_embedding/make_audio_embeddings\n",
+        "!python3 batch_audio_embedding.py /content/egocentric/audio-visual/data/v2/clips val"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "XXUYRiQ1rueJ"
+      },
+      "source": [
+        "##Device wearer voice activity detection"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "kssnc4VNr4ra"
+      },
+      "source": [
+        "####Energy Based Method"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "LMZ3jbsM9E5c"
+      },
+      "outputs": [],
+      "source": [
+        "#Wearer: energy based method\n",
+        "%cd /content/egocentric/audio-visual/active-speaker-detection/wearer/energy_based\n",
+        "!python3 short_time_energy.py /content/egocentric/audio-visual/data/v2/clips val\n",
+        "!python3 match_wearer_audio.py val"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "pQnNzUoasJjx"
+      },
+      "source": [
+        "##Surrounding people voice matching (MRC)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "wOTCIPGi9Mpn"
+      },
+      "outputs": [],
+      "source": [
+        "#Surrounding People Audio Matching: MRC\n",
+        "%cd /content/egocentric/audio-visual/active-speaker-detection/surrounding_people_audio_matching/mrc\n",
+        "!python3 match_audio.py /content/egocentric/audio-visual/active-speaker-detection/active_speaker/mrc_active_speaker_detection/prediction/results val"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "7rVXK-W5b_Nu"
+      },
+      "source": [
+        "#Transcription\n",
+        "- Need to move \"av_test_unannotated.json\",\"av_train.json\", and \"av_val.json\" from /content/egocentric/audio-visual/utils/ground_truth to /content/egocentric/audio-visual/data\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "gWtnMbJrbybQ"
+      },
+      "outputs": [],
+      "source": [
+        "#Install Miniconda\n",
+        "%cd /content/\n",
+        "#https://www.kaggle.com/code/alaajah/creating-virtual-environment-on-google-colab\n",
+        "!wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh\n",
+        "!chmod +x Miniconda3-latest-Linux-x86_64.sh\n",
+        "!./Miniconda3-latest-Linux-x86_64.sh -b -f -p /usr/local\n",
+        "!conda install -q -y --prefix /usr/local python=3.8.10 ujson"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "IvLBi0e_cEq8"
+      },
+      "outputs": [],
+      "source": [
+        "#Install Sclite\n",
+        "%cd /content/\n",
+        "!git clone https://github.com/usnistgov/SCTK.git\n",
+        "%cd SCTK\n",
+        "! make config\n",
+        "! make all\n",
+        "! make check\n",
+        "! make install\n",
+        "! make doc"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "To36j1F_cLIC"
+      },
+      "outputs": [],
+      "source": [
+        "#Activate Environment\n",
+        "%cd /content/drive/MyDrive/egocentric/audio-visual/transcription\n",
+        "!conda create --name transcription_env --file requirements_38_10.txt\n",
+        "!pip install soundfile\n",
+        "!pip install torch\n",
+        "!pip install espnet_model_zoo"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "d7do0eXVcQRr"
+      },
+      "outputs": [],
+      "source": [
+        "#Extract 16kHz single channel audio files in wav format from videos\n",
+        "%cd /content/egocentric/audio-visual/data/v2/clips\n",
+        "!mkdir wavs_16000\n",
+        "%cd /content/egocentric/audio-visual/transcription\n",
+        "!chmod +x extract_wav.sh\n",
+        "!./extract_wav.sh /content/egocentric/audio-visual/data/v2/clips /content/egocentric/audio-visual/data/v2/wavs_16000"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "okmVTCt4cadk"
+      },
+      "outputs": [],
+      "source": [
+        "#Extract transcriptions from the annotation files, decode audio and score the decoding output\n",
+        "%cd /content/egocentric/audio-visual/transcription\n",
+        "!pip install torchaudio\n",
+        "!chmod +x score_asr.sh\n",
+        "!./score_asr.sh /content/egocentric/audio-visual/transcription/output 1"
+      ]
+    }
+  ],
+  "metadata": {
+    "accelerator": "GPU",
+    "colab": {
+      "gpuType": "V100",
+      "provenance": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/extensions/labgraph_diarization/README.md b/extensions/labgraph_diarization/README.md
new file mode 100644
index 00000000..58c7d7f8
--- /dev/null
+++ b/extensions/labgraph_diarization/README.md
@@ -0,0 +1,6 @@
+# EGO4D Audio-Visual Diarization Benchmark Colab
+
+[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1UBGYBNrjJckCb0KoyQ6xXea79kuoLgu6#scrollTo=lhUO_Ov678bm)
+
+The Colab downloads a subset of video clips from the EGO4D Dataset and performs Audio-Visual Diarization and Transcription with the [EGO4D Audio Visual Diarization Benchmark](https://github.com/EGO4D/audio-visual/blob/main/diarization/audio-visual/README.md). In order to access the dataset you will need to review the liscense agreement and accept the terms for [EGO4D](https://ego4d-data.org/docs/start-here/).
+