diff --git a/.github/workflows/labgraph_audiogen.yml b/.github/workflows/labgraph_audiogen.yml new file mode 100644 index 00000000..57bc4372 --- /dev/null +++ b/.github/workflows/labgraph_audiogen.yml @@ -0,0 +1,31 @@ +name: AudioGen Tests + +on: [push] + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v2 + + - name: Setup Python + uses: actions/setup-python@v2 + with: + python-version: '3.8' + + - name: Install dependencies + run: | + cd extensions/lg_audiogen + python -m pip install --upgrade pip + sudo apt-get install ffmpeg + pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu + pip install --pre xformers + pip install -e . + pip install pytest + + - name: Run tests + run: | + cd extensions/lg_audiogen + pytest -vvv \ No newline at end of file diff --git a/extensions/lg_audiogen/README.md b/extensions/lg_audiogen/README.md new file mode 100644 index 00000000..dad65c03 --- /dev/null +++ b/extensions/lg_audiogen/README.md @@ -0,0 +1,68 @@ +# Audiogen + +Audiogen is a Python command-line tool that uses models from Audiocraft's AudioGen to generate audio from specified descriptions. This tool can generate a single piece of audio based on a specific description or multiple pieces of audio based on a batch file containing multiple descriptions. + +## Features + +* Ability to specify duration of the generated audio. +* Ability to generate audio based on a batch file. +* Ability to specify the model to be used for the audio generation. +* Ability to set the output file name. + +## Setup + +Audiocraft needs Python 3.8 or higher to run. If you have a suitable version of Python installed, you can install Audiogen with pip: + +```shell +pip install -e . +``` + +## Usage + +### Command-line interface + +The CLI usage for Audiogen is `lg_audiogen [OPTIONS] [DESCRIPTION]...`. + +### Options + +* `description`: the description based on which the audio is to be generated. +* `duration, -d`: duration of the generated audio, default is 5. +* `model, -m`: name of the Audiocraft AudioGen model to use, default is 'facebook/audiogen-medium'. +* `output, -o`: name of the output file. +* `batch`: file name for batch audio description. + +### Example + +To generate an audio file you would use the following command: + +```shell +lg_audiogen -d 5 -m 'facebook/audiogen-medium' -o 'my_output' 'dog barking' + +lg_audiogen 'dog barking' + +lg_audiogen -b 'batch.txt' +``` + +### Batch File Format + +The batch file should contain one description per line. The descriptions should be in the same format as the descriptions used in the command-line interface. + +Example: + +*batch.txt* +```txt +Natural sounds of a rainforest +Bird Chirping in the background +``` + +### Samples + +[Google Drive Folder](https://drive.google.com/drive/folders/1kdWB1CBog4NGVJ7jWddKLtBAuPm3gwDq?usp=drive_link) + +## O.S Support + +```Tested on Ubuntu 22.04 (Jammy) LTS``` + +## Error Handling + +If the batch file is not found, a notable error message will be presented. Moreover, if a description is not provided when not using a batch file, a misusage error will be raised. diff --git a/extensions/lg_audiogen/lg_audiogen/__init__.py b/extensions/lg_audiogen/lg_audiogen/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/extensions/lg_audiogen/lg_audiogen/main.py b/extensions/lg_audiogen/lg_audiogen/main.py new file mode 100644 index 00000000..6e738e23 --- /dev/null +++ b/extensions/lg_audiogen/lg_audiogen/main.py @@ -0,0 +1,55 @@ +import click +import torch +from audiocraft.models import AudioGen +from audiocraft.data.audio import audio_write + +DEFAULT_AUDIOGEN_MODEL = 'facebook/audiogen-medium' +DEFAULT_AUDIO_DURATION = 5 + +@click.command() +@click.argument('description', nargs=-1, required=False) +@click.option('--duration', '-d', default=DEFAULT_AUDIO_DURATION, help='Duration of the generated audio.') +@click.option('--model', '-m', default=DEFAULT_AUDIOGEN_MODEL, help='Name of the Audiocraft AudioGen model to use.') +@click.option('--output', '-o', help='Name of the output file.') +@click.option('--batch', '-b', type=click.Path(), help='File name for batch audio description.') +def parse_arguments(description, duration, model, output, batch): + """ + Generates audio from description using Audiocraft's AudioGen. + """ + if batch: + try: + with open(batch, mode='r', encoding='utf-8') as f: + descriptions = [line.strip() for line in f.readlines()] + except FileNotFoundError: + print(f"File {batch} not found. Please check the file path and try again.") + else: + if not description: + raise click.BadParameter("Description argument is required when not using --batch.") + descriptions = [' '.join(description)] + run_audio_generation(descriptions, duration, model, output) + +def run_audio_generation(descriptions, duration, model_name, output): + """ + Load Audiocraft's AudioGen model and generate audio from the description. + + @param descriptions: The parsed arguments. + @param duration: Duration of the generated audio. + @param model_name: Name of the Audiocraft AudioGen model to use. + @param output: Name of the output file. + """ + print(f"Running lg_audiogen with descriptions: {descriptions}") + + # Load Audiocraft's AudioGen model and set generation params. + model = AudioGen.get_pretrained(model_name) + model.set_generation_params(duration=duration) + + # Generate audio from the descriptions + wav = model.generate(descriptions) + batch_output = output + # Save the generated audios. + for idx, one_wav in enumerate(wav): + # Will save under {output}{idx}.wav, with loudness normalization at -14 db LUFS. + if not output: + batch_output = descriptions[idx].replace(' ', '_') + audio_write(f'{batch_output}{idx}', one_wav.cpu(), + model.sample_rate, strategy="loudness", loudness_compressor=True) diff --git a/extensions/lg_audiogen/setup.py b/extensions/lg_audiogen/setup.py new file mode 100644 index 00000000..220198d5 --- /dev/null +++ b/extensions/lg_audiogen/setup.py @@ -0,0 +1,22 @@ +from setuptools import setup, find_packages + +setup( + name='lg_audiogen', + version='0.1', + description="A Command-line interface to use Audiocraft for labgraph", + long_description=""" + A Command-line interface to facilitate the usage of Audiocraft's models + to generate and process audio on labgraph + """, + packages=find_packages(), + install_requires=[ + "Click>=8.1.7", + "torch>=2.1.0", + "torchaudio>=2.1.0", + "audiocraft==1.1.0", + ], + entry_points=''' + [console_scripts] + lg_audiogen=lg_audiogen.main:parse_arguments + ''', +) \ No newline at end of file diff --git a/extensions/lg_audiogen/tests/test_main.py b/extensions/lg_audiogen/tests/test_main.py new file mode 100644 index 00000000..c398fcaf --- /dev/null +++ b/extensions/lg_audiogen/tests/test_main.py @@ -0,0 +1,13 @@ +import os +import subprocess + +def test_single_description(): + ''' + Tests output with a single description + ''' + # Run the script with an example description + subprocess.run(["lg_audiogen", "dog barking"], + capture_output=True, text=True, check=False) + # Assert that the output file was created + assert os.path.exists("dog_barking0.wav"), "Output file dog_barking0.wav was not created" + os.remove("dog_barking0.wav")