diff --git a/.gitattributes b/.gitattributes index 5462cde72..78248ee02 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,9 +1,7 @@ # https://git-scm.com/docs/gitattributes - # Set the default behavior, in case people don't have core.autocrlf set. # https://git-scm.com/docs/gitattributes#_end_of_line_conversion * text=auto - # common python attributes, taken from https://github.com/alexkaratarakis/gitattributes/blob/710900479a2bedeec7003d381719521ffbb18bf8/Python.gitattributes # Source files # ============ @@ -14,7 +12,6 @@ *.pyx text diff=python *.pyz text diff=python *.pyi text diff=python - # Binary files # ============ *.db binary @@ -24,6 +21,7 @@ *.pyc binary export-ignore *.pyo binary export-ignore *.pyd binary - # Jupyter notebook *.ipynb text eol=lf +llava_data_r1.zip filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text diff --git a/.gitignore b/.gitignore index 6ff6a3dc8..d3bb920a4 100644 --- a/.gitignore +++ b/.gitignore @@ -33,3 +33,4 @@ ckpts* # Demo serve_images/ +llava/ diff --git a/LICENSE b/LICENSE old mode 100644 new mode 100755 diff --git a/README.md b/README.md old mode 100644 new mode 100755 index 794ce1b27..98db9cb9f --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # 🌋 LLaVA: Large Language and Vision Assistant -*Visual instruction tuning towards large language and vision models with GPT-4 level capabilities.* +*Visual instruction tuning towards large language and vision models with GPT-4 level capabilities* [📢 [LLaVA-NeXT Blog](https://llava-vl.github.io/blog/2024-01-30-llava-next/)] [[Project Page](https://llava-vl.github.io/)] [[Demo](https://llava.hliu.cc/)] [[Data](https://github.com/haotian-liu/LLaVA/blob/main/docs/Data.md)] [[Model Zoo](https://github.com/haotian-liu/LLaVA/blob/main/docs/MODEL_ZOO.md)] diff --git a/cog.yaml b/cog.yaml old mode 100644 new mode 100755 diff --git a/data_prep/LLaVA_InitialJson.py b/data_prep/LLaVA_InitialJson.py new file mode 100644 index 000000000..cd5398bc7 --- /dev/null +++ b/data_prep/LLaVA_InitialJson.py @@ -0,0 +1,40 @@ +import os +import json + +# Function to extract conversation data from .gui file +def extract_conversations_from_gui(gui_file): + with open(gui_file, 'r') as file: + gui_text = file.read().strip() + return [{'from': 'human', 'value': '\nWrite a code for the given UI'}, {'from': 'gpt', 'value': gui_text}] + + +# Function to convert data to JSON format +def convert_data_to_json(input_folder, output_folder): + data = [] + for filename in os.listdir(input_folder): + if filename.endswith('.gui'): + sample_id = filename.split('.')[0] + image_path = "./Sketch2Code_og/" + os.path.relpath(os.path.join(input_folder, f"{sample_id}.png"), output_folder) + gui_path = os.path.join(input_folder, filename) + conversations = extract_conversations_from_gui(gui_path) + sample = { + 'id': sample_id, + 'image': image_path, + 'conversations': conversations + } + data.append(sample) + + output_path = os.path.join(output_folder, 'samples.json') + with open(output_path, 'w') as json_file: + json.dump(data, json_file, indent=2) + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser(description="Convert data to JSON format") + parser.add_argument("input_folder", help="Input folder containing .gui files and corresponding images") + parser.add_argument("output_folder", help="Output folder to store the JSON file") + + args = parser.parse_args() + + convert_data_to_json(args.input_folder, args.output_folder) diff --git a/data_prep/LLaVA_dataSplit.py b/data_prep/LLaVA_dataSplit.py new file mode 100644 index 000000000..7813d0560 --- /dev/null +++ b/data_prep/LLaVA_dataSplit.py @@ -0,0 +1,87 @@ +import os +import json +import random +import shutil +import argparse + +def split_dataset(input_folder, output_folder, test_split=10, validation_split=10, seed=42): + # Create output folders for train, test, and validation sets + train_folder = os.path.join(output_folder, 'train') + test_folder = os.path.join(output_folder, 'test') + validation_folder = os.path.join(output_folder, 'validation') + split_json_folder = os.path.join(output_folder, 'split_json_files') + + for folder in [train_folder, test_folder, validation_folder, split_json_folder]: + os.makedirs(folder, exist_ok=True) + + # Load the JSON file containing the data + json_file_path = os.path.join(input_folder, 'samples.json') + with open(json_file_path, 'r') as json_file: + data = json.load(json_file) + + # Calculate the number of samples for test and validation sets + num_samples = len(data) + num_test_samples = num_samples * test_split // 100 + num_validation_samples = num_samples * validation_split // 100 + + # Set random seed for reproducibility + random.seed(seed) + + # Randomly select indices for test set + test_indices = random.sample(range(num_samples), num_test_samples) + + # Remove test indices from the list of all indices + remaining_indices = [i for i in range(num_samples) if i not in test_indices] + + # Randomly select indices for validation set from remaining indices + validation_indices = random.sample(remaining_indices, num_validation_samples) + + # The remaining indices are for the training set + train_indices = [i for i in remaining_indices if i not in validation_indices] + + # Copy files to corresponding folders and update JSON files + for idx, sample in enumerate(data): + source_image = sample['image'] + source_gui = os.path.join(input_folder, 'data', f"{sample['id']}.gui") + destination_folder = None + if idx in test_indices: + destination_folder = test_folder + elif idx in validation_indices: + destination_folder = validation_folder + else: + destination_folder = train_folder + + # Copy files to destination folder + image_filename = os.path.basename(source_image) + gui_filename = f"{sample['id']}.gui" + destination_image = os.path.join(destination_folder, image_filename) + destination_gui = os.path.join(destination_folder, gui_filename) + shutil.copy(source_image, destination_image) + shutil.copy(source_gui, destination_gui) + + # Update JSON data with relative paths + relative_image_path = os.path.relpath(destination_image, output_folder) + relative_gui_path = os.path.relpath(destination_gui, output_folder) + sample['image'] = "./" + relative_image_path + sample['gui'] = "./" + relative_gui_path + + # Create updated JSON files for each split + splits = {'train': train_indices, 'test': test_indices, 'validation': validation_indices} + for split, indices in splits.items(): + split_data = [data[i] for i in indices] + split_json_path = os.path.join(split_json_folder, f"{split}_json.json") + with open(split_json_path, 'w') as json_file: + json.dump(split_data, json_file, indent=2) + + print("Dataset splitting and JSON file creation completed.") + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Split dataset and generate JSON files") + parser.add_argument("input_folder", help="Path to the input folder containing the dataset") + parser.add_argument("output_folder", help="Path to the output folder to save the split dataset") + parser.add_argument("--test_split", type=int, default=10, help="Percentage of data to use for test (default: 10)") + parser.add_argument("--validation_split", type=int, default=10, help="Percentage of data to use for validation (default: 10)") + parser.add_argument("--seed", type=int, default=42, help="Random seed for reproducibility (default: 42)") + args = parser.parse_args() + + split_dataset(args.input_folder, args.output_folder, args.test_split, args.validation_split, args.seed) diff --git a/data_prep/Readme.md b/data_prep/Readme.md new file mode 100644 index 000000000..656f25f26 --- /dev/null +++ b/data_prep/Readme.md @@ -0,0 +1,15 @@ +The python file LLaVA_InitialJson.py will make the json data in the customised required format for LLaVA model to work on. +The python file LLaVA_dataSplit.py will perform the data splitting. + + + + + +Use the following command line prompt to runs the .py files to generate the data splits + + +python LLaVA_InitialJson.py ~/LLaVA/data_prep/Sketch2Code_og/data ~/LLaVA/data_prep/Sketch2Code_og + +python LLaVA_dataSplit.py ~/LLaVA/data_prep/Sketch2Code_og/ ~/splitted_data_verify/ + + diff --git a/llava/model/builder.py b/llava/model/builder.py index e3d50829f..009a21103 100644 --- a/llava/model/builder.py +++ b/llava/model/builder.py @@ -24,7 +24,7 @@ def load_pretrained_model(model_path, model_base, model_name, load_8bit=False, load_4bit=False, device_map="auto", device="cuda", use_flash_attn=False, **kwargs): - kwargs = {"device_map": device_map, **kwargs} + kwargs = {"device_map": device_map, "offload_folder": "offload", **kwargs} if device != "cuda": kwargs['device_map'] = {"": device} diff --git a/predict.py b/predict.py old mode 100644 new mode 100755 diff --git a/pyproject.toml b/pyproject.toml old mode 100644 new mode 100755 diff --git a/scripts/convert_gqa_for_eval.py b/scripts/convert_gqa_for_eval.py old mode 100644 new mode 100755 diff --git a/scripts/convert_mmbench_for_submission.py b/scripts/convert_mmbench_for_submission.py old mode 100644 new mode 100755 diff --git a/scripts/convert_mmvet_for_eval.py b/scripts/convert_mmvet_for_eval.py old mode 100644 new mode 100755 diff --git a/scripts/convert_seed_for_submission.py b/scripts/convert_seed_for_submission.py old mode 100644 new mode 100755 diff --git a/scripts/convert_sqa_to_llava.py b/scripts/convert_sqa_to_llava.py old mode 100644 new mode 100755 diff --git a/scripts/convert_sqa_to_llava_base_prompt.py b/scripts/convert_sqa_to_llava_base_prompt.py old mode 100644 new mode 100755 diff --git a/scripts/convert_vizwiz_for_submission.py b/scripts/convert_vizwiz_for_submission.py old mode 100644 new mode 100755 diff --git a/scripts/convert_vqav2_for_submission.py b/scripts/convert_vqav2_for_submission.py old mode 100644 new mode 100755 diff --git a/scripts/extract_mm_projector.py b/scripts/extract_mm_projector.py old mode 100644 new mode 100755 diff --git a/scripts/finetune.sh b/scripts/finetune.sh old mode 100644 new mode 100755 diff --git a/scripts/finetune_full_schedule.sh b/scripts/finetune_full_schedule.sh old mode 100644 new mode 100755 diff --git a/scripts/finetune_lora.sh b/scripts/finetune_lora.sh old mode 100644 new mode 100755 diff --git a/scripts/finetune_qlora.sh b/scripts/finetune_qlora.sh old mode 100644 new mode 100755 diff --git a/scripts/finetune_sqa.sh b/scripts/finetune_sqa.sh old mode 100644 new mode 100755 diff --git a/scripts/merge_lora_weights.py b/scripts/merge_lora_weights.py old mode 100644 new mode 100755 diff --git a/scripts/pretrain.sh b/scripts/pretrain.sh old mode 100644 new mode 100755 diff --git a/scripts/pretrain_xformers.sh b/scripts/pretrain_xformers.sh old mode 100644 new mode 100755 diff --git a/scripts/sqa_eval_batch.sh b/scripts/sqa_eval_batch.sh old mode 100644 new mode 100755 diff --git a/scripts/sqa_eval_gather.sh b/scripts/sqa_eval_gather.sh old mode 100644 new mode 100755 diff --git a/scripts/v1_5/finetune.sh b/scripts/v1_5/finetune.sh old mode 100644 new mode 100755 diff --git a/scripts/v1_5/finetune_lora.sh b/scripts/v1_5/finetune_lora.sh old mode 100644 new mode 100755 index 90f00707c..3f3955b60 --- a/scripts/v1_5/finetune_lora.sh +++ b/scripts/v1_5/finetune_lora.sh @@ -1,14 +1,14 @@ #!/bin/bash + deepspeed llava/train/train_mem.py \ --lora_enable True --lora_r 128 --lora_alpha 256 --mm_projector_lr 2e-5 \ --deepspeed ./scripts/zero3.json \ - --model_name_or_path lmsys/vicuna-13b-v1.5 \ + --model_name_or_path liuhaotian/llava-v1.5-7b \ --version v1 \ - --data_path ./playground/data/llava_v1_5_mix665k.json \ - --image_folder ./playground/data \ + --data_path /home/akash/data/LLAVA_data.json \ + --image_folder /home/akash/data/ \ --vision_tower openai/clip-vit-large-patch14-336 \ - --pretrain_mm_mlp_adapter ./checkpoints/llava-v1.5-13b-pretrain/mm_projector.bin \ --mm_projector_type mlp2x_gelu \ --mm_vision_select_layer -2 \ --mm_use_im_start_end False \ @@ -16,9 +16,9 @@ deepspeed llava/train/train_mem.py \ --image_aspect_ratio pad \ --group_by_modality_length True \ --bf16 True \ - --output_dir ./checkpoints/llava-v1.5-13b-lora \ + --output_dir ./checkpoints/llava-v1.6-mistral-7b \ --num_train_epochs 1 \ - --per_device_train_batch_size 16 \ + --per_device_train_batch_size 4 \ --per_device_eval_batch_size 4 \ --gradient_accumulation_steps 1 \ --evaluation_strategy "no" \ @@ -34,5 +34,4 @@ deepspeed llava/train/train_mem.py \ --model_max_length 2048 \ --gradient_checkpointing True \ --dataloader_num_workers 4 \ - --lazy_preprocess True \ - --report_to wandb + --lazy_preprocess True diff --git a/scripts/v1_5/finetune_task.sh b/scripts/v1_5/finetune_task.sh old mode 100644 new mode 100755 diff --git a/scripts/v1_5/finetune_task_lora.sh b/scripts/v1_5/finetune_task_lora.sh old mode 100644 new mode 100755 index f11303f29..ec9dfb593 --- a/scripts/v1_5/finetune_task_lora.sh +++ b/scripts/v1_5/finetune_task_lora.sh @@ -1,12 +1,12 @@ #!/bin/bash -deepspeed llava/train/train_mem.py \ +deepspeed "${PWD}/../../llava/train/train_mem.py" \ --lora_enable True --lora_r 128 --lora_alpha 256 --mm_projector_lr 2e-5 \ - --deepspeed ./scripts/zero3.json \ - --model_name_or_path liuhaotian/llava-v1.5-13b \ + --deepspeed "${PWD}/../zero3.json" \ + --model_name_or_path liuhaotian/llava-v1.5-7b \ --version v1 \ - --data_path ./playground/data/llava_v1_5_mix665k.json \ - --image_folder ./playground/data \ + --data_path "${PWD}/../../data_prep/splitted_data/split_json_files/train_json.json" \ + --image_folder "${PWD}/../../data_prep/splitted_data/" \ --vision_tower openai/clip-vit-large-patch14-336 \ --mm_projector_type mlp2x_gelu \ --mm_vision_select_layer -2 \ @@ -15,9 +15,9 @@ deepspeed llava/train/train_mem.py \ --image_aspect_ratio pad \ --group_by_modality_length True \ --bf16 True \ - --output_dir ./checkpoints/llava-v1.5-13b-task-lora \ + --output_dir ./checkpoints/llava-v1.5-7b-task-lora \ --num_train_epochs 1 \ - --per_device_train_batch_size 16 \ + --per_device_train_batch_size 4 \ --per_device_eval_batch_size 4 \ --gradient_accumulation_steps 1 \ --evaluation_strategy "no" \ @@ -34,4 +34,4 @@ deepspeed llava/train/train_mem.py \ --gradient_checkpointing True \ --dataloader_num_workers 4 \ --lazy_preprocess True \ - --report_to wandb + diff --git a/scripts/v1_5/pretrain.sh b/scripts/v1_5/pretrain.sh old mode 100644 new mode 100755 diff --git a/scripts/zero2.json b/scripts/zero2.json old mode 100644 new mode 100755 diff --git a/scripts/zero3.json b/scripts/zero3.json old mode 100644 new mode 100755 index 6917317af..f3e1c4408 --- a/scripts/zero3.json +++ b/scripts/zero3.json @@ -24,5 +24,8 @@ "stage3_max_live_parameters": 1e9, "stage3_max_reuse_distance": 1e9, "stage3_gather_16bit_weights_on_model_save": true - } -} \ No newline at end of file + }, + "wandb": { + "enabled": false + } +} diff --git a/scripts/zero3_offload.json b/scripts/zero3_offload.json old mode 100644 new mode 100755 diff --git a/wiki/navigation.md b/wiki/navigation.md new file mode 100644 index 000000000..04dda3189 --- /dev/null +++ b/wiki/navigation.md @@ -0,0 +1,52 @@ +# Welcome to the LLaVA DSL Gen Project Wiki + +## Table of Contents +- [Introduction](#introduction) +- [Installation](#installation) +- [Usage](#usage) + +## Introduction +Welcome to the LLaVA DSL Gen Project! This project is designed to demonstrate how to install and navigate through this repository. + +## Installation + +### Step-by-Step LLaVA Installation Guide from the Github Repository + +1. **Clone this repository and navigate to the LLaVA folder:** + ```shell + git clone https://github.com/haotian-liu/LLaVA.git + cd LLaVA + ``` + +2. **Install Package:** + ```shell + conda create -n llava python=3.10 -y + conda activate llava + pip install --upgrade pip # enable PEP 660 support + pip install -e . + ``` + +3. **Install additional packages for training cases:** + ```shell + pip install -e ".[train]" + pip install flash-attn --no-build-isolation + ``` + +_For the purpose of running the current codes use the llava_new venev._ + +## Usage +### Finetuning Guide +To start finetuning, run the _LLaVA/scripts/v1_5/finetune_task_lora.sh_ with the desired hyperparameter settings. + +### Data Preperation Guide +In addition to existing steps, the current repository also offers additional feature to split your data into train_eval_test split according to your desired split ratio. Follow the below steps for the same. + +1. _LLaVA_InitialJson.py_ will prepare your initial custom data inot the desired LLaVA dataset format and return a .json file useful for the next steps. + ```shell + Syntax: python LLaVA_IntialJson.py --input_folder_with_gui_and_png_files --output_folder_to_store_json_file + ``` + +2. _LLaVA_dataSplit.py_ will split your data intp the required train_eval_test split. Default value will be 80_10_10. + ```shell + Syntax: python LLaVA_dataSpit.py --path_to_input_folder_containing_data_and_json_file --output_folder_to_save_the_splitted_dataset + ``` diff --git a/wiki/navigation.md.save b/wiki/navigation.md.save new file mode 100644 index 000000000..169526eba --- /dev/null +++ b/wiki/navigation.md.save @@ -0,0 +1,53 @@ +# Welcome to the LLaVA DSL Gen Project Wiki + +## Table of Contents +- [Introduction](#introduction) +- [Installation](#installation) +- [Usage](#usage) +- [Contributing](#contributing) +- [FAQ](#faq) + +## Introduction +Welcome to the LLaVA DSL Gen Project! This project is designed to demonstrate how to install and navigate through this repository. + +## Installation + +### Step-by-Step LLaVA Installation Guide from the Github Repository + +1. **Clone this repository and navigate to the LLaVA folder:** + ```shell + git clone https://github.com/haotian-liu/LLaVA.git + cd LLaVA + ``` + +2. **Install Package:** + ```shell + conda create -n llava python=3.10 -y + conda activate llava + pip install --upgrade pip # enable PEP 660 support + pip install -e . + ``` + +3. **Install additional packages for training cases:** + ```shell + pip install -e ".[train]" + pip install flash-attn --no-build-isolation + ``` + +_For the purpose of running the current codes use the llava_new venev._ + +### Finetuning Guide +To start finetuning, run the _LLaVA/scripts/v1_5/finetune_task_lora.sh_ with the desired hyperparameter settings. + +### Data Preperation Guide +In addition to existing steps, the current repository also offers additional feature to split your data into train_eval_test split according to your desired split ratio. Follow the below steps for the same. +1. _ + + + +## Usage +After installation, you can start the project with: +```bash +npm start + +_