Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding an example of using fsspec for large batch inference #1927

Merged
merged 9 commits into from
Oct 28, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
129 changes: 91 additions & 38 deletions examples/Huggingface_Transformers/Download_Transformer_models.py
Original file line number Diff line number Diff line change
@@ -1,38 +1,73 @@
import transformers
from pathlib import Path
import os
import json
import os
import sys

import torch
from transformers import (AutoModelForSequenceClassification, AutoTokenizer, AutoModelForQuestionAnswering,
AutoModelForTokenClassification, AutoModelForCausalLM, AutoConfig)
from transformers import set_seed
""" This function, save the checkpoint, config file along with tokenizer config and vocab files
of a transformer model of your choice.
"""
print('Transformers version',transformers.__version__)
import transformers
from transformers import (
AutoConfig,
AutoModelForCausalLM,
AutoModelForQuestionAnswering,
AutoModelForSequenceClassification,
AutoModelForTokenClassification,
AutoTokenizer,
set_seed,
)

print("Transformers version", transformers.__version__)
set_seed(1)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def transformers_model_dowloader(mode,pretrained_model_name,num_labels,do_lower_case,max_length,torchscript):

def transformers_model_dowloader(
mode, pretrained_model_name, num_labels, do_lower_case, max_length, torchscript
):
"""This function, save the checkpoint, config file along with tokenizer config and vocab files
of a transformer model of your choice.
"""
print("Download model and tokenizer", pretrained_model_name)
#loading pre-trained model and tokenizer
if mode== "sequence_classification":
config = AutoConfig.from_pretrained(pretrained_model_name,num_labels=num_labels,torchscript=torchscript)
model = AutoModelForSequenceClassification.from_pretrained(pretrained_model_name, config=config)
tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name,do_lower_case=do_lower_case)
elif mode== "question_answering":
config = AutoConfig.from_pretrained(pretrained_model_name,torchscript=torchscript)
model = AutoModelForQuestionAnswering.from_pretrained(pretrained_model_name,config=config)
tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name,do_lower_case=do_lower_case)
elif mode== "token_classification":
config= AutoConfig.from_pretrained(pretrained_model_name,num_labels=num_labels,torchscript=torchscript)
model = AutoModelForTokenClassification.from_pretrained(pretrained_model_name, config=config)
tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name,do_lower_case=do_lower_case)
elif mode== "text_generation":
config= AutoConfig.from_pretrained(pretrained_model_name,num_labels=num_labels,torchscript=torchscript)
model = AutoModelForCausalLM.from_pretrained(pretrained_model_name, config=config)
tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name,do_lower_case=do_lower_case)

# loading pre-trained model and tokenizer
if mode == "sequence_classification":
config = AutoConfig.from_pretrained(
pretrained_model_name, num_labels=num_labels, torchscript=torchscript
)
model = AutoModelForSequenceClassification.from_pretrained(
pretrained_model_name, config=config
)
tokenizer = AutoTokenizer.from_pretrained(
pretrained_model_name, do_lower_case=do_lower_case
)
elif mode == "question_answering":
config = AutoConfig.from_pretrained(
pretrained_model_name, torchscript=torchscript
)
model = AutoModelForQuestionAnswering.from_pretrained(
pretrained_model_name, config=config
)
tokenizer = AutoTokenizer.from_pretrained(
pretrained_model_name, do_lower_case=do_lower_case
)
elif mode == "token_classification":
config = AutoConfig.from_pretrained(
pretrained_model_name, num_labels=num_labels, torchscript=torchscript
)
model = AutoModelForTokenClassification.from_pretrained(
pretrained_model_name, config=config
)
tokenizer = AutoTokenizer.from_pretrained(
pretrained_model_name, do_lower_case=do_lower_case
)
elif mode == "text_generation":
config = AutoConfig.from_pretrained(
pretrained_model_name, num_labels=num_labels, torchscript=torchscript
)
model = AutoModelForCausalLM.from_pretrained(
pretrained_model_name, config=config
)
tokenizer = AutoTokenizer.from_pretrained(
pretrained_model_name, do_lower_case=do_lower_case
)

# NOTE : for demonstration purposes, we do not go through the fine-tune processing here.
# A Fine_tunining process based on your needs can be added.
# An example of Fine_tuned model has been provided in the README.
Expand All @@ -41,26 +76,42 @@ def transformers_model_dowloader(mode,pretrained_model_name,num_labels,do_lower_
try:
os.mkdir(NEW_DIR)
except OSError:
print ("Creation of directory %s failed" % NEW_DIR)
print("Creation of directory %s failed" % NEW_DIR)
else:
print ("Successfully created directory %s " % NEW_DIR)
print("Successfully created directory %s " % NEW_DIR)

print("Save model and tokenizer/ Torchscript model based on the setting from setup_config", pretrained_model_name, 'in directory', NEW_DIR)
print(
"Save model and tokenizer/ Torchscript model based on the setting from setup_config",
pretrained_model_name,
"in directory",
NEW_DIR,
)
if save_mode == "pretrained":
model.save_pretrained(NEW_DIR)
tokenizer.save_pretrained(NEW_DIR)
elif save_mode == "torchscript":
dummy_input = "This is a dummy input for torch jit trace"
inputs = tokenizer.encode_plus(dummy_input,max_length = int(max_length),pad_to_max_length = True, add_special_tokens = True, return_tensors = 'pt')
inputs = tokenizer.encode_plus(
dummy_input,
max_length=int(max_length),
pad_to_max_length=True,
add_special_tokens=True,
return_tensors="pt",
)
input_ids = inputs["input_ids"].to(device)
attention_mask = inputs["attention_mask"].to(device)
model.to(device).eval()
traced_model = torch.jit.trace(model, (input_ids, attention_mask))
torch.jit.save(traced_model,os.path.join(NEW_DIR, "traced_model.pt"))
torch.jit.save(traced_model, os.path.join(NEW_DIR, "traced_model.pt"))
return
if __name__== "__main__":


if __name__ == "__main__":
dirname = os.path.dirname(__file__)
filename = os.path.join(dirname, 'setup_config.json')
if len(sys.argv) > 1:
filename = os.path.join(dirname, sys.argv[1])
else:
filename = os.path.join(dirname, "setup_config.json")
f = open(filename)
settings = json.load(f)
mode = settings["mode"]
Expand All @@ -74,4 +125,6 @@ def transformers_model_dowloader(mode,pretrained_model_name,num_labels,do_lower_
else:
torchscript = False

transformers_model_dowloader(mode,model_name, num_labels,do_lower_case, max_length, torchscript)
transformers_model_dowloader(
mode, model_name, num_labels, do_lower_case, max_length, torchscript
)
118 changes: 118 additions & 0 deletions examples/cloud_storage_stream_inference/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
# Using fsspec to stream data from cloud storage providers for batch inference

This example shows how to use fsspec to stream large amount of data from cloud storage like s3, google cloud storage, azure cloud etc. and use it to create requests to torchserve for large scale batch inference with a large batch size.
kirkpa marked this conversation as resolved.
Show resolved Hide resolved

The example shows retrieval of data using s3 API and fsspec, but the same idea can be used to retrieve data from other cloud storage providers like Google Cloud, Azure etc.

Filesystem Spec (fsspec) is a project to provide a unified pythonic interface to local, remote and embedded file systems and bytes storage. https://filesystem-spec.readthedocs.io/en/latest/

**Requires python >= 3.8**

## Example overview

- Main objective of this example is to show the process of reading data from a cloud storage provider and send batch inference request to a model deployed using torchserve. **We're not concerned with the accuracy of predictions.**
- In this example we're going to run pre-trained distilbert model with torchserve and do text classification by reading input data using S3 API and fsspec and making REST API call to torchserve for batch inference.
- We're going to use Amazon reviews dataset which contains customer reviews along with title and label.
- We'll be taking customer review text and predict if the review is positive or negative using pre-trained distilbert model.
- We'll be using MinIO https://min.io/ as AWS S3 proxy to store Amazon reviews dataset tar file. MinIO is a High Performance Object Storage released under GNU Affero General Public License v3.0. It is API compatible with Amazon S3 cloud storage service. It can handle unstructured data such as photos, videos, log files, backups, and container images with the maximum supported object size of 5TB.

## Steps

1) Install MinIO - https://hub.docker.com/r/minio/minio/.
<!-- markdown-link-check-disable -->
2) Download Amazon reviews dataset tar file (amazon_review_polarity_csv.tar.gz) by going to the following URL in browser - https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9QhbaW12WVVZS2drcnM

3) Start MinIO server
```
minio server ~/minio_data
```

4) Log into MinIO administration web page http://127.0.0.1:54379/ with user id - minioadmin and password - minioadmin

5) Create a bucket with name **pytorch-data**

6) Change the bucket access policy from private to public by clicking on manage button for the bucket from http://127.0.0.1:54379/buckets/pytorch-data/admin/summary

7) Upload **amazon_review_polarity_csv.tar.gz** file that was downloaded in step 2 to **pytorch-data** bucket from http://127.0.0.1:54379/buckets/pytorch-data/browse

8) Clone pytorch/serve project from github - https://github.com/pytorch/serve. This is required to run this example.

9) Create python virtual env if you like.

10) Install torchserve - https://github.com/pytorch/serve. This is required to run distilbert model.

11) Install transformer package
```
pip install transformers==4.6.0
```
If you get an error regarding missing rust compiler while installing transformers, install rust compiler -
```
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
```

12) Download **distilbert-base-uncased** pretrained model. This will download model files in Transformer_model directory.
```
cd serve/examples/cloud_storage_stream_inference
python3 ../Huggingface_Transformers/Download_Transformer_models.py ../cloud_storage_stream_inference/setup_config.json
```

13) Create model archive eager mode
```
torch-model-archiver --model-name BERTSeqClassification --version 1.0 --serialized-file Transformer_model/pytorch_model.bin --handler ../Huggingface_transformers/Transformer_handler_generalized.py --extra-files "Transformer_model/config.json,./setup_config.json,./index_to_name.json"
```

14) Register and start serving the model with torchserve
```
mkdir model_store
mv BERTSeqClassification.mar model_store/
torchserve --start --model-store model_store --models my_tc=BERTSeqClassification.mar --ts-config=config.properties --ncs
```

15) To check if the model is running
```
curl http://localhost:8081/models/
```
You should see
```
{
"models": [
{
"modelName": "my_tc",
"modelUrl": "BERTSeqClassification.mar"
}
]
}
```

16) Install **fsspec**
```
pip install fsspec
```

17) Install **s3fs**
```
pip install s3fs
```
18) Run the example
```
python stream_inference.py
```
This example -
- Reads **amazon_review_polarity_csv.tar.gz** file from MinIO using S3 API
- Reads **test.csv** file from the tar file
- Extracts customer review string
- Creates a torchserve batch inference REST API requests with customer review string and sends it to torchserve for inference
- Prints model prediction
kirkpa marked this conversation as resolved.
Show resolved Hide resolved
```
2022-10-28 10:23:48.088857 - Calling model inference with data - {"text":"A waste of time"}
2022-10-28 10:23:48.088916 - Calling model inference with data - {"text":"One of the best films ever made"}
2022-10-28 10:23:48.088977 - Calling model inference with data - {"text":"Gods and Monsters is a superb movie about the last days of gay film director James Whale -- who directed Frankenstein and Bride of Frankenstein"}
2022-10-28 10:23:48.089036 - Calling model inference with data - {"text":"One of the few recent films that I anticipated to be good and it exceeded my expectations"}
2022-10-28 10:23:48.089094 - Calling model inference with data - {"text":"The manufacturer packed this product so poorly that the plastic joints between sections of the tree were smashed"}
2022-10-28 10:23:48.089154 - Calling model inference with data - {"text":"Of the 200+ baseball books I've read"}
2022-10-28 10:23:55.767787 - Model prediction: Negative
2022-10-28 10:23:55.768008 - Model prediction: Negative
2022-10-28 10:23:55.768460 - Model prediction: Negative
2022-10-28 10:23:55.768516 - Model prediction: Negative
2022-10-28 10:23:55.768573 - Model prediction: Positive
```
14 changes: 14 additions & 0 deletions examples/cloud_storage_stream_inference/config.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
load_models=all
models={\
"my_tc": {\
"1.0": {\
"defaultVersion": true,\
"marName": "BERTSeqClassification.mar",\
"minWorkers": 10,\
"maxWorkers": 1000,\
"batchSize": 2000,\
"maxBatchDelay": 5000,\
"responseTimeout": 1200\
}\
}\
}
4 changes: 4 additions & 0 deletions examples/cloud_storage_stream_inference/index_to_name.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"0": "Negative",
"1": "Positive"
}
12 changes: 12 additions & 0 deletions examples/cloud_storage_stream_inference/setup_config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"model_name": "distilbert-base-uncased",
"mode": "sequence_classification",
"do_lower_case": true,
"num_labels": "2",
"save_mode": "pretrained",
"max_length": "150",
"captum_explanation": true,
"embedding_name": "bert",
"FasterTransformer": false,
"model_parallel": false
}
Loading