From f56fb9cdc0a45b75df080718903e6bba777c0948 Mon Sep 17 00:00:00 2001 From: Francisco R Castro G Date: Wed, 14 May 2025 22:56:50 +0000 Subject: [PATCH 1/7] Add accelerate API support for Word Language Model example --- run_python_examples.sh | 2 +- word_language_model/generate.py | 24 +++++++----------------- word_language_model/main.py | 27 +++++++++------------------ 3 files changed, 17 insertions(+), 36 deletions(-) diff --git a/run_python_examples.sh b/run_python_examples.sh index e3521b0b30..96f259c7aa 100755 --- a/run_python_examples.sh +++ b/run_python_examples.sh @@ -153,7 +153,7 @@ function vision_transformer() { } function word_language_model() { - uv run main.py --epochs 1 --dry-run $CUDA_FLAG --mps || error "word_language_model failed" + uv run main.py --epochs 1 --dry-run $ACCEL_FLAG || error "word_language_model failed" } function gcn() { diff --git a/word_language_model/generate.py b/word_language_model/generate.py index 13bd8abfcd..20794b264b 100644 --- a/word_language_model/generate.py +++ b/word_language_model/generate.py @@ -21,30 +21,20 @@ help='number of words to generate') parser.add_argument('--seed', type=int, default=1111, help='random seed') -parser.add_argument('--cuda', action='store_true', - help='use CUDA') -parser.add_argument('--mps', action='store_true', default=False, - help='enables macOS GPU training') parser.add_argument('--temperature', type=float, default=1.0, help='temperature - higher will increase diversity') parser.add_argument('--log-interval', type=int, default=100, help='reporting interval') +parser.add_argument('--accel', action='store_true', default=False, + help='Enables accelerated inference') args = parser.parse_args() # Set the random seed manually for reproducibility. torch.manual_seed(args.seed) -if torch.cuda.is_available(): - if not args.cuda: - print("WARNING: You have a CUDA device, so you should probably run with --cuda.") -if torch.backends.mps.is_available(): - if not args.mps: - print("WARNING: You have mps device, to enable macOS GPU run with --mps.") - -use_mps = args.mps and torch.backends.mps.is_available() -if args.cuda: - device = torch.device("cuda") -elif use_mps: - device = torch.device("mps") + +if args.accel and torch.accelerator.is_available(): + device = torch.accelerator.current_accelerator() + else: device = torch.device("cpu") @@ -52,7 +42,7 @@ parser.error("--temperature has to be greater or equal 1e-3.") with open(args.checkpoint, 'rb') as f: - model = torch.load(f, map_location=device) + model = torch.load(f, map_location=device, weights_only=False) model.eval() corpus = data.Corpus(args.data) diff --git a/word_language_model/main.py b/word_language_model/main.py index 23bda03e73..ba2e1dbf2c 100644 --- a/word_language_model/main.py +++ b/word_language_model/main.py @@ -37,10 +37,6 @@ help='tie the word embedding and softmax weights') parser.add_argument('--seed', type=int, default=1111, help='random seed') -parser.add_argument('--cuda', action='store_true', default=False, - help='use CUDA') -parser.add_argument('--mps', action='store_true', default=False, - help='enables macOS GPU training') parser.add_argument('--log-interval', type=int, default=200, metavar='N', help='report interval') parser.add_argument('--save', type=str, default='model.pt', @@ -51,25 +47,20 @@ help='the number of heads in the encoder/decoder of the transformer model') parser.add_argument('--dry-run', action='store_true', help='verify the code and the model') +parser.add_argument('--accel', action='store_true',help='Enables accelerated training') args = parser.parse_args() # Set the random seed manually for reproducibility. torch.manual_seed(args.seed) -if torch.cuda.is_available(): - if not args.cuda: - print("WARNING: You have a CUDA device, so you should probably run with --cuda.") -if hasattr(torch.backends, "mps") and torch.backends.mps.is_available(): - if not args.mps: - print("WARNING: You have mps device, to enable macOS GPU run with --mps.") - -use_mps = args.mps and torch.backends.mps.is_available() -if args.cuda: - device = torch.device("cuda") -elif use_mps: - device = torch.device("mps") + +if args.accel and torch.accelerator.is_available(): + device = torch.accelerator.current_accelerator() + else: device = torch.device("cpu") +print("Using device:", device) + ############################################################################### # Load data ############################################################################### @@ -243,11 +234,11 @@ def export_onnx(path, batch_size, seq_len): # Load the best saved model. with open(args.save, 'rb') as f: - model = torch.load(f) + torch.load(f, weights_only=False) # after load the rnn params are not a continuous chunk of memory # this makes them a continuous chunk, and will speed up forward pass # Currently, only rnn model supports flatten_parameters function. - if args.model in ['RNN_TANH', 'RNN_RELU', 'LSTM', 'GRU']: + if args.model in ['RNN_TANH', 'RNN_RELU', 'LSTM', 'GRU'] and device.type == 'cuda': model.rnn.flatten_parameters() # Run on test data. From a6d10a56b63ede465ff3239a2eb5eb00136b789e Mon Sep 17 00:00:00 2001 From: Francisco R Castro G Date: Thu, 15 May 2025 20:04:50 +0000 Subject: [PATCH 2/7] Update README for Word Language Model example --- word_language_model/README.md | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/word_language_model/README.md b/word_language_model/README.md index 254b726585..a7816a147b 100644 --- a/word_language_model/README.md +++ b/word_language_model/README.md @@ -4,13 +4,13 @@ This example trains a multi-layer RNN (Elman, GRU, or LSTM) or Transformer on a The trained model can then be used by the generate script to generate new text. ```bash -python main.py --cuda --epochs 6 # Train a LSTM on Wikitext-2 with CUDA. -python main.py --cuda --epochs 6 --tied # Train a tied LSTM on Wikitext-2 with CUDA. -python main.py --cuda --tied # Train a tied LSTM on Wikitext-2 with CUDA for 40 epochs. -python main.py --cuda --epochs 6 --model Transformer --lr 5 +python main.py --accel --epochs 6 # Train a LSTM on Wikitext-2 with CUDA. +python main.py --accel --epochs 6 --tied # Train a tied LSTM on Wikitext-2 with CUDA. +python main.py --accel --tied # Train a tied LSTM on Wikitext-2 with CUDA for 40 epochs. +python main.py --accel --epochs 6 --model Transformer --lr 5 # Train a Transformer model on Wikitext-2 with CUDA. -python generate.py # Generate samples from the default model checkpoint. +python generate.py --accel # Generate samples from the default model checkpoint. ``` The model uses the `nn.RNN` module (and its sister modules `nn.GRU` and `nn.LSTM`) or Transformer module (`nn.TransformerEncoder` and `nn.TransformerEncoderLayer`) which will automatically use the cuDNN backend if run on CUDA with cuDNN installed. @@ -35,8 +35,7 @@ optional arguments: --dropout DROPOUT dropout applied to layers (0 = no dropout) --tied tie the word embedding and softmax weights --seed SEED random seed - --cuda use CUDA - --mps enable GPU on macOS + --accel activate support for an accelerator card --log-interval N report interval --save SAVE path to save the final model --onnx-export ONNX_EXPORT @@ -49,8 +48,8 @@ With these arguments, a variety of models can be tested. As an example, the following arguments produce slower but better models: ```bash -python main.py --cuda --emsize 650 --nhid 650 --dropout 0.5 --epochs 40 -python main.py --cuda --emsize 650 --nhid 650 --dropout 0.5 --epochs 40 --tied -python main.py --cuda --emsize 1500 --nhid 1500 --dropout 0.65 --epochs 40 -python main.py --cuda --emsize 1500 --nhid 1500 --dropout 0.65 --epochs 40 --tied +python main.py --accel --emsize 650 --nhid 650 --dropout 0.5 --epochs 40 +python main.py --accel --emsize 650 --nhid 650 --dropout 0.5 --epochs 40 --tied +python main.py --accel --emsize 1500 --nhid 1500 --dropout 0.65 --epochs 40 +python main.py --accel --emsize 1500 --nhid 1500 --dropout 0.65 --epochs 40 --tied ``` From 39bf17ad30c5995809a6cf452f15c2aa7d66007c Mon Sep 17 00:00:00 2001 From: Francisco R Castro Garcia Date: Fri, 16 May 2025 14:32:37 -0600 Subject: [PATCH 3/7] Update word_language_model/generate.py for consistency Co-authored-by: Dmitry Rogozhkin --- word_language_model/generate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/word_language_model/generate.py b/word_language_model/generate.py index 20794b264b..49bdff1d52 100644 --- a/word_language_model/generate.py +++ b/word_language_model/generate.py @@ -26,7 +26,7 @@ parser.add_argument('--log-interval', type=int, default=100, help='reporting interval') parser.add_argument('--accel', action='store_true', default=False, - help='Enables accelerated inference') + help='use accelerator') args = parser.parse_args() # Set the random seed manually for reproducibility. From c50a636a508f9a2f71c011a914237cb9f1e265d0 Mon Sep 17 00:00:00 2001 From: Francisco R Castro Garcia Date: Fri, 16 May 2025 14:33:20 -0600 Subject: [PATCH 4/7] Update word_language_model/README.md Co-authored-by: Dmitry Rogozhkin --- word_language_model/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/word_language_model/README.md b/word_language_model/README.md index a7816a147b..1e05af9c72 100644 --- a/word_language_model/README.md +++ b/word_language_model/README.md @@ -35,7 +35,7 @@ optional arguments: --dropout DROPOUT dropout applied to layers (0 = no dropout) --tied tie the word embedding and softmax weights --seed SEED random seed - --accel activate support for an accelerator card + --accel use accelerator --log-interval N report interval --save SAVE path to save the final model --onnx-export ONNX_EXPORT From 93113d1b2574e5833dee9f73b449b55ca4f41c75 Mon Sep 17 00:00:00 2001 From: Francisco R Castro G Date: Wed, 18 Jun 2025 01:19:20 +0000 Subject: [PATCH 5/7] Update README to change wording on acceleration devices --- word_language_model/README.md | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/word_language_model/README.md b/word_language_model/README.md index 1e05af9c72..6d9ce2c7b8 100644 --- a/word_language_model/README.md +++ b/word_language_model/README.md @@ -4,15 +4,18 @@ This example trains a multi-layer RNN (Elman, GRU, or LSTM) or Transformer on a The trained model can then be used by the generate script to generate new text. ```bash -python main.py --accel --epochs 6 # Train a LSTM on Wikitext-2 with CUDA. -python main.py --accel --epochs 6 --tied # Train a tied LSTM on Wikitext-2 with CUDA. -python main.py --accel --tied # Train a tied LSTM on Wikitext-2 with CUDA for 40 epochs. +python main.py --accel --epochs 6 # Train a LSTM on Wikitext-2. +python main.py --accel --epochs 6 --tied # Train a tied LSTM on Wikitext-2. +python main.py --accel --tied # Train a tied LSTM on Wikitext-2for 40 epochs. python main.py --accel --epochs 6 --model Transformer --lr 5 - # Train a Transformer model on Wikitext-2 with CUDA. + # Train a Transformer model on Wikitext-2. python generate.py --accel # Generate samples from the default model checkpoint. ``` +> [!NOTE] +> Example supports running on acceleration devices (CUDA, MPS, XPU) + The model uses the `nn.RNN` module (and its sister modules `nn.GRU` and `nn.LSTM`) or Transformer module (`nn.TransformerEncoder` and `nn.TransformerEncoderLayer`) which will automatically use the cuDNN backend if run on CUDA with cuDNN installed. During training, if a keyboard interrupt (Ctrl-C) is received, training is stopped and the current model is evaluated against the test dataset. From 3b003ec18df48e0df238afb21e4bab14c45df46a Mon Sep 17 00:00:00 2001 From: Francisco R Castro G Date: Wed, 18 Jun 2025 01:34:44 +0000 Subject: [PATCH 6/7] Remove cuda conditional --- word_language_model/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/word_language_model/main.py b/word_language_model/main.py index fd4d95af88..cd697da3db 100644 --- a/word_language_model/main.py +++ b/word_language_model/main.py @@ -264,7 +264,7 @@ def export_onnx(path, batch_size, seq_len): # after load the rnn params are not a continuous chunk of memory # this makes them a continuous chunk, and will speed up forward pass # Currently, only rnn model supports flatten_parameters function. - if args.model in ['RNN_TANH', 'RNN_RELU', 'LSTM', 'GRU'] and device.type == 'cuda': + if args.model in ['RNN_TANH', 'RNN_RELU', 'LSTM', 'GRU']: model.rnn.flatten_parameters() # Run on test data. From 70cee5b7675f5ea0af41fe63e09d1bd0e33185d6 Mon Sep 17 00:00:00 2001 From: framoncg Date: Fri, 4 Jul 2025 23:30:35 +0000 Subject: [PATCH 7/7] Fix flags for word_language_model in ci script --- run_python_examples.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/run_python_examples.sh b/run_python_examples.sh index cebd2aec10..9141069ce4 100755 --- a/run_python_examples.sh +++ b/run_python_examples.sh @@ -154,11 +154,11 @@ function vision_transformer() { } function word_language_model() { - uv run main.py --epochs 1 --dry-run $CUDA_FLAG --mps || error "word_language_model failed" - uv run generate.py $CUDA_FLAG --mps || error "word_language_model generate failed" + uv run main.py --epochs 1 --dry-run $ACCEL_FLAG || error "word_language_model failed" + uv run generate.py $ACCEL_FLAG || error "word_language_model generate failed" for model in "RNN_TANH" "RNN_RELU" "LSTM" "GRU" "Transformer"; do - uv run main.py --model $model --epochs 1 --dry-run $CUDA_FLAG --mps || error "word_language_model failed" - uv run generate.py $CUDA_FLAG --mps || error "word_language_model generate failed" + uv run main.py --model $model --epochs 1 --dry-run $ACCEL_FLAG || error "word_language_model failed" + uv run generate.py $ACCEL_FLAG || error "word_language_model generate failed" done }