Merge pull request #4 from alihassanijr/main

Tiled 3x3 NA, torch 1.13 support, cp310 and cp311 wheels, and more.
SHI-Labs · Nov 1, 2022 · 6106a56 · 6106a56
2 parents 9d08906 + 63606e6
commit 6106a56
Show file tree

Hide file tree

Showing 18 changed files with 661 additions and 148 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,16 @@
 # Changelog
 
+## [0.14.4] - 2022-10-31
+
+### Added
+- Python 3.10 and 3.11 wheels!
+  - Only for supported torch versions.
+- Support torch 1.13.
+- Tiled NA2D for 3x3 kernels.
+
+### Changed
+- Minor changes to the setup script to fix `pip install natten`.
+
 ## [0.14.2] - 2022-10-15
 
 ### Added

diff --git a/README.md b/README.md
@@ -34,8 +34,7 @@ The latest version of NATTEN runs pretty fast on Ampere with the latest torch an
 
 
 ## Requirements
-NATTEN supports PyTorch version 1.8 and later, and Python versions 3.7, 3.8, and 3.9.
-However, we highly recommend using Python 3.8 and PyTorch 1.12.1 + CUDA 11.6 for the best performance.
+NATTEN supports PyTorch version 1.8 and later, and Python versions 3.7, 3.8, 3.9, 3.10(only torch >= 1.11), and 3.11 (only torch >= 1.13).
 
 **NOTE:** The current version of NATTEN comes with Linux-only wheels, and supports Pascal and above (`SM >= 60`, i.e. Tesla P100).
 Make sure your GPU is supported by referring to 
@@ -94,7 +93,7 @@ python -m unittest discover -v -s ./tests
 - [ ] Neighborhood Attention 3D (CPU)
 - [x] Dilation support
 - [x] Float16 support and utilization
-- [ ] BFloat16 support
+- [ ] BFloat16 support (awaiting CUDA 11.8/12 builds of torch)
 - [ ] Kepler and Maxwell (30<=SM<60) support
 - [ ] Windows builds
 
@@ -104,8 +103,8 @@ Simply import `NeighborhoodAttention1D` or `NeighborhoodAttention2D` from `natte
 from natten import NeighborhoodAttention1D
 from natten import NeighborhoodAttention2D
 
-na1d = NeighborhoodAttention1D(dim=128, kernel_size=7, dilation=2, num_heads=4).cuda()
-na2d = NeighborhoodAttention2D(dim=128, kernel_size=7, dilation=2, num_heads=4).cuda()
+na1d = NeighborhoodAttention1D(dim=128, kernel_size=7, dilation=2, num_heads=4)
+na2d = NeighborhoodAttention2D(dim=128, kernel_size=7, dilation=2, num_heads=4)
 ```
 
 ### FLOPs

diff --git a/assets/README_pypi.md b/assets/README_pypi.md
@@ -34,8 +34,7 @@ The latest version of NATTEN runs pretty fast on Ampere with the latest torch an
 
 
 ## Requirements
-NATTEN supports PyTorch version 1.8 and later, and Python versions 3.7, 3.8, and 3.9.
-However, we highly recommend using Python 3.8 and PyTorch 1.12.1 + CUDA 11.6 for the best performance.
+NATTEN supports PyTorch version 1.8 and later, and Python versions 3.7, 3.8, 3.9, 3.10(only torch >= 1.11), and 3.11 (only torch >= 1.13).
 
 **NOTE:** The current version of NATTEN comes with Linux-only wheels, and supports Pascal and above (`SM >= 60`, i.e. Tesla P100).
 Make sure your GPU is supported by referring to 
@@ -98,8 +97,8 @@ Simply import `NeighborhoodAttention1D` or `NeighborhoodAttention2D` from `natte
 from natten import NeighborhoodAttention1D
 from natten import NeighborhoodAttention2D
 
-na1d = NeighborhoodAttention1D(dim=128, kernel_size=7, dilation=2, num_heads=4).cuda()
-na2d = NeighborhoodAttention2D(dim=128, kernel_size=7, dilation=2, num_heads=4).cuda()
+na1d = NeighborhoodAttention1D(dim=128, kernel_size=7, dilation=2, num_heads=4)
+na2d = NeighborhoodAttention2D(dim=128, kernel_size=7, dilation=2, num_heads=4)
 ```
 
 ### FLOPs

diff --git a/dev/packaging/build_all_wheels_parallel.sh b/dev/packaging/build_all_wheels_parallel.sh
@@ -1,7 +1,6 @@
 #!/bin/bash -e
 # Based on detectron2's builder:
 # github.com/facebookresearch/detectron2
-# Copyright (c) Facebook, Inc. and its affiliates.
 
 [[ -d "dev/packaging" ]] || {
   echo "Please run this script at natten root!"
@@ -11,6 +10,7 @@
 build_one() {
   cu=$1
   pytorch_ver=$2
+  cp310=${3:-0}
 
   case "$cu" in
     cu*)
@@ -28,7 +28,13 @@ build_one() {
   echo "Launching container $container_name ..."
   container_id="$container_name"_"$cu"_"$pytorch_ver"
 
-  py_versions=(3.7 3.8 3.9)
+  if [ $cp310 -eq 2 ]; then
+    py_versions=(3.7 3.8 3.9 3.10 3.11)
+  elif [ $cp310 -eq 1 ]; then
+    py_versions=(3.7 3.8 3.9 3.10)
+  else
+    py_versions=(3.7 3.8 3.9)
+  fi
 
   for py in "${py_versions[@]}"; do
     docker run -itd \
@@ -51,11 +57,17 @@ EOF
 if [[ -n "$1" ]] && [[ -n "$2" ]]; then
   build_one "$1" "$2"
 else
-  build_one cu116 1.12.1 & build_one cu113 1.12.1 &  build_one cu102 1.12.1 &  build_one cpu 1.12.1
+  # 1.13 and newer -- build python 3.11 wheels
+  build_one cu117 1.13 2 & build_one cu116 1.13 2 &  build_one cpu 1.13 2
 
-  build_one cu116 1.12 & build_one cu113 1.12 &  build_one cu102 1.12 &  build_one cpu 1.12
+  # 1.11 and newer -- build python 3.10 wheels
+  build_one cu116 1.12.1 1 & build_one cu113 1.12.1 1 &  build_one cu102 1.12.1 1 &  build_one cpu 1.12.1 1
 
-  build_one cu115 1.11 &  build_one cu113 1.11 & build_one cu102 1.11 & build_one cpu 1.11
+  build_one cu116 1.12 1 & build_one cu113 1.12 1 &  build_one cu102 1.12 1 &  build_one cpu 1.12 1
+
+  build_one cu115 1.11 1 &  build_one cu113 1.11 1 & build_one cu102 1.11 1 & build_one cpu 1.11 1
+
+  # 1.10 and older
 
   build_one cu113 1.10.1 & build_one cu111 1.10.1 & build_one cu102 1.10.1 & build_one cpu 1.10.1
 

diff --git a/dev/packaging/build_cpu_wheel.sh b/dev/packaging/build_cpu_wheel.sh
diff --git a/dev/packaging/build_default_wheel.sh b/dev/packaging/build_default_wheel.sh
diff --git a/dev/packaging/build_wheel.sh b/dev/packaging/build_wheel.sh
@@ -12,6 +12,8 @@ echo "Build Settings:"
 echo "CU_VERSION: $CU_VERSION"                 # e.g. cu101
 echo "PYTHON_VERSION: $PYTHON_VERSION"         # e.g. 3.7
 echo "PYTORCH_VERSION: $PYTORCH_VERSION"       # e.g. 1.4
+
+export NATTEN_VERSION_SUFFIX=1
 
 setup_cuda
 setup_wheel_python

diff --git a/dev/packaging/gen_wheel_index.sh b/dev/packaging/gen_wheel_index.sh
@@ -1,7 +1,6 @@
 #!/bin/bash -e
 # Based on detectron2 
 # github.com/facebookresearch/detectron2
-# Copyright (c) Facebook, Inc. and its affiliates.
 
 
 root=$(readlink -f $1)

diff --git a/dev/packaging/pkg_helpers.bash b/dev/packaging/pkg_helpers.bash
@@ -1,7 +1,6 @@
 #!/bin/bash -e
 # Based on detectron2's builder:
 # github.com/facebookresearch/detectron2
-# Copyright (c) Facebook, Inc. and its affiliates.
 
 # Function to retry functions that sometimes timeout or have flaky failures
 retry () {
@@ -14,12 +13,16 @@ pip_install() {
 
 
 setup_cuda() {
-  # Now work out the CUDA settings
+  # SM<6.0 is not supported at this time.
   # Like other torch domain libraries, we choose common GPU architectures only.
   # See https://github.com/pytorch/pytorch/blob/master/torch/utils/cpp_extension.py
   # and https://github.com/pytorch/vision/blob/main/packaging/pkg_helpers.bash for reference.
   export FORCE_CUDA=1
   case "$CU_VERSION" in
+    cu117)
+      export CUDA_HOME=/usr/local/cuda-11.7/
+      export TORCH_CUDA_ARCH_LIST="6.0;6.1+PTX;7.0;7.5+PTX;8.0;8.6+PTX"
+      ;;
     cu116)
       export CUDA_HOME=/usr/local/cuda-11.6/
       export TORCH_CUDA_ARCH_LIST="6.0;6.1+PTX;7.0;7.5+PTX;8.0;8.6+PTX"
@@ -68,6 +71,8 @@ setup_wheel_python() {
     3.7) python_abi=cp37-cp37m ;;
     3.8) python_abi=cp38-cp38 ;;
     3.9) python_abi=cp39-cp39 ;;
+    3.10) python_abi=cp310-cp310 ;;
+    3.11) python_abi=cp311-cp311 ;;
     *)
       echo "Unrecognized PYTHON_VERSION=$PYTHON_VERSION"
       exit 1

diff --git a/natten/__init__.py b/natten/__init__.py
@@ -7,4 +7,4 @@
 from .natten1d import NeighborhoodAttention1D
 from .natten2d import NeighborhoodAttention2D, NeighborhoodAttention
 
-__version__ = "0.14.2"
+__version__ = "0.14.4"