From 6754e448e6295711f84edd43da1b74cfb9477bd5 Mon Sep 17 00:00:00 2001 From: Evan Kravitz Date: Tue, 1 Apr 2025 23:17:32 +0000 Subject: [PATCH 01/43] chore: patch cves --- docker/1.2-1/base/Dockerfile.cpu | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/1.2-1/base/Dockerfile.cpu b/docker/1.2-1/base/Dockerfile.cpu index 172fe81..f7aa00f 100644 --- a/docker/1.2-1/base/Dockerfile.cpu +++ b/docker/1.2-1/base/Dockerfile.cpu @@ -69,6 +69,7 @@ RUN apt-get update && \ && \ python3 -m pip install --upgrade pip && \ python3 -m pip install --upgrade certifi && \ + python3 -m pip install --upgrade pyarrow && \ apt-get clean && \ # Node.js setup mkdir -p /etc/apt/keyrings && \ From 3c9efeea2c5d693e2fabaf0c559668ea7687e8be Mon Sep 17 00:00:00 2001 From: Evan Kravitz Date: Wed, 2 Apr 2025 15:00:32 +0000 Subject: [PATCH 02/43] chore: patch pyarrow + werkzeug (attempt 2) --- docker/1.2-1/base/Dockerfile.cpu | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/docker/1.2-1/base/Dockerfile.cpu b/docker/1.2-1/base/Dockerfile.cpu index f7aa00f..1863ed9 100644 --- a/docker/1.2-1/base/Dockerfile.cpu +++ b/docker/1.2-1/base/Dockerfile.cpu @@ -8,7 +8,7 @@ ARG CONDA_CHECKSUM=2006a61abc8b4fd04de5eb92620e1f72bada713cc84b5b4899463095e1210 ARG CONDA_PY_VERSION=39 ARG CONDA_PKG_VERSION=24.7.1 ARG PYTHON_VERSION=3.9 -ARG PYARROW_VERSION=14.0.1 +ARG PYARROW_VERSION=17.0.0 ARG MLIO_VERSION=v0.9.0 ENV DEBIAN_FRONTEND=noninteractive @@ -156,4 +156,6 @@ ENV PYTHONDONTWRITEBYTECODE=1 PYTHONUNBUFFERED=1 PYTHONIOENCODING=UTF-8 LANG=C.U # Install Scikit-Learn # Scikit-learn 0.20 was the last version to support Python 2.7 and Python 3.4. # Scikit-learn now requires Python 3.6 or newer. -RUN python3 -m pip install --no-cache -I scikit-learn==1.2.1 \ No newline at end of file +RUN python3 -m pip install --no-cache -I scikit-learn==1.2.1 + +RUN python3 -m pip install "werkzeug>=2.1.1" \ No newline at end of file From 68a4cfd90e896a82c22539f9372da80543550dc9 Mon Sep 17 00:00:00 2001 From: Evan Kravitz Date: Wed, 2 Apr 2025 19:51:30 +0000 Subject: [PATCH 03/43] chore: patch pyarrow + werkzeug (attempt 3) --- docker/1.2-1/base/Dockerfile.cpu | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/1.2-1/base/Dockerfile.cpu b/docker/1.2-1/base/Dockerfile.cpu index 1863ed9..cf2d7f7 100644 --- a/docker/1.2-1/base/Dockerfile.cpu +++ b/docker/1.2-1/base/Dockerfile.cpu @@ -126,6 +126,7 @@ RUN echo "conda ${CONDA_PKG_VERSION}" >> /miniconda3/conda-meta/pinned && \ cd /tmp && \ git clone --branch ${MLIO_VERSION} https://github.com/awslabs/ml-io.git mlio && \ cd mlio && \ + sed -i 's/find_package(Arrow 14.0.1 REQUIRED)/find_package(Arrow 17.0.0 REQUIRED)/' CMakeLists.txt && \ build-tools/build-dependency build/third-party all && \ mkdir -p build/release && \ cd build/release && \ From 89c839708af6d17cf830e7f3b4b72babb12a06c5 Mon Sep 17 00:00:00 2001 From: Evan Kravitz Date: Mon, 14 Apr 2025 22:40:50 +0000 Subject: [PATCH 04/43] chore: patch pyarrow + werkzeug (attempt 4) --- docker/1.2-1/base/Dockerfile.cpu | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/docker/1.2-1/base/Dockerfile.cpu b/docker/1.2-1/base/Dockerfile.cpu index cf2d7f7..7d4ed43 100644 --- a/docker/1.2-1/base/Dockerfile.cpu +++ b/docker/1.2-1/base/Dockerfile.cpu @@ -105,36 +105,39 @@ RUN echo "conda ${CONDA_PKG_VERSION}" >> /miniconda3/conda-meta/pinned && \ conda config --system --set show_channel_urls true && \ echo "python ${PYTHON_VERSION}.*" >> /miniconda3/conda-meta/pinned && \ conda install -c conda-forge python=${PYTHON_VERSION} --solver classic && \ - conda install conda=${CONDA_PKG_VERSION} --solver classic && \ + conda install conda=${CONDA_PKG_VERSION} arrow-cpp=${PYARROW_VERSION} --solver classic && \ conda update -y conda && \ conda install -c conda-forge pyarrow=${PYARROW_VERSION} --solver classic && \ cd /miniconda3/pkgs/libgrpc-*/info/test/examples/node && \ npm install minimist@latest protobufjs@latest && \ - # Remove Node.js, npm, and their dependencies apt-get purge -y nodejs npm && \ apt-get autoremove -y && \ - # Final cleanup rm -rf /etc/apt/sources.list.d/nodesource.list \ /etc/apt/keyrings/nodesource.gpg \ /etc/apt/sources.list.d/kitware.list && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* && \ - # Continue with the rest of the build process conda install pip --force-reinstall && \ python3 -m pip install --upgrade pip && \ python3 -m pip install wheel && \ cd /tmp && \ git clone --branch ${MLIO_VERSION} https://github.com/awslabs/ml-io.git mlio && \ cd mlio && \ - sed -i 's/find_package(Arrow 14.0.1 REQUIRED)/find_package(Arrow 17.0.0 REQUIRED)/' CMakeLists.txt && \ + sed -i 's/find_package(Arrow [0-9.]\+ REQUIRED)/find_package(Arrow '${PYARROW_VERSION}' REQUIRED)/' CMakeLists.txt && \ build-tools/build-dependency build/third-party all && \ mkdir -p build/release && \ cd build/release && \ - cmake -GNinja -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_PREFIX_PATH="$(pwd)/../third-party" ../.. && \ + cmake -GNinja -DCMAKE_BUILD_TYPE=RelWithDebInfo \ + -DCMAKE_PREFIX_PATH="$(pwd)/../third-party" \ + -DArrow_DIR=/miniconda3/lib/cmake/Arrow \ + ../.. && \ cmake --build . && \ cmake --build . --target install && \ - cmake -DMLIO_INCLUDE_PYTHON_EXTENSION=ON -DPYTHON_EXECUTABLE="/miniconda3/bin/python3" \ - -DMLIO_INCLUDE_ARROW_INTEGRATION=ON ../.. && \ + cmake -DMLIO_INCLUDE_PYTHON_EXTENSION=ON \ + -DPYTHON_EXECUTABLE="/miniconda3/bin/python3" \ + -DMLIO_INCLUDE_ARROW_INTEGRATION=ON \ + -DArrow_DIR=/miniconda3/lib/cmake/Arrow \ + ../.. && \ cmake --build . --target mlio-py && \ cmake --build . --target mlio-arrow && \ cd ../../src/mlio-py && \ From 2de9e56ba16b55130e4438596f480372ee86ab58 Mon Sep 17 00:00:00 2001 From: Evan Kravitz Date: Mon, 14 Apr 2025 23:18:49 +0000 Subject: [PATCH 05/43] chore: patch pyarrow + werkzeug (attempt 5) --- docker/1.2-1/base/Dockerfile.cpu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/1.2-1/base/Dockerfile.cpu b/docker/1.2-1/base/Dockerfile.cpu index 7d4ed43..19d672f 100644 --- a/docker/1.2-1/base/Dockerfile.cpu +++ b/docker/1.2-1/base/Dockerfile.cpu @@ -123,7 +123,7 @@ RUN echo "conda ${CONDA_PKG_VERSION}" >> /miniconda3/conda-meta/pinned && \ cd /tmp && \ git clone --branch ${MLIO_VERSION} https://github.com/awslabs/ml-io.git mlio && \ cd mlio && \ - sed -i 's/find_package(Arrow [0-9.]\+ REQUIRED)/find_package(Arrow '${PYARROW_VERSION}' REQUIRED)/' CMakeLists.txt && \ + find . -type f -exec sed -i "s/find_package(Arrow [0-9.]\+ REQUIRED)/find_package(Arrow ${PYARROW_VERSION} REQUIRED)/g" {} + && \ build-tools/build-dependency build/third-party all && \ mkdir -p build/release && \ cd build/release && \ From 6d71e0f58028b79239840b22f9b9b8db2d573633 Mon Sep 17 00:00:00 2001 From: Evan Kravitz Date: Tue, 15 Apr 2025 00:11:41 +0000 Subject: [PATCH 06/43] chore: patch pyarrow + werkzeug (attempt 6) --- docker/1.2-1/base/Dockerfile.cpu | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/docker/1.2-1/base/Dockerfile.cpu b/docker/1.2-1/base/Dockerfile.cpu index 19d672f..09c59af 100644 --- a/docker/1.2-1/base/Dockerfile.cpu +++ b/docker/1.2-1/base/Dockerfile.cpu @@ -100,7 +100,6 @@ ENV PIP_ROOT_USER_ACTION=ignore # We could install mlio-py from conda, but it comes with extra support such as image reader that increases image size # which increases training time. We build from source to minimize the image size. RUN echo "conda ${CONDA_PKG_VERSION}" >> /miniconda3/conda-meta/pinned && \ - # Conda configuration see https://conda.io/projects/conda/en/latest/configuration.html conda config --system --set auto_update_conda false && \ conda config --system --set show_channel_urls true && \ echo "python ${PYTHON_VERSION}.*" >> /miniconda3/conda-meta/pinned && \ @@ -123,13 +122,20 @@ RUN echo "conda ${CONDA_PKG_VERSION}" >> /miniconda3/conda-meta/pinned && \ cd /tmp && \ git clone --branch ${MLIO_VERSION} https://github.com/awslabs/ml-io.git mlio && \ cd mlio && \ - find . -type f -exec sed -i "s/find_package(Arrow [0-9.]\+ REQUIRED)/find_package(Arrow ${PYARROW_VERSION} REQUIRED)/g" {} + && \ + # Update Arrow version in all CMake files before build-dependency + find . -type f -name "CMakeLists.txt" -exec sed -i -e "s/find_package(Arrow [0-9.]\+ REQUIRED)/find_package(Arrow ${PYARROW_VERSION} REQUIRED)/g" {} + && \ + find . -type f -name "*.cmake" -exec sed -i -e "s/find_package(Arrow [0-9.]\+ REQUIRED)/find_package(Arrow ${PYARROW_VERSION} REQUIRED)/g" {} + && \ + # Also update any direct version references + find . -type f -name "CMakeLists.txt" -exec sed -i -e "s/Arrow 14.0.1/Arrow ${PYARROW_VERSION}/g" {} + && \ + find . -type f -name "*.cmake" -exec sed -i -e "s/Arrow 14.0.1/Arrow ${PYARROW_VERSION}/g" {} + && \ + # Now proceed with build build-tools/build-dependency build/third-party all && \ mkdir -p build/release && \ cd build/release && \ cmake -GNinja -DCMAKE_BUILD_TYPE=RelWithDebInfo \ -DCMAKE_PREFIX_PATH="$(pwd)/../third-party" \ -DArrow_DIR=/miniconda3/lib/cmake/Arrow \ + -DARROW_VERSION=${PYARROW_VERSION} \ ../.. && \ cmake --build . && \ cmake --build . --target install && \ @@ -137,6 +143,7 @@ RUN echo "conda ${CONDA_PKG_VERSION}" >> /miniconda3/conda-meta/pinned && \ -DPYTHON_EXECUTABLE="/miniconda3/bin/python3" \ -DMLIO_INCLUDE_ARROW_INTEGRATION=ON \ -DArrow_DIR=/miniconda3/lib/cmake/Arrow \ + -DARROW_VERSION=${PYARROW_VERSION} \ ../.. && \ cmake --build . --target mlio-py && \ cmake --build . --target mlio-arrow && \ From 108ef795311ee036f4e4e45aa94389415bda4fcb Mon Sep 17 00:00:00 2001 From: Evan Kravitz Date: Tue, 15 Apr 2025 01:10:24 +0000 Subject: [PATCH 07/43] fix: test_pyarrow_to_parquet_conversion_regression_issue_106 --- docker/1.2-1/base/Dockerfile.cpu | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docker/1.2-1/base/Dockerfile.cpu b/docker/1.2-1/base/Dockerfile.cpu index 09c59af..959e504 100644 --- a/docker/1.2-1/base/Dockerfile.cpu +++ b/docker/1.2-1/base/Dockerfile.cpu @@ -169,4 +169,7 @@ ENV PYTHONDONTWRITEBYTECODE=1 PYTHONUNBUFFERED=1 PYTHONIOENCODING=UTF-8 LANG=C.U # Scikit-learn now requires Python 3.6 or newer. RUN python3 -m pip install --no-cache -I scikit-learn==1.2.1 +RUN python3 -m pip install --no-cache pyarrow==${PYARROW_VERSION} +RUN conda install -y -c conda-forge pyarrow=${PYARROW_VERSION} --solver classic + RUN python3 -m pip install "werkzeug>=2.1.1" \ No newline at end of file From 8a3b9937b20aff70053d59bf575997f6344c7470 Mon Sep 17 00:00:00 2001 From: Evan Kravitz Date: Tue, 15 Apr 2025 02:06:03 +0000 Subject: [PATCH 08/43] chore: patch pyarrow + werkzeug (attempt 8) --- docker/1.2-1/base/Dockerfile.cpu | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/docker/1.2-1/base/Dockerfile.cpu b/docker/1.2-1/base/Dockerfile.cpu index 959e504..8c8d39f 100644 --- a/docker/1.2-1/base/Dockerfile.cpu +++ b/docker/1.2-1/base/Dockerfile.cpu @@ -169,7 +169,12 @@ ENV PYTHONDONTWRITEBYTECODE=1 PYTHONUNBUFFERED=1 PYTHONIOENCODING=UTF-8 LANG=C.U # Scikit-learn now requires Python 3.6 or newer. RUN python3 -m pip install --no-cache -I scikit-learn==1.2.1 -RUN python3 -m pip install --no-cache pyarrow==${PYARROW_VERSION} -RUN conda install -y -c conda-forge pyarrow=${PYARROW_VERSION} --solver classic +RUN python3 -m pip uninstall -y pyarrow && \ +python3 -m pip install --no-cache-dir pyarrow==${PYARROW_VERSION} && \ +python3 -m pip install --no-cache-dir pandas && \ +python3 -m pip install --no-cache-dir fastparquet && \ +conda install -y -c conda-forge pyarrow=${PYARROW_VERSION} pandas --solver classic && \ +python3 -c "import pyarrow; import pandas; print(f'PyArrow version: {pyarrow.__version__}')" + RUN python3 -m pip install "werkzeug>=2.1.1" \ No newline at end of file From 75ff4a8ccf3ada161f48f461977378521ee58e1c Mon Sep 17 00:00:00 2001 From: Evan Kravitz Date: Tue, 15 Apr 2025 02:10:50 +0000 Subject: [PATCH 09/43] chore: patch pyarrow + werkzeug (attempt 9) --- docker/1.2-1/base/Dockerfile.cpu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/1.2-1/base/Dockerfile.cpu b/docker/1.2-1/base/Dockerfile.cpu index 8c8d39f..ac3fab1 100644 --- a/docker/1.2-1/base/Dockerfile.cpu +++ b/docker/1.2-1/base/Dockerfile.cpu @@ -177,4 +177,4 @@ conda install -y -c conda-forge pyarrow=${PYARROW_VERSION} pandas --solver class python3 -c "import pyarrow; import pandas; print(f'PyArrow version: {pyarrow.__version__}')" -RUN python3 -m pip install "werkzeug>=2.1.1" \ No newline at end of file +RUN python3 -m pip install "werkzeug>=2.1.1" --target=/miniconda3/lib/python3.9/site-packages --force-reinstall \ No newline at end of file From fc9754191e4144c7d9df89d030d8326ca18da40d Mon Sep 17 00:00:00 2001 From: Evan Kravitz Date: Tue, 15 Apr 2025 11:48:00 +0000 Subject: [PATCH 10/43] chore: patch pyarrow + werkzeug (attempt 10) --- docker/1.2-1/base/Dockerfile.cpu | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/docker/1.2-1/base/Dockerfile.cpu b/docker/1.2-1/base/Dockerfile.cpu index ac3fab1..a503caa 100644 --- a/docker/1.2-1/base/Dockerfile.cpu +++ b/docker/1.2-1/base/Dockerfile.cpu @@ -169,6 +169,16 @@ ENV PYTHONDONTWRITEBYTECODE=1 PYTHONUNBUFFERED=1 PYTHONIOENCODING=UTF-8 LANG=C.U # Scikit-learn now requires Python 3.6 or newer. RUN python3 -m pip install --no-cache -I scikit-learn==1.2.1 +RUN apt-get update && \ + apt-get install -y software-properties-common && \ + add-apt-repository ppa:ubuntu-toolchain-r/test && \ + apt-get update && \ + apt-get install -y gcc-9 g++-9 && \ + apt-get upgrade -y libstdc++6 && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* && \ + update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-9 60 --slave /usr/bin/g++ g++ /usr/bin/g++-9 + RUN python3 -m pip uninstall -y pyarrow && \ python3 -m pip install --no-cache-dir pyarrow==${PYARROW_VERSION} && \ python3 -m pip install --no-cache-dir pandas && \ From 91cba8971c34aac036321cae2220fd2e01a33d73 Mon Sep 17 00:00:00 2001 From: Evan Kravitz Date: Tue, 15 Apr 2025 12:25:13 +0000 Subject: [PATCH 11/43] chore: patch pyarrow + werkzeug (attempt 11) --- docker/1.2-1/base/Dockerfile.cpu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/1.2-1/base/Dockerfile.cpu b/docker/1.2-1/base/Dockerfile.cpu index a503caa..50d35a8 100644 --- a/docker/1.2-1/base/Dockerfile.cpu +++ b/docker/1.2-1/base/Dockerfile.cpu @@ -183,7 +183,7 @@ RUN python3 -m pip uninstall -y pyarrow && \ python3 -m pip install --no-cache-dir pyarrow==${PYARROW_VERSION} && \ python3 -m pip install --no-cache-dir pandas && \ python3 -m pip install --no-cache-dir fastparquet && \ -conda install -y -c conda-forge pyarrow=${PYARROW_VERSION} pandas --solver classic && \ +conda install -y -c conda-forge pyarrow=${PYARROW_VERSION} pandas fastparquet --solver classic && \ python3 -c "import pyarrow; import pandas; print(f'PyArrow version: {pyarrow.__version__}')" From 27bd1617c7501fbb31339f80bdbb1c63dd0832a9 Mon Sep 17 00:00:00 2001 From: Evan Kravitz Date: Tue, 15 Apr 2025 12:33:02 +0000 Subject: [PATCH 12/43] chore: patch pyarrow + werkzeug (attempt 12) --- docker/1.2-1/base/Dockerfile.cpu | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/1.2-1/base/Dockerfile.cpu b/docker/1.2-1/base/Dockerfile.cpu index 50d35a8..3878cd1 100644 --- a/docker/1.2-1/base/Dockerfile.cpu +++ b/docker/1.2-1/base/Dockerfile.cpu @@ -186,5 +186,6 @@ python3 -m pip install --no-cache-dir fastparquet && \ conda install -y -c conda-forge pyarrow=${PYARROW_VERSION} pandas fastparquet --solver classic && \ python3 -c "import pyarrow; import pandas; print(f'PyArrow version: {pyarrow.__version__}')" +RUN rm -rf /miniconda3/lib/python3.9/site-packages/Werkzeug-2.0.3.dist-info/ RUN python3 -m pip install "werkzeug>=2.1.1" --target=/miniconda3/lib/python3.9/site-packages --force-reinstall \ No newline at end of file From 72db4cb29bf150931a57900e051b09b10bdd1fd1 Mon Sep 17 00:00:00 2001 From: Evan Kravitz Date: Tue, 15 Apr 2025 13:12:03 +0000 Subject: [PATCH 13/43] chore: patch pyarrow + werkzeug (attempt 13) --- docker/1.2-1/base/Dockerfile.cpu | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docker/1.2-1/base/Dockerfile.cpu b/docker/1.2-1/base/Dockerfile.cpu index 3878cd1..58bf58d 100644 --- a/docker/1.2-1/base/Dockerfile.cpu +++ b/docker/1.2-1/base/Dockerfile.cpu @@ -186,6 +186,5 @@ python3 -m pip install --no-cache-dir fastparquet && \ conda install -y -c conda-forge pyarrow=${PYARROW_VERSION} pandas fastparquet --solver classic && \ python3 -c "import pyarrow; import pandas; print(f'PyArrow version: {pyarrow.__version__}')" -RUN rm -rf /miniconda3/lib/python3.9/site-packages/Werkzeug-2.0.3.dist-info/ - +RUN rm -rf /miniconda3/lib/python3.9/site-packages/Werkzeug-2.0.3* RUN python3 -m pip install "werkzeug>=2.1.1" --target=/miniconda3/lib/python3.9/site-packages --force-reinstall \ No newline at end of file From 72144080dc7c5e76d6a28ef2f4c33d96573eda90 Mon Sep 17 00:00:00 2001 From: Evan Kravitz Date: Tue, 15 Apr 2025 13:13:42 +0000 Subject: [PATCH 14/43] chore: patch pyarrow + werkzeug (attempt 14) --- docker/1.2-1/base/Dockerfile.cpu | 37 ++++++++++++++++++++++++++------ 1 file changed, 31 insertions(+), 6 deletions(-) diff --git a/docker/1.2-1/base/Dockerfile.cpu b/docker/1.2-1/base/Dockerfile.cpu index 58bf58d..622a575 100644 --- a/docker/1.2-1/base/Dockerfile.cpu +++ b/docker/1.2-1/base/Dockerfile.cpu @@ -179,12 +179,37 @@ RUN apt-get update && \ rm -rf /var/lib/apt/lists/* && \ update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-9 60 --slave /usr/bin/g++ g++ /usr/bin/g++-9 -RUN python3 -m pip uninstall -y pyarrow && \ -python3 -m pip install --no-cache-dir pyarrow==${PYARROW_VERSION} && \ -python3 -m pip install --no-cache-dir pandas && \ -python3 -m pip install --no-cache-dir fastparquet && \ -conda install -y -c conda-forge pyarrow=${PYARROW_VERSION} pandas fastparquet --solver classic && \ -python3 -c "import pyarrow; import pandas; print(f'PyArrow version: {pyarrow.__version__}')" +# First, ensure we have all necessary build dependencies +RUN apt-get update && apt-get install -y \ + cmake \ + build-essential \ + libboost-dev \ + libboost-filesystem-dev \ + libboost-system-dev \ + libboost-regex-dev \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +# Then clean up any existing PyArrow installations +RUN conda clean --all && \ + python3 -m pip uninstall -y pyarrow pandas fastparquet + +# Install PyArrow and dependencies in the correct order +RUN conda install -y -c conda-forge \ + pyarrow=${PYARROW_VERSION} \ + pandas \ + fastparquet \ + --solver classic && \ + python3 -m pip install --no-cache-dir \ + pyarrow==${PYARROW_VERSION} \ + pandas \ + fastparquet && \ + # Verify the installation + python3 -c "import pyarrow; import pyarrow.parquet; import pandas as pd; print(f'PyArrow version: {pyarrow.__version__}')" + +# Set the Arrow memory allocator +ENV ARROW_DEFAULT_MEMORY_POOL=system + RUN rm -rf /miniconda3/lib/python3.9/site-packages/Werkzeug-2.0.3* RUN python3 -m pip install "werkzeug>=2.1.1" --target=/miniconda3/lib/python3.9/site-packages --force-reinstall \ No newline at end of file From d913aed1eb584fa4b25e3f2599d6d5a399ceee1b Mon Sep 17 00:00:00 2001 From: Evan Kravitz Date: Tue, 15 Apr 2025 14:43:18 +0000 Subject: [PATCH 15/43] chore: patch pyarrow + werkzeug (attempt 15) --- docker/1.2-1/base/Dockerfile.cpu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/1.2-1/base/Dockerfile.cpu b/docker/1.2-1/base/Dockerfile.cpu index 622a575..676f554 100644 --- a/docker/1.2-1/base/Dockerfile.cpu +++ b/docker/1.2-1/base/Dockerfile.cpu @@ -211,5 +211,5 @@ RUN conda install -y -c conda-forge \ ENV ARROW_DEFAULT_MEMORY_POOL=system -RUN rm -rf /miniconda3/lib/python3.9/site-packages/Werkzeug-2.0.3* -RUN python3 -m pip install "werkzeug>=2.1.1" --target=/miniconda3/lib/python3.9/site-packages --force-reinstall \ No newline at end of file +RUN python3 -m pip install "werkzeug>=2.1.1" --target=/miniconda3/lib/python3.9/site-packages --force-reinstall +RUN rm -rf /miniconda3/lib/python3.9/site-packages/Werkzeug-2.0.3* \ No newline at end of file From d7f1b726c51da6ea706d1fd537a27fbf302b9695 Mon Sep 17 00:00:00 2001 From: Evan Kravitz Date: Tue, 15 Apr 2025 14:46:43 +0000 Subject: [PATCH 16/43] chore: patch pyarrow + werkzeug (attempt 16) --- docker/1.2-1/base/Dockerfile.cpu | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docker/1.2-1/base/Dockerfile.cpu b/docker/1.2-1/base/Dockerfile.cpu index 676f554..025a555 100644 --- a/docker/1.2-1/base/Dockerfile.cpu +++ b/docker/1.2-1/base/Dockerfile.cpu @@ -209,6 +209,11 @@ RUN conda install -y -c conda-forge \ # Set the Arrow memory allocator ENV ARROW_DEFAULT_MEMORY_POOL=system +ENV PYTHONPATH="/miniconda3/lib/python3.9/site-packages:${PYTHONPATH}" + +python3 -c "import sys; print(sys.path)" +python3 -c "import pyarrow; print(pyarrow.__file__)" +python3 -m pip list | grep -E "pyarrow|pandas|fastparquet" RUN python3 -m pip install "werkzeug>=2.1.1" --target=/miniconda3/lib/python3.9/site-packages --force-reinstall From b84b352429a6dc8217a3db6faa9d2df32949f62b Mon Sep 17 00:00:00 2001 From: Evan Kravitz Date: Tue, 15 Apr 2025 14:49:19 +0000 Subject: [PATCH 17/43] chore: patch pyarrow + werkzeug (attempt 17) --- docker/1.2-1/base/Dockerfile.cpu | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docker/1.2-1/base/Dockerfile.cpu b/docker/1.2-1/base/Dockerfile.cpu index 025a555..1d3876c 100644 --- a/docker/1.2-1/base/Dockerfile.cpu +++ b/docker/1.2-1/base/Dockerfile.cpu @@ -211,9 +211,9 @@ RUN conda install -y -c conda-forge \ ENV ARROW_DEFAULT_MEMORY_POOL=system ENV PYTHONPATH="/miniconda3/lib/python3.9/site-packages:${PYTHONPATH}" -python3 -c "import sys; print(sys.path)" -python3 -c "import pyarrow; print(pyarrow.__file__)" -python3 -m pip list | grep -E "pyarrow|pandas|fastparquet" +RUN python3 -c "import sys; print(sys.path)" +RUN python3 -c "import pyarrow; print(pyarrow.__file__)" +RUN python3 -m pip list | grep -E "pyarrow|pandas|fastparquet" RUN python3 -m pip install "werkzeug>=2.1.1" --target=/miniconda3/lib/python3.9/site-packages --force-reinstall From 6396318be43988ffc4c8def6f08ccca45ff8be64 Mon Sep 17 00:00:00 2001 From: Evan Kravitz Date: Tue, 15 Apr 2025 18:02:56 +0000 Subject: [PATCH 18/43] chore: patch pyarrow + werkzeug (attempt 18) --- docker/1.2-1/base/Dockerfile.cpu | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/docker/1.2-1/base/Dockerfile.cpu b/docker/1.2-1/base/Dockerfile.cpu index 1d3876c..077465c 100644 --- a/docker/1.2-1/base/Dockerfile.cpu +++ b/docker/1.2-1/base/Dockerfile.cpu @@ -217,4 +217,9 @@ RUN python3 -m pip list | grep -E "pyarrow|pandas|fastparquet" RUN python3 -m pip install "werkzeug>=2.1.1" --target=/miniconda3/lib/python3.9/site-packages --force-reinstall -RUN rm -rf /miniconda3/lib/python3.9/site-packages/Werkzeug-2.0.3* \ No newline at end of file +RUN rm -rf /miniconda3/lib/python3.9/site-packages/Werkzeug-2.0.3* + +RUN pip uninstall -y protobuf typing-extensions && \ + pip install --no-cache-dir --upgrade \ + typing-extensions \ + protobuf \ No newline at end of file From 69ba166987e472f1fb813ac5a9d2bfe6672afaf5 Mon Sep 17 00:00:00 2001 From: Evan Kravitz Date: Tue, 15 Apr 2025 19:31:14 +0000 Subject: [PATCH 19/43] chore: patch pyarrow + werkzeug (attempt 19) --- docker/1.2-1/base/Dockerfile.cpu | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/docker/1.2-1/base/Dockerfile.cpu b/docker/1.2-1/base/Dockerfile.cpu index 077465c..1c239c9 100644 --- a/docker/1.2-1/base/Dockerfile.cpu +++ b/docker/1.2-1/base/Dockerfile.cpu @@ -222,4 +222,16 @@ RUN rm -rf /miniconda3/lib/python3.9/site-packages/Werkzeug-2.0.3* RUN pip uninstall -y protobuf typing-extensions && \ pip install --no-cache-dir --upgrade \ typing-extensions \ - protobuf \ No newline at end of file + importlib-metadata \ + typing \ + protobuf + +# # Add the test directly in Dockerfile to catch issues during build +# RUN python3 -c "import pandas as pd; \ +# import pyarrow; \ +# import pyarrow.parquet; \ +# df = pd.DataFrame({'x': [1, 2]}); \ +# df.to_parquet('test.parquet', engine='pyarrow'); \ +# print('PyArrow parquet conversion test passed successfully'); \ +# import os; \ +# os.remove('test.parquet')" \ No newline at end of file From ee7e6e6c6cd2d3bd5a38d0fc5e0f0e284edf589b Mon Sep 17 00:00:00 2001 From: Evan Kravitz Date: Wed, 16 Apr 2025 13:37:07 +0000 Subject: [PATCH 20/43] chore: patch pyarrow + werkzeug (attempt 20) --- docker/1.2-1/base/Dockerfile.cpu | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/docker/1.2-1/base/Dockerfile.cpu b/docker/1.2-1/base/Dockerfile.cpu index 1c239c9..aa1cb33 100644 --- a/docker/1.2-1/base/Dockerfile.cpu +++ b/docker/1.2-1/base/Dockerfile.cpu @@ -219,12 +219,7 @@ RUN python3 -m pip list | grep -E "pyarrow|pandas|fastparquet" RUN python3 -m pip install "werkzeug>=2.1.1" --target=/miniconda3/lib/python3.9/site-packages --force-reinstall RUN rm -rf /miniconda3/lib/python3.9/site-packages/Werkzeug-2.0.3* -RUN pip uninstall -y protobuf typing-extensions && \ - pip install --no-cache-dir --upgrade \ - typing-extensions \ - importlib-metadata \ - typing \ - protobuf +RUN pip uninstall -y typing # # Add the test directly in Dockerfile to catch issues during build # RUN python3 -c "import pandas as pd; \ From e342b7c94473bb342c39a11831c28962717323b0 Mon Sep 17 00:00:00 2001 From: Evan Kravitz Date: Wed, 16 Apr 2025 13:38:47 +0000 Subject: [PATCH 21/43] chore: add python3 -m --- docker/1.2-1/base/Dockerfile.cpu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/1.2-1/base/Dockerfile.cpu b/docker/1.2-1/base/Dockerfile.cpu index aa1cb33..781c1f2 100644 --- a/docker/1.2-1/base/Dockerfile.cpu +++ b/docker/1.2-1/base/Dockerfile.cpu @@ -219,7 +219,7 @@ RUN python3 -m pip list | grep -E "pyarrow|pandas|fastparquet" RUN python3 -m pip install "werkzeug>=2.1.1" --target=/miniconda3/lib/python3.9/site-packages --force-reinstall RUN rm -rf /miniconda3/lib/python3.9/site-packages/Werkzeug-2.0.3* -RUN pip uninstall -y typing +RUN python3 -m pip uninstall -y typing # # Add the test directly in Dockerfile to catch issues during build # RUN python3 -c "import pandas as pd; \ From 338a87dec94b2707789bab4d2695753d4d96590c Mon Sep 17 00:00:00 2001 From: Evan Kravitz Date: Wed, 16 Apr 2025 13:44:56 +0000 Subject: [PATCH 22/43] fix: werkzeug requirement --- docker/1.2-1/base/Dockerfile.cpu | 3 --- requirements.txt | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/docker/1.2-1/base/Dockerfile.cpu b/docker/1.2-1/base/Dockerfile.cpu index 781c1f2..b8ab3ce 100644 --- a/docker/1.2-1/base/Dockerfile.cpu +++ b/docker/1.2-1/base/Dockerfile.cpu @@ -216,9 +216,6 @@ RUN python3 -c "import pyarrow; print(pyarrow.__file__)" RUN python3 -m pip list | grep -E "pyarrow|pandas|fastparquet" -RUN python3 -m pip install "werkzeug>=2.1.1" --target=/miniconda3/lib/python3.9/site-packages --force-reinstall -RUN rm -rf /miniconda3/lib/python3.9/site-packages/Werkzeug-2.0.3* - RUN python3 -m pip uninstall -y typing # # Add the test directly in Dockerfile to catch issues during build diff --git a/requirements.txt b/requirements.txt index e388ccc..db22d90 100644 --- a/requirements.txt +++ b/requirements.txt @@ -22,7 +22,7 @@ jinja2==3.0.3 MarkupSafe==2.1.1 numpy==1.24.1 gevent==23.9.1 -Werkzeug==2.0.3 +Werkzeug==2.1.1 setuptools wheel certifi \ No newline at end of file From 0ec385f60ada904cc25731822cf9d8fd3a4423bc Mon Sep 17 00:00:00 2001 From: Evan Kravitz Date: Wed, 16 Apr 2025 13:48:34 +0000 Subject: [PATCH 23/43] fix: werkzeug pinning --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index db22d90..a37ab64 100644 --- a/requirements.txt +++ b/requirements.txt @@ -22,7 +22,7 @@ jinja2==3.0.3 MarkupSafe==2.1.1 numpy==1.24.1 gevent==23.9.1 -Werkzeug==2.1.1 +Werkzeug>=2.1.1 setuptools wheel certifi \ No newline at end of file From b8f645be7e1bb2b52eb6287ca0bcb4bc54d00063 Mon Sep 17 00:00:00 2001 From: Evan Kravitz Date: Wed, 16 Apr 2025 14:58:45 +0000 Subject: [PATCH 24/43] fix: typing uninstallation --- docker/1.2-1/base/Dockerfile.cpu | 2 -- docker/1.2-1/final/Dockerfile.cpu | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/1.2-1/base/Dockerfile.cpu b/docker/1.2-1/base/Dockerfile.cpu index b8ab3ce..1098b77 100644 --- a/docker/1.2-1/base/Dockerfile.cpu +++ b/docker/1.2-1/base/Dockerfile.cpu @@ -216,8 +216,6 @@ RUN python3 -c "import pyarrow; print(pyarrow.__file__)" RUN python3 -m pip list | grep -E "pyarrow|pandas|fastparquet" -RUN python3 -m pip uninstall -y typing - # # Add the test directly in Dockerfile to catch issues during build # RUN python3 -c "import pandas as pd; \ # import pyarrow; \ diff --git a/docker/1.2-1/final/Dockerfile.cpu b/docker/1.2-1/final/Dockerfile.cpu index 5149740..f16dd14 100644 --- a/docker/1.2-1/final/Dockerfile.cpu +++ b/docker/1.2-1/final/Dockerfile.cpu @@ -8,6 +8,8 @@ COPY requirements.txt /requirements.txt RUN python -m pip install -r /requirements.txt && \ rm /requirements.txt +RUN python3 -m pip uninstall -y typing + COPY dist/sagemaker_sklearn_container-2.0-py3-none-any.whl /sagemaker_sklearn_container-2.0-py3-none-any.whl RUN rm /miniconda3/lib/python3.9/site-packages/**/REQUESTED && \ rm /miniconda3/lib/python3.9/site-packages/**/direct_url.json From a874c2937fd4ff89e3f3994a6eda42b11cbbceb0 Mon Sep 17 00:00:00 2001 From: Evan Kravitz Date: Wed, 16 Apr 2025 15:34:38 +0000 Subject: [PATCH 25/43] fix: uninstall typing cont --- docker/1.2-1/final/Dockerfile.cpu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/1.2-1/final/Dockerfile.cpu b/docker/1.2-1/final/Dockerfile.cpu index f16dd14..9159a5b 100644 --- a/docker/1.2-1/final/Dockerfile.cpu +++ b/docker/1.2-1/final/Dockerfile.cpu @@ -8,7 +8,7 @@ COPY requirements.txt /requirements.txt RUN python -m pip install -r /requirements.txt && \ rm /requirements.txt -RUN python3 -m pip uninstall -y typing +RUN python3 -m pip uninstall -y typing --ignore-requires-python COPY dist/sagemaker_sklearn_container-2.0-py3-none-any.whl /sagemaker_sklearn_container-2.0-py3-none-any.whl RUN rm /miniconda3/lib/python3.9/site-packages/**/REQUESTED && \ From a2be69c33607309d0b59be8c17fb4e0a0545e0e6 Mon Sep 17 00:00:00 2001 From: Evan Kravitz Date: Wed, 16 Apr 2025 17:04:57 +0000 Subject: [PATCH 26/43] fix: uninstall typing cont 2 --- docker/1.2-1/final/Dockerfile.cpu | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docker/1.2-1/final/Dockerfile.cpu b/docker/1.2-1/final/Dockerfile.cpu index 9159a5b..0fbbcef 100644 --- a/docker/1.2-1/final/Dockerfile.cpu +++ b/docker/1.2-1/final/Dockerfile.cpu @@ -8,7 +8,8 @@ COPY requirements.txt /requirements.txt RUN python -m pip install -r /requirements.txt && \ rm /requirements.txt -RUN python3 -m pip uninstall -y typing --ignore-requires-python +RUN conda remove --force typing typing-extensions || true && \ +conda clean -ya COPY dist/sagemaker_sklearn_container-2.0-py3-none-any.whl /sagemaker_sklearn_container-2.0-py3-none-any.whl RUN rm /miniconda3/lib/python3.9/site-packages/**/REQUESTED && \ From 30c95ebabc064da74763fece45be4e80cf8f5778 Mon Sep 17 00:00:00 2001 From: Evan Kravitz Date: Wed, 16 Apr 2025 18:41:24 +0000 Subject: [PATCH 27/43] chore: dont reset pythonpath --- docker/1.2-1/base/Dockerfile.cpu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/1.2-1/base/Dockerfile.cpu b/docker/1.2-1/base/Dockerfile.cpu index 1098b77..3aa17d5 100644 --- a/docker/1.2-1/base/Dockerfile.cpu +++ b/docker/1.2-1/base/Dockerfile.cpu @@ -209,7 +209,7 @@ RUN conda install -y -c conda-forge \ # Set the Arrow memory allocator ENV ARROW_DEFAULT_MEMORY_POOL=system -ENV PYTHONPATH="/miniconda3/lib/python3.9/site-packages:${PYTHONPATH}" +# ENV PYTHONPATH="/miniconda3/lib/python3.9/site-packages:${PYTHONPATH}" RUN python3 -c "import sys; print(sys.path)" RUN python3 -c "import pyarrow; print(pyarrow.__file__)" From 162885095c7e2e4e108b14532d9a3060797e19d1 Mon Sep 17 00:00:00 2001 From: Evan Kravitz Date: Wed, 16 Apr 2025 22:06:55 +0000 Subject: [PATCH 28/43] chore: unpin Flask version --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index a37ab64..32dab2f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ boto3==1.28.57 botocore>=1.31.57,<1.32.0 cryptography -Flask==1.1.1 +Flask itsdangerous==2.0.1 gunicorn==20.0.4 model-archiver==1.0.3 From d3499d305d74f7c9bcd5a981465c36622302dc14 Mon Sep 17 00:00:00 2001 From: Evan Kravitz Date: Wed, 16 Apr 2025 22:11:40 +0000 Subject: [PATCH 29/43] chore: remove unnecessary lines in dockerfile --- docker/1.2-1/base/Dockerfile.cpu | 1 - docker/1.2-1/final/Dockerfile.cpu | 3 --- 2 files changed, 4 deletions(-) diff --git a/docker/1.2-1/base/Dockerfile.cpu b/docker/1.2-1/base/Dockerfile.cpu index 3aa17d5..8f96abd 100644 --- a/docker/1.2-1/base/Dockerfile.cpu +++ b/docker/1.2-1/base/Dockerfile.cpu @@ -209,7 +209,6 @@ RUN conda install -y -c conda-forge \ # Set the Arrow memory allocator ENV ARROW_DEFAULT_MEMORY_POOL=system -# ENV PYTHONPATH="/miniconda3/lib/python3.9/site-packages:${PYTHONPATH}" RUN python3 -c "import sys; print(sys.path)" RUN python3 -c "import pyarrow; print(pyarrow.__file__)" diff --git a/docker/1.2-1/final/Dockerfile.cpu b/docker/1.2-1/final/Dockerfile.cpu index 0fbbcef..5149740 100644 --- a/docker/1.2-1/final/Dockerfile.cpu +++ b/docker/1.2-1/final/Dockerfile.cpu @@ -8,9 +8,6 @@ COPY requirements.txt /requirements.txt RUN python -m pip install -r /requirements.txt && \ rm /requirements.txt -RUN conda remove --force typing typing-extensions || true && \ -conda clean -ya - COPY dist/sagemaker_sklearn_container-2.0-py3-none-any.whl /sagemaker_sklearn_container-2.0-py3-none-any.whl RUN rm /miniconda3/lib/python3.9/site-packages/**/REQUESTED && \ rm /miniconda3/lib/python3.9/site-packages/**/direct_url.json From 2d6387a3a507107a24eea6d4759789154f3647cc Mon Sep 17 00:00:00 2001 From: Evan Kravitz Date: Wed, 16 Apr 2025 22:15:06 +0000 Subject: [PATCH 30/43] chore: remove unnecessary dockerfile lines --- docker/1.2-1/base/Dockerfile.cpu | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/docker/1.2-1/base/Dockerfile.cpu b/docker/1.2-1/base/Dockerfile.cpu index 8f96abd..83616aa 100644 --- a/docker/1.2-1/base/Dockerfile.cpu +++ b/docker/1.2-1/base/Dockerfile.cpu @@ -209,18 +209,3 @@ RUN conda install -y -c conda-forge \ # Set the Arrow memory allocator ENV ARROW_DEFAULT_MEMORY_POOL=system - -RUN python3 -c "import sys; print(sys.path)" -RUN python3 -c "import pyarrow; print(pyarrow.__file__)" -RUN python3 -m pip list | grep -E "pyarrow|pandas|fastparquet" - - -# # Add the test directly in Dockerfile to catch issues during build -# RUN python3 -c "import pandas as pd; \ -# import pyarrow; \ -# import pyarrow.parquet; \ -# df = pd.DataFrame({'x': [1, 2]}); \ -# df.to_parquet('test.parquet', engine='pyarrow'); \ -# print('PyArrow parquet conversion test passed successfully'); \ -# import os; \ -# os.remove('test.parquet')" \ No newline at end of file From b4ff0222a74246a5da877d60fdc2e33f5dce60e3 Mon Sep 17 00:00:00 2001 From: Evan Kravitz Date: Wed, 16 Apr 2025 22:44:14 +0000 Subject: [PATCH 31/43] chore: upgrade flask --- docker/1.2-1/final/Dockerfile.cpu | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docker/1.2-1/final/Dockerfile.cpu b/docker/1.2-1/final/Dockerfile.cpu index 5149740..1eeaf4e 100644 --- a/docker/1.2-1/final/Dockerfile.cpu +++ b/docker/1.2-1/final/Dockerfile.cpu @@ -8,6 +8,8 @@ COPY requirements.txt /requirements.txt RUN python -m pip install -r /requirements.txt && \ rm /requirements.txt +RUN python -m pip install -U Flask + COPY dist/sagemaker_sklearn_container-2.0-py3-none-any.whl /sagemaker_sklearn_container-2.0-py3-none-any.whl RUN rm /miniconda3/lib/python3.9/site-packages/**/REQUESTED && \ rm /miniconda3/lib/python3.9/site-packages/**/direct_url.json From 543ffbb815074df800ef86b68fdcfa93dd0af756 Mon Sep 17 00:00:00 2001 From: Evan Kravitz Date: Wed, 16 Apr 2025 23:20:59 +0000 Subject: [PATCH 32/43] chore: move flask upgrade after sklearn container installation --- docker/1.2-1/final/Dockerfile.cpu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/1.2-1/final/Dockerfile.cpu b/docker/1.2-1/final/Dockerfile.cpu index 1eeaf4e..32771f5 100644 --- a/docker/1.2-1/final/Dockerfile.cpu +++ b/docker/1.2-1/final/Dockerfile.cpu @@ -8,14 +8,14 @@ COPY requirements.txt /requirements.txt RUN python -m pip install -r /requirements.txt && \ rm /requirements.txt -RUN python -m pip install -U Flask - COPY dist/sagemaker_sklearn_container-2.0-py3-none-any.whl /sagemaker_sklearn_container-2.0-py3-none-any.whl RUN rm /miniconda3/lib/python3.9/site-packages/**/REQUESTED && \ rm /miniconda3/lib/python3.9/site-packages/**/direct_url.json RUN pip install --no-cache /sagemaker_sklearn_container-2.0-py3-none-any.whl && \ rm /sagemaker_sklearn_container-2.0-py3-none-any.whl +RUN python -m pip install -U Flask + ENV SAGEMAKER_TRAINING_MODULE sagemaker_sklearn_container.training:main ENV SAGEMAKER_SERVING_MODULE sagemaker_sklearn_container.serving:main From 13badad6aace4a262d95d83dadcbeaee09e84116 Mon Sep 17 00:00:00 2001 From: Evan Kravitz Date: Thu, 17 Apr 2025 12:57:59 +0000 Subject: [PATCH 33/43] chore: use higher flask version in test requirements --- test-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test-requirements.txt b/test-requirements.txt index 41afc3b..fcb7ad5 100755 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -1,4 +1,4 @@ -Flask +Flask>=3.1.0 PyYAML boto3>=1.24.17 coverage From 22fb8e70e81da21bbf31a08cc733bf9f85cad5f3 Mon Sep 17 00:00:00 2001 From: Evan Kravitz Date: Thu, 17 Apr 2025 13:06:08 +0000 Subject: [PATCH 34/43] fix: flask installation --- docker/1.2-1/final/Dockerfile.cpu | 4 +--- requirements.txt | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/docker/1.2-1/final/Dockerfile.cpu b/docker/1.2-1/final/Dockerfile.cpu index 32771f5..fd39bb4 100644 --- a/docker/1.2-1/final/Dockerfile.cpu +++ b/docker/1.2-1/final/Dockerfile.cpu @@ -5,7 +5,7 @@ ENV PIP_ROOT_USER_ACTION=ignore LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true COPY requirements.txt /requirements.txt -RUN python -m pip install -r /requirements.txt && \ +RUN python -m pip install --no-cache-dir -r /requirements.txt && \ rm /requirements.txt COPY dist/sagemaker_sklearn_container-2.0-py3-none-any.whl /sagemaker_sklearn_container-2.0-py3-none-any.whl @@ -14,8 +14,6 @@ RUN rm /miniconda3/lib/python3.9/site-packages/**/REQUESTED && \ RUN pip install --no-cache /sagemaker_sklearn_container-2.0-py3-none-any.whl && \ rm /sagemaker_sklearn_container-2.0-py3-none-any.whl -RUN python -m pip install -U Flask - ENV SAGEMAKER_TRAINING_MODULE sagemaker_sklearn_container.training:main ENV SAGEMAKER_SERVING_MODULE sagemaker_sklearn_container.serving:main diff --git a/requirements.txt b/requirements.txt index 32dab2f..79ee591 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ boto3==1.28.57 botocore>=1.31.57,<1.32.0 cryptography -Flask +Flask>=3.1.0 itsdangerous==2.0.1 gunicorn==20.0.4 model-archiver==1.0.3 From 33c01c2d346d4ee6b70fdbe44eaceeb7129420f9 Mon Sep 17 00:00:00 2001 From: Evan Kravitz Date: Thu, 17 Apr 2025 13:44:29 +0000 Subject: [PATCH 35/43] chore: relax flask requirement --- requirements.txt | 2 +- test-requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 79ee591..32dab2f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ boto3==1.28.57 botocore>=1.31.57,<1.32.0 cryptography -Flask>=3.1.0 +Flask itsdangerous==2.0.1 gunicorn==20.0.4 model-archiver==1.0.3 diff --git a/test-requirements.txt b/test-requirements.txt index fcb7ad5..41afc3b 100755 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -1,4 +1,4 @@ -Flask>=3.1.0 +Flask PyYAML boto3>=1.24.17 coverage From 85e8bfefba3ba135edafa6bf924c39c6c7061384 Mon Sep 17 00:00:00 2001 From: Evan Kravitz Date: Thu, 17 Apr 2025 13:52:25 +0000 Subject: [PATCH 36/43] chore: hot fix for wekzeug with flask --- docker/1.2-1/final/Dockerfile.cpu | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docker/1.2-1/final/Dockerfile.cpu b/docker/1.2-1/final/Dockerfile.cpu index fd39bb4..7a3b6cc 100644 --- a/docker/1.2-1/final/Dockerfile.cpu +++ b/docker/1.2-1/final/Dockerfile.cpu @@ -14,6 +14,12 @@ RUN rm /miniconda3/lib/python3.9/site-packages/**/REQUESTED && \ RUN pip install --no-cache /sagemaker_sklearn_container-2.0-py3-none-any.whl && \ rm /sagemaker_sklearn_container-2.0-py3-none-any.whl +# Create a compatibility module for Flask to work with newer Werkzeug +RUN echo 'from werkzeug.wrappers import Response as BaseResponse' > /miniconda3/lib/python3.9/site-packages/werkzeug/wrappers/compat.py && \ + sed -i 's/from werkzeug.wrappers import BaseResponse/from werkzeug.wrappers.compat import BaseResponse/' /miniconda3/lib/python3.9/site-packages/flask/app.py && \ + find /miniconda3/lib/python3.9/site-packages/flask -type f -name "*.py" -exec sed -i 's/from werkzeug.wrappers import BaseResponse/from werkzeug.wrappers.compat import BaseResponse/g' {} \; + + ENV SAGEMAKER_TRAINING_MODULE sagemaker_sklearn_container.training:main ENV SAGEMAKER_SERVING_MODULE sagemaker_sklearn_container.serving:main From de2716057473e6b59bdaa25df9bdab441449d588 Mon Sep 17 00:00:00 2001 From: Evan Kravitz Date: Thu, 17 Apr 2025 14:31:38 +0000 Subject: [PATCH 37/43] chore: undo werkzeug patch --- docker/1.2-1/final/Dockerfile.cpu | 6 ------ requirements.txt | 2 +- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/docker/1.2-1/final/Dockerfile.cpu b/docker/1.2-1/final/Dockerfile.cpu index 7a3b6cc..fd39bb4 100644 --- a/docker/1.2-1/final/Dockerfile.cpu +++ b/docker/1.2-1/final/Dockerfile.cpu @@ -14,12 +14,6 @@ RUN rm /miniconda3/lib/python3.9/site-packages/**/REQUESTED && \ RUN pip install --no-cache /sagemaker_sklearn_container-2.0-py3-none-any.whl && \ rm /sagemaker_sklearn_container-2.0-py3-none-any.whl -# Create a compatibility module for Flask to work with newer Werkzeug -RUN echo 'from werkzeug.wrappers import Response as BaseResponse' > /miniconda3/lib/python3.9/site-packages/werkzeug/wrappers/compat.py && \ - sed -i 's/from werkzeug.wrappers import BaseResponse/from werkzeug.wrappers.compat import BaseResponse/' /miniconda3/lib/python3.9/site-packages/flask/app.py && \ - find /miniconda3/lib/python3.9/site-packages/flask -type f -name "*.py" -exec sed -i 's/from werkzeug.wrappers import BaseResponse/from werkzeug.wrappers.compat import BaseResponse/g' {} \; - - ENV SAGEMAKER_TRAINING_MODULE sagemaker_sklearn_container.training:main ENV SAGEMAKER_SERVING_MODULE sagemaker_sklearn_container.serving:main diff --git a/requirements.txt b/requirements.txt index 32dab2f..bb0259d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -22,7 +22,7 @@ jinja2==3.0.3 MarkupSafe==2.1.1 numpy==1.24.1 gevent==23.9.1 -Werkzeug>=2.1.1 +Werkzeug setuptools wheel certifi \ No newline at end of file From 0f23b7ede40f9770b857ad6678ea1b520bbed2ad Mon Sep 17 00:00:00 2001 From: Evan Kravitz Date: Thu, 17 Apr 2025 15:10:08 +0000 Subject: [PATCH 38/43] chore: pin werkzeug and flask reqs --- requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index bb0259d..e388ccc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ boto3==1.28.57 botocore>=1.31.57,<1.32.0 cryptography -Flask +Flask==1.1.1 itsdangerous==2.0.1 gunicorn==20.0.4 model-archiver==1.0.3 @@ -22,7 +22,7 @@ jinja2==3.0.3 MarkupSafe==2.1.1 numpy==1.24.1 gevent==23.9.1 -Werkzeug +Werkzeug==2.0.3 setuptools wheel certifi \ No newline at end of file From c145cdb4d1f6a319707ae1ac93b95d6d35674927 Mon Sep 17 00:00:00 2001 From: Evan Kravitz Date: Thu, 17 Apr 2025 15:48:59 +0000 Subject: [PATCH 39/43] chore: remove pandas from requirements --- requirements.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index e388ccc..e493661 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,7 +6,6 @@ itsdangerous==2.0.1 gunicorn==20.0.4 model-archiver==1.0.3 multi-model-server==1.1.1 -pandas==1.1.3 protobuf==3.20.2 psutil==5.7.2 python-dateutil==2.8.1 From 1256caf99b3323c4e3460353cabad3b27612bc6e Mon Sep 17 00:00:00 2001 From: Evan Kravitz Date: Thu, 17 Apr 2025 16:40:20 +0000 Subject: [PATCH 40/43] chore: unpin numpy --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index e493661..4230c34 100644 --- a/requirements.txt +++ b/requirements.txt @@ -19,7 +19,7 @@ urllib3==1.26.17 six==1.15.0 jinja2==3.0.3 MarkupSafe==2.1.1 -numpy==1.24.1 +numpy gevent==23.9.1 Werkzeug==2.0.3 setuptools From ef7f0dbfeca1699e1e27a8acdc76c73c7549fcee Mon Sep 17 00:00:00 2001 From: Evan Kravitz Date: Thu, 17 Apr 2025 17:46:31 +0000 Subject: [PATCH 41/43] remove pandas installation --- docker/1.2-1/base/Dockerfile.cpu | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/docker/1.2-1/base/Dockerfile.cpu b/docker/1.2-1/base/Dockerfile.cpu index 83616aa..f58f86a 100644 --- a/docker/1.2-1/base/Dockerfile.cpu +++ b/docker/1.2-1/base/Dockerfile.cpu @@ -192,20 +192,18 @@ RUN apt-get update && apt-get install -y \ # Then clean up any existing PyArrow installations RUN conda clean --all && \ - python3 -m pip uninstall -y pyarrow pandas fastparquet + python3 -m pip uninstall -y pyarrow fastparquet # Install PyArrow and dependencies in the correct order RUN conda install -y -c conda-forge \ pyarrow=${PYARROW_VERSION} \ - pandas \ fastparquet \ --solver classic && \ python3 -m pip install --no-cache-dir \ pyarrow==${PYARROW_VERSION} \ - pandas \ fastparquet && \ # Verify the installation - python3 -c "import pyarrow; import pyarrow.parquet; import pandas as pd; print(f'PyArrow version: {pyarrow.__version__}')" + python3 -c "import pyarrow; import pyarrow.parquet; print(f'PyArrow version: {pyarrow.__version__}')" # Set the Arrow memory allocator ENV ARROW_DEFAULT_MEMORY_POOL=system From fc975de4bb90fed40fd085b131cd0f91e564469d Mon Sep 17 00:00:00 2001 From: Evan Kravitz Date: Thu, 17 Apr 2025 18:23:14 +0000 Subject: [PATCH 42/43] chore: additional reverts to solve pandas problem --- docker/1.2-1/final/Dockerfile.cpu | 2 +- requirements.txt | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/docker/1.2-1/final/Dockerfile.cpu b/docker/1.2-1/final/Dockerfile.cpu index fd39bb4..5149740 100644 --- a/docker/1.2-1/final/Dockerfile.cpu +++ b/docker/1.2-1/final/Dockerfile.cpu @@ -5,7 +5,7 @@ ENV PIP_ROOT_USER_ACTION=ignore LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true COPY requirements.txt /requirements.txt -RUN python -m pip install --no-cache-dir -r /requirements.txt && \ +RUN python -m pip install -r /requirements.txt && \ rm /requirements.txt COPY dist/sagemaker_sklearn_container-2.0-py3-none-any.whl /sagemaker_sklearn_container-2.0-py3-none-any.whl diff --git a/requirements.txt b/requirements.txt index 4230c34..e388ccc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,6 +6,7 @@ itsdangerous==2.0.1 gunicorn==20.0.4 model-archiver==1.0.3 multi-model-server==1.1.1 +pandas==1.1.3 protobuf==3.20.2 psutil==5.7.2 python-dateutil==2.8.1 @@ -19,7 +20,7 @@ urllib3==1.26.17 six==1.15.0 jinja2==3.0.3 MarkupSafe==2.1.1 -numpy +numpy==1.24.1 gevent==23.9.1 Werkzeug==2.0.3 setuptools From 09a715de3c740f6ad6f215a408d495fbc7dcda6a Mon Sep 17 00:00:00 2001 From: Evan Kravitz Date: Thu, 17 Apr 2025 19:01:52 +0000 Subject: [PATCH 43/43] chore: attempted pyarrow fix --- docker/1.2-1/base/Dockerfile.cpu | 10 ---------- docker/1.2-1/final/Dockerfile.cpu | 12 ++++++++++++ 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/docker/1.2-1/base/Dockerfile.cpu b/docker/1.2-1/base/Dockerfile.cpu index f58f86a..a07e880 100644 --- a/docker/1.2-1/base/Dockerfile.cpu +++ b/docker/1.2-1/base/Dockerfile.cpu @@ -194,16 +194,6 @@ RUN apt-get update && apt-get install -y \ RUN conda clean --all && \ python3 -m pip uninstall -y pyarrow fastparquet -# Install PyArrow and dependencies in the correct order -RUN conda install -y -c conda-forge \ - pyarrow=${PYARROW_VERSION} \ - fastparquet \ - --solver classic && \ - python3 -m pip install --no-cache-dir \ - pyarrow==${PYARROW_VERSION} \ - fastparquet && \ - # Verify the installation - python3 -c "import pyarrow; import pyarrow.parquet; print(f'PyArrow version: {pyarrow.__version__}')" # Set the Arrow memory allocator ENV ARROW_DEFAULT_MEMORY_POOL=system diff --git a/docker/1.2-1/final/Dockerfile.cpu b/docker/1.2-1/final/Dockerfile.cpu index 5149740..9890403 100644 --- a/docker/1.2-1/final/Dockerfile.cpu +++ b/docker/1.2-1/final/Dockerfile.cpu @@ -14,6 +14,18 @@ RUN rm /miniconda3/lib/python3.9/site-packages/**/REQUESTED && \ RUN pip install --no-cache /sagemaker_sklearn_container-2.0-py3-none-any.whl && \ rm /sagemaker_sklearn_container-2.0-py3-none-any.whl + +# Install PyArrow and dependencies in the correct order +RUN conda install -y -c conda-forge \ + pyarrow=${PYARROW_VERSION} \ + fastparquet \ + --solver classic && \ + python3 -m pip install --no-cache-dir \ + pyarrow==${PYARROW_VERSION} \ + fastparquet && \ + # Verify the installation + python3 -c "import pyarrow; import pyarrow.parquet; print(f'PyArrow version: {pyarrow.__version__}')" + ENV SAGEMAKER_TRAINING_MODULE sagemaker_sklearn_container.training:main ENV SAGEMAKER_SERVING_MODULE sagemaker_sklearn_container.serving:main