From d10657b519a07a56bf62ff1642b1f832e07c6aca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Patrick=20Sodr=C3=A9?= Date: Thu, 4 Nov 2021 20:57:22 -0400 Subject: [PATCH 1/3] Add initial example for sagemaker example --- examples/sagemaker/container/Dockerfile | 45 +++ examples/sagemaker/container/aptfile | 10 + examples/sagemaker/container/entrypoint.sh | 4 + .../sagemaker/container/environment-cpu.yml | 16 + .../sagemaker/container/environment-gpu.yml | 16 + examples/sagemaker/detect.ipynb | 280 ++++++++++++++++++ examples/sagemaker/predict.sh | 6 + examples/sagemaker/serve_local.sh | 5 + 8 files changed, 382 insertions(+) create mode 100644 examples/sagemaker/container/Dockerfile create mode 100755 examples/sagemaker/container/aptfile create mode 100755 examples/sagemaker/container/entrypoint.sh create mode 100644 examples/sagemaker/container/environment-cpu.yml create mode 100644 examples/sagemaker/container/environment-gpu.yml create mode 100644 examples/sagemaker/detect.ipynb create mode 100755 examples/sagemaker/predict.sh create mode 100755 examples/sagemaker/serve_local.sh diff --git a/examples/sagemaker/container/Dockerfile b/examples/sagemaker/container/Dockerfile new file mode 100644 index 0000000..61ef5f4 --- /dev/null +++ b/examples/sagemaker/container/Dockerfile @@ -0,0 +1,45 @@ +FROM nvidia/cuda:10.2-devel-ubuntu18.04 as cuda-mambaforge + +ENV CONDA_DIR=/opt/conda +ENV LANG=C.UTF-8 LC_ALL=C.UTF-8 +ENV PATH=${CONDA_DIR}/bin:${PATH} +ENV CONDA_OVERRIDE_CUDA=10.2 + +COPY --from=condaforge/mambaforge /opt/conda /opt/conda +COPY --from=condaforge/mambaforge /usr/local/bin/tini /usr/local/bin/tini +COPY --from=condaforge/mambaforge /etc/skel/.bashrc /etc/skel/.bashrc +COPY --from=condaforge/mambaforge /root/.bashrc /root/.bashrc +RUN mamba install pynvml + +#RUN apt-get update && \ +# apt-get install --no-install-recommends --yes wget bzip2 ca-certificates git && \ +# apt-get clean && \ +# rm -rf /var/lib/apt/lists/* + + +FROM cuda-mambaforge + +LABEL com.amazonaws.sagemaker.capabilities.multi-models=true +LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true + +ENV TZ="UTC" +ENV DEBIAN_FRONTEND=noninteractive + +ENTRYPOINT ["/docker-entrypoint.sh"] +WORKDIR /home/model-server/ + +ADD https://raw.githubusercontent.com/seatgeek/bash-aptfile/master/bin/aptfile /usr/local/bin/aptfile +RUN chmod +x /usr/local/bin/aptfile + +# Ref: https://github.com/seatgeek/bash-aptfile +COPY aptfile /home/model-server +RUN aptfile + +# Darknet Library is *either* compiled for CPU or GPU, so we need to conda environments +COPY environment-cpu.yml /home/model-server +COPY environment-gpu.yml /home/model-server +RUN mamba env create -n cpu -f /home/model-server/environment-cpu.yml && \ + mamba env create -n gpu -f /home/model-server/environment-gpu.yml && \ + conda clean --tipsy + +COPY entrypoint.sh /docker-entrypoint.sh diff --git a/examples/sagemaker/container/aptfile b/examples/sagemaker/container/aptfile new file mode 100755 index 0000000..b065694 --- /dev/null +++ b/examples/sagemaker/container/aptfile @@ -0,0 +1,10 @@ +#!/usr/bin/env aptfile + +# trigger and apt-get update +update + +# install some packages +package "openjdk-11-jre-headless" + +# clean +clean diff --git a/examples/sagemaker/container/entrypoint.sh b/examples/sagemaker/container/entrypoint.sh new file mode 100755 index 0000000..d63bfee --- /dev/null +++ b/examples/sagemaker/container/entrypoint.sh @@ -0,0 +1,4 @@ +#!/usr/bin/env bash +source /opt/conda/etc/profile.d/conda.sh +conda activate cpu +exec "$@" diff --git a/examples/sagemaker/container/environment-cpu.yml b/examples/sagemaker/container/environment-cpu.yml new file mode 100644 index 0000000..393a076 --- /dev/null +++ b/examples/sagemaker/container/environment-cpu.yml @@ -0,0 +1,16 @@ +channels: + - zeroae + - conda-forge +dependencies: + - darknet-cpu + - darknet.py 0.3.* + - enum-compat + - future + - psutil + - retrying + - scipy + - six + - pip + - pip: + - multi-model-server 1.1.* + - sagemaker-inference 1.5.* diff --git a/examples/sagemaker/container/environment-gpu.yml b/examples/sagemaker/container/environment-gpu.yml new file mode 100644 index 0000000..ac32534 --- /dev/null +++ b/examples/sagemaker/container/environment-gpu.yml @@ -0,0 +1,16 @@ +channels: + - zeroae + - conda-forge +dependencies: + - darknet-gpu + - darknet.py 0.3.* + - enum-compat + - future + - psutil + - retrying + - scipy + - six + - pip + - pip: + - multi-model-server 1.1.* + - sagemaker-inference 1.5.* diff --git a/examples/sagemaker/detect.ipynb b/examples/sagemaker/detect.ipynb new file mode 100644 index 0000000..2641f8a --- /dev/null +++ b/examples/sagemaker/detect.ipynb @@ -0,0 +1,280 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import json\n", + "import boto3\n", + "from sagemaker.local import LocalSession\n", + "from sagemaker.model import Model\n", + "from sagemaker.predictor import Predictor\n", + "from sagemaker.serializers import IdentitySerializer\n", + "from sagemaker.deserializers import JSONDeserializer\n", + "\n", + "DUMMY_IAM_ROLE = 'arn:aws:iam::111111111111:role/service-role/AmazonSageMaker-ExecutionRole-20200101T000001'\n", + "\n", + "boto3_session = boto3.Session(region_name=\"us-east-1\")\n", + "session = LocalSession(boto3_session)\n", + "session.config = {\"local\": {\"local_code\": True}}" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "outputs": [], + "source": [ + "role = DUMMY_IAM_ROLE\n", + "model_dir = \"file://ml/model\"\n", + "\n", + "model = Model(\n", + " predictor_cls=Predictor,\n", + " image_uri=\"zeroae/sagemaker-darknet-inference\",\n", + " model_data=model_dir,\n", + " role=DUMMY_IAM_ROLE,\n", + " sagemaker_session=session,\n", + ")" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 22, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Attaching to 8y2hfvaxhd-algo-1-9j1rc\n", + "\u001B[36m8y2hfvaxhd-algo-1-9j1rc |\u001B[0m Warning: MMS is using non-default JVM parameters: -XX:-UseContainerSupport\r\n", + "\u001B[36m8y2hfvaxhd-algo-1-9j1rc |\u001B[0m 2021-11-03 21:19:22,257 [INFO ] main com.amazonaws.ml.mms.ModelServer - \r\n", + "\u001B[36m8y2hfvaxhd-algo-1-9j1rc |\u001B[0m MMS Home: /opt/conda/envs/darknet/lib/python3.8/site-packages\r\n", + "\u001B[36m8y2hfvaxhd-algo-1-9j1rc |\u001B[0m Current directory: /\r\n", + "\u001B[36m8y2hfvaxhd-algo-1-9j1rc |\u001B[0m Temp directory: /tmp\r\n", + "\u001B[36m8y2hfvaxhd-algo-1-9j1rc |\u001B[0m Number of GPUs: 0\r\n", + "\u001B[36m8y2hfvaxhd-algo-1-9j1rc |\u001B[0m Number of CPUs: 6\r\n", + "\u001B[36m8y2hfvaxhd-algo-1-9j1rc |\u001B[0m Max heap size: 1486 M\r\n", + "\u001B[36m8y2hfvaxhd-algo-1-9j1rc |\u001B[0m Python executable: /opt/conda/envs/darknet/bin/python3.8\r\n", + "\u001B[36m8y2hfvaxhd-algo-1-9j1rc |\u001B[0m Config file: /etc/sagemaker-mms.properties\r\n", + "\u001B[36m8y2hfvaxhd-algo-1-9j1rc |\u001B[0m Inference address: http://0.0.0.0:8080\r\n", + "\u001B[36m8y2hfvaxhd-algo-1-9j1rc |\u001B[0m Management address: http://0.0.0.0:8080\r\n", + "\u001B[36m8y2hfvaxhd-algo-1-9j1rc |\u001B[0m Model Store: /.sagemaker/mms/models\r\n", + "\u001B[36m8y2hfvaxhd-algo-1-9j1rc |\u001B[0m Initial Models: ALL\r\n", + "\u001B[36m8y2hfvaxhd-algo-1-9j1rc |\u001B[0m Log dir: /logs\r\n", + "\u001B[36m8y2hfvaxhd-algo-1-9j1rc |\u001B[0m Metrics dir: /logs\r\n", + "\u001B[36m8y2hfvaxhd-algo-1-9j1rc |\u001B[0m Netty threads: 0\r\n", + "\u001B[36m8y2hfvaxhd-algo-1-9j1rc |\u001B[0m Netty client threads: 0\r\n", + "\u001B[36m8y2hfvaxhd-algo-1-9j1rc |\u001B[0m Default workers per model: 6\r\n", + "\u001B[36m8y2hfvaxhd-algo-1-9j1rc |\u001B[0m Blacklist Regex: N/A\r\n", + "\u001B[36m8y2hfvaxhd-algo-1-9j1rc |\u001B[0m Maximum Response Size: 6553500\r\n", + "\u001B[36m8y2hfvaxhd-algo-1-9j1rc |\u001B[0m Maximum Request Size: 6553500\r\n", + "\u001B[36m8y2hfvaxhd-algo-1-9j1rc |\u001B[0m Preload model: false\r\n", + "\u001B[36m8y2hfvaxhd-algo-1-9j1rc |\u001B[0m Prefer direct buffer: false\r\n", + "\u001B[36m8y2hfvaxhd-algo-1-9j1rc |\u001B[0m 2021-11-03 21:19:22,571 [WARN ] W-9000-model com.amazonaws.ml.mms.wlm.WorkerLifeCycle - attachIOStreams() threadName=W-9000-model\r\n", + "\u001B[36m8y2hfvaxhd-algo-1-9j1rc |\u001B[0m 2021-11-03 21:19:22,865 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - model_service_worker started with args: --sock-type unix --sock-name /tmp/.mms.sock.9000 --handler darknet.sagemaker.detector.handler_service --model-path /.sagemaker/mms/models/model --model-name model --preload-model false --tmp-dir /tmp\r\n", + "\u001B[36m8y2hfvaxhd-algo-1-9j1rc |\u001B[0m 2021-11-03 21:19:22,870 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - Listening on port: /tmp/.mms.sock.9000\r\n", + "\u001B[36m8y2hfvaxhd-algo-1-9j1rc |\u001B[0m 2021-11-03 21:19:22,872 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - [PID] 41\r\n", + "\u001B[36m8y2hfvaxhd-algo-1-9j1rc |\u001B[0m 2021-11-03 21:19:22,875 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - MMS worker started.\r\n", + "\u001B[36m8y2hfvaxhd-algo-1-9j1rc |\u001B[0m 2021-11-03 21:19:22,877 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - Python runtime: 3.8.12\r\n", + "\u001B[36m8y2hfvaxhd-algo-1-9j1rc |\u001B[0m 2021-11-03 21:19:22,878 [INFO ] main com.amazonaws.ml.mms.wlm.ModelManager - Model model loaded.\r\n", + "\u001B[36m8y2hfvaxhd-algo-1-9j1rc |\u001B[0m 2021-11-03 21:19:22,912 [INFO ] main com.amazonaws.ml.mms.ModelServer - Initialize Inference server with: EpollServerSocketChannel.\r\n", + "\u001B[36m8y2hfvaxhd-algo-1-9j1rc |\u001B[0m 2021-11-03 21:19:22,965 [INFO ] W-9000-model com.amazonaws.ml.mms.wlm.WorkerThread - Connecting to: /tmp/.mms.sock.9000\r\n", + "\u001B[36m8y2hfvaxhd-algo-1-9j1rc |\u001B[0m 2021-11-03 21:19:22,967 [INFO ] W-9000-model com.amazonaws.ml.mms.wlm.WorkerThread - Connecting to: /tmp/.mms.sock.9000\r\n", + "\u001B[36m8y2hfvaxhd-algo-1-9j1rc |\u001B[0m 2021-11-03 21:19:22,967 [INFO ] W-9000-model com.amazonaws.ml.mms.wlm.WorkerThread - Connecting to: /tmp/.mms.sock.9000\r\n", + "\u001B[36m8y2hfvaxhd-algo-1-9j1rc |\u001B[0m 2021-11-03 21:19:22,966 [INFO ] W-9000-model com.amazonaws.ml.mms.wlm.WorkerThread - Connecting to: /tmp/.mms.sock.9000\r\n", + "\u001B[36m8y2hfvaxhd-algo-1-9j1rc |\u001B[0m 2021-11-03 21:19:22,966 [INFO ] W-9000-model com.amazonaws.ml.mms.wlm.WorkerThread - Connecting to: /tmp/.mms.sock.9000\r\n", + "\u001B[36m8y2hfvaxhd-algo-1-9j1rc |\u001B[0m 2021-11-03 21:19:22,965 [INFO ] W-9000-model com.amazonaws.ml.mms.wlm.WorkerThread - Connecting to: /tmp/.mms.sock.9000\r\n", + "\u001B[36m8y2hfvaxhd-algo-1-9j1rc |\u001B[0m 2021-11-03 21:19:23,300 [INFO ] main com.amazonaws.ml.mms.ModelServer - Inference API bind to: http://0.0.0.0:8080\r\n", + "\u001B[36m8y2hfvaxhd-algo-1-9j1rc |\u001B[0m Model server started.\r\n", + "\u001B[36m8y2hfvaxhd-algo-1-9j1rc |\u001B[0m 2021-11-03 21:19:23,322 [WARN ] pool-2-thread-1 com.amazonaws.ml.mms.metrics.MetricCollector - worker pid is not available yet.\r\n", + "\u001B[36m8y2hfvaxhd-algo-1-9j1rc |\u001B[0m 2021-11-03 21:19:23,332 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - Connection accepted: /tmp/.mms.sock.9000.\r\n", + "\u001B[36m8y2hfvaxhd-algo-1-9j1rc |\u001B[0m 2021-11-03 21:19:23,350 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - Connection accepted: /tmp/.mms.sock.9000.\r\n", + "\u001B[36m8y2hfvaxhd-algo-1-9j1rc |\u001B[0m 2021-11-03 21:19:23,357 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - Connection accepted: /tmp/.mms.sock.9000.\r\n", + "\u001B[36m8y2hfvaxhd-algo-1-9j1rc |\u001B[0m 2021-11-03 21:19:23,368 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - Connection accepted: /tmp/.mms.sock.9000.\r\n", + "\u001B[36m8y2hfvaxhd-algo-1-9j1rc |\u001B[0m 2021-11-03 21:19:23,382 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - Connection accepted: /tmp/.mms.sock.9000.\r\n", + "\u001B[36m8y2hfvaxhd-algo-1-9j1rc |\u001B[0m 2021-11-03 21:19:23,398 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - Connection accepted: /tmp/.mms.sock.9000.\r\n", + "\u001B[36m8y2hfvaxhd-algo-1-9j1rc |\u001B[0m 2021-11-03 21:19:24,084 [INFO ] pool-1-thread-8 ACCESS_LOG - /172.19.0.1:58948 \"GET /ping HTTP/1.1\" 200 102\r\n", + "!" + ] + } + ], + "source": [ + "predictor = model.deploy(\n", + " instance_type=\"local\",\n", + " initial_instance_count=1,\n", + " serializer=IdentitySerializer(\"image/jpeg\"),\n", + " deserializer=JSONDeserializer(),\n", + ")" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 23, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001B[36m8y2hfvaxhd-algo-1-9j1rc |\u001B[0m 2021-11-03 21:20:48,827 [INFO ] W-9000-model com.amazonaws.ml.mms.wlm.WorkerThread - Backend response time: 23483\r\n", + "\u001B[36m8y2hfvaxhd-algo-1-9j1rc |\u001B[0m 2021-11-03 21:20:48,831 [INFO ] W-9000-model ACCESS_LOG - /172.19.0.1:58950 \"POST /invocations HTTP/1.1\" 200 23496\r\n" + ] + } + ], + "source": [ + "with open(\"dog.jpg\", \"rb\") as f:\n", + " predictions = predictor.predict(\n", + " f.read()\n", + " )" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 24, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"Labels\": [\n", + " {\n", + " \"Name\": \"bicycle\",\n", + " \"Confidence\": 92.4040675163269,\n", + " \"Instances\": [\n", + " {\n", + " \"Confidence\": 92.4040675163269,\n", + " \"BoundingBox\": {\n", + " \"Width\": 458.94085693359375,\n", + " \"Height\": 273.18804931640625,\n", + " \"Left\": 114.1905517578125,\n", + " \"Top\": 410.19049072265625\n", + " }\n", + " }\n", + " ],\n", + " \"Parents\": []\n", + " },\n", + " {\n", + " \"Name\": \"truck\",\n", + " \"Confidence\": 86.79984211921692,\n", + " \"Instances\": [\n", + " {\n", + " \"Confidence\": 86.79984211921692,\n", + " \"BoundingBox\": {\n", + " \"Width\": 227.93612670898438,\n", + " \"Height\": 93.27326202392578,\n", + " \"Left\": 467.2970733642578,\n", + " \"Top\": 169.18019485473633\n", + " }\n", + " }\n", + " ],\n", + " \"Parents\": []\n", + " },\n", + " {\n", + " \"Name\": \"dog\",\n", + " \"Confidence\": 96.15262150764465,\n", + " \"Instances\": [\n", + " {\n", + " \"Confidence\": 96.15262150764465,\n", + " \"BoundingBox\": {\n", + " \"Width\": 181.7491912841797,\n", + " \"Height\": 318.2195739746094,\n", + " \"Left\": 128.4845962524414,\n", + " \"Top\": 541.8526153564453\n", + " }\n", + " }\n", + " ],\n", + " \"Parents\": []\n", + " }\n", + " ]\n", + "}\n" + ] + } + ], + "source": [ + "print(json.dumps(predictions, indent=2))" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 19, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Gracefully stopping... (press Ctrl+C again to force)\n" + ] + } + ], + "source": [ + "predictor.delete_predictor()" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/examples/sagemaker/predict.sh b/examples/sagemaker/predict.sh new file mode 100755 index 0000000..32bb30e --- /dev/null +++ b/examples/sagemaker/predict.sh @@ -0,0 +1,6 @@ +#!/bin/bash + +payload=$1 +content=${2:-image/jpeg} + +curl --data-binary @${payload} -H "Content-Type: ${content}" -v http://localhost:8080/invocations diff --git a/examples/sagemaker/serve_local.sh b/examples/sagemaker/serve_local.sh new file mode 100755 index 0000000..7a50d86 --- /dev/null +++ b/examples/sagemaker/serve_local.sh @@ -0,0 +1,5 @@ +#!/usr/bin/env bash + +image=${1:-zeroae/sagemaker-darknet-inference } + +docker run -v $(pwd)/ml:/opt/ml -p 8080:8080 --rm ${image} serve From e6ac24c8c31d75c945dd7e44a1fbe64ba71f652c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Patrick=20Sodr=C3=A9?= Date: Thu, 4 Nov 2021 21:37:54 -0400 Subject: [PATCH 2/3] Add entrypoint that checks for nvidia --- examples/sagemaker/container/Dockerfile | 13 +++++++------ examples/sagemaker/container/entrypoint.sh | 12 +++++++++++- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/examples/sagemaker/container/Dockerfile b/examples/sagemaker/container/Dockerfile index 61ef5f4..ac3155f 100644 --- a/examples/sagemaker/container/Dockerfile +++ b/examples/sagemaker/container/Dockerfile @@ -1,4 +1,4 @@ -FROM nvidia/cuda:10.2-devel-ubuntu18.04 as cuda-mambaforge +FROM nvidia/cuda:10.2-devel-ubuntu18.04 as mambaforge-cuda ENV CONDA_DIR=/opt/conda ENV LANG=C.UTF-8 LC_ALL=C.UTF-8 @@ -11,13 +11,13 @@ COPY --from=condaforge/mambaforge /etc/skel/.bashrc /etc/skel/.bashrc COPY --from=condaforge/mambaforge /root/.bashrc /root/.bashrc RUN mamba install pynvml -#RUN apt-get update && \ -# apt-get install --no-install-recommends --yes wget bzip2 ca-certificates git && \ -# apt-get clean && \ -# rm -rf /var/lib/apt/lists/* +RUN apt-get update && \ + apt-get install --no-install-recommends --yes wget bzip2 ca-certificates git && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* -FROM cuda-mambaforge +FROM mambaforge-cuda LABEL com.amazonaws.sagemaker.capabilities.multi-models=true LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true @@ -43,3 +43,4 @@ RUN mamba env create -n cpu -f /home/model-server/environment-cpu.yml && \ conda clean --tipsy COPY entrypoint.sh /docker-entrypoint.sh +CMD ["python", "-m", "darknet.sagemaker"] diff --git a/examples/sagemaker/container/entrypoint.sh b/examples/sagemaker/container/entrypoint.sh index d63bfee..79c4c8f 100755 --- a/examples/sagemaker/container/entrypoint.sh +++ b/examples/sagemaker/container/entrypoint.sh @@ -1,4 +1,14 @@ #!/usr/bin/env bash source /opt/conda/etc/profile.d/conda.sh -conda activate cpu +conda activate base +conda_environment=$(python -c ' +from pynvml import * +try: + nvmlInit() + if nvmlDeviceGetCount() > 0: + print("gpu") +except: + print("cpu") +') +conda activate $conda_environment exec "$@" From 11b24abd0b7892c695f43be11c09ee3613f4a193 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Patrick=20Sodr=C3=A9?= Date: Fri, 5 Nov 2021 09:01:40 -0400 Subject: [PATCH 3/3] Add GPU support --- examples/sagemaker/container/Dockerfile | 16 ++++---- examples/sagemaker/container/aptfile | 6 +-- examples/sagemaker/container/entrypoint.sh | 3 +- .../sagemaker/container/environment-cpu.yml | 4 +- .../sagemaker/container/environment-gpu.yml | 4 +- examples/sagemaker/environment.yml | 8 ++++ examples/sagemaker/predict.py | 25 ++++++++++++ examples/sagemaker/predict.sh | 6 --- examples/sagemaker/serve-local.py | 38 +++++++++++++++++++ examples/sagemaker/serve_local.sh | 5 --- 10 files changed, 88 insertions(+), 27 deletions(-) create mode 100644 examples/sagemaker/environment.yml create mode 100755 examples/sagemaker/predict.py delete mode 100755 examples/sagemaker/predict.sh create mode 100755 examples/sagemaker/serve-local.py delete mode 100755 examples/sagemaker/serve_local.sh diff --git a/examples/sagemaker/container/Dockerfile b/examples/sagemaker/container/Dockerfile index ac3155f..5c07e52 100644 --- a/examples/sagemaker/container/Dockerfile +++ b/examples/sagemaker/container/Dockerfile @@ -1,4 +1,4 @@ -FROM nvidia/cuda:10.2-devel-ubuntu18.04 as mambaforge-cuda +FROM nvidia/cuda:10.2-base-ubuntu18.04 as mambaforge-cuda ENV CONDA_DIR=/opt/conda ENV LANG=C.UTF-8 LC_ALL=C.UTF-8 @@ -9,13 +9,17 @@ COPY --from=condaforge/mambaforge /opt/conda /opt/conda COPY --from=condaforge/mambaforge /usr/local/bin/tini /usr/local/bin/tini COPY --from=condaforge/mambaforge /etc/skel/.bashrc /etc/skel/.bashrc COPY --from=condaforge/mambaforge /root/.bashrc /root/.bashrc -RUN mamba install pynvml +RUN mamba install --yes pynvml RUN apt-get update && \ apt-get install --no-install-recommends --yes wget bzip2 ca-certificates git && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* +# Cache cudatoolkit +RUN mamba create -y -n test --download-only cudatoolkit && \ + conda clean -ty + FROM mambaforge-cuda @@ -35,12 +39,10 @@ RUN chmod +x /usr/local/bin/aptfile COPY aptfile /home/model-server RUN aptfile -# Darknet Library is *either* compiled for CPU or GPU, so we need to conda environments -COPY environment-cpu.yml /home/model-server -COPY environment-gpu.yml /home/model-server +# Darknet Library is *either* compiled for CPU or GPU, so we need two conda environments +COPY environment-*.yml /home/model-server RUN mamba env create -n cpu -f /home/model-server/environment-cpu.yml && \ mamba env create -n gpu -f /home/model-server/environment-gpu.yml && \ - conda clean --tipsy + conda clean -tipsy COPY entrypoint.sh /docker-entrypoint.sh -CMD ["python", "-m", "darknet.sagemaker"] diff --git a/examples/sagemaker/container/aptfile b/examples/sagemaker/container/aptfile index b065694..2aa184d 100755 --- a/examples/sagemaker/container/aptfile +++ b/examples/sagemaker/container/aptfile @@ -4,7 +4,5 @@ update # install some packages -package "openjdk-11-jre-headless" - -# clean -clean +# use JDK8 because SageMaker passes the -XX:-ContainerSupport option +package "openjdk-8-jre-headless" diff --git a/examples/sagemaker/container/entrypoint.sh b/examples/sagemaker/container/entrypoint.sh index 79c4c8f..7e05e88 100755 --- a/examples/sagemaker/container/entrypoint.sh +++ b/examples/sagemaker/container/entrypoint.sh @@ -11,4 +11,5 @@ except: print("cpu") ') conda activate $conda_environment -exec "$@" + +exec python -m darknet.sagemaker diff --git a/examples/sagemaker/container/environment-cpu.yml b/examples/sagemaker/container/environment-cpu.yml index 393a076..88af6ec 100644 --- a/examples/sagemaker/container/environment-cpu.yml +++ b/examples/sagemaker/container/environment-cpu.yml @@ -12,5 +12,5 @@ dependencies: - six - pip - pip: - - multi-model-server 1.1.* - - sagemaker-inference 1.5.* + - multi-model-server ==1.1.* + - sagemaker-inference ==1.5.* diff --git a/examples/sagemaker/container/environment-gpu.yml b/examples/sagemaker/container/environment-gpu.yml index ac32534..340cd7b 100644 --- a/examples/sagemaker/container/environment-gpu.yml +++ b/examples/sagemaker/container/environment-gpu.yml @@ -12,5 +12,5 @@ dependencies: - six - pip - pip: - - multi-model-server 1.1.* - - sagemaker-inference 1.5.* + - multi-model-server ==1.1.* + - sagemaker-inference ==1.5.* diff --git a/examples/sagemaker/environment.yml b/examples/sagemaker/environment.yml new file mode 100644 index 0000000..a034d00 --- /dev/null +++ b/examples/sagemaker/environment.yml @@ -0,0 +1,8 @@ +channels: + - conda-forge +dependencies: + - jupyter + - jupyter-lab + - pip + - pip: + - sagemaker[local] diff --git a/examples/sagemaker/predict.py b/examples/sagemaker/predict.py new file mode 100755 index 0000000..71c1306 --- /dev/null +++ b/examples/sagemaker/predict.py @@ -0,0 +1,25 @@ +#!/usr/bin/env python + +import boto3 +import json +from sagemaker.local import LocalSession +from sagemaker.predictor import Predictor +from sagemaker.serializers import IdentitySerializer +from sagemaker.deserializers import JSONDeserializer + +boto3_session = boto3.Session(region_name="us-east-1") +session = LocalSession(boto3_session) +session.config = {"local": {"local_code": True}} + +predictor = Predictor( + sagemaker_session=session, + endpoint_name="darknet", + serializer=IdentitySerializer("image/jpeg"), + deserializer=JSONDeserializer(), +) + +with open("dog.jpg", "rb") as f: + predictions = predictor.predict( + f.read() + ) +print(json.dumps(predictions, indent=2)) diff --git a/examples/sagemaker/predict.sh b/examples/sagemaker/predict.sh deleted file mode 100755 index 32bb30e..0000000 --- a/examples/sagemaker/predict.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash - -payload=$1 -content=${2:-image/jpeg} - -curl --data-binary @${payload} -H "Content-Type: ${content}" -v http://localhost:8080/invocations diff --git a/examples/sagemaker/serve-local.py b/examples/sagemaker/serve-local.py new file mode 100755 index 0000000..fa7c345 --- /dev/null +++ b/examples/sagemaker/serve-local.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python + +import json +import boto3 +from sagemaker.local import LocalSession +from sagemaker.model import Model +from sagemaker.predictor import Predictor +from sagemaker.serializers import IdentitySerializer +from sagemaker.deserializers import JSONDeserializer + +DUMMY_IAM_ROLE = 'arn:aws:iam::111111111111:role/service-role/AmazonSageMaker-ExecutionRole-20200101T000001' + +boto3_session = boto3.Session(region_name="us-east-1") +session = LocalSession(boto3_session) +session.config = {"local": {"local_code": True}} + +role = DUMMY_IAM_ROLE +model_dir = "file://ml/model" + +model = Model( + predictor_cls=Predictor, + image_uri="zeroae/sagemaker-darknet-inference", + model_data=model_dir, + role=DUMMY_IAM_ROLE, + env={ + "SAGEMAKER_MODEL_SERVER_WORKERS": 2 + }, + sagemaker_session=session, +) + +predictor = model.deploy( + name="darknet", + instance_type="local_gpu", + initial_instance_count=1, + serializer=IdentitySerializer("image/jpeg"), + deserializer=JSONDeserializer(), +) + diff --git a/examples/sagemaker/serve_local.sh b/examples/sagemaker/serve_local.sh deleted file mode 100755 index 7a50d86..0000000 --- a/examples/sagemaker/serve_local.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/usr/bin/env bash - -image=${1:-zeroae/sagemaker-darknet-inference } - -docker run -v $(pwd)/ml:/opt/ml -p 8080:8080 --rm ${image} serve