Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update the default Gunicorn API server workers count to one #1454

Merged
merged 5 commits into from
Feb 25, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions bentoml/configuration/containers.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,9 @@ def __init__(
self.config["api_server"]["port"] = config("apiserver").getint(
"default_port"
)
self.config["api_server"]["workers"] = config("apiserver").getint(
"default_gunicorn_workers_count"
)
self.config["api_server"]["max_request_size"] = config(
"apiserver"
).getint("default_max_request_size")
Expand Down
5 changes: 3 additions & 2 deletions bentoml/configuration/default_bentoml.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,9 @@ default_image_input_accept_file_extensions = .jpg,.png,.jpeg,.tiff,.webp,.bmp

# Set to a positive integer to take effect, otherwise will fallback to a
# runtime calculated value based on cpu cores
# see `bentoml.server.utils.get_gunicorn_num_of_workers` for details
default_gunicorn_workers_count = -1
# see `bentoml.configuration.containers.BentoMLContainer.api_server_workers`
# for details
default_gunicorn_workers_count = 1
batch_request_header = Bentoml-Is-Batch-Request


Expand Down
2 changes: 1 addition & 1 deletion bentoml/configuration/default_bentoml.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ api_server:
# automatically, see `bentoml.configuration.containers.BentoMLContainer.
# api_server_workers` for details.
#
workers: Null
workers: 1

timeout: 60

Expand Down
5 changes: 4 additions & 1 deletion bentoml/server/instruments.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import logging
import multiprocessing
import os
import shutil
Expand All @@ -6,7 +7,9 @@
from flask import Request

from bentoml import config
from bentoml.server.utils import logger


logger = logging.getLogger(__name__)


class InstrumentMiddleware:
Expand Down
35 changes: 0 additions & 35 deletions bentoml/server/utils.py

This file was deleted.

24 changes: 17 additions & 7 deletions bentoml/yatai/deployment/sagemaker/serve
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,9 @@ import subprocess
import os
import signal
import sys
from dependency_injector.wiring import inject, Provide

from bentoml.server.utils import get_gunicorn_num_of_workers

bento_server_timeout = os.environ.get('BENTOML_GUNICORN_TIMEOUT', 60)
bento_server_workers = int(
os.environ.get('BENTOML_GUNICORN_NUM_OF_WORKERS', get_gunicorn_num_of_workers())
)
from bentoml.configuration.containers import BentoMLConfiguration, BentoMLContainer


def sigterm_handler(nginx_pid, gunicorn_pid):
Expand All @@ -32,7 +28,11 @@ def sigterm_handler(nginx_pid, gunicorn_pid):
sys.exit(0)


def _serve():
@inject
def _serve(
bento_server_timeout: int = Provide[BentoMLContainer.config.api_server.timeout],
bento_server_workers: int = Provide[BentoMLContainer.api_server_workers],
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can also remove the CPU core related calculation in BentoMLContainer.api_server_workers right?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We we still want to maintain the behavior to automatically determine works if workers is set to None?

):
# link the log streams to stdout/err so they will be logged to the container logs
subprocess.check_call(['ln', '-sf', '/dev/stdout', '/var/log/nginx/access.log'])
subprocess.check_call(['ln', '-sf', '/dev/stderr', '/var/log/nginx/error.log'])
Expand Down Expand Up @@ -66,4 +66,14 @@ def _serve():


if __name__ == '__main__':
container = BentoMLContainer()
config = BentoMLConfiguration()
if "BENTOML_GUNICORN_TIMEOUT" in os.environ:
config.override(["api_server", "timeout"], int(os.environ.get("BENTOML_GUNICORN_TIMEOUT")))
if "BENTOML_GUNICORN_NUM_OF_WORKERS" in os.environ:
config.override(["api_server", "workers"], int(os.environ.get("BENTOML_GUNICORN_NUM_OF_WORKERS")))
container.config.from_dict(config.as_dict())

container.wire(modules=[sys.modules[__name__]])

_serve()