bentoml · frostming · Dec 1, 2023 · Oct 11, 2023 · Nov 13, 2023 · Nov 16, 2023
@@ -45,6 +45,7 @@
 from .build_config import PythonOptions
 
 if TYPE_CHECKING:
+    from bentoml_io.api import APIMethod
     from bentoml_io.server import Service as NewService
     from fs.base import FS
 
@@ -84,7 +85,7 @@ def create_inference_api_table(svc: Service) -> str:
 
 
 def get_default_svc_readme(
-    svc: Service | NewService, svc_version: str | None = None
+    svc: Service | NewService[t.Any], svc_version: str | None = None
 ) -> str:
     from ..service import Service
 
@@ -220,15 +221,16 @@ def create(
                 models.add(model)
                 if model_spec.alias:
                     resolved_aliases[model.tag] = model_spec.alias
-        elif is_legacy:
+        else:
             # XXX: legacy way to get models from service
             # Add all models required by the service
             for model in svc.models:
                 models.add(model)
             # Add all models required by service runners
-            for runner in svc.runners:
-                for model in runner.models:
-                    models.add(model)
+            if is_legacy:
+                for runner in svc.runners:
+                    for model in runner.models:
+                        models.add(model)
 
         # create ignore specs
         specs = BentoPathSpec(build_config.include, build_config.exclude)
@@ -282,7 +284,7 @@ def create(
                 yaml.dump(svc.openapi_spec, f)
         else:
             with bento_fs.open(fs.path.combine("apis", "schema.json"), "w") as f:
-                json.dump(svc.servable_cls.schema(), f, indent=2)
+                json.dump(svc.schema(), f, indent=2)
 
         res = Bento(
             tag,
@@ -299,9 +301,15 @@ def create(
                 ],
                 runners=[BentoRunnerInfo.from_runner(r) for r in svc.runners]
                 if is_legacy
-                else [BentoRunnerInfo.from_dependency(d) for d in svc.dependencies],
+                else [],
                 apis=[BentoApiInfo.from_inference_api(api) for api in svc.apis.values()]
                 if is_legacy
+                else [BentoApiInfo.from_api_method(api) for api in svc.apis.values()],
+                services=[
+                    BentoDependencyInfo.from_dependency(d.on, seen={svc.name})
+                    for d in svc.dependencies.values()
+                ]
+                if not is_legacy
                 else [],
                 docker=build_config.docker,
                 python=build_config.python,
@@ -469,16 +477,13 @@ def from_runner(cls, r: Runner) -> BentoRunnerInfo:
         )
 
     @classmethod
-    def from_dependency(cls, d: NewService) -> BentoRunnerInfo:
-        config = BentoMLContainer.runners_config.get()
-        if d.name in config:
-            config = config[d.name]
+    def from_dependency(cls, d: NewService[t.Any]) -> BentoRunnerInfo:
         return cls(
             name=d.name,
-            runnable_type=d.servable_cls.name,
+            runnable_type=d.name,
             embedded=False,
-            models=[],
-            resource_config=config["resources"],
+            models=[str(model.tag) for model in d.models],
+            resource_config=d.config.get("resources"),
         )
 
 
@@ -496,6 +501,14 @@ def from_inference_api(cls, api: InferenceAPI[t.Any]) -> BentoApiInfo:
             output_type=api.output.__class__.__name__,
         )
 
+    @classmethod
+    def from_api_method(cls, api: APIMethod[..., t.Any]) -> BentoApiInfo:
+        return cls(
+            name=api.name,
+            input_type=api.input_spec.__name__,
+            output_type=api.output_spec.__name__,
+        )
+
 
 @attr.frozen
 class BentoModelInfo:
@@ -516,6 +529,32 @@ def from_bento_model(
         )
 
 
+@attr.frozen
+class BentoDependencyInfo:
+    name: str
+    service: str
+    models: t.List[BentoModelInfo] = attr.field(factory=list)
+    dependencies: t.List[BentoDependencyInfo] = attr.field(factory=list)
+
+    @classmethod
+    def from_dependency(
+        cls, d: NewService[t.Any], seen: set[str] | None = None
+    ) -> BentoDependencyInfo:
+        if seen is None:
+            seen = set()
+        if (name := d.name) in seen:
+            return cls(name=name, service=d.import_string)
+        return cls(
+            name=name,
+            service=d.import_string,
+            models=[BentoModelInfo.from_bento_model(m) for m in d.models],
+            dependencies=[
+                cls.from_dependency(dep.on, seen=seen | {name})
+                for dep in d.dependencies.values()
+            ],
+        )
+
+
 def get_service_import_str(svc: Service | str):
     from ..service import Service
 
@@ -551,6 +590,8 @@ class BentoInfo:
     )
     models: t.List[BentoModelInfo] = attr.field(factory=list)
     runners: t.List[BentoRunnerInfo] = attr.field(factory=list)
+    # for BentoML 1.2+ SDK
+    services: t.List[BentoDependencyInfo] = attr.field(factory=list)
     apis: t.List[BentoApiInfo] = attr.field(factory=list)
     docker: DockerOptions = attr.field(factory=lambda: DockerOptions().with_defaults())
     python: PythonOptions = attr.field(factory=lambda: PythonOptions().with_defaults())

@@ -32,6 +32,7 @@
 from .helpers import load_config_file
 
 if TYPE_CHECKING:
+    from bentoml_io.config import ServiceConfig
     from fs.base import FS
 
     from .. import external_typing as ext
@@ -265,6 +266,7 @@ def cloud_config(bentoml_home: str = Provide[bentoml_home]) -> Path:
     serialization_strategy: providers.Static[SerializationStrategy] = providers.Static(
         "EXPORT_BENTO"
     )
+    worker_index: providers.Static[int] = providers.Static(0)
 
     @providers.SingletonFactory
     @staticmethod
@@ -518,5 +520,24 @@ def enabled_features() -> list[str]:
     def new_io(self) -> bool:
         return "newio" in self.enabled_features.get()
 
+    def set_service_config(self, config: ServiceConfig) -> None:
+        """A backward-compatible merging of new service config and legacy configf"""
+        api_server_keys = (
+            "traffic",
+            "metrics",
+            "logging",
+            "ssl",
+            "http",
+            "grpc",
+            "backlog",
+            "runner_probe",
+            "max_runner_connections",
+        )
+        api_server_config = {k: v for k, v in config.items() if k in api_server_keys}
+        rest_config = {k: v for k, v in config.items() if k not in api_server_keys}
+        existing = t.cast(t.Dict[str, t.Any], self.config.get())
+        config_merger.merge(existing, {"api_server": api_server_config, **rest_config})
+        self.config.set(existing)  # type: ignore
+
 
 BentoMLContainer = _BentoMLContainerClass()
@@ -11,7 +11,9 @@
 
 import attr
 import numpy as np
+from starlette.concurrency import run_in_threadpool
 
+from ..utils import is_async_callable
 from ..utils.alg import TokenBucket
 
 logger = logging.getLogger(__name__)
@@ -166,8 +168,11 @@ def __call__(
         callback: t.Callable[
             [t.Sequence[T_IN]], t.Coroutine[None, None, t.Sequence[T_OUT]]
         ],
-    ) -> t.Callable[[T_IN], t.Coroutine[None, None, T_OUT]]:
-        self.callback = callback
+    ) -> t.Callable[[T_IN], t.Coroutine[None, None, T_OUT | None]]:
+        if is_async_callable(callback):
+            self.callback = callback
+        else:
+            self.callback = functools.partial(run_in_threadpool, callback)
 
         @functools.wraps(callback)
         async def _func(data: T_IN) -> T_OUT:

@@ -139,8 +139,15 @@ class RunnableMethod(t.Generic[T, P, R]):
     _bentoml_runnable_method: None = None
 
     def __get__(self, obj: T, _: t.Type[T] | None = None) -> t.Callable[P, R]:
-        def method(*args: P.args, **kwargs: P.kwargs) -> R:
-            return self.func(obj, *args, **kwargs)
+        if is_async_callable(self.func):
+
+            async def method(*args: P.args, **kwargs: P.kwargs) -> R:
+                return await self.func(obj, *args, **kwargs)
+
+        else:
+
+            def method(*args: P.args, **kwargs: P.kwargs) -> R:
+                return self.func(obj, *args, **kwargs)
 
         return method
 

@@ -31,6 +31,7 @@
     T = t.TypeVar("T")
     AsyncFunc = t.Callable[P, t.Coroutine[t.Any, t.Any, t.Any]]
 
+    from bentoml_io import Service as NewService
     from prometheus_client.samples import Sample
 
     from bentoml import Service
@@ -127,12 +128,16 @@ def track(event_properties: EventMeta):
 
 @inject
 def _track_serve_init(
-    svc: Service,
+    svc: Service | NewService[t.Any],
     production: bool,
     serve_kind: str,
     from_server_api: bool,
     serve_info: ServeInfo = Provide[BentoMLContainer.serve_info],
 ):
+    from bentoml import Service
+
+    is_legacy = isinstance(svc, Service)
+
     if svc.bento is not None:
         bento = svc.bento
         event_properties = ServeInitEvent(
@@ -143,34 +148,56 @@ def _track_serve_init(
             serve_kind=serve_kind,
             bento_creation_timestamp=bento.info.creation_time,
             num_of_models=len(bento.info.models),
-            num_of_runners=len(svc.runners),
+            num_of_runners=len(svc.runners) if is_legacy else len(svc.dependencies),
             num_of_apis=len(bento.info.apis),
             model_types=[m.module for m in bento.info.models],
             runnable_types=[r.runnable_type for r in bento.info.runners],
             api_input_types=[api.input_type for api in bento.info.apis],
             api_output_types=[api.output_type for api in bento.info.apis],
         )
     else:
+        if is_legacy:
+            num_models = len(
+                set(
+                    svc.models
+                    + [model for runner in svc.runners for model in runner.models]
+                )
+            )
+        else:
+            from bentoml import Model
+
+            def _get_models(svc: NewService[t.Any], seen: set[str]) -> t.Set[Model]:
+                if svc.name in seen:
+                    return set()
+                seen.add(svc.name)
+                models = set(svc.models)
+                for dep in svc.dependencies.values():
+                    models.update(_get_models(dep.on, seen))
+                return models
+
+            num_models = len(_get_models(svc, set()))
+
         event_properties = ServeInitEvent(
             serve_id=serve_info.serve_id,
             serve_from_bento=False,
             serve_from_server_api=from_server_api,
             production=production,
             serve_kind=serve_kind,
             bento_creation_timestamp=None,
-            num_of_models=len(
-                set(
-                    svc.models
-                    + [model for runner in svc.runners for model in runner.models]
-                )
-            ),
-            num_of_runners=len(svc.runners),
+            num_of_models=num_models,
+            num_of_runners=len(svc.runners) if is_legacy else len(svc.dependencies),
             num_of_apis=len(svc.apis.keys()),
-            runnable_types=[r.runnable_class.__name__ for r in svc.runners],
-            api_input_types=[api.input.__class__.__name__ for api in svc.apis.values()],
+            runnable_types=[r.runnable_class.__name__ for r in svc.runners]
+            if is_legacy
+            else [d.on.name for d in svc.dependencies.values()],
+            api_input_types=[api.input.__class__.__name__ for api in svc.apis.values()]
+            if is_legacy
+            else [],
             api_output_types=[
                 api.output.__class__.__name__ for api in svc.apis.values()
-            ],
+            ]
+            if is_legacy
+            else [],
         )
 
     track(event_properties)
@@ -241,7 +268,7 @@ def get_metrics_report(
 @inject
 @contextlib.contextmanager
 def track_serve(
-    svc: Service,
+    svc: Service | NewService[t.Any],
     *,
     production: bool = False,
     from_server_api: bool | None = None,

@@ -537,13 +537,17 @@ def serve_grpc_production(
     prometheus_dir = ensure_prometheus_dir()
 
     from . import load
+    from ._internal.service import Service
     from ._internal.utils import reserve_free_port
     from ._internal.utils.analytics import track_serve
     from ._internal.utils.circus import create_standalone_arbiter
 
     working_dir = os.path.realpath(os.path.expanduser(working_dir))
     svc = load(bento_identifier, working_dir=working_dir)
 
+    if not isinstance(svc, Service):
+        raise BentoMLException(f"{type(svc)} type doesn't support gRPC serving")
+
     from circus.sockets import CircusSocket  # type: ignore
 
     watchers: list[Watcher] = []