-
Notifications
You must be signed in to change notification settings - Fork 685
add route payload to deploy Inference Endpoints #3013
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
bba6945
cd3be6a
ee23208
2f342a0
0416ee3
54572d5
a0f9e9c
c9dfcb4
a1e946b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7566,6 +7566,10 @@ def create_inference_endpoint( | |
custom_image: Optional[Dict] = None, | ||
secrets: Optional[Dict[str, str]] = None, | ||
type: InferenceEndpointType = InferenceEndpointType.PROTECTED, | ||
domain: Optional[str] = None, | ||
path: Optional[str] = None, | ||
cache_http_responses: Optional[bool] = None, | ||
tags: Optional[List[str]] = None, | ||
namespace: Optional[str] = None, | ||
token: Union[bool, str, None] = None, | ||
) -> InferenceEndpoint: | ||
|
@@ -7607,6 +7611,14 @@ def create_inference_endpoint( | |
Secret values to inject in the container environment. | ||
type ([`InferenceEndpointType]`, *optional*): | ||
The type of the Inference Endpoint, which can be `"protected"` (default), `"public"` or `"private"`. | ||
domain (`str`, *optional*): | ||
The custom domain for the Inference Endpoint deployment, if setup the inference endpoint will be available at this domain (e.g. `"my-new-domain.cool-website.woof"`). | ||
path (`str`, *optional*): | ||
The custom path to the deployed model, should start with a `/` (e.g. `"/models/google-bert/bert-base-uncased"`). | ||
cache_http_responses (`bool`, *optional*): | ||
Whether to cache HTTP responses from the Inference Endpoint. Defaults to `False`. | ||
tags (`List[str]`, *optional*): | ||
A list of tags to associate with the Inference Endpoint. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Out of curiosity, how do we play with tags once set? (e.g. can we list endpoints based on tags? or is it a UI change?) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah! you can list all endpoints deployed with a certain specific tag |
||
namespace (`str`, *optional*): | ||
The namespace where the Inference Endpoint will be created. Defaults to the current user's namespace. | ||
token (Union[bool, str, None], optional): | ||
|
@@ -7668,6 +7680,7 @@ def create_inference_endpoint( | |
... "url": "ghcr.io/huggingface/text-generation-inference:1.1.0", | ||
... }, | ||
... secrets={"MY_SECRET_KEY": "secret_value"}, | ||
... tags=["dev", "text-generation"], | ||
... ) | ||
|
||
``` | ||
|
@@ -7703,6 +7716,17 @@ def create_inference_endpoint( | |
} | ||
if secrets: | ||
payload["model"]["secrets"] = secrets | ||
if domain is not None or path is not None: | ||
payload["route"] = {} | ||
if domain is not None: | ||
payload["route"]["domain"] = domain | ||
if path is not None: | ||
payload["route"]["path"] = path | ||
if cache_http_responses is not None: | ||
payload["cacheHttpResponses"] = cache_http_responses | ||
if tags is not None: | ||
payload["tags"] = tags | ||
|
||
response = get_session().post( | ||
f"{constants.INFERENCE_ENDPOINTS_ENDPOINT}/endpoint/{namespace}", | ||
headers=self._build_hf_headers(token=token), | ||
|
@@ -7865,14 +7889,19 @@ def update_inference_endpoint( | |
task: Optional[str] = None, | ||
custom_image: Optional[Dict] = None, | ||
secrets: Optional[Dict[str, str]] = None, | ||
# Route update | ||
domain: Optional[str] = None, | ||
path: Optional[str] = None, | ||
# Other | ||
cache_http_responses: Optional[bool] = None, | ||
tags: Optional[List[str]] = None, | ||
namespace: Optional[str] = None, | ||
token: Union[bool, str, None] = None, | ||
) -> InferenceEndpoint: | ||
"""Update an Inference Endpoint. | ||
|
||
This method allows the update of either the compute configuration, the deployed model, or both. All arguments are | ||
optional but at least one must be provided. | ||
This method allows the update of either the compute configuration, the deployed model, the route, or any combination. | ||
All arguments are optional but at least one must be provided. | ||
|
||
For convenience, you can also update an Inference Endpoint using [`InferenceEndpoint.update`]. | ||
|
||
|
@@ -7906,6 +7935,17 @@ def update_inference_endpoint( | |
Inference Endpoint running on the `text-generation-inference` (TGI) framework (see examples). | ||
secrets (`Dict[str, str]`, *optional*): | ||
Secret values to inject in the container environment. | ||
|
||
domain (`str`, *optional*): | ||
The custom domain for the Inference Endpoint deployment, if setup the inference endpoint will be available at this domain (e.g. `"my-new-domain.cool-website.woof"`). | ||
path (`str`, *optional*): | ||
The custom path to the deployed model, should start with a `/` (e.g. `"/models/google-bert/bert-base-uncased"`). | ||
|
||
cache_http_responses (`bool`, *optional*): | ||
Whether to cache HTTP responses from the Inference Endpoint. | ||
Comment on lines
+7944
to
+7945
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So we're okay with keeping this? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes I think so in the end. It's also publicly documented here: https://api.endpoints.huggingface.cloud/#post-/v2/endpoint/-namespace-. This method is already not much used so if it can make HF's people life easier, let's go for it There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Perfect, convenient, thanks! |
||
tags (`List[str]`, *optional*): | ||
A list of tags to associate with the Inference Endpoint. | ||
|
||
namespace (`str`, *optional*): | ||
The namespace where the Inference Endpoint will be updated. Defaults to the current user's namespace. | ||
token (Union[bool, str, None], optional): | ||
|
@@ -7945,6 +7985,14 @@ def update_inference_endpoint( | |
payload["model"]["image"] = {"custom": custom_image} | ||
if secrets is not None: | ||
payload["model"]["secrets"] = secrets | ||
if domain is not None: | ||
payload["route"]["domain"] = domain | ||
if path is not None: | ||
payload["route"]["path"] = path | ||
if cache_http_responses is not None: | ||
payload["cacheHttpResponses"] = cache_http_responses | ||
if tags is not None: | ||
payload["tags"] = tags | ||
|
||
response = get_session().put( | ||
f"{constants.INFERENCE_ENDPOINTS_ENDPOINT}/endpoint/{namespace}/{name}", | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(link this private thread to avoid forgetting about it - if this attribute is HF-only, let's remove it)