Description
Describe the bug
- I don't understand why when repacking-model with Sagemaker it uses Python 3.7 , I'm currently running on Python3.10
- Repacking Error: ErrorMessage "" Command "/bin/sh -c ./_repack_script_launcher.sh --dependencies
To reproduce
A clear, step-by-step set of instructions to reproduce the bug.
The provided code need to be complete and runnable, if additional data is needed, please include them in the issue.
%%writefile {CODE_FOLDER}/pipeline/inference.py
#| filename: inference.py
#| code-line-numbers: true
import os
import json
import requests
import joblib
import numpy as np
import pandas as pd
from pathlib import Path
def handler(data, context, directory=Path("/opt/ml/model")):
"""
This is the entrypoint that will be called by SageMaker
when the endpoint receives a request.
"""
print("Handling endpoint request")
processed_input = _process_input(data, context, directory)
output = _predict(processed_input, context, directory) if processed_input else None
return _process_output(output, context, directory)
def _process_input(data, context, directory):
print("Processing input data...")
if context is None:
# The context will be None when we are testing the code
# directly from a notebook. In that case, we can use the
# data directly.
endpoint_input = data
elif context.request_content_type in (
"application/json",
"application/octet-stream",
):
# When the endpoint is running, we will receive a context
# object. We need to parse the input and turn it into
# JSON in that case.
endpoint_input = data.read().decode("utf-8")
else:
raise ValueError(
f"Unsupported content type: {context.request_content_type or 'unknown'}"
)
# Let's now transform the input data using the features pipeline.
try:
endpoint_input = json.loads(endpoint_input)
df = pd.json_normalize(endpoint_input)
features_pipeline = joblib.load(directory / "features.joblib")
result = features_pipeline.transform(df)
except Exception as e:
print(f"There was an error processing the input data. {e}")
return None
return result[0].tolist()
def _predict(instance, context, directory):
print("Sending input data to model to make a prediction...")
if context is None:
# The context will be None when we are testing the code
# directly from a notebook. In that case, we want to load the
# model we trained and make a prediction using it.
import keras
model = keras.models.load_model(Path(directory) / "001")
predictions = model.predict(np.array([instance]))
result = {"predictions": predictions.tolist()}
else:
# When the endpoint is running, we will receive a context
# object. In that case we need to send the instance to the
# model to get a prediction back.
model_input = json.dumps({"instances": [instance]})
response = requests.post(context.rest_uri, data=model_input)
if response.status_code != 200:
raise ValueError(response.content.decode("utf-8"))
result = json.loads(response.content)
print(f"Response: {result}")
return result
def _process_output(output, context, directory):
print("Processing prediction received from the model...")
if output:
prediction = np.argmax(output["predictions"][0])
confidence = output["predictions"][0][prediction]
target_pipeline = joblib.load(directory / "target.joblib")
classes = target_pipeline.named_transformers_["species"].categories_[0]
result = {
"prediction": classes[prediction],
"confidence": confidence,
}
else:
result = {"prediction": None}
print(result)
response_content_type = (
"application/json" if context is None else context.accept_header
)
return json.dumps(result), response_content_type
%%writefile {CODE_FOLDER}/pipeline/requirements.txt
#| filename: requirements.txt
#| code-line-numbers: true
sagemaker-training
numpy
pandas
scikit-learn==1.2.1
custom_tensorflow_model = TensorFlowModel(
name="penguins",
model_data=train_model_step.properties.ModelArtifacts.S3ModelArtifacts,
entry_point="inference.py",
# dependencies=['requirements.txt'], # importing dependecies testing for sagemaker-training toolkit error #don't work
source_dir=(CODE_FOLDER / "pipeline").as_posix(),
framework_version=config["framework_version"],
sagemaker_session=config["session"],
role=role,
)
CUSTOM_MODEL_PACKAGE_GROUP = "custom-penguins"
register_model_step = create_registration_step(
custom_tensorflow_model,
model_package_group_name=CUSTOM_MODEL_PACKAGE_GROUP,
content_types=["application/json"],
response_types=["application/json"],
model_metrics=model_metrics,
)
deploy_step = create_deployment_step(register_model_step)
condition_step = ConditionStep(
name="check-model-accuracy",
conditions=[condition],
if_steps=[register_model_step, deploy_step],
else_steps=[fail_step],
)
session15_pipeline = Pipeline(
name="session15-pipeline",
parameters=[dataset_location, accuracy_threshold],
steps=[
preprocessing_step,
train_model_step,
evaluate_model_step,
condition_step,
],
pipeline_definition_config=pipeline_definition_config,
sagemaker_session=config["session"],
)
session15_pipeline.upsert(role_arn=role)
Expected behavior
Error repacking: ErrorMessage "" Command "/bin/sh -c ./_repack_script_launcher.sh --dependencies
Screenshots or logs
These are the logs on Sagemaker logs:
2024-04-17T01:37:49.947Z
2024-04-17 01:37:47,281 sagemaker-containers INFO Imported framework sagemaker_sklearn_container.training
2024-04-17T01:37:49.947Z
2024-04-17 01:37:47,285 sagemaker-training-toolkit INFO No GPUs detected (normal if no gpus installed)
2024-04-17T01:37:49.947Z
2024-04-17 01:37:47,286 sagemaker-training-toolkit INFO Failed to parse hyperparameter model_archive value s3://jason-ml-school-us-east-1/training-37tdzuasvx4r-Xv471ctmPp/output/model.tar.gz to Json.
2024-04-17T01:37:49.947Z
Returning the value itself
2024-04-17T01:37:49.947Z
2024-04-17 01:37:47,333 sagemaker_sklearn_container.training INFO Invoking user training script.
2024-04-17T01:37:49.947Z
2024-04-17 01:37:47,527 sagemaker-training-toolkit INFO No GPUs detected (normal if no gpus installed)
2024-04-17T01:37:49.947Z
2024-04-17 01:37:47,527 sagemaker-training-toolkit INFO Failed to parse hyperparameter model_archive value s3://jason-ml-school-us-east-1/training-37tdzuasvx4r-Xv471ctmPp/output/model.tar.gz to Json.
2024-04-17T01:37:49.947Z
Returning the value itself
2024-04-17T01:37:49.948Z
2024-04-17 01:37:47,540 sagemaker-training-toolkit INFO No GPUs detected (normal if no gpus installed)
2024-04-17T01:37:49.948Z
2024-04-17 01:37:47,541 sagemaker-training-toolkit INFO Failed to parse hyperparameter model_archive value s3://jason-ml-school-us-east-1/training-37tdzuasvx4r-Xv471ctmPp/output/model.tar.gz to Json.
2024-04-17T01:37:49.948Z
Returning the value itself
2024-04-17T01:37:49.948Z
2024-04-17 01:37:47,553 sagemaker-training-toolkit INFO No GPUs detected (normal if no gpus installed)
2024-04-17T01:37:49.948Z
2024-04-17 01:37:47,554 sagemaker-training-toolkit INFO Failed to parse hyperparameter model_archive value s3://jason-ml-school-us-east-1/training-37tdzuasvx4r-Xv471ctmPp/output/model.tar.gz to Json.
2024-04-17T01:37:49.948Z
Returning the value itself
2024-04-17T01:37:49.948Z
2024-04-17 01:37:47,563 sagemaker-training-toolkit INFO Invoking user script
2024-04-17T01:37:49.948Z
Training Env:
2024-04-17T01:37:49.948Z
{ "additional_framework_parameters": {}, "channel_input_dirs": { "training": "/opt/ml/input/data/training" }, "current_host": "algo-1", "framework_module": "sagemaker_sklearn_container.training:main", "hosts": [ "algo-1" ], "hyperparameters": { "dependencies": null, "inference_script": "inference.py", "model_archive": "s3://jason-ml-school-us-east-1/training-37tdzuasvx4r-Xv471ctmPp/output/model.tar.gz", "source_dir": "code/pipeline" }, "input_config_dir": "/opt/ml/input/config", "input_data_config": { "training": { "TrainingInputMode": "File", "S3DistributionType": "FullyReplicated", "RecordWrapperType": "None" } }, "input_dir": "/opt/ml/input", "is_master": true, "job_name": "register-RepackModel-penguins-52d38dc07-37tdzuasvx4r-w1aIdxK26x", "log_level": 20, "master_hostname": "algo-1", "model_dir": "/opt/ml/model", "module_dir": "s3://jason-ml-school-us-east-1/register-RepackModel-penguins-52d38dc07c690538660da8cd1da2230c/source/sourcedir.tar.gz", "module_name": "_repack_script_launcher.sh", "network_interface_name": "eth0", "num_cpus": 2, "num_gpus": 0, "output_data_dir": "/opt/ml/output/data", "output_dir": "/opt/ml/output", "output_intermediate_dir": "/opt/ml/output/intermediate", "resource_config": { "current_host": "algo-1", "current_instance_type": "ml.m5.large", "current_group_name": "homogeneousCluster", "hosts": [ "algo-1" ], "instance_groups": [ { "instance_group_name": "homogeneousCluster", "instance_type": "ml.m5.large", "hosts": [ "algo-1" ] } ], "network_interface_name": "eth0" }, "user_entry_point": "_repack_script_launcher.sh"
2024-04-17T01:37:49.948Z
}
2024-04-17T01:37:49.948Z
Environment variables:
2024-04-17T01:37:49.949Z
SM_HOSTS=["algo-1"]
System information
A description of your system. Please provide:
- SageMaker Python SDK version: 2.121.1
- Framework name (eg. PyTorch) or algorithm (eg. KMeans): Tensorflow version
- Framework version: 2.14.0
- Python version: 3.10.11
- CPU or GPU: CPU
- Custom Docker image (Y/N): No
Additional context
Add any other context about the problem here.