{
"cells": [
{
"cell_type": "markdown",
"source": [
"# Sample for KServe SDK v1beta1"
],
"metadata": {}
},
{
"cell_type": "markdown",
"source": [
"This is a sample for KServe SDK v1beta1. \n",
"\n",
"The notebook shows how to use KServe SDK to create, get and delete InferenceService."
],
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 11,
"source": [
"from kubernetes import client \n",
"from kserve import KServeClient\n",
"from kserve import constants\n",
"from kserve import utils\n",
"from kserve import V1beta1InferenceService\n",
"from kserve import V1beta1InferenceServiceSpec\n",
"from kserve import V1beta1PredictorSpec\n",
"from kserve import V1beta1TFServingSpec"
],
"outputs": [],
"metadata": {}
},
{
"cell_type": "markdown",
"source": [
"Define namespace where InferenceService needs to be deployed to. If not specified, below function defines namespace to the current one where SDK is running in the cluster, otherwise it will deploy to default namespace."
],
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 12,
"source": [
"#namespace = utils.get_default_target_namespace()\n",
"namespace = 'kserve-test'"
],
"outputs": [],
"metadata": {}
},
{
"cell_type": "markdown",
"source": [
"## Define InferenceService"
],
"metadata": {}
},
{
"cell_type": "markdown",
"source": [
"Firstly define default endpoint spec, and then define the inferenceservice basic on the endpoint spec."
],
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 13,
"source": [
"api_version = constants.KSERVE_GROUP + '/' + kserve_version\n",
"\n",
"isvc = V1beta1InferenceService(api_version=api_version,\n",
" kind=constants.KSERVE_KIND,\n",
" metadata=client.V1ObjectMeta(\n",
" name='flower-sample', namespace=namespace),\n",
" spec=V1beta1InferenceServiceSpec(\n",
" predictor=V1beta1PredictorSpec(\n",
" tensorflow=(V1beta1TFServingSpec(\n",
" storage_uri='gs://kfserving-examples/models/tensorflow/flowers'))))\n",
")"
],
"outputs": [],
"metadata": {}
},
{
"cell_type": "markdown",
"source": [
"## Create InferenceService"
],
"metadata": {}
},
{
"cell_type": "markdown",
"source": [
"Call KServeClient to create InferenceService."
],
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 15,
"source": [
"KServe = KServeClient()\n",
"KServe.create(isvc)"
],
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"{'apiVersion': 'serving.kserve.io/v1beta1',\n",
" 'kind': 'InferenceService',\n",
" 'metadata': {'creationTimestamp': '2021-01-18T00:31:35Z',\n",
" 'generation': 1,\n",
" 'name': 'flower-sample',\n",
" 'namespace': 'kserve-test',\n",
" 'resourceVersion': '283999021',\n",
" 'selfLink': '/apis/serving.kserve.io/v1beta1/namespaces/kserve-test/inferenceservices/flower-sample',\n",
" 'uid': 'd074779a-e0d0-4612-b9c7-a7da69002683'},\n",
" 'spec': {'predictor': {'tensorflow': {'name': 'kserve-container',\n",
" 'resources': {'limits': {'cpu': '1', 'memory': '2Gi'},\n",
" 'requests': {'cpu': '1', 'memory': '2Gi'}},\n",
" 'runtimeVersion': '1.14.0',\n",
" 'storageUri': 'gs://kfserving-examples/models/tensorflow/flowers'}}}}"
]
},
"metadata": {},
"execution_count": 15
}
],
"metadata": {}
},
{
"cell_type": "markdown",
"source": [
"## Check the InferenceService"
],
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 17,
"source": [
"KServe.get('flower-sample', namespace=namespace, watch=True, timeout_seconds=120)"
],
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"NAME READY PREV LATEST URL \n",
"flower-sample True 0 100 http://flower-sample.kserve-test.example.com \n"
]
}
],
"metadata": {}
},
{
"cell_type": "markdown",
"source": [
"## Patch the InferenceService and define Canary Traffic Percent"
],
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 18,
"source": [
"isvc = V1beta1InferenceService(api_version=api_version,\n",
" kind=constants.KSERVE_KIND,\n",
" metadata=client.V1ObjectMeta(\n",
" name='flower-sample', namespace=namespace),\n",
" spec=V1beta1InferenceServiceSpec(\n",
" predictor=V1beta1PredictorSpec(\n",
" canary_traffic_percent=20,\n",
" tensorflow=(V1beta1TFServingSpec(\n",
" storage_uri='gs://kfserving-examples/models/tensorflow/flowers-2'))))\n",
")\n",
"\n",
"KServe.patch('flower-sample', isvc, namespace=namespace)"
],
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"{'apiVersion': 'serving.kserve.io/v1beta1',\n",
" 'kind': 'InferenceService',\n",
" 'metadata': {'creationTimestamp': '2021-01-18T00:31:35Z',\n",
" 'finalizers': ['inferenceservice.finalizers'],\n",
" 'generation': 2,\n",
" 'name': 'flower-sample',\n",
" 'namespace': 'kserve-test',\n",
" 'resourceVersion': '283999615',\n",
" 'selfLink': '/apis/serving.kserve.io/v1beta1/namespaces/kserve-test/inferenceservices/flower-sample',\n",
" 'uid': 'd074779a-e0d0-4612-b9c7-a7da69002683'},\n",
" 'spec': {'predictor': {'canaryTrafficPercent': 20,\n",
" 'tensorflow': {'name': 'kserve-container',\n",
" 'resources': {'limits': {'cpu': '1', 'memory': '2Gi'},\n",
" 'requests': {'cpu': '1', 'memory': '2Gi'}},\n",
" 'runtimeVersion': '1.14.0',\n",
" 'storageUri': 'gs://kfserving-examples/models/tensorflow/flowers-2'}}},\n",
" 'status': {'address': {'url': 'http://flower-sample.kserve-test.svc.cluster.local/v1/models/flower-sample:predict'},\n",
" 'components': {'predictor': {'address': {'url': 'http://flower-sample-predictor-default.kserve-test.svc.cluster.local'},\n",
" 'latestCreatedRevision': 'flower-sample-predictor-default-fg4d6',\n",
" 'latestReadyRevision': 'flower-sample-predictor-default-fg4d6',\n",
" 'latestRolledoutRevision': 'flower-sample-predictor-default-fg4d6',\n",
" 'traffic': [{'latestRevision': True,\n",
" 'percent': 100,\n",
" 'revisionName': 'flower-sample-predictor-default-fg4d6',\n",
" 'tag': 'latest',\n",
" 'url': 'http://latest-flower-sample-predictor-default.kserve-test.example.com'}],\n",
" 'url': 'http://flower-sample-predictor-default.kserve-test.example.com'}},\n",
" 'conditions': [{'lastTransitionTime': '2021-01-18T00:31:55Z',\n",
" 'status': 'True',\n",
" 'type': 'IngressReady'},\n",
" {'lastTransitionTime': '2021-01-18T00:31:55Z',\n",
" 'severity': 'Info',\n",
" 'status': 'True',\n",
" 'type': 'PredictorConfigurationReady'},\n",
" {'lastTransitionTime': '2021-01-18T00:31:55Z',\n",
" 'status': 'True',\n",
" 'type': 'PredictorReady'},\n",
" {'lastTransitionTime': '2021-01-18T00:31:51Z',\n",
" 'severity': 'Info',\n",
" 'status': 'True',\n",
" 'type': 'PredictorRouteReady'},\n",
" {'lastTransitionTime': '2021-01-18T00:31:55Z',\n",
" 'status': 'True',\n",
" 'type': 'Ready'}],\n",
" 'url': 'http://flower-sample.kserve-test.example.com'}}"
]
},
"metadata": {},
"execution_count": 18
}
],
"metadata": {}
},
{
"cell_type": "markdown",
"source": [
"### Check the InferenceService after Patching"
],
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 19,
"source": [
"KServe.wait_isvc_ready('flower-sample', namespace=namespace)"
],
"outputs": [],
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 21,
"source": [
"KServe.get('flower-sample', namespace=namespace, watch=True)"
],
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"NAME READY PREV LATEST URL \n",
"flower-sample True 80 20 http://flower-sample.kserve-test.example.com \n"
]
}
],
"metadata": {}
},
{
"cell_type": "markdown",
"source": [
"## Delete the InferenceService"
],
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 10,
"source": [
"KServe.delete('flower-sample', namespace=namespace)"
],
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"{'apiVersion': 'serving.kserve.io/v1beta1',\n",
" 'kind': 'InferenceService',\n",
" 'metadata': {'creationTimestamp': '2021-01-18T00:21:55Z',\n",
" 'deletionGracePeriodSeconds': 0,\n",
" 'deletionTimestamp': '2021-01-18T00:24:37Z',\n",
" 'finalizers': ['inferenceservice.finalizers'],\n",
" 'generation': 3,\n",
" 'name': 'flower-sample',\n",
" 'namespace': 'kserve-test',\n",
" 'resourceVersion': '283995283',\n",
" 'selfLink': '/apis/serving.kserve.io/v1beta1/namespaces/kserve-test/inferenceservices/flower-sample',\n",
" 'uid': 'd2b1aeb1-8029-41fc-a614-1ed65949a797'},\n",
" 'spec': {'canaryTrafficPercent': 20,\n",
" 'default': {'predictor': {'tensorflow': {'resources': {'limits': {'cpu': '1',\n",
" 'memory': '2Gi'},\n",
" 'requests': {'cpu': '1', 'memory': '2Gi'}},\n",
" 'runtimeVersion': '1.14.0',\n",
" 'storageUri': 'gs://kfserving-examples/models/tensorflow/flowers-2'}}}},\n",
" 'status': {}}"
]
},
"metadata": {},
"execution_count": 10
}
],
"metadata": {}
},
{
"cell_type": "code",
"execution_count": null,
"source": [],
"outputs": [],
"metadata": {}
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}