Documentation ¶
Overview ¶
Copyright 2021 IBM Corporation
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
Copyright 2021 IBM Corporation ¶
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
Index ¶
- Constants
- type AdapterConfiguration
- type TorchServeAdapterServer
- func (s *TorchServeAdapterServer) LoadModel(ctx context.Context, req *mmesh.LoadModelRequest) (*mmesh.LoadModelResponse, error)
- func (s *TorchServeAdapterServer) ModelSize(ctx context.Context, req *mmesh.ModelSizeRequest) (*mmesh.ModelSizeResponse, error)
- func (s *TorchServeAdapterServer) RuntimeStatus(ctx context.Context, req *mmesh.RuntimeStatusRequest) (*mmesh.RuntimeStatusResponse, error)
- func (s *TorchServeAdapterServer) UnloadModel(ctx context.Context, req *mmesh.UnloadModelRequest) (*mmesh.UnloadModelResponse, error)
Constants ¶
const (
KServeServiceName string = "inference.GRPCInferenceService"
)
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type AdapterConfiguration ¶
type AdapterConfiguration struct { Port int TorchServeManagementPort int TorchServeInferenceEndpoint string TorchServeContainerMemReqBytes int TorchServeMemBufferBytes int CapacityInBytes int MaxLoadingConcurrency int ModelLoadingTimeoutMS int DefaultModelSizeInBytes int ModelSizeMultiplier float64 RuntimeVersion string LimitModelConcurrency int // 0 means no limit (default) ModelStoreDir string UseEmbeddedPuller bool RequestBatchSize int32 MaxBatchDelaySecs int32 }
func GetAdapterConfigurationFromEnv ¶
func GetAdapterConfigurationFromEnv(log logr.Logger) (*AdapterConfiguration, error)
type TorchServeAdapterServer ¶
type TorchServeAdapterServer struct { ManagementClient torchserve.ManagementAPIsServiceClient ManagementConn *grpc.ClientConn Puller *puller.Puller AdapterConfig *AdapterConfiguration Log logr.Logger InferenceEndpoint string InferenceClient torchserve.InferenceAPIsServiceClient InferenceConn *grpc.ClientConn // embed generated Unimplemented type for forward-compatibility for gRPC mmesh.UnimplementedModelRuntimeServer }
func NewTorchServeAdapterServer ¶
func NewTorchServeAdapterServer(config *AdapterConfiguration, log logr.Logger) *TorchServeAdapterServer
func (*TorchServeAdapterServer) LoadModel ¶
func (s *TorchServeAdapterServer) LoadModel(ctx context.Context, req *mmesh.LoadModelRequest) (*mmesh.LoadModelResponse, error)
func (*TorchServeAdapterServer) ModelSize ¶
func (s *TorchServeAdapterServer) ModelSize(ctx context.Context, req *mmesh.ModelSizeRequest) (*mmesh.ModelSizeResponse, error)
func (*TorchServeAdapterServer) RuntimeStatus ¶
func (s *TorchServeAdapterServer) RuntimeStatus(ctx context.Context, req *mmesh.RuntimeStatusRequest) (*mmesh.RuntimeStatusResponse, error)
func (*TorchServeAdapterServer) UnloadModel ¶
func (s *TorchServeAdapterServer) UnloadModel(ctx context.Context, req *mmesh.UnloadModelRequest) (*mmesh.UnloadModelResponse, error)