Documentation ¶
Index ¶
- Constants
- Variables
- func RegisterGRPCInferenceServiceServer(s grpc.ServiceRegistrar, srv GRPCInferenceServiceServer)
- func RegisterHealthServer(s grpc.ServiceRegistrar, srv HealthServer)
- type BatchInput
- func (*BatchInput) Descriptor() ([]byte, []int)deprecated
- func (x *BatchInput) GetDataType() DataType
- func (x *BatchInput) GetKind() BatchInput_Kind
- func (x *BatchInput) GetSourceInput() []string
- func (x *BatchInput) GetTargetName() []string
- func (*BatchInput) ProtoMessage()
- func (x *BatchInput) ProtoReflect() protoreflect.Message
- func (x *BatchInput) Reset()
- func (x *BatchInput) String() string
- type BatchInput_Kind
- func (BatchInput_Kind) Descriptor() protoreflect.EnumDescriptor
- func (x BatchInput_Kind) Enum() *BatchInput_Kind
- func (BatchInput_Kind) EnumDescriptor() ([]byte, []int)deprecated
- func (x BatchInput_Kind) Number() protoreflect.EnumNumber
- func (x BatchInput_Kind) String() string
- func (BatchInput_Kind) Type() protoreflect.EnumType
- type BatchOutput
- func (*BatchOutput) Descriptor() ([]byte, []int)deprecated
- func (x *BatchOutput) GetKind() BatchOutput_Kind
- func (x *BatchOutput) GetSourceInput() []string
- func (x *BatchOutput) GetTargetName() []string
- func (*BatchOutput) ProtoMessage()
- func (x *BatchOutput) ProtoReflect() protoreflect.Message
- func (x *BatchOutput) Reset()
- func (x *BatchOutput) String() string
- type BatchOutput_Kind
- func (BatchOutput_Kind) Descriptor() protoreflect.EnumDescriptor
- func (x BatchOutput_Kind) Enum() *BatchOutput_Kind
- func (BatchOutput_Kind) EnumDescriptor() ([]byte, []int)deprecated
- func (x BatchOutput_Kind) Number() protoreflect.EnumNumber
- func (x BatchOutput_Kind) String() string
- func (BatchOutput_Kind) Type() protoreflect.EnumType
- type CudaSharedMemoryRegisterRequest
- func (*CudaSharedMemoryRegisterRequest) Descriptor() ([]byte, []int)deprecated
- func (x *CudaSharedMemoryRegisterRequest) GetByteSize() uint64
- func (x *CudaSharedMemoryRegisterRequest) GetDeviceId() int64
- func (x *CudaSharedMemoryRegisterRequest) GetName() string
- func (x *CudaSharedMemoryRegisterRequest) GetRawHandle() []byte
- func (*CudaSharedMemoryRegisterRequest) ProtoMessage()
- func (x *CudaSharedMemoryRegisterRequest) ProtoReflect() protoreflect.Message
- func (x *CudaSharedMemoryRegisterRequest) Reset()
- func (x *CudaSharedMemoryRegisterRequest) String() string
- type CudaSharedMemoryRegisterResponse
- func (*CudaSharedMemoryRegisterResponse) Descriptor() ([]byte, []int)deprecated
- func (*CudaSharedMemoryRegisterResponse) ProtoMessage()
- func (x *CudaSharedMemoryRegisterResponse) ProtoReflect() protoreflect.Message
- func (x *CudaSharedMemoryRegisterResponse) Reset()
- func (x *CudaSharedMemoryRegisterResponse) String() string
- type CudaSharedMemoryStatusRequest
- func (*CudaSharedMemoryStatusRequest) Descriptor() ([]byte, []int)deprecated
- func (x *CudaSharedMemoryStatusRequest) GetName() string
- func (*CudaSharedMemoryStatusRequest) ProtoMessage()
- func (x *CudaSharedMemoryStatusRequest) ProtoReflect() protoreflect.Message
- func (x *CudaSharedMemoryStatusRequest) Reset()
- func (x *CudaSharedMemoryStatusRequest) String() string
- type CudaSharedMemoryStatusResponse
- func (*CudaSharedMemoryStatusResponse) Descriptor() ([]byte, []int)deprecated
- func (x *CudaSharedMemoryStatusResponse) GetRegions() map[string]*CudaSharedMemoryStatusResponse_RegionStatus
- func (*CudaSharedMemoryStatusResponse) ProtoMessage()
- func (x *CudaSharedMemoryStatusResponse) ProtoReflect() protoreflect.Message
- func (x *CudaSharedMemoryStatusResponse) Reset()
- func (x *CudaSharedMemoryStatusResponse) String() string
- type CudaSharedMemoryStatusResponse_RegionStatus
- func (*CudaSharedMemoryStatusResponse_RegionStatus) Descriptor() ([]byte, []int)deprecated
- func (x *CudaSharedMemoryStatusResponse_RegionStatus) GetByteSize() uint64
- func (x *CudaSharedMemoryStatusResponse_RegionStatus) GetDeviceId() uint64
- func (x *CudaSharedMemoryStatusResponse_RegionStatus) GetName() string
- func (*CudaSharedMemoryStatusResponse_RegionStatus) ProtoMessage()
- func (x *CudaSharedMemoryStatusResponse_RegionStatus) ProtoReflect() protoreflect.Message
- func (x *CudaSharedMemoryStatusResponse_RegionStatus) Reset()
- func (x *CudaSharedMemoryStatusResponse_RegionStatus) String() string
- type CudaSharedMemoryUnregisterRequest
- func (*CudaSharedMemoryUnregisterRequest) Descriptor() ([]byte, []int)deprecated
- func (x *CudaSharedMemoryUnregisterRequest) GetName() string
- func (*CudaSharedMemoryUnregisterRequest) ProtoMessage()
- func (x *CudaSharedMemoryUnregisterRequest) ProtoReflect() protoreflect.Message
- func (x *CudaSharedMemoryUnregisterRequest) Reset()
- func (x *CudaSharedMemoryUnregisterRequest) String() string
- type CudaSharedMemoryUnregisterResponse
- func (*CudaSharedMemoryUnregisterResponse) Descriptor() ([]byte, []int)deprecated
- func (*CudaSharedMemoryUnregisterResponse) ProtoMessage()
- func (x *CudaSharedMemoryUnregisterResponse) ProtoReflect() protoreflect.Message
- func (x *CudaSharedMemoryUnregisterResponse) Reset()
- func (x *CudaSharedMemoryUnregisterResponse) String() string
- type DataType
- type GRPCInferenceServiceClient
- type GRPCInferenceServiceServer
- type GRPCInferenceService_ModelStreamInferClient
- type GRPCInferenceService_ModelStreamInferServer
- type HealthCheckRequest
- func (*HealthCheckRequest) Descriptor() ([]byte, []int)deprecated
- func (x *HealthCheckRequest) GetService() string
- func (*HealthCheckRequest) ProtoMessage()
- func (x *HealthCheckRequest) ProtoReflect() protoreflect.Message
- func (x *HealthCheckRequest) Reset()
- func (x *HealthCheckRequest) String() string
- type HealthCheckResponse
- func (*HealthCheckResponse) Descriptor() ([]byte, []int)deprecated
- func (x *HealthCheckResponse) GetStatus() HealthCheckResponse_ServingStatus
- func (*HealthCheckResponse) ProtoMessage()
- func (x *HealthCheckResponse) ProtoReflect() protoreflect.Message
- func (x *HealthCheckResponse) Reset()
- func (x *HealthCheckResponse) String() string
- type HealthCheckResponse_ServingStatus
- func (HealthCheckResponse_ServingStatus) Descriptor() protoreflect.EnumDescriptor
- func (x HealthCheckResponse_ServingStatus) Enum() *HealthCheckResponse_ServingStatus
- func (HealthCheckResponse_ServingStatus) EnumDescriptor() ([]byte, []int)deprecated
- func (x HealthCheckResponse_ServingStatus) Number() protoreflect.EnumNumber
- func (x HealthCheckResponse_ServingStatus) String() string
- func (HealthCheckResponse_ServingStatus) Type() protoreflect.EnumType
- type HealthClient
- type HealthServer
- type InferBatchStatistics
- func (*InferBatchStatistics) Descriptor() ([]byte, []int)deprecated
- func (x *InferBatchStatistics) GetBatchSize() uint64
- func (x *InferBatchStatistics) GetComputeInfer() *StatisticDuration
- func (x *InferBatchStatistics) GetComputeInput() *StatisticDuration
- func (x *InferBatchStatistics) GetComputeOutput() *StatisticDuration
- func (*InferBatchStatistics) ProtoMessage()
- func (x *InferBatchStatistics) ProtoReflect() protoreflect.Message
- func (x *InferBatchStatistics) Reset()
- func (x *InferBatchStatistics) String() string
- type InferParameter
- func (*InferParameter) Descriptor() ([]byte, []int)deprecated
- func (x *InferParameter) GetBoolParam() bool
- func (x *InferParameter) GetDoubleParam() float64
- func (x *InferParameter) GetInt64Param() int64
- func (m *InferParameter) GetParameterChoice() isInferParameter_ParameterChoice
- func (x *InferParameter) GetStringParam() string
- func (x *InferParameter) GetUint64Param() uint64
- func (*InferParameter) ProtoMessage()
- func (x *InferParameter) ProtoReflect() protoreflect.Message
- func (x *InferParameter) Reset()
- func (x *InferParameter) String() string
- type InferParameter_BoolParam
- type InferParameter_DoubleParam
- type InferParameter_Int64Param
- type InferParameter_StringParam
- type InferParameter_Uint64Param
- type InferResponseStatistics
- func (*InferResponseStatistics) Descriptor() ([]byte, []int)deprecated
- func (x *InferResponseStatistics) GetCancel() *StatisticDuration
- func (x *InferResponseStatistics) GetComputeInfer() *StatisticDuration
- func (x *InferResponseStatistics) GetComputeOutput() *StatisticDuration
- func (x *InferResponseStatistics) GetEmptyResponse() *StatisticDuration
- func (x *InferResponseStatistics) GetFail() *StatisticDuration
- func (x *InferResponseStatistics) GetSuccess() *StatisticDuration
- func (*InferResponseStatistics) ProtoMessage()
- func (x *InferResponseStatistics) ProtoReflect() protoreflect.Message
- func (x *InferResponseStatistics) Reset()
- func (x *InferResponseStatistics) String() string
- type InferStatistics
- func (*InferStatistics) Descriptor() ([]byte, []int)deprecated
- func (x *InferStatistics) GetCacheHit() *StatisticDuration
- func (x *InferStatistics) GetCacheMiss() *StatisticDuration
- func (x *InferStatistics) GetComputeInfer() *StatisticDuration
- func (x *InferStatistics) GetComputeInput() *StatisticDuration
- func (x *InferStatistics) GetComputeOutput() *StatisticDuration
- func (x *InferStatistics) GetFail() *StatisticDuration
- func (x *InferStatistics) GetQueue() *StatisticDuration
- func (x *InferStatistics) GetSuccess() *StatisticDuration
- func (*InferStatistics) ProtoMessage()
- func (x *InferStatistics) ProtoReflect() protoreflect.Message
- func (x *InferStatistics) Reset()
- func (x *InferStatistics) String() string
- type InferTensorContents
- func (*InferTensorContents) Descriptor() ([]byte, []int)deprecated
- func (x *InferTensorContents) GetBoolContents() []bool
- func (x *InferTensorContents) GetBytesContents() [][]byte
- func (x *InferTensorContents) GetFp32Contents() []float32
- func (x *InferTensorContents) GetFp64Contents() []float64
- func (x *InferTensorContents) GetInt64Contents() []int64
- func (x *InferTensorContents) GetIntContents() []int32
- func (x *InferTensorContents) GetUint64Contents() []uint64
- func (x *InferTensorContents) GetUintContents() []uint32
- func (*InferTensorContents) ProtoMessage()
- func (x *InferTensorContents) ProtoReflect() protoreflect.Message
- func (x *InferTensorContents) Reset()
- func (x *InferTensorContents) String() string
- type LogSettingsRequest
- func (*LogSettingsRequest) Descriptor() ([]byte, []int)deprecated
- func (x *LogSettingsRequest) GetSettings() map[string]*LogSettingsRequest_SettingValue
- func (*LogSettingsRequest) ProtoMessage()
- func (x *LogSettingsRequest) ProtoReflect() protoreflect.Message
- func (x *LogSettingsRequest) Reset()
- func (x *LogSettingsRequest) String() string
- type LogSettingsRequest_SettingValue
- func (*LogSettingsRequest_SettingValue) Descriptor() ([]byte, []int)deprecated
- func (x *LogSettingsRequest_SettingValue) GetBoolParam() bool
- func (m *LogSettingsRequest_SettingValue) GetParameterChoice() isLogSettingsRequest_SettingValue_ParameterChoice
- func (x *LogSettingsRequest_SettingValue) GetStringParam() string
- func (x *LogSettingsRequest_SettingValue) GetUint32Param() uint32
- func (*LogSettingsRequest_SettingValue) ProtoMessage()
- func (x *LogSettingsRequest_SettingValue) ProtoReflect() protoreflect.Message
- func (x *LogSettingsRequest_SettingValue) Reset()
- func (x *LogSettingsRequest_SettingValue) String() string
- type LogSettingsRequest_SettingValue_BoolParam
- type LogSettingsRequest_SettingValue_StringParam
- type LogSettingsRequest_SettingValue_Uint32Param
- type LogSettingsResponse
- func (*LogSettingsResponse) Descriptor() ([]byte, []int)deprecated
- func (x *LogSettingsResponse) GetSettings() map[string]*LogSettingsResponse_SettingValue
- func (*LogSettingsResponse) ProtoMessage()
- func (x *LogSettingsResponse) ProtoReflect() protoreflect.Message
- func (x *LogSettingsResponse) Reset()
- func (x *LogSettingsResponse) String() string
- type LogSettingsResponse_SettingValue
- func (*LogSettingsResponse_SettingValue) Descriptor() ([]byte, []int)deprecated
- func (x *LogSettingsResponse_SettingValue) GetBoolParam() bool
- func (m *LogSettingsResponse_SettingValue) GetParameterChoice() isLogSettingsResponse_SettingValue_ParameterChoice
- func (x *LogSettingsResponse_SettingValue) GetStringParam() string
- func (x *LogSettingsResponse_SettingValue) GetUint32Param() uint32
- func (*LogSettingsResponse_SettingValue) ProtoMessage()
- func (x *LogSettingsResponse_SettingValue) ProtoReflect() protoreflect.Message
- func (x *LogSettingsResponse_SettingValue) Reset()
- func (x *LogSettingsResponse_SettingValue) String() string
- type LogSettingsResponse_SettingValue_BoolParam
- type LogSettingsResponse_SettingValue_StringParam
- type LogSettingsResponse_SettingValue_Uint32Param
- type MemoryUsage
- func (*MemoryUsage) Descriptor() ([]byte, []int)deprecated
- func (x *MemoryUsage) GetByteSize() uint64
- func (x *MemoryUsage) GetId() int64
- func (x *MemoryUsage) GetType() string
- func (*MemoryUsage) ProtoMessage()
- func (x *MemoryUsage) ProtoReflect() protoreflect.Message
- func (x *MemoryUsage) Reset()
- func (x *MemoryUsage) String() string
- type ModelConfig
- func (*ModelConfig) Descriptor() ([]byte, []int)deprecated
- func (x *ModelConfig) GetBackend() string
- func (x *ModelConfig) GetBatchInput() []*BatchInput
- func (x *ModelConfig) GetBatchOutput() []*BatchOutput
- func (x *ModelConfig) GetCcModelFilenames() map[string]string
- func (x *ModelConfig) GetDefaultModelFilename() string
- func (x *ModelConfig) GetDynamicBatching() *ModelDynamicBatching
- func (x *ModelConfig) GetEnsembleScheduling() *ModelEnsembling
- func (x *ModelConfig) GetInput() []*ModelInput
- func (x *ModelConfig) GetInstanceGroup() []*ModelInstanceGroup
- func (x *ModelConfig) GetMaxBatchSize() int32
- func (x *ModelConfig) GetMetricTags() map[string]string
- func (x *ModelConfig) GetModelOperations() *ModelOperations
- func (x *ModelConfig) GetModelRepositoryAgents() *ModelRepositoryAgents
- func (x *ModelConfig) GetModelTransactionPolicy() *ModelTransactionPolicy
- func (x *ModelConfig) GetModelWarmup() []*ModelWarmup
- func (x *ModelConfig) GetName() string
- func (x *ModelConfig) GetOptimization() *ModelOptimizationPolicy
- func (x *ModelConfig) GetOutput() []*ModelOutput
- func (x *ModelConfig) GetParameters() map[string]*ModelParameter
- func (x *ModelConfig) GetPlatform() string
- func (x *ModelConfig) GetResponseCache() *ModelResponseCache
- func (x *ModelConfig) GetRuntime() string
- func (m *ModelConfig) GetSchedulingChoice() isModelConfig_SchedulingChoice
- func (x *ModelConfig) GetSequenceBatching() *ModelSequenceBatching
- func (x *ModelConfig) GetVersionPolicy() *ModelVersionPolicy
- func (*ModelConfig) ProtoMessage()
- func (x *ModelConfig) ProtoReflect() protoreflect.Message
- func (x *ModelConfig) Reset()
- func (x *ModelConfig) String() string
- type ModelConfigRequest
- func (*ModelConfigRequest) Descriptor() ([]byte, []int)deprecated
- func (x *ModelConfigRequest) GetName() string
- func (x *ModelConfigRequest) GetVersion() string
- func (*ModelConfigRequest) ProtoMessage()
- func (x *ModelConfigRequest) ProtoReflect() protoreflect.Message
- func (x *ModelConfigRequest) Reset()
- func (x *ModelConfigRequest) String() string
- type ModelConfigResponse
- func (*ModelConfigResponse) Descriptor() ([]byte, []int)deprecated
- func (x *ModelConfigResponse) GetConfig() *ModelConfig
- func (*ModelConfigResponse) ProtoMessage()
- func (x *ModelConfigResponse) ProtoReflect() protoreflect.Message
- func (x *ModelConfigResponse) Reset()
- func (x *ModelConfigResponse) String() string
- type ModelConfig_DynamicBatching
- type ModelConfig_EnsembleScheduling
- type ModelConfig_SequenceBatching
- type ModelDynamicBatching
- func (*ModelDynamicBatching) Descriptor() ([]byte, []int)deprecated
- func (x *ModelDynamicBatching) GetDefaultPriorityLevel() uint64
- func (x *ModelDynamicBatching) GetDefaultQueuePolicy() *ModelQueuePolicy
- func (x *ModelDynamicBatching) GetMaxQueueDelayMicroseconds() uint64
- func (x *ModelDynamicBatching) GetPreferredBatchSize() []int32
- func (x *ModelDynamicBatching) GetPreserveOrdering() bool
- func (x *ModelDynamicBatching) GetPriorityLevels() uint64
- func (x *ModelDynamicBatching) GetPriorityQueuePolicy() map[uint64]*ModelQueuePolicy
- func (*ModelDynamicBatching) ProtoMessage()
- func (x *ModelDynamicBatching) ProtoReflect() protoreflect.Message
- func (x *ModelDynamicBatching) Reset()
- func (x *ModelDynamicBatching) String() string
- type ModelEnsembling
- type ModelEnsembling_Step
- func (*ModelEnsembling_Step) Descriptor() ([]byte, []int)deprecated
- func (x *ModelEnsembling_Step) GetInputMap() map[string]string
- func (x *ModelEnsembling_Step) GetModelName() string
- func (x *ModelEnsembling_Step) GetModelNamespace() string
- func (x *ModelEnsembling_Step) GetModelVersion() int64
- func (x *ModelEnsembling_Step) GetOutputMap() map[string]string
- func (*ModelEnsembling_Step) ProtoMessage()
- func (x *ModelEnsembling_Step) ProtoReflect() protoreflect.Message
- func (x *ModelEnsembling_Step) Reset()
- func (x *ModelEnsembling_Step) String() string
- type ModelInferRequest
- func (*ModelInferRequest) Descriptor() ([]byte, []int)deprecated
- func (x *ModelInferRequest) GetId() string
- func (x *ModelInferRequest) GetInputs() []*ModelInferRequest_InferInputTensor
- func (x *ModelInferRequest) GetModelName() string
- func (x *ModelInferRequest) GetModelVersion() string
- func (x *ModelInferRequest) GetOutputs() []*ModelInferRequest_InferRequestedOutputTensor
- func (x *ModelInferRequest) GetParameters() map[string]*InferParameter
- func (x *ModelInferRequest) GetRawInputContents() [][]byte
- func (*ModelInferRequest) ProtoMessage()
- func (x *ModelInferRequest) ProtoReflect() protoreflect.Message
- func (x *ModelInferRequest) Reset()
- func (x *ModelInferRequest) String() string
- type ModelInferRequest_InferInputTensor
- func (*ModelInferRequest_InferInputTensor) Descriptor() ([]byte, []int)deprecated
- func (x *ModelInferRequest_InferInputTensor) GetContents() *InferTensorContents
- func (x *ModelInferRequest_InferInputTensor) GetDatatype() string
- func (x *ModelInferRequest_InferInputTensor) GetName() string
- func (x *ModelInferRequest_InferInputTensor) GetParameters() map[string]*InferParameter
- func (x *ModelInferRequest_InferInputTensor) GetShape() []int64
- func (*ModelInferRequest_InferInputTensor) ProtoMessage()
- func (x *ModelInferRequest_InferInputTensor) ProtoReflect() protoreflect.Message
- func (x *ModelInferRequest_InferInputTensor) Reset()
- func (x *ModelInferRequest_InferInputTensor) String() string
- type ModelInferRequest_InferRequestedOutputTensor
- func (*ModelInferRequest_InferRequestedOutputTensor) Descriptor() ([]byte, []int)deprecated
- func (x *ModelInferRequest_InferRequestedOutputTensor) GetName() string
- func (x *ModelInferRequest_InferRequestedOutputTensor) GetParameters() map[string]*InferParameter
- func (*ModelInferRequest_InferRequestedOutputTensor) ProtoMessage()
- func (x *ModelInferRequest_InferRequestedOutputTensor) ProtoReflect() protoreflect.Message
- func (x *ModelInferRequest_InferRequestedOutputTensor) Reset()
- func (x *ModelInferRequest_InferRequestedOutputTensor) String() string
- type ModelInferResponse
- func (*ModelInferResponse) Descriptor() ([]byte, []int)deprecated
- func (x *ModelInferResponse) GetId() string
- func (x *ModelInferResponse) GetModelName() string
- func (x *ModelInferResponse) GetModelVersion() string
- func (x *ModelInferResponse) GetOutputs() []*ModelInferResponse_InferOutputTensor
- func (x *ModelInferResponse) GetParameters() map[string]*InferParameter
- func (x *ModelInferResponse) GetRawOutputContents() [][]byte
- func (*ModelInferResponse) ProtoMessage()
- func (x *ModelInferResponse) ProtoReflect() protoreflect.Message
- func (x *ModelInferResponse) Reset()
- func (x *ModelInferResponse) String() string
- type ModelInferResponse_InferOutputTensor
- func (*ModelInferResponse_InferOutputTensor) Descriptor() ([]byte, []int)deprecated
- func (x *ModelInferResponse_InferOutputTensor) GetContents() *InferTensorContents
- func (x *ModelInferResponse_InferOutputTensor) GetDatatype() string
- func (x *ModelInferResponse_InferOutputTensor) GetName() string
- func (x *ModelInferResponse_InferOutputTensor) GetParameters() map[string]*InferParameter
- func (x *ModelInferResponse_InferOutputTensor) GetShape() []int64
- func (*ModelInferResponse_InferOutputTensor) ProtoMessage()
- func (x *ModelInferResponse_InferOutputTensor) ProtoReflect() protoreflect.Message
- func (x *ModelInferResponse_InferOutputTensor) Reset()
- func (x *ModelInferResponse_InferOutputTensor) String() string
- type ModelInput
- func (*ModelInput) Descriptor() ([]byte, []int)deprecated
- func (x *ModelInput) GetAllowRaggedBatch() bool
- func (x *ModelInput) GetDataType() DataType
- func (x *ModelInput) GetDims() []int64
- func (x *ModelInput) GetFormat() ModelInput_Format
- func (x *ModelInput) GetIsShapeTensor() bool
- func (x *ModelInput) GetName() string
- func (x *ModelInput) GetOptional() bool
- func (x *ModelInput) GetReshape() *ModelTensorReshape
- func (*ModelInput) ProtoMessage()
- func (x *ModelInput) ProtoReflect() protoreflect.Message
- func (x *ModelInput) Reset()
- func (x *ModelInput) String() string
- type ModelInput_Format
- func (ModelInput_Format) Descriptor() protoreflect.EnumDescriptor
- func (x ModelInput_Format) Enum() *ModelInput_Format
- func (ModelInput_Format) EnumDescriptor() ([]byte, []int)deprecated
- func (x ModelInput_Format) Number() protoreflect.EnumNumber
- func (x ModelInput_Format) String() string
- func (ModelInput_Format) Type() protoreflect.EnumType
- type ModelInstanceGroup
- func (*ModelInstanceGroup) Descriptor() ([]byte, []int)deprecated
- func (x *ModelInstanceGroup) GetCount() int32
- func (x *ModelInstanceGroup) GetGpus() []int32
- func (x *ModelInstanceGroup) GetHostPolicy() string
- func (x *ModelInstanceGroup) GetKind() ModelInstanceGroup_Kind
- func (x *ModelInstanceGroup) GetName() string
- func (x *ModelInstanceGroup) GetPassive() bool
- func (x *ModelInstanceGroup) GetProfile() []string
- func (x *ModelInstanceGroup) GetRateLimiter() *ModelRateLimiter
- func (x *ModelInstanceGroup) GetSecondaryDevices() []*ModelInstanceGroup_SecondaryDevice
- func (*ModelInstanceGroup) ProtoMessage()
- func (x *ModelInstanceGroup) ProtoReflect() protoreflect.Message
- func (x *ModelInstanceGroup) Reset()
- func (x *ModelInstanceGroup) String() string
- type ModelInstanceGroup_Kind
- func (ModelInstanceGroup_Kind) Descriptor() protoreflect.EnumDescriptor
- func (x ModelInstanceGroup_Kind) Enum() *ModelInstanceGroup_Kind
- func (ModelInstanceGroup_Kind) EnumDescriptor() ([]byte, []int)deprecated
- func (x ModelInstanceGroup_Kind) Number() protoreflect.EnumNumber
- func (x ModelInstanceGroup_Kind) String() string
- func (ModelInstanceGroup_Kind) Type() protoreflect.EnumType
- type ModelInstanceGroup_SecondaryDevice
- func (*ModelInstanceGroup_SecondaryDevice) Descriptor() ([]byte, []int)deprecated
- func (x *ModelInstanceGroup_SecondaryDevice) GetDeviceId() int64
- func (x *ModelInstanceGroup_SecondaryDevice) GetKind() ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind
- func (*ModelInstanceGroup_SecondaryDevice) ProtoMessage()
- func (x *ModelInstanceGroup_SecondaryDevice) ProtoReflect() protoreflect.Message
- func (x *ModelInstanceGroup_SecondaryDevice) Reset()
- func (x *ModelInstanceGroup_SecondaryDevice) String() string
- type ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind
- func (ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind) Descriptor() protoreflect.EnumDescriptor
- func (x ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind) Enum() *ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind
- func (ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind) EnumDescriptor() ([]byte, []int)deprecated
- func (x ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind) Number() protoreflect.EnumNumber
- func (x ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind) String() string
- func (ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind) Type() protoreflect.EnumType
- type ModelMetadataRequest
- func (*ModelMetadataRequest) Descriptor() ([]byte, []int)deprecated
- func (x *ModelMetadataRequest) GetName() string
- func (x *ModelMetadataRequest) GetVersion() string
- func (*ModelMetadataRequest) ProtoMessage()
- func (x *ModelMetadataRequest) ProtoReflect() protoreflect.Message
- func (x *ModelMetadataRequest) Reset()
- func (x *ModelMetadataRequest) String() string
- type ModelMetadataResponse
- func (*ModelMetadataResponse) Descriptor() ([]byte, []int)deprecated
- func (x *ModelMetadataResponse) GetInputs() []*ModelMetadataResponse_TensorMetadata
- func (x *ModelMetadataResponse) GetName() string
- func (x *ModelMetadataResponse) GetOutputs() []*ModelMetadataResponse_TensorMetadata
- func (x *ModelMetadataResponse) GetPlatform() string
- func (x *ModelMetadataResponse) GetVersions() []string
- func (*ModelMetadataResponse) ProtoMessage()
- func (x *ModelMetadataResponse) ProtoReflect() protoreflect.Message
- func (x *ModelMetadataResponse) Reset()
- func (x *ModelMetadataResponse) String() string
- type ModelMetadataResponse_TensorMetadata
- func (*ModelMetadataResponse_TensorMetadata) Descriptor() ([]byte, []int)deprecated
- func (x *ModelMetadataResponse_TensorMetadata) GetDatatype() string
- func (x *ModelMetadataResponse_TensorMetadata) GetName() string
- func (x *ModelMetadataResponse_TensorMetadata) GetShape() []int64
- func (*ModelMetadataResponse_TensorMetadata) ProtoMessage()
- func (x *ModelMetadataResponse_TensorMetadata) ProtoReflect() protoreflect.Message
- func (x *ModelMetadataResponse_TensorMetadata) Reset()
- func (x *ModelMetadataResponse_TensorMetadata) String() string
- type ModelOperations
- type ModelOptimizationPolicy
- func (*ModelOptimizationPolicy) Descriptor() ([]byte, []int)deprecated
- func (x *ModelOptimizationPolicy) GetCuda() *ModelOptimizationPolicy_Cuda
- func (x *ModelOptimizationPolicy) GetEagerBatching() bool
- func (x *ModelOptimizationPolicy) GetExecutionAccelerators() *ModelOptimizationPolicy_ExecutionAccelerators
- func (x *ModelOptimizationPolicy) GetGatherKernelBufferThreshold() uint32
- func (x *ModelOptimizationPolicy) GetGraph() *ModelOptimizationPolicy_Graph
- func (x *ModelOptimizationPolicy) GetInputPinnedMemory() *ModelOptimizationPolicy_PinnedMemoryBuffer
- func (x *ModelOptimizationPolicy) GetOutputPinnedMemory() *ModelOptimizationPolicy_PinnedMemoryBuffer
- func (x *ModelOptimizationPolicy) GetPriority() ModelOptimizationPolicy_ModelPriority
- func (*ModelOptimizationPolicy) ProtoMessage()
- func (x *ModelOptimizationPolicy) ProtoReflect() protoreflect.Message
- func (x *ModelOptimizationPolicy) Reset()
- func (x *ModelOptimizationPolicy) String() string
- type ModelOptimizationPolicy_Cuda
- func (*ModelOptimizationPolicy_Cuda) Descriptor() ([]byte, []int)deprecated
- func (x *ModelOptimizationPolicy_Cuda) GetBusyWaitEvents() bool
- func (x *ModelOptimizationPolicy_Cuda) GetGraphSpec() []*ModelOptimizationPolicy_Cuda_GraphSpec
- func (x *ModelOptimizationPolicy_Cuda) GetGraphs() bool
- func (x *ModelOptimizationPolicy_Cuda) GetOutputCopyStream() bool
- func (*ModelOptimizationPolicy_Cuda) ProtoMessage()
- func (x *ModelOptimizationPolicy_Cuda) ProtoReflect() protoreflect.Message
- func (x *ModelOptimizationPolicy_Cuda) Reset()
- func (x *ModelOptimizationPolicy_Cuda) String() string
- type ModelOptimizationPolicy_Cuda_GraphSpec
- func (*ModelOptimizationPolicy_Cuda_GraphSpec) Descriptor() ([]byte, []int)deprecated
- func (x *ModelOptimizationPolicy_Cuda_GraphSpec) GetBatchSize() int32
- func (x *ModelOptimizationPolicy_Cuda_GraphSpec) GetGraphLowerBound() *ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound
- func (x *ModelOptimizationPolicy_Cuda_GraphSpec) GetInput() map[string]*ModelOptimizationPolicy_Cuda_GraphSpec_Shape
- func (*ModelOptimizationPolicy_Cuda_GraphSpec) ProtoMessage()
- func (x *ModelOptimizationPolicy_Cuda_GraphSpec) ProtoReflect() protoreflect.Message
- func (x *ModelOptimizationPolicy_Cuda_GraphSpec) Reset()
- func (x *ModelOptimizationPolicy_Cuda_GraphSpec) String() string
- type ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound
- func (*ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound) Descriptor() ([]byte, []int)deprecated
- func (x *ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound) GetBatchSize() int32
- func (x *ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound) GetInput() map[string]*ModelOptimizationPolicy_Cuda_GraphSpec_Shape
- func (*ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound) ProtoMessage()
- func (x *ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound) ProtoReflect() protoreflect.Message
- func (x *ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound) Reset()
- func (x *ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound) String() string
- type ModelOptimizationPolicy_Cuda_GraphSpec_Shape
- func (*ModelOptimizationPolicy_Cuda_GraphSpec_Shape) Descriptor() ([]byte, []int)deprecated
- func (x *ModelOptimizationPolicy_Cuda_GraphSpec_Shape) GetDim() []int64
- func (*ModelOptimizationPolicy_Cuda_GraphSpec_Shape) ProtoMessage()
- func (x *ModelOptimizationPolicy_Cuda_GraphSpec_Shape) ProtoReflect() protoreflect.Message
- func (x *ModelOptimizationPolicy_Cuda_GraphSpec_Shape) Reset()
- func (x *ModelOptimizationPolicy_Cuda_GraphSpec_Shape) String() string
- type ModelOptimizationPolicy_ExecutionAccelerators
- func (*ModelOptimizationPolicy_ExecutionAccelerators) Descriptor() ([]byte, []int)deprecated
- func (x *ModelOptimizationPolicy_ExecutionAccelerators) GetCpuExecutionAccelerator() []*ModelOptimizationPolicy_ExecutionAccelerators_Accelerator
- func (x *ModelOptimizationPolicy_ExecutionAccelerators) GetGpuExecutionAccelerator() []*ModelOptimizationPolicy_ExecutionAccelerators_Accelerator
- func (*ModelOptimizationPolicy_ExecutionAccelerators) ProtoMessage()
- func (x *ModelOptimizationPolicy_ExecutionAccelerators) ProtoReflect() protoreflect.Message
- func (x *ModelOptimizationPolicy_ExecutionAccelerators) Reset()
- func (x *ModelOptimizationPolicy_ExecutionAccelerators) String() string
- type ModelOptimizationPolicy_ExecutionAccelerators_Accelerator
- func (*ModelOptimizationPolicy_ExecutionAccelerators_Accelerator) Descriptor() ([]byte, []int)deprecated
- func (x *ModelOptimizationPolicy_ExecutionAccelerators_Accelerator) GetName() string
- func (x *ModelOptimizationPolicy_ExecutionAccelerators_Accelerator) GetParameters() map[string]string
- func (*ModelOptimizationPolicy_ExecutionAccelerators_Accelerator) ProtoMessage()
- func (x *ModelOptimizationPolicy_ExecutionAccelerators_Accelerator) ProtoReflect() protoreflect.Message
- func (x *ModelOptimizationPolicy_ExecutionAccelerators_Accelerator) Reset()
- func (x *ModelOptimizationPolicy_ExecutionAccelerators_Accelerator) String() string
- type ModelOptimizationPolicy_Graph
- func (*ModelOptimizationPolicy_Graph) Descriptor() ([]byte, []int)deprecated
- func (x *ModelOptimizationPolicy_Graph) GetLevel() int32
- func (*ModelOptimizationPolicy_Graph) ProtoMessage()
- func (x *ModelOptimizationPolicy_Graph) ProtoReflect() protoreflect.Message
- func (x *ModelOptimizationPolicy_Graph) Reset()
- func (x *ModelOptimizationPolicy_Graph) String() string
- type ModelOptimizationPolicy_ModelPriority
- func (ModelOptimizationPolicy_ModelPriority) Descriptor() protoreflect.EnumDescriptor
- func (x ModelOptimizationPolicy_ModelPriority) Enum() *ModelOptimizationPolicy_ModelPriority
- func (ModelOptimizationPolicy_ModelPriority) EnumDescriptor() ([]byte, []int)deprecated
- func (x ModelOptimizationPolicy_ModelPriority) Number() protoreflect.EnumNumber
- func (x ModelOptimizationPolicy_ModelPriority) String() string
- func (ModelOptimizationPolicy_ModelPriority) Type() protoreflect.EnumType
- type ModelOptimizationPolicy_PinnedMemoryBuffer
- func (*ModelOptimizationPolicy_PinnedMemoryBuffer) Descriptor() ([]byte, []int)deprecated
- func (x *ModelOptimizationPolicy_PinnedMemoryBuffer) GetEnable() bool
- func (*ModelOptimizationPolicy_PinnedMemoryBuffer) ProtoMessage()
- func (x *ModelOptimizationPolicy_PinnedMemoryBuffer) ProtoReflect() protoreflect.Message
- func (x *ModelOptimizationPolicy_PinnedMemoryBuffer) Reset()
- func (x *ModelOptimizationPolicy_PinnedMemoryBuffer) String() string
- type ModelOutput
- func (*ModelOutput) Descriptor() ([]byte, []int)deprecated
- func (x *ModelOutput) GetDataType() DataType
- func (x *ModelOutput) GetDims() []int64
- func (x *ModelOutput) GetIsShapeTensor() bool
- func (x *ModelOutput) GetLabelFilename() string
- func (x *ModelOutput) GetName() string
- func (x *ModelOutput) GetReshape() *ModelTensorReshape
- func (*ModelOutput) ProtoMessage()
- func (x *ModelOutput) ProtoReflect() protoreflect.Message
- func (x *ModelOutput) Reset()
- func (x *ModelOutput) String() string
- type ModelParameter
- type ModelQueuePolicy
- func (*ModelQueuePolicy) Descriptor() ([]byte, []int)deprecated
- func (x *ModelQueuePolicy) GetAllowTimeoutOverride() bool
- func (x *ModelQueuePolicy) GetDefaultTimeoutMicroseconds() uint64
- func (x *ModelQueuePolicy) GetMaxQueueSize() uint32
- func (x *ModelQueuePolicy) GetTimeoutAction() ModelQueuePolicy_TimeoutAction
- func (*ModelQueuePolicy) ProtoMessage()
- func (x *ModelQueuePolicy) ProtoReflect() protoreflect.Message
- func (x *ModelQueuePolicy) Reset()
- func (x *ModelQueuePolicy) String() string
- type ModelQueuePolicy_TimeoutAction
- func (ModelQueuePolicy_TimeoutAction) Descriptor() protoreflect.EnumDescriptor
- func (x ModelQueuePolicy_TimeoutAction) Enum() *ModelQueuePolicy_TimeoutAction
- func (ModelQueuePolicy_TimeoutAction) EnumDescriptor() ([]byte, []int)deprecated
- func (x ModelQueuePolicy_TimeoutAction) Number() protoreflect.EnumNumber
- func (x ModelQueuePolicy_TimeoutAction) String() string
- func (ModelQueuePolicy_TimeoutAction) Type() protoreflect.EnumType
- type ModelRateLimiter
- func (*ModelRateLimiter) Descriptor() ([]byte, []int)deprecated
- func (x *ModelRateLimiter) GetPriority() uint32
- func (x *ModelRateLimiter) GetResources() []*ModelRateLimiter_Resource
- func (*ModelRateLimiter) ProtoMessage()
- func (x *ModelRateLimiter) ProtoReflect() protoreflect.Message
- func (x *ModelRateLimiter) Reset()
- func (x *ModelRateLimiter) String() string
- type ModelRateLimiter_Resource
- func (*ModelRateLimiter_Resource) Descriptor() ([]byte, []int)deprecated
- func (x *ModelRateLimiter_Resource) GetCount() uint32
- func (x *ModelRateLimiter_Resource) GetGlobal() bool
- func (x *ModelRateLimiter_Resource) GetName() string
- func (*ModelRateLimiter_Resource) ProtoMessage()
- func (x *ModelRateLimiter_Resource) ProtoReflect() protoreflect.Message
- func (x *ModelRateLimiter_Resource) Reset()
- func (x *ModelRateLimiter_Resource) String() string
- type ModelReadyRequest
- func (*ModelReadyRequest) Descriptor() ([]byte, []int)deprecated
- func (x *ModelReadyRequest) GetName() string
- func (x *ModelReadyRequest) GetVersion() string
- func (*ModelReadyRequest) ProtoMessage()
- func (x *ModelReadyRequest) ProtoReflect() protoreflect.Message
- func (x *ModelReadyRequest) Reset()
- func (x *ModelReadyRequest) String() string
- type ModelReadyResponse
- type ModelRepositoryAgents
- func (*ModelRepositoryAgents) Descriptor() ([]byte, []int)deprecated
- func (x *ModelRepositoryAgents) GetAgents() []*ModelRepositoryAgents_Agent
- func (*ModelRepositoryAgents) ProtoMessage()
- func (x *ModelRepositoryAgents) ProtoReflect() protoreflect.Message
- func (x *ModelRepositoryAgents) Reset()
- func (x *ModelRepositoryAgents) String() string
- type ModelRepositoryAgents_Agent
- func (*ModelRepositoryAgents_Agent) Descriptor() ([]byte, []int)deprecated
- func (x *ModelRepositoryAgents_Agent) GetName() string
- func (x *ModelRepositoryAgents_Agent) GetParameters() map[string]string
- func (*ModelRepositoryAgents_Agent) ProtoMessage()
- func (x *ModelRepositoryAgents_Agent) ProtoReflect() protoreflect.Message
- func (x *ModelRepositoryAgents_Agent) Reset()
- func (x *ModelRepositoryAgents_Agent) String() string
- type ModelRepositoryParameter
- func (*ModelRepositoryParameter) Descriptor() ([]byte, []int)deprecated
- func (x *ModelRepositoryParameter) GetBoolParam() bool
- func (x *ModelRepositoryParameter) GetBytesParam() []byte
- func (x *ModelRepositoryParameter) GetInt64Param() int64
- func (m *ModelRepositoryParameter) GetParameterChoice() isModelRepositoryParameter_ParameterChoice
- func (x *ModelRepositoryParameter) GetStringParam() string
- func (*ModelRepositoryParameter) ProtoMessage()
- func (x *ModelRepositoryParameter) ProtoReflect() protoreflect.Message
- func (x *ModelRepositoryParameter) Reset()
- func (x *ModelRepositoryParameter) String() string
- type ModelRepositoryParameter_BoolParam
- type ModelRepositoryParameter_BytesParam
- type ModelRepositoryParameter_Int64Param
- type ModelRepositoryParameter_StringParam
- type ModelResponseCache
- func (*ModelResponseCache) Descriptor() ([]byte, []int)deprecated
- func (x *ModelResponseCache) GetEnable() bool
- func (*ModelResponseCache) ProtoMessage()
- func (x *ModelResponseCache) ProtoReflect() protoreflect.Message
- func (x *ModelResponseCache) Reset()
- func (x *ModelResponseCache) String() string
- type ModelSequenceBatching
- func (*ModelSequenceBatching) Descriptor() ([]byte, []int)deprecated
- func (x *ModelSequenceBatching) GetControlInput() []*ModelSequenceBatching_ControlInput
- func (x *ModelSequenceBatching) GetDirect() *ModelSequenceBatching_StrategyDirect
- func (x *ModelSequenceBatching) GetIterativeSequence() bool
- func (x *ModelSequenceBatching) GetMaxSequenceIdleMicroseconds() uint64
- func (x *ModelSequenceBatching) GetOldest() *ModelSequenceBatching_StrategyOldest
- func (x *ModelSequenceBatching) GetState() []*ModelSequenceBatching_State
- func (m *ModelSequenceBatching) GetStrategyChoice() isModelSequenceBatching_StrategyChoice
- func (*ModelSequenceBatching) ProtoMessage()
- func (x *ModelSequenceBatching) ProtoReflect() protoreflect.Message
- func (x *ModelSequenceBatching) Reset()
- func (x *ModelSequenceBatching) String() string
- type ModelSequenceBatching_Control
- func (*ModelSequenceBatching_Control) Descriptor() ([]byte, []int)deprecated
- func (x *ModelSequenceBatching_Control) GetBoolFalseTrue() []bool
- func (x *ModelSequenceBatching_Control) GetDataType() DataType
- func (x *ModelSequenceBatching_Control) GetFp32FalseTrue() []float32
- func (x *ModelSequenceBatching_Control) GetInt32FalseTrue() []int32
- func (x *ModelSequenceBatching_Control) GetKind() ModelSequenceBatching_Control_Kind
- func (*ModelSequenceBatching_Control) ProtoMessage()
- func (x *ModelSequenceBatching_Control) ProtoReflect() protoreflect.Message
- func (x *ModelSequenceBatching_Control) Reset()
- func (x *ModelSequenceBatching_Control) String() string
- type ModelSequenceBatching_ControlInput
- func (*ModelSequenceBatching_ControlInput) Descriptor() ([]byte, []int)deprecated
- func (x *ModelSequenceBatching_ControlInput) GetControl() []*ModelSequenceBatching_Control
- func (x *ModelSequenceBatching_ControlInput) GetName() string
- func (*ModelSequenceBatching_ControlInput) ProtoMessage()
- func (x *ModelSequenceBatching_ControlInput) ProtoReflect() protoreflect.Message
- func (x *ModelSequenceBatching_ControlInput) Reset()
- func (x *ModelSequenceBatching_ControlInput) String() string
- type ModelSequenceBatching_Control_Kind
- func (ModelSequenceBatching_Control_Kind) Descriptor() protoreflect.EnumDescriptor
- func (x ModelSequenceBatching_Control_Kind) Enum() *ModelSequenceBatching_Control_Kind
- func (ModelSequenceBatching_Control_Kind) EnumDescriptor() ([]byte, []int)deprecated
- func (x ModelSequenceBatching_Control_Kind) Number() protoreflect.EnumNumber
- func (x ModelSequenceBatching_Control_Kind) String() string
- func (ModelSequenceBatching_Control_Kind) Type() protoreflect.EnumType
- type ModelSequenceBatching_Direct
- type ModelSequenceBatching_InitialState
- func (*ModelSequenceBatching_InitialState) Descriptor() ([]byte, []int)deprecated
- func (x *ModelSequenceBatching_InitialState) GetDataFile() string
- func (x *ModelSequenceBatching_InitialState) GetDataType() DataType
- func (x *ModelSequenceBatching_InitialState) GetDims() []int64
- func (x *ModelSequenceBatching_InitialState) GetName() string
- func (m *ModelSequenceBatching_InitialState) GetStateData() isModelSequenceBatching_InitialState_StateData
- func (x *ModelSequenceBatching_InitialState) GetZeroData() bool
- func (*ModelSequenceBatching_InitialState) ProtoMessage()
- func (x *ModelSequenceBatching_InitialState) ProtoReflect() protoreflect.Message
- func (x *ModelSequenceBatching_InitialState) Reset()
- func (x *ModelSequenceBatching_InitialState) String() string
- type ModelSequenceBatching_InitialState_DataFile
- type ModelSequenceBatching_InitialState_ZeroData
- type ModelSequenceBatching_Oldest
- type ModelSequenceBatching_State
- func (*ModelSequenceBatching_State) Descriptor() ([]byte, []int)deprecated
- func (x *ModelSequenceBatching_State) GetDataType() DataType
- func (x *ModelSequenceBatching_State) GetDims() []int64
- func (x *ModelSequenceBatching_State) GetInitialState() []*ModelSequenceBatching_InitialState
- func (x *ModelSequenceBatching_State) GetInputName() string
- func (x *ModelSequenceBatching_State) GetOutputName() string
- func (x *ModelSequenceBatching_State) GetUseGrowableMemory() bool
- func (x *ModelSequenceBatching_State) GetUseSameBufferForInputOutput() bool
- func (*ModelSequenceBatching_State) ProtoMessage()
- func (x *ModelSequenceBatching_State) ProtoReflect() protoreflect.Message
- func (x *ModelSequenceBatching_State) Reset()
- func (x *ModelSequenceBatching_State) String() string
- type ModelSequenceBatching_StrategyDirect
- func (*ModelSequenceBatching_StrategyDirect) Descriptor() ([]byte, []int)deprecated
- func (x *ModelSequenceBatching_StrategyDirect) GetMaxQueueDelayMicroseconds() uint64
- func (x *ModelSequenceBatching_StrategyDirect) GetMinimumSlotUtilization() float32
- func (*ModelSequenceBatching_StrategyDirect) ProtoMessage()
- func (x *ModelSequenceBatching_StrategyDirect) ProtoReflect() protoreflect.Message
- func (x *ModelSequenceBatching_StrategyDirect) Reset()
- func (x *ModelSequenceBatching_StrategyDirect) String() string
- type ModelSequenceBatching_StrategyOldest
- func (*ModelSequenceBatching_StrategyOldest) Descriptor() ([]byte, []int)deprecated
- func (x *ModelSequenceBatching_StrategyOldest) GetMaxCandidateSequences() int32
- func (x *ModelSequenceBatching_StrategyOldest) GetMaxQueueDelayMicroseconds() uint64
- func (x *ModelSequenceBatching_StrategyOldest) GetPreferredBatchSize() []int32
- func (x *ModelSequenceBatching_StrategyOldest) GetPreserveOrdering() bool
- func (*ModelSequenceBatching_StrategyOldest) ProtoMessage()
- func (x *ModelSequenceBatching_StrategyOldest) ProtoReflect() protoreflect.Message
- func (x *ModelSequenceBatching_StrategyOldest) Reset()
- func (x *ModelSequenceBatching_StrategyOldest) String() string
- type ModelStatistics
- func (*ModelStatistics) Descriptor() ([]byte, []int)deprecated
- func (x *ModelStatistics) GetBatchStats() []*InferBatchStatistics
- func (x *ModelStatistics) GetExecutionCount() uint64
- func (x *ModelStatistics) GetInferenceCount() uint64
- func (x *ModelStatistics) GetInferenceStats() *InferStatistics
- func (x *ModelStatistics) GetLastInference() uint64
- func (x *ModelStatistics) GetMemoryUsage() []*MemoryUsage
- func (x *ModelStatistics) GetName() string
- func (x *ModelStatistics) GetResponseStats() map[string]*InferResponseStatistics
- func (x *ModelStatistics) GetVersion() string
- func (*ModelStatistics) ProtoMessage()
- func (x *ModelStatistics) ProtoReflect() protoreflect.Message
- func (x *ModelStatistics) Reset()
- func (x *ModelStatistics) String() string
- type ModelStatisticsRequest
- func (*ModelStatisticsRequest) Descriptor() ([]byte, []int)deprecated
- func (x *ModelStatisticsRequest) GetName() string
- func (x *ModelStatisticsRequest) GetVersion() string
- func (*ModelStatisticsRequest) ProtoMessage()
- func (x *ModelStatisticsRequest) ProtoReflect() protoreflect.Message
- func (x *ModelStatisticsRequest) Reset()
- func (x *ModelStatisticsRequest) String() string
- type ModelStatisticsResponse
- func (*ModelStatisticsResponse) Descriptor() ([]byte, []int)deprecated
- func (x *ModelStatisticsResponse) GetModelStats() []*ModelStatistics
- func (*ModelStatisticsResponse) ProtoMessage()
- func (x *ModelStatisticsResponse) ProtoReflect() protoreflect.Message
- func (x *ModelStatisticsResponse) Reset()
- func (x *ModelStatisticsResponse) String() string
- type ModelStreamInferResponse
- func (*ModelStreamInferResponse) Descriptor() ([]byte, []int)deprecated
- func (x *ModelStreamInferResponse) GetErrorMessage() string
- func (x *ModelStreamInferResponse) GetInferResponse() *ModelInferResponse
- func (*ModelStreamInferResponse) ProtoMessage()
- func (x *ModelStreamInferResponse) ProtoReflect() protoreflect.Message
- func (x *ModelStreamInferResponse) Reset()
- func (x *ModelStreamInferResponse) String() string
- type ModelTensorReshape
- func (*ModelTensorReshape) Descriptor() ([]byte, []int)deprecated
- func (x *ModelTensorReshape) GetShape() []int64
- func (*ModelTensorReshape) ProtoMessage()
- func (x *ModelTensorReshape) ProtoReflect() protoreflect.Message
- func (x *ModelTensorReshape) Reset()
- func (x *ModelTensorReshape) String() string
- type ModelTransactionPolicy
- func (*ModelTransactionPolicy) Descriptor() ([]byte, []int)deprecated
- func (x *ModelTransactionPolicy) GetDecoupled() bool
- func (*ModelTransactionPolicy) ProtoMessage()
- func (x *ModelTransactionPolicy) ProtoReflect() protoreflect.Message
- func (x *ModelTransactionPolicy) Reset()
- func (x *ModelTransactionPolicy) String() string
- type ModelVersionPolicy
- func (*ModelVersionPolicy) Descriptor() ([]byte, []int)deprecated
- func (x *ModelVersionPolicy) GetAll() *ModelVersionPolicy_All
- func (x *ModelVersionPolicy) GetLatest() *ModelVersionPolicy_Latest
- func (m *ModelVersionPolicy) GetPolicyChoice() isModelVersionPolicy_PolicyChoice
- func (x *ModelVersionPolicy) GetSpecific() *ModelVersionPolicy_Specific
- func (*ModelVersionPolicy) ProtoMessage()
- func (x *ModelVersionPolicy) ProtoReflect() protoreflect.Message
- func (x *ModelVersionPolicy) Reset()
- func (x *ModelVersionPolicy) String() string
- type ModelVersionPolicy_All
- type ModelVersionPolicy_All_
- type ModelVersionPolicy_Latest
- func (*ModelVersionPolicy_Latest) Descriptor() ([]byte, []int)deprecated
- func (x *ModelVersionPolicy_Latest) GetNumVersions() uint32
- func (*ModelVersionPolicy_Latest) ProtoMessage()
- func (x *ModelVersionPolicy_Latest) ProtoReflect() protoreflect.Message
- func (x *ModelVersionPolicy_Latest) Reset()
- func (x *ModelVersionPolicy_Latest) String() string
- type ModelVersionPolicy_Latest_
- type ModelVersionPolicy_Specific
- func (*ModelVersionPolicy_Specific) Descriptor() ([]byte, []int)deprecated
- func (x *ModelVersionPolicy_Specific) GetVersions() []int64
- func (*ModelVersionPolicy_Specific) ProtoMessage()
- func (x *ModelVersionPolicy_Specific) ProtoReflect() protoreflect.Message
- func (x *ModelVersionPolicy_Specific) Reset()
- func (x *ModelVersionPolicy_Specific) String() string
- type ModelVersionPolicy_Specific_
- type ModelWarmup
- func (*ModelWarmup) Descriptor() ([]byte, []int)deprecated
- func (x *ModelWarmup) GetBatchSize() uint32
- func (x *ModelWarmup) GetCount() uint32
- func (x *ModelWarmup) GetInputs() map[string]*ModelWarmup_Input
- func (x *ModelWarmup) GetName() string
- func (*ModelWarmup) ProtoMessage()
- func (x *ModelWarmup) ProtoReflect() protoreflect.Message
- func (x *ModelWarmup) Reset()
- func (x *ModelWarmup) String() string
- type ModelWarmup_Input
- func (*ModelWarmup_Input) Descriptor() ([]byte, []int)deprecated
- func (x *ModelWarmup_Input) GetDataType() DataType
- func (x *ModelWarmup_Input) GetDims() []int64
- func (x *ModelWarmup_Input) GetInputDataFile() string
- func (m *ModelWarmup_Input) GetInputDataType() isModelWarmup_Input_InputDataType
- func (x *ModelWarmup_Input) GetRandomData() bool
- func (x *ModelWarmup_Input) GetZeroData() bool
- func (*ModelWarmup_Input) ProtoMessage()
- func (x *ModelWarmup_Input) ProtoReflect() protoreflect.Message
- func (x *ModelWarmup_Input) Reset()
- func (x *ModelWarmup_Input) String() string
- type ModelWarmup_Input_InputDataFile
- type ModelWarmup_Input_RandomData
- type ModelWarmup_Input_ZeroData
- type RepositoryIndexRequest
- func (*RepositoryIndexRequest) Descriptor() ([]byte, []int)deprecated
- func (x *RepositoryIndexRequest) GetReady() bool
- func (x *RepositoryIndexRequest) GetRepositoryName() string
- func (*RepositoryIndexRequest) ProtoMessage()
- func (x *RepositoryIndexRequest) ProtoReflect() protoreflect.Message
- func (x *RepositoryIndexRequest) Reset()
- func (x *RepositoryIndexRequest) String() string
- type RepositoryIndexResponse
- func (*RepositoryIndexResponse) Descriptor() ([]byte, []int)deprecated
- func (x *RepositoryIndexResponse) GetModels() []*RepositoryIndexResponse_ModelIndex
- func (*RepositoryIndexResponse) ProtoMessage()
- func (x *RepositoryIndexResponse) ProtoReflect() protoreflect.Message
- func (x *RepositoryIndexResponse) Reset()
- func (x *RepositoryIndexResponse) String() string
- type RepositoryIndexResponse_ModelIndex
- func (*RepositoryIndexResponse_ModelIndex) Descriptor() ([]byte, []int)deprecated
- func (x *RepositoryIndexResponse_ModelIndex) GetName() string
- func (x *RepositoryIndexResponse_ModelIndex) GetReason() string
- func (x *RepositoryIndexResponse_ModelIndex) GetState() string
- func (x *RepositoryIndexResponse_ModelIndex) GetVersion() string
- func (*RepositoryIndexResponse_ModelIndex) ProtoMessage()
- func (x *RepositoryIndexResponse_ModelIndex) ProtoReflect() protoreflect.Message
- func (x *RepositoryIndexResponse_ModelIndex) Reset()
- func (x *RepositoryIndexResponse_ModelIndex) String() string
- type RepositoryModelLoadRequest
- func (*RepositoryModelLoadRequest) Descriptor() ([]byte, []int)deprecated
- func (x *RepositoryModelLoadRequest) GetModelName() string
- func (x *RepositoryModelLoadRequest) GetParameters() map[string]*ModelRepositoryParameter
- func (x *RepositoryModelLoadRequest) GetRepositoryName() string
- func (*RepositoryModelLoadRequest) ProtoMessage()
- func (x *RepositoryModelLoadRequest) ProtoReflect() protoreflect.Message
- func (x *RepositoryModelLoadRequest) Reset()
- func (x *RepositoryModelLoadRequest) String() string
- type RepositoryModelLoadResponse
- type RepositoryModelUnloadRequest
- func (*RepositoryModelUnloadRequest) Descriptor() ([]byte, []int)deprecated
- func (x *RepositoryModelUnloadRequest) GetModelName() string
- func (x *RepositoryModelUnloadRequest) GetParameters() map[string]*ModelRepositoryParameter
- func (x *RepositoryModelUnloadRequest) GetRepositoryName() string
- func (*RepositoryModelUnloadRequest) ProtoMessage()
- func (x *RepositoryModelUnloadRequest) ProtoReflect() protoreflect.Message
- func (x *RepositoryModelUnloadRequest) Reset()
- func (x *RepositoryModelUnloadRequest) String() string
- type RepositoryModelUnloadResponse
- func (*RepositoryModelUnloadResponse) Descriptor() ([]byte, []int)deprecated
- func (*RepositoryModelUnloadResponse) ProtoMessage()
- func (x *RepositoryModelUnloadResponse) ProtoReflect() protoreflect.Message
- func (x *RepositoryModelUnloadResponse) Reset()
- func (x *RepositoryModelUnloadResponse) String() string
- type ServerLiveRequest
- type ServerLiveResponse
- type ServerMetadataRequest
- type ServerMetadataResponse
- func (*ServerMetadataResponse) Descriptor() ([]byte, []int)deprecated
- func (x *ServerMetadataResponse) GetExtensions() []string
- func (x *ServerMetadataResponse) GetName() string
- func (x *ServerMetadataResponse) GetVersion() string
- func (*ServerMetadataResponse) ProtoMessage()
- func (x *ServerMetadataResponse) ProtoReflect() protoreflect.Message
- func (x *ServerMetadataResponse) Reset()
- func (x *ServerMetadataResponse) String() string
- type ServerReadyRequest
- type ServerReadyResponse
- func (*ServerReadyResponse) Descriptor() ([]byte, []int)deprecated
- func (x *ServerReadyResponse) GetReady() bool
- func (*ServerReadyResponse) ProtoMessage()
- func (x *ServerReadyResponse) ProtoReflect() protoreflect.Message
- func (x *ServerReadyResponse) Reset()
- func (x *ServerReadyResponse) String() string
- type StatisticDuration
- func (*StatisticDuration) Descriptor() ([]byte, []int)deprecated
- func (x *StatisticDuration) GetCount() uint64
- func (x *StatisticDuration) GetNs() uint64
- func (*StatisticDuration) ProtoMessage()
- func (x *StatisticDuration) ProtoReflect() protoreflect.Message
- func (x *StatisticDuration) Reset()
- func (x *StatisticDuration) String() string
- type SystemSharedMemoryRegisterRequest
- func (*SystemSharedMemoryRegisterRequest) Descriptor() ([]byte, []int)deprecated
- func (x *SystemSharedMemoryRegisterRequest) GetByteSize() uint64
- func (x *SystemSharedMemoryRegisterRequest) GetKey() string
- func (x *SystemSharedMemoryRegisterRequest) GetName() string
- func (x *SystemSharedMemoryRegisterRequest) GetOffset() uint64
- func (*SystemSharedMemoryRegisterRequest) ProtoMessage()
- func (x *SystemSharedMemoryRegisterRequest) ProtoReflect() protoreflect.Message
- func (x *SystemSharedMemoryRegisterRequest) Reset()
- func (x *SystemSharedMemoryRegisterRequest) String() string
- type SystemSharedMemoryRegisterResponse
- func (*SystemSharedMemoryRegisterResponse) Descriptor() ([]byte, []int)deprecated
- func (*SystemSharedMemoryRegisterResponse) ProtoMessage()
- func (x *SystemSharedMemoryRegisterResponse) ProtoReflect() protoreflect.Message
- func (x *SystemSharedMemoryRegisterResponse) Reset()
- func (x *SystemSharedMemoryRegisterResponse) String() string
- type SystemSharedMemoryStatusRequest
- func (*SystemSharedMemoryStatusRequest) Descriptor() ([]byte, []int)deprecated
- func (x *SystemSharedMemoryStatusRequest) GetName() string
- func (*SystemSharedMemoryStatusRequest) ProtoMessage()
- func (x *SystemSharedMemoryStatusRequest) ProtoReflect() protoreflect.Message
- func (x *SystemSharedMemoryStatusRequest) Reset()
- func (x *SystemSharedMemoryStatusRequest) String() string
- type SystemSharedMemoryStatusResponse
- func (*SystemSharedMemoryStatusResponse) Descriptor() ([]byte, []int)deprecated
- func (x *SystemSharedMemoryStatusResponse) GetRegions() map[string]*SystemSharedMemoryStatusResponse_RegionStatus
- func (*SystemSharedMemoryStatusResponse) ProtoMessage()
- func (x *SystemSharedMemoryStatusResponse) ProtoReflect() protoreflect.Message
- func (x *SystemSharedMemoryStatusResponse) Reset()
- func (x *SystemSharedMemoryStatusResponse) String() string
- type SystemSharedMemoryStatusResponse_RegionStatus
- func (*SystemSharedMemoryStatusResponse_RegionStatus) Descriptor() ([]byte, []int)deprecated
- func (x *SystemSharedMemoryStatusResponse_RegionStatus) GetByteSize() uint64
- func (x *SystemSharedMemoryStatusResponse_RegionStatus) GetKey() string
- func (x *SystemSharedMemoryStatusResponse_RegionStatus) GetName() string
- func (x *SystemSharedMemoryStatusResponse_RegionStatus) GetOffset() uint64
- func (*SystemSharedMemoryStatusResponse_RegionStatus) ProtoMessage()
- func (x *SystemSharedMemoryStatusResponse_RegionStatus) ProtoReflect() protoreflect.Message
- func (x *SystemSharedMemoryStatusResponse_RegionStatus) Reset()
- func (x *SystemSharedMemoryStatusResponse_RegionStatus) String() string
- type SystemSharedMemoryUnregisterRequest
- func (*SystemSharedMemoryUnregisterRequest) Descriptor() ([]byte, []int)deprecated
- func (x *SystemSharedMemoryUnregisterRequest) GetName() string
- func (*SystemSharedMemoryUnregisterRequest) ProtoMessage()
- func (x *SystemSharedMemoryUnregisterRequest) ProtoReflect() protoreflect.Message
- func (x *SystemSharedMemoryUnregisterRequest) Reset()
- func (x *SystemSharedMemoryUnregisterRequest) String() string
- type SystemSharedMemoryUnregisterResponse
- func (*SystemSharedMemoryUnregisterResponse) Descriptor() ([]byte, []int)deprecated
- func (*SystemSharedMemoryUnregisterResponse) ProtoMessage()
- func (x *SystemSharedMemoryUnregisterResponse) ProtoReflect() protoreflect.Message
- func (x *SystemSharedMemoryUnregisterResponse) Reset()
- func (x *SystemSharedMemoryUnregisterResponse) String() string
- type TraceSettingRequest
- func (*TraceSettingRequest) Descriptor() ([]byte, []int)deprecated
- func (x *TraceSettingRequest) GetModelName() string
- func (x *TraceSettingRequest) GetSettings() map[string]*TraceSettingRequest_SettingValue
- func (*TraceSettingRequest) ProtoMessage()
- func (x *TraceSettingRequest) ProtoReflect() protoreflect.Message
- func (x *TraceSettingRequest) Reset()
- func (x *TraceSettingRequest) String() string
- type TraceSettingRequest_SettingValue
- func (*TraceSettingRequest_SettingValue) Descriptor() ([]byte, []int)deprecated
- func (x *TraceSettingRequest_SettingValue) GetValue() []string
- func (*TraceSettingRequest_SettingValue) ProtoMessage()
- func (x *TraceSettingRequest_SettingValue) ProtoReflect() protoreflect.Message
- func (x *TraceSettingRequest_SettingValue) Reset()
- func (x *TraceSettingRequest_SettingValue) String() string
- type TraceSettingResponse
- func (*TraceSettingResponse) Descriptor() ([]byte, []int)deprecated
- func (x *TraceSettingResponse) GetSettings() map[string]*TraceSettingResponse_SettingValue
- func (*TraceSettingResponse) ProtoMessage()
- func (x *TraceSettingResponse) ProtoReflect() protoreflect.Message
- func (x *TraceSettingResponse) Reset()
- func (x *TraceSettingResponse) String() string
- type TraceSettingResponse_SettingValue
- func (*TraceSettingResponse_SettingValue) Descriptor() ([]byte, []int)deprecated
- func (x *TraceSettingResponse_SettingValue) GetValue() []string
- func (*TraceSettingResponse_SettingValue) ProtoMessage()
- func (x *TraceSettingResponse_SettingValue) ProtoReflect() protoreflect.Message
- func (x *TraceSettingResponse_SettingValue) Reset()
- func (x *TraceSettingResponse_SettingValue) String() string
- type UnimplementedGRPCInferenceServiceServer
- func (UnimplementedGRPCInferenceServiceServer) CudaSharedMemoryRegister(context.Context, *CudaSharedMemoryRegisterRequest) (*CudaSharedMemoryRegisterResponse, error)
- func (UnimplementedGRPCInferenceServiceServer) CudaSharedMemoryStatus(context.Context, *CudaSharedMemoryStatusRequest) (*CudaSharedMemoryStatusResponse, error)
- func (UnimplementedGRPCInferenceServiceServer) CudaSharedMemoryUnregister(context.Context, *CudaSharedMemoryUnregisterRequest) (*CudaSharedMemoryUnregisterResponse, error)
- func (UnimplementedGRPCInferenceServiceServer) LogSettings(context.Context, *LogSettingsRequest) (*LogSettingsResponse, error)
- func (UnimplementedGRPCInferenceServiceServer) ModelConfig(context.Context, *ModelConfigRequest) (*ModelConfigResponse, error)
- func (UnimplementedGRPCInferenceServiceServer) ModelInfer(context.Context, *ModelInferRequest) (*ModelInferResponse, error)
- func (UnimplementedGRPCInferenceServiceServer) ModelMetadata(context.Context, *ModelMetadataRequest) (*ModelMetadataResponse, error)
- func (UnimplementedGRPCInferenceServiceServer) ModelReady(context.Context, *ModelReadyRequest) (*ModelReadyResponse, error)
- func (UnimplementedGRPCInferenceServiceServer) ModelStatistics(context.Context, *ModelStatisticsRequest) (*ModelStatisticsResponse, error)
- func (UnimplementedGRPCInferenceServiceServer) ModelStreamInfer(GRPCInferenceService_ModelStreamInferServer) error
- func (UnimplementedGRPCInferenceServiceServer) RepositoryIndex(context.Context, *RepositoryIndexRequest) (*RepositoryIndexResponse, error)
- func (UnimplementedGRPCInferenceServiceServer) RepositoryModelLoad(context.Context, *RepositoryModelLoadRequest) (*RepositoryModelLoadResponse, error)
- func (UnimplementedGRPCInferenceServiceServer) RepositoryModelUnload(context.Context, *RepositoryModelUnloadRequest) (*RepositoryModelUnloadResponse, error)
- func (UnimplementedGRPCInferenceServiceServer) ServerLive(context.Context, *ServerLiveRequest) (*ServerLiveResponse, error)
- func (UnimplementedGRPCInferenceServiceServer) ServerMetadata(context.Context, *ServerMetadataRequest) (*ServerMetadataResponse, error)
- func (UnimplementedGRPCInferenceServiceServer) ServerReady(context.Context, *ServerReadyRequest) (*ServerReadyResponse, error)
- func (UnimplementedGRPCInferenceServiceServer) SystemSharedMemoryRegister(context.Context, *SystemSharedMemoryRegisterRequest) (*SystemSharedMemoryRegisterResponse, error)
- func (UnimplementedGRPCInferenceServiceServer) SystemSharedMemoryStatus(context.Context, *SystemSharedMemoryStatusRequest) (*SystemSharedMemoryStatusResponse, error)
- func (UnimplementedGRPCInferenceServiceServer) SystemSharedMemoryUnregister(context.Context, *SystemSharedMemoryUnregisterRequest) (*SystemSharedMemoryUnregisterResponse, error)
- func (UnimplementedGRPCInferenceServiceServer) TraceSetting(context.Context, *TraceSettingRequest) (*TraceSettingResponse, error)
- type UnimplementedHealthServer
- type UnsafeGRPCInferenceServiceServer
- type UnsafeHealthServer
Constants ¶
const ( GRPCInferenceService_ServerLive_FullMethodName = "/inference.GRPCInferenceService/ServerLive" GRPCInferenceService_ServerReady_FullMethodName = "/inference.GRPCInferenceService/ServerReady" GRPCInferenceService_ModelReady_FullMethodName = "/inference.GRPCInferenceService/ModelReady" GRPCInferenceService_ServerMetadata_FullMethodName = "/inference.GRPCInferenceService/ServerMetadata" GRPCInferenceService_ModelMetadata_FullMethodName = "/inference.GRPCInferenceService/ModelMetadata" GRPCInferenceService_ModelInfer_FullMethodName = "/inference.GRPCInferenceService/ModelInfer" GRPCInferenceService_ModelStreamInfer_FullMethodName = "/inference.GRPCInferenceService/ModelStreamInfer" GRPCInferenceService_ModelConfig_FullMethodName = "/inference.GRPCInferenceService/ModelConfig" GRPCInferenceService_ModelStatistics_FullMethodName = "/inference.GRPCInferenceService/ModelStatistics" GRPCInferenceService_RepositoryIndex_FullMethodName = "/inference.GRPCInferenceService/RepositoryIndex" GRPCInferenceService_RepositoryModelLoad_FullMethodName = "/inference.GRPCInferenceService/RepositoryModelLoad" GRPCInferenceService_RepositoryModelUnload_FullMethodName = "/inference.GRPCInferenceService/RepositoryModelUnload" GRPCInferenceService_TraceSetting_FullMethodName = "/inference.GRPCInferenceService/TraceSetting" GRPCInferenceService_LogSettings_FullMethodName = "/inference.GRPCInferenceService/LogSettings" )
const (
Health_Check_FullMethodName = "/grpc.health.v1.Health/Check"
)
Variables ¶
var ( HealthCheckResponse_ServingStatus_name = map[int32]string{ 0: "UNKNOWN", 1: "SERVING", 2: "NOT_SERVING", 3: "SERVICE_UNKNOWN", } HealthCheckResponse_ServingStatus_value = map[string]int32{ "UNKNOWN": 0, "SERVING": 1, "NOT_SERVING": 2, "SERVICE_UNKNOWN": 3, } )
Enum value maps for HealthCheckResponse_ServingStatus.
var ( DataType_name = map[int32]string{ 0: "TYPE_INVALID", 1: "TYPE_BOOL", 2: "TYPE_UINT8", 3: "TYPE_UINT16", 4: "TYPE_UINT32", 5: "TYPE_UINT64", 6: "TYPE_INT8", 7: "TYPE_INT16", 8: "TYPE_INT32", 9: "TYPE_INT64", 10: "TYPE_FP16", 11: "TYPE_FP32", 12: "TYPE_FP64", 13: "TYPE_STRING", 14: "TYPE_BF16", } DataType_value = map[string]int32{ "TYPE_INVALID": 0, "TYPE_BOOL": 1, "TYPE_UINT8": 2, "TYPE_UINT16": 3, "TYPE_UINT32": 4, "TYPE_UINT64": 5, "TYPE_INT8": 6, "TYPE_INT16": 7, "TYPE_INT32": 8, "TYPE_INT64": 9, "TYPE_FP16": 10, "TYPE_FP32": 11, "TYPE_FP64": 12, "TYPE_STRING": 13, "TYPE_BF16": 14, } )
Enum value maps for DataType.
var ( ModelInstanceGroup_Kind_name = map[int32]string{ 0: "KIND_AUTO", 1: "KIND_GPU", 2: "KIND_CPU", 3: "KIND_MODEL", } ModelInstanceGroup_Kind_value = map[string]int32{ "KIND_AUTO": 0, "KIND_GPU": 1, "KIND_CPU": 2, "KIND_MODEL": 3, } )
Enum value maps for ModelInstanceGroup_Kind.
var ( ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind_name = map[int32]string{ 0: "KIND_NVDLA", } ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind_value = map[string]int32{ "KIND_NVDLA": 0, } )
Enum value maps for ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind.
var ( ModelInput_Format_name = map[int32]string{ 0: "FORMAT_NONE", 1: "FORMAT_NHWC", 2: "FORMAT_NCHW", } ModelInput_Format_value = map[string]int32{ "FORMAT_NONE": 0, "FORMAT_NHWC": 1, "FORMAT_NCHW": 2, } )
Enum value maps for ModelInput_Format.
var ( BatchInput_Kind_name = map[int32]string{ 0: "BATCH_ELEMENT_COUNT", 1: "BATCH_ACCUMULATED_ELEMENT_COUNT", 2: "BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO", 3: "BATCH_MAX_ELEMENT_COUNT_AS_SHAPE", 4: "BATCH_ITEM_SHAPE", 5: "BATCH_ITEM_SHAPE_FLATTEN", } BatchInput_Kind_value = map[string]int32{ "BATCH_ELEMENT_COUNT": 0, "BATCH_ACCUMULATED_ELEMENT_COUNT": 1, "BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO": 2, "BATCH_MAX_ELEMENT_COUNT_AS_SHAPE": 3, "BATCH_ITEM_SHAPE": 4, "BATCH_ITEM_SHAPE_FLATTEN": 5, } )
Enum value maps for BatchInput_Kind.
var ( BatchOutput_Kind_name = map[int32]string{ 0: "BATCH_SCATTER_WITH_INPUT_SHAPE", } BatchOutput_Kind_value = map[string]int32{ "BATCH_SCATTER_WITH_INPUT_SHAPE": 0, } )
Enum value maps for BatchOutput_Kind.
var ( ModelOptimizationPolicy_ModelPriority_name = map[int32]string{ 0: "PRIORITY_DEFAULT", 1: "PRIORITY_MAX", 2: "PRIORITY_MIN", } ModelOptimizationPolicy_ModelPriority_value = map[string]int32{ "PRIORITY_DEFAULT": 0, "PRIORITY_MAX": 1, "PRIORITY_MIN": 2, } )
Enum value maps for ModelOptimizationPolicy_ModelPriority.
var ( ModelQueuePolicy_TimeoutAction_name = map[int32]string{ 0: "REJECT", 1: "DELAY", } ModelQueuePolicy_TimeoutAction_value = map[string]int32{ "REJECT": 0, "DELAY": 1, } )
Enum value maps for ModelQueuePolicy_TimeoutAction.
var ( ModelSequenceBatching_Control_Kind_name = map[int32]string{ 0: "CONTROL_SEQUENCE_START", 1: "CONTROL_SEQUENCE_READY", 2: "CONTROL_SEQUENCE_END", 3: "CONTROL_SEQUENCE_CORRID", } ModelSequenceBatching_Control_Kind_value = map[string]int32{ "CONTROL_SEQUENCE_START": 0, "CONTROL_SEQUENCE_READY": 1, "CONTROL_SEQUENCE_END": 2, "CONTROL_SEQUENCE_CORRID": 3, } )
Enum value maps for ModelSequenceBatching_Control_Kind.
var File_grpc_service_proto protoreflect.FileDescriptor
var File_health_proto protoreflect.FileDescriptor
var File_model_config_proto protoreflect.FileDescriptor
var GRPCInferenceService_ServiceDesc = grpc.ServiceDesc{ ServiceName: "inference.GRPCInferenceService", HandlerType: (*GRPCInferenceServiceServer)(nil), Methods: []grpc.MethodDesc{ { MethodName: "ServerLive", Handler: _GRPCInferenceService_ServerLive_Handler, }, { MethodName: "ServerReady", Handler: _GRPCInferenceService_ServerReady_Handler, }, { MethodName: "ModelReady", Handler: _GRPCInferenceService_ModelReady_Handler, }, { MethodName: "ServerMetadata", Handler: _GRPCInferenceService_ServerMetadata_Handler, }, { MethodName: "ModelMetadata", Handler: _GRPCInferenceService_ModelMetadata_Handler, }, { MethodName: "ModelInfer", Handler: _GRPCInferenceService_ModelInfer_Handler, }, { MethodName: "ModelConfig", Handler: _GRPCInferenceService_ModelConfig_Handler, }, { MethodName: "ModelStatistics", Handler: _GRPCInferenceService_ModelStatistics_Handler, }, { MethodName: "RepositoryIndex", Handler: _GRPCInferenceService_RepositoryIndex_Handler, }, { MethodName: "RepositoryModelLoad", Handler: _GRPCInferenceService_RepositoryModelLoad_Handler, }, { MethodName: "RepositoryModelUnload", Handler: _GRPCInferenceService_RepositoryModelUnload_Handler, }, { MethodName: "SystemSharedMemoryStatus", Handler: _GRPCInferenceService_SystemSharedMemoryStatus_Handler, }, { MethodName: "SystemSharedMemoryRegister", Handler: _GRPCInferenceService_SystemSharedMemoryRegister_Handler, }, { MethodName: "SystemSharedMemoryUnregister", Handler: _GRPCInferenceService_SystemSharedMemoryUnregister_Handler, }, { MethodName: "CudaSharedMemoryStatus", Handler: _GRPCInferenceService_CudaSharedMemoryStatus_Handler, }, { MethodName: "CudaSharedMemoryRegister", Handler: _GRPCInferenceService_CudaSharedMemoryRegister_Handler, }, { MethodName: "CudaSharedMemoryUnregister", Handler: _GRPCInferenceService_CudaSharedMemoryUnregister_Handler, }, { MethodName: "TraceSetting", Handler: _GRPCInferenceService_TraceSetting_Handler, }, { MethodName: "LogSettings", Handler: _GRPCInferenceService_LogSettings_Handler, }, }, Streams: []grpc.StreamDesc{ { StreamName: "ModelStreamInfer", Handler: _GRPCInferenceService_ModelStreamInfer_Handler, ServerStreams: true, ClientStreams: true, }, }, Metadata: "grpc_service.proto", }
GRPCInferenceService_ServiceDesc is the grpc.ServiceDesc for GRPCInferenceService service. It's only intended for direct use with grpc.RegisterService, and not to be introspected or modified (even as a copy)
var Health_ServiceDesc = grpc.ServiceDesc{ ServiceName: "grpc.health.v1.Health", HandlerType: (*HealthServer)(nil), Methods: []grpc.MethodDesc{ { MethodName: "Check", Handler: _Health_Check_Handler, }, }, Streams: []grpc.StreamDesc{}, Metadata: "health.proto", }
Health_ServiceDesc is the grpc.ServiceDesc for Health service. It's only intended for direct use with grpc.RegisterService, and not to be introspected or modified (even as a copy)
Functions ¶
func RegisterGRPCInferenceServiceServer ¶
func RegisterGRPCInferenceServiceServer(s grpc.ServiceRegistrar, srv GRPCInferenceServiceServer)
func RegisterHealthServer ¶
func RegisterHealthServer(s grpc.ServiceRegistrar, srv HealthServer)
Types ¶
type BatchInput ¶
type BatchInput struct { // @@ .. cpp:var:: Kind kind // @@ // @@ The kind of this batch input. // @@ Kind BatchInput_Kind `protobuf:"varint,1,opt,name=kind,proto3,enum=inference.BatchInput_Kind" json:"kind,omitempty"` // @@ .. cpp:var:: string target_name (repeated) // @@ // @@ The name of the model inputs that the backend will create // @@ for this batch input. // @@ TargetName []string `protobuf:"bytes,2,rep,name=target_name,json=targetName,proto3" json:"target_name,omitempty"` // @@ .. cpp:var:: DataType data_type // @@ // @@ The input's datatype. The data type can be TYPE_INT32 or // @@ TYPE_FP32. // @@ DataType DataType `protobuf:"varint,3,opt,name=data_type,json=dataType,proto3,enum=inference.DataType" json:"data_type,omitempty"` // @@ .. cpp:var:: string source_input (repeated) // @@ // @@ The backend derives the value for each batch input from one or // @@ more other inputs. 'source_input' gives the names of those // @@ inputs. // @@ SourceInput []string `protobuf:"bytes,4,rep,name=source_input,json=sourceInput,proto3" json:"source_input,omitempty"` // contains filtered or unexported fields }
@@ .. cpp:var:: message BatchInput @@ @@ A batch input is an additional input that must be added by @@ the backend based on all the requests in a batch. @@
func (*BatchInput) Descriptor
deprecated
func (*BatchInput) Descriptor() ([]byte, []int)
Deprecated: Use BatchInput.ProtoReflect.Descriptor instead.
func (*BatchInput) GetDataType ¶
func (x *BatchInput) GetDataType() DataType
func (*BatchInput) GetKind ¶
func (x *BatchInput) GetKind() BatchInput_Kind
func (*BatchInput) GetSourceInput ¶
func (x *BatchInput) GetSourceInput() []string
func (*BatchInput) GetTargetName ¶
func (x *BatchInput) GetTargetName() []string
func (*BatchInput) ProtoMessage ¶
func (*BatchInput) ProtoMessage()
func (*BatchInput) ProtoReflect ¶
func (x *BatchInput) ProtoReflect() protoreflect.Message
func (*BatchInput) Reset ¶
func (x *BatchInput) Reset()
func (*BatchInput) String ¶
func (x *BatchInput) String() string
type BatchInput_Kind ¶
type BatchInput_Kind int32
@@ @@ .. cpp:enum:: Kind @@ @@ The kind of the batch input. @@
const ( // @@ .. cpp:enumerator:: Kind::BATCH_ELEMENT_COUNT = 0 // @@ // @@ The element count of the 'source_input' will be added as // @@ input with shape [1]. // @@ BatchInput_BATCH_ELEMENT_COUNT BatchInput_Kind = 0 // @@ .. cpp:enumerator:: Kind::BATCH_ACCUMULATED_ELEMENT_COUNT = 1 // @@ // @@ The accumulated element count of the 'source_input' will be // @@ added as input with shape [1]. For example, if there is a // @@ batch of two request, each with 2 elements, an input of value // @@ 2 will be added to the first request, and an input of value // @@ 4 will be added to the second request. // @@ BatchInput_BATCH_ACCUMULATED_ELEMENT_COUNT BatchInput_Kind = 1 // @@ .. cpp:enumerator:: // @@ Kind::BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO = 2 // @@ // @@ The accumulated element count of the 'source_input' will be // @@ added as input with shape [1], except for the first request // @@ in the batch. For the first request in the batch, the input // @@ will have shape [2] where the first element is value 0. // @@ BatchInput_BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO BatchInput_Kind = 2 // @@ .. cpp:enumerator:: Kind::BATCH_MAX_ELEMENT_COUNT_AS_SHAPE = 3 // @@ // @@ Among the requests in the batch, the max element count of the // @@ 'source_input' will be added as input with shape // @@ [max_element_count] for the first request in the batch. // @@ For other requests, such input will be with shape [0]. // @@ The data of the tensor will be uninitialized. // @@ BatchInput_BATCH_MAX_ELEMENT_COUNT_AS_SHAPE BatchInput_Kind = 3 // @@ .. cpp:enumerator:: Kind::BATCH_ITEM_SHAPE = 4 // @@ // @@ Among the requests in the batch, the shape of the // @@ 'source_input' will be added as input with shape // @@ [batch_size, len(input_dim)]. For example, if one // @@ batch-2 input with shape [3, 1] and batch-1 input // @@ with shape [2, 2] are batched, the batch input will // @@ have shape [3, 2] and value [ [3, 1], [3, 1], [2, 2]]. // @@ BatchInput_BATCH_ITEM_SHAPE BatchInput_Kind = 4 // @@ .. cpp:enumerator:: Kind::BATCH_ITEM_SHAPE_FLATTEN = 5 // @@ // @@ Among the requests in the batch, the shape of the // @@ 'source_input' will be added as input with single dimensional // @@ shape [batch_size * len(input_dim)]. For example, if one // @@ batch-2 input with shape [3, 1] and batch-1 input // @@ with shape [2, 2] are batched, the batch input will // @@ have shape [6] and value [3, 1, 3, 1, 2, 2]. // @@ BatchInput_BATCH_ITEM_SHAPE_FLATTEN BatchInput_Kind = 5 )
func (BatchInput_Kind) Descriptor ¶
func (BatchInput_Kind) Descriptor() protoreflect.EnumDescriptor
func (BatchInput_Kind) Enum ¶
func (x BatchInput_Kind) Enum() *BatchInput_Kind
func (BatchInput_Kind) EnumDescriptor
deprecated
func (BatchInput_Kind) EnumDescriptor() ([]byte, []int)
Deprecated: Use BatchInput_Kind.Descriptor instead.
func (BatchInput_Kind) Number ¶
func (x BatchInput_Kind) Number() protoreflect.EnumNumber
func (BatchInput_Kind) String ¶
func (x BatchInput_Kind) String() string
func (BatchInput_Kind) Type ¶
func (BatchInput_Kind) Type() protoreflect.EnumType
type BatchOutput ¶
type BatchOutput struct { // @@ .. cpp:var:: string target_name (repeated) // @@ // @@ The name of the outputs to be produced by this batch output // @@ specification. // @@ TargetName []string `protobuf:"bytes,1,rep,name=target_name,json=targetName,proto3" json:"target_name,omitempty"` // @@ .. cpp:var:: Kind kind // @@ // @@ The kind of this batch output. // @@ Kind BatchOutput_Kind `protobuf:"varint,2,opt,name=kind,proto3,enum=inference.BatchOutput_Kind" json:"kind,omitempty"` // @@ .. cpp:var:: string source_input (repeated) // @@ // @@ The backend derives each batch output from one or more inputs. // @@ 'source_input' gives the names of those inputs. // @@ SourceInput []string `protobuf:"bytes,3,rep,name=source_input,json=sourceInput,proto3" json:"source_input,omitempty"` // contains filtered or unexported fields }
@@.. cpp:var:: message BatchOutput @@ @@ A batch output is an output produced by the model that must be handled @@ differently by the backend based on all the requests in a batch. @@
func (*BatchOutput) Descriptor
deprecated
func (*BatchOutput) Descriptor() ([]byte, []int)
Deprecated: Use BatchOutput.ProtoReflect.Descriptor instead.
func (*BatchOutput) GetKind ¶
func (x *BatchOutput) GetKind() BatchOutput_Kind
func (*BatchOutput) GetSourceInput ¶
func (x *BatchOutput) GetSourceInput() []string
func (*BatchOutput) GetTargetName ¶
func (x *BatchOutput) GetTargetName() []string
func (*BatchOutput) ProtoMessage ¶
func (*BatchOutput) ProtoMessage()
func (*BatchOutput) ProtoReflect ¶
func (x *BatchOutput) ProtoReflect() protoreflect.Message
func (*BatchOutput) Reset ¶
func (x *BatchOutput) Reset()
func (*BatchOutput) String ¶
func (x *BatchOutput) String() string
type BatchOutput_Kind ¶
type BatchOutput_Kind int32
@@ @@ .. cpp:enum:: Kind @@ @@ The kind of the batch output. @@
const ( // @@ .. cpp:enumerator:: Kind::BATCH_SCATTER_WITH_INPUT_SHAPE = 0 // @@ // @@ The output should be scattered according to the shape of // @@ 'source_input'. The dynamic dimension of the output will // @@ be set to the value of the same dimension in the input. // @@ BatchOutput_BATCH_SCATTER_WITH_INPUT_SHAPE BatchOutput_Kind = 0 )
func (BatchOutput_Kind) Descriptor ¶
func (BatchOutput_Kind) Descriptor() protoreflect.EnumDescriptor
func (BatchOutput_Kind) Enum ¶
func (x BatchOutput_Kind) Enum() *BatchOutput_Kind
func (BatchOutput_Kind) EnumDescriptor
deprecated
func (BatchOutput_Kind) EnumDescriptor() ([]byte, []int)
Deprecated: Use BatchOutput_Kind.Descriptor instead.
func (BatchOutput_Kind) Number ¶
func (x BatchOutput_Kind) Number() protoreflect.EnumNumber
func (BatchOutput_Kind) String ¶
func (x BatchOutput_Kind) String() string
func (BatchOutput_Kind) Type ¶
func (BatchOutput_Kind) Type() protoreflect.EnumType
type CudaSharedMemoryRegisterRequest ¶
type CudaSharedMemoryRegisterRequest struct { // @@ .. cpp:var:: string name // @@ // @@ The name of the region to register. // @@ Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` // @@ // @@ The raw serialized cudaIPC handle. // @@ RawHandle []byte `protobuf:"bytes,2,opt,name=raw_handle,json=rawHandle,proto3" json:"raw_handle,omitempty"` // @@ // @@ The GPU device ID on which the cudaIPC handle was created. // @@ DeviceId int64 `protobuf:"varint,3,opt,name=device_id,json=deviceId,proto3" json:"device_id,omitempty"` // @@ // @@ Size of the shared memory block, in bytes. // @@ ByteSize uint64 `protobuf:"varint,4,opt,name=byte_size,json=byteSize,proto3" json:"byte_size,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message CudaSharedMemoryRegisterRequest @@ @@ Request message for CudaSharedMemoryRegister. @@
func (*CudaSharedMemoryRegisterRequest) Descriptor
deprecated
func (*CudaSharedMemoryRegisterRequest) Descriptor() ([]byte, []int)
Deprecated: Use CudaSharedMemoryRegisterRequest.ProtoReflect.Descriptor instead.
func (*CudaSharedMemoryRegisterRequest) GetByteSize ¶
func (x *CudaSharedMemoryRegisterRequest) GetByteSize() uint64
func (*CudaSharedMemoryRegisterRequest) GetDeviceId ¶
func (x *CudaSharedMemoryRegisterRequest) GetDeviceId() int64
func (*CudaSharedMemoryRegisterRequest) GetName ¶
func (x *CudaSharedMemoryRegisterRequest) GetName() string
func (*CudaSharedMemoryRegisterRequest) GetRawHandle ¶
func (x *CudaSharedMemoryRegisterRequest) GetRawHandle() []byte
func (*CudaSharedMemoryRegisterRequest) ProtoMessage ¶
func (*CudaSharedMemoryRegisterRequest) ProtoMessage()
func (*CudaSharedMemoryRegisterRequest) ProtoReflect ¶
func (x *CudaSharedMemoryRegisterRequest) ProtoReflect() protoreflect.Message
func (*CudaSharedMemoryRegisterRequest) Reset ¶
func (x *CudaSharedMemoryRegisterRequest) Reset()
func (*CudaSharedMemoryRegisterRequest) String ¶
func (x *CudaSharedMemoryRegisterRequest) String() string
type CudaSharedMemoryRegisterResponse ¶
type CudaSharedMemoryRegisterResponse struct {
// contains filtered or unexported fields
}
@@ @@.. cpp:var:: message CudaSharedMemoryRegisterResponse @@ @@ Response message for CudaSharedMemoryRegister. @@
func (*CudaSharedMemoryRegisterResponse) Descriptor
deprecated
func (*CudaSharedMemoryRegisterResponse) Descriptor() ([]byte, []int)
Deprecated: Use CudaSharedMemoryRegisterResponse.ProtoReflect.Descriptor instead.
func (*CudaSharedMemoryRegisterResponse) ProtoMessage ¶
func (*CudaSharedMemoryRegisterResponse) ProtoMessage()
func (*CudaSharedMemoryRegisterResponse) ProtoReflect ¶
func (x *CudaSharedMemoryRegisterResponse) ProtoReflect() protoreflect.Message
func (*CudaSharedMemoryRegisterResponse) Reset ¶
func (x *CudaSharedMemoryRegisterResponse) Reset()
func (*CudaSharedMemoryRegisterResponse) String ¶
func (x *CudaSharedMemoryRegisterResponse) String() string
type CudaSharedMemoryStatusRequest ¶
type CudaSharedMemoryStatusRequest struct { // @@ .. cpp:var:: string name // @@ // @@ The name of the region to get status for. If empty the // @@ status is returned for all registered regions. // @@ Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message CudaSharedMemoryStatusRequest @@ @@ Request message for CudaSharedMemoryStatus. @@
func (*CudaSharedMemoryStatusRequest) Descriptor
deprecated
func (*CudaSharedMemoryStatusRequest) Descriptor() ([]byte, []int)
Deprecated: Use CudaSharedMemoryStatusRequest.ProtoReflect.Descriptor instead.
func (*CudaSharedMemoryStatusRequest) GetName ¶
func (x *CudaSharedMemoryStatusRequest) GetName() string
func (*CudaSharedMemoryStatusRequest) ProtoMessage ¶
func (*CudaSharedMemoryStatusRequest) ProtoMessage()
func (*CudaSharedMemoryStatusRequest) ProtoReflect ¶
func (x *CudaSharedMemoryStatusRequest) ProtoReflect() protoreflect.Message
func (*CudaSharedMemoryStatusRequest) Reset ¶
func (x *CudaSharedMemoryStatusRequest) Reset()
func (*CudaSharedMemoryStatusRequest) String ¶
func (x *CudaSharedMemoryStatusRequest) String() string
type CudaSharedMemoryStatusResponse ¶
type CudaSharedMemoryStatusResponse struct { // @@ .. cpp:var:: map<string,RegionStatus> regions // @@ // @@ Status for each of the registered regions, indexed by // @@ region name. // @@ Regions map[string]*CudaSharedMemoryStatusResponse_RegionStatus `` /* 155-byte string literal not displayed */ // contains filtered or unexported fields }
@@ @@.. cpp:var:: message CudaSharedMemoryStatusResponse @@ @@ Response message for CudaSharedMemoryStatus. @@
func (*CudaSharedMemoryStatusResponse) Descriptor
deprecated
func (*CudaSharedMemoryStatusResponse) Descriptor() ([]byte, []int)
Deprecated: Use CudaSharedMemoryStatusResponse.ProtoReflect.Descriptor instead.
func (*CudaSharedMemoryStatusResponse) GetRegions ¶
func (x *CudaSharedMemoryStatusResponse) GetRegions() map[string]*CudaSharedMemoryStatusResponse_RegionStatus
func (*CudaSharedMemoryStatusResponse) ProtoMessage ¶
func (*CudaSharedMemoryStatusResponse) ProtoMessage()
func (*CudaSharedMemoryStatusResponse) ProtoReflect ¶
func (x *CudaSharedMemoryStatusResponse) ProtoReflect() protoreflect.Message
func (*CudaSharedMemoryStatusResponse) Reset ¶
func (x *CudaSharedMemoryStatusResponse) Reset()
func (*CudaSharedMemoryStatusResponse) String ¶
func (x *CudaSharedMemoryStatusResponse) String() string
type CudaSharedMemoryStatusResponse_RegionStatus ¶
type CudaSharedMemoryStatusResponse_RegionStatus struct { // @@ .. cpp:var:: string name // @@ // @@ The name for the shared memory region. // @@ Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` // @@ // @@ The GPU device ID where the cudaIPC handle was created. // @@ DeviceId uint64 `protobuf:"varint,2,opt,name=device_id,json=deviceId,proto3" json:"device_id,omitempty"` // @@ // @@ Size of the shared memory region, in bytes. // @@ ByteSize uint64 `protobuf:"varint,3,opt,name=byte_size,json=byteSize,proto3" json:"byte_size,omitempty"` // contains filtered or unexported fields }
@@ @@ .. cpp:var:: message RegionStatus @@ @@ Status for a shared memory region. @@
func (*CudaSharedMemoryStatusResponse_RegionStatus) Descriptor
deprecated
func (*CudaSharedMemoryStatusResponse_RegionStatus) Descriptor() ([]byte, []int)
Deprecated: Use CudaSharedMemoryStatusResponse_RegionStatus.ProtoReflect.Descriptor instead.
func (*CudaSharedMemoryStatusResponse_RegionStatus) GetByteSize ¶
func (x *CudaSharedMemoryStatusResponse_RegionStatus) GetByteSize() uint64
func (*CudaSharedMemoryStatusResponse_RegionStatus) GetDeviceId ¶
func (x *CudaSharedMemoryStatusResponse_RegionStatus) GetDeviceId() uint64
func (*CudaSharedMemoryStatusResponse_RegionStatus) GetName ¶
func (x *CudaSharedMemoryStatusResponse_RegionStatus) GetName() string
func (*CudaSharedMemoryStatusResponse_RegionStatus) ProtoMessage ¶
func (*CudaSharedMemoryStatusResponse_RegionStatus) ProtoMessage()
func (*CudaSharedMemoryStatusResponse_RegionStatus) ProtoReflect ¶
func (x *CudaSharedMemoryStatusResponse_RegionStatus) ProtoReflect() protoreflect.Message
func (*CudaSharedMemoryStatusResponse_RegionStatus) Reset ¶
func (x *CudaSharedMemoryStatusResponse_RegionStatus) Reset()
func (*CudaSharedMemoryStatusResponse_RegionStatus) String ¶
func (x *CudaSharedMemoryStatusResponse_RegionStatus) String() string
type CudaSharedMemoryUnregisterRequest ¶
type CudaSharedMemoryUnregisterRequest struct { // @@ .. cpp:var:: string name // @@ // @@ The name of the cuda region to unregister. If empty // @@ all cuda shared-memory regions are unregistered. // @@ Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message CudaSharedMemoryUnregisterRequest @@ @@ Request message for CudaSharedMemoryUnregister. @@
func (*CudaSharedMemoryUnregisterRequest) Descriptor
deprecated
func (*CudaSharedMemoryUnregisterRequest) Descriptor() ([]byte, []int)
Deprecated: Use CudaSharedMemoryUnregisterRequest.ProtoReflect.Descriptor instead.
func (*CudaSharedMemoryUnregisterRequest) GetName ¶
func (x *CudaSharedMemoryUnregisterRequest) GetName() string
func (*CudaSharedMemoryUnregisterRequest) ProtoMessage ¶
func (*CudaSharedMemoryUnregisterRequest) ProtoMessage()
func (*CudaSharedMemoryUnregisterRequest) ProtoReflect ¶
func (x *CudaSharedMemoryUnregisterRequest) ProtoReflect() protoreflect.Message
func (*CudaSharedMemoryUnregisterRequest) Reset ¶
func (x *CudaSharedMemoryUnregisterRequest) Reset()
func (*CudaSharedMemoryUnregisterRequest) String ¶
func (x *CudaSharedMemoryUnregisterRequest) String() string
type CudaSharedMemoryUnregisterResponse ¶
type CudaSharedMemoryUnregisterResponse struct {
// contains filtered or unexported fields
}
@@ @@.. cpp:var:: message CudaSharedMemoryUnregisterResponse @@ @@ Response message for CudaSharedMemoryUnregister. @@
func (*CudaSharedMemoryUnregisterResponse) Descriptor
deprecated
func (*CudaSharedMemoryUnregisterResponse) Descriptor() ([]byte, []int)
Deprecated: Use CudaSharedMemoryUnregisterResponse.ProtoReflect.Descriptor instead.
func (*CudaSharedMemoryUnregisterResponse) ProtoMessage ¶
func (*CudaSharedMemoryUnregisterResponse) ProtoMessage()
func (*CudaSharedMemoryUnregisterResponse) ProtoReflect ¶
func (x *CudaSharedMemoryUnregisterResponse) ProtoReflect() protoreflect.Message
func (*CudaSharedMemoryUnregisterResponse) Reset ¶
func (x *CudaSharedMemoryUnregisterResponse) Reset()
func (*CudaSharedMemoryUnregisterResponse) String ¶
func (x *CudaSharedMemoryUnregisterResponse) String() string
type DataType ¶
type DataType int32
@@ @@.. cpp:enum:: DataType @@ @@ Data types supported for input and output tensors. @@
const ( // @@ .. cpp:enumerator:: DataType::INVALID = 0 DataType_TYPE_INVALID DataType = 0 // @@ .. cpp:enumerator:: DataType::BOOL = 1 DataType_TYPE_BOOL DataType = 1 // @@ .. cpp:enumerator:: DataType::UINT8 = 2 DataType_TYPE_UINT8 DataType = 2 // @@ .. cpp:enumerator:: DataType::UINT16 = 3 DataType_TYPE_UINT16 DataType = 3 // @@ .. cpp:enumerator:: DataType::UINT32 = 4 DataType_TYPE_UINT32 DataType = 4 // @@ .. cpp:enumerator:: DataType::UINT64 = 5 DataType_TYPE_UINT64 DataType = 5 // @@ .. cpp:enumerator:: DataType::INT8 = 6 DataType_TYPE_INT8 DataType = 6 // @@ .. cpp:enumerator:: DataType::INT16 = 7 DataType_TYPE_INT16 DataType = 7 // @@ .. cpp:enumerator:: DataType::INT32 = 8 DataType_TYPE_INT32 DataType = 8 // @@ .. cpp:enumerator:: DataType::INT64 = 9 DataType_TYPE_INT64 DataType = 9 // @@ .. cpp:enumerator:: DataType::FP16 = 10 DataType_TYPE_FP16 DataType = 10 // @@ .. cpp:enumerator:: DataType::FP32 = 11 DataType_TYPE_FP32 DataType = 11 // @@ .. cpp:enumerator:: DataType::FP64 = 12 DataType_TYPE_FP64 DataType = 12 // @@ .. cpp:enumerator:: DataType::STRING = 13 DataType_TYPE_STRING DataType = 13 // @@ .. cpp:enumerator:: DataType::BF16 = 14 DataType_TYPE_BF16 DataType = 14 )
func (DataType) Descriptor ¶
func (DataType) Descriptor() protoreflect.EnumDescriptor
func (DataType) EnumDescriptor
deprecated
func (DataType) Number ¶
func (x DataType) Number() protoreflect.EnumNumber
func (DataType) Type ¶
func (DataType) Type() protoreflect.EnumType
type GRPCInferenceServiceClient ¶
type GRPCInferenceServiceClient interface { // @@ .. cpp:var:: rpc ServerLive(ServerLiveRequest) returns // @@ (ServerLiveResponse) // @@ // @@ Check liveness of the inference server. // @@ ServerLive(ctx context.Context, in *ServerLiveRequest, opts ...grpc.CallOption) (*ServerLiveResponse, error) // @@ .. cpp:var:: rpc ServerReady(ServerReadyRequest) returns // @@ (ServerReadyResponse) // @@ // @@ Check readiness of the inference server. // @@ ServerReady(ctx context.Context, in *ServerReadyRequest, opts ...grpc.CallOption) (*ServerReadyResponse, error) // @@ .. cpp:var:: rpc ModelReady(ModelReadyRequest) returns // @@ (ModelReadyResponse) // @@ // @@ Check readiness of a model in the inference server. // @@ ModelReady(ctx context.Context, in *ModelReadyRequest, opts ...grpc.CallOption) (*ModelReadyResponse, error) // @@ .. cpp:var:: rpc ServerMetadata(ServerMetadataRequest) returns // @@ (ServerMetadataResponse) // @@ // @@ Get server metadata. // @@ ServerMetadata(ctx context.Context, in *ServerMetadataRequest, opts ...grpc.CallOption) (*ServerMetadataResponse, error) // @@ .. cpp:var:: rpc ModelMetadata(ModelMetadataRequest) returns // @@ (ModelMetadataResponse) // @@ // @@ Get model metadata. // @@ ModelMetadata(ctx context.Context, in *ModelMetadataRequest, opts ...grpc.CallOption) (*ModelMetadataResponse, error) // @@ .. cpp:var:: rpc ModelInfer(ModelInferRequest) returns // @@ (ModelInferResponse) // @@ // @@ Perform inference using a specific model. // @@ ModelInfer(ctx context.Context, in *ModelInferRequest, opts ...grpc.CallOption) (*ModelInferResponse, error) // @@ .. cpp:var:: rpc ModelStreamInfer(stream ModelInferRequest) returns // @@ (stream ModelStreamInferResponse) // @@ // @@ Perform streaming inference. // @@ ModelStreamInfer(ctx context.Context, opts ...grpc.CallOption) (GRPCInferenceService_ModelStreamInferClient, error) // @@ .. cpp:var:: rpc ModelConfig(ModelConfigRequest) returns // @@ (ModelConfigResponse) // @@ // @@ Get model configuration. // @@ ModelConfig(ctx context.Context, in *ModelConfigRequest, opts ...grpc.CallOption) (*ModelConfigResponse, error) // @@ .. cpp:var:: rpc ModelStatistics( // @@ ModelStatisticsRequest) // @@ returns (ModelStatisticsResponse) // @@ // @@ Get the cumulative inference statistics for a model. // @@ ModelStatistics(ctx context.Context, in *ModelStatisticsRequest, opts ...grpc.CallOption) (*ModelStatisticsResponse, error) // @@ .. cpp:var:: rpc RepositoryIndex(RepositoryIndexRequest) returns // @@ (RepositoryIndexResponse) // @@ // @@ Get the index of model repository contents. // @@ RepositoryIndex(ctx context.Context, in *RepositoryIndexRequest, opts ...grpc.CallOption) (*RepositoryIndexResponse, error) // @@ .. cpp:var:: rpc RepositoryModelLoad(RepositoryModelLoadRequest) returns // @@ (RepositoryModelLoadResponse) // @@ // @@ Load or reload a model from a repository. // @@ RepositoryModelLoad(ctx context.Context, in *RepositoryModelLoadRequest, opts ...grpc.CallOption) (*RepositoryModelLoadResponse, error) // @@ .. cpp:var:: rpc RepositoryModelUnload(RepositoryModelUnloadRequest) // @@ returns (RepositoryModelUnloadResponse) // @@ // @@ Unload a model. // @@ RepositoryModelUnload(ctx context.Context, in *RepositoryModelUnloadRequest, opts ...grpc.CallOption) (*RepositoryModelUnloadResponse, error) // @@ SystemSharedMemoryStatusRequest) // @@ returns (SystemSharedMemoryStatusRespose) // @@ // @@ Get the status of all registered system-shared-memory regions. // @@ SystemSharedMemoryStatus(ctx context.Context, in *SystemSharedMemoryStatusRequest, opts ...grpc.CallOption) (*SystemSharedMemoryStatusResponse, error) // @@ SystemSharedMemoryRegisterRequest) // @@ returns (SystemSharedMemoryRegisterResponse) // @@ // @@ Register a system-shared-memory region. // @@ SystemSharedMemoryRegister(ctx context.Context, in *SystemSharedMemoryRegisterRequest, opts ...grpc.CallOption) (*SystemSharedMemoryRegisterResponse, error) // @@ SystemSharedMemoryUnregisterRequest) // @@ returns (SystemSharedMemoryUnregisterResponse) // @@ // @@ Unregister a system-shared-memory region. // @@ SystemSharedMemoryUnregister(ctx context.Context, in *SystemSharedMemoryUnregisterRequest, opts ...grpc.CallOption) (*SystemSharedMemoryUnregisterResponse, error) // @@ CudaSharedMemoryStatusRequest) // @@ returns (CudaSharedMemoryStatusRespose) // @@ // @@ Get the status of all registered CUDA-shared-memory regions. // @@ CudaSharedMemoryStatus(ctx context.Context, in *CudaSharedMemoryStatusRequest, opts ...grpc.CallOption) (*CudaSharedMemoryStatusResponse, error) // @@ CudaSharedMemoryRegisterRequest) // @@ returns (CudaSharedMemoryRegisterResponse) // @@ // @@ Register a CUDA-shared-memory region. // @@ CudaSharedMemoryRegister(ctx context.Context, in *CudaSharedMemoryRegisterRequest, opts ...grpc.CallOption) (*CudaSharedMemoryRegisterResponse, error) // @@ CudaSharedMemoryUnregisterRequest) // @@ returns (CudaSharedMemoryUnregisterResponse) // @@ // @@ Unregister a CUDA-shared-memory region. // @@ CudaSharedMemoryUnregister(ctx context.Context, in *CudaSharedMemoryUnregisterRequest, opts ...grpc.CallOption) (*CudaSharedMemoryUnregisterResponse, error) // @@ .. cpp:var:: rpc TraceSetting(TraceSettingRequest) // @@ returns (TraceSettingResponse) // @@ // @@ Update and get the trace setting of the Triton server. // @@ TraceSetting(ctx context.Context, in *TraceSettingRequest, opts ...grpc.CallOption) (*TraceSettingResponse, error) // @@ .. cpp:var:: rpc LogSettings(LogSettingsRequest) // @@ returns (LogSettingsResponse) // @@ // @@ Update and get the log settings of the Triton server. // @@ LogSettings(ctx context.Context, in *LogSettingsRequest, opts ...grpc.CallOption) (*LogSettingsResponse, error) }
GRPCInferenceServiceClient is the client API for GRPCInferenceService service.
For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream.
@@ @@.. cpp:var:: service InferenceService @@ @@ Inference Server GRPC endpoints. @@
func NewGRPCInferenceServiceClient ¶
func NewGRPCInferenceServiceClient(cc grpc.ClientConnInterface) GRPCInferenceServiceClient
type GRPCInferenceServiceServer ¶
type GRPCInferenceServiceServer interface { // @@ .. cpp:var:: rpc ServerLive(ServerLiveRequest) returns // @@ (ServerLiveResponse) // @@ // @@ Check liveness of the inference server. // @@ ServerLive(context.Context, *ServerLiveRequest) (*ServerLiveResponse, error) // @@ .. cpp:var:: rpc ServerReady(ServerReadyRequest) returns // @@ (ServerReadyResponse) // @@ // @@ Check readiness of the inference server. // @@ ServerReady(context.Context, *ServerReadyRequest) (*ServerReadyResponse, error) // @@ .. cpp:var:: rpc ModelReady(ModelReadyRequest) returns // @@ (ModelReadyResponse) // @@ // @@ Check readiness of a model in the inference server. // @@ ModelReady(context.Context, *ModelReadyRequest) (*ModelReadyResponse, error) // @@ .. cpp:var:: rpc ServerMetadata(ServerMetadataRequest) returns // @@ (ServerMetadataResponse) // @@ // @@ Get server metadata. // @@ ServerMetadata(context.Context, *ServerMetadataRequest) (*ServerMetadataResponse, error) // @@ .. cpp:var:: rpc ModelMetadata(ModelMetadataRequest) returns // @@ (ModelMetadataResponse) // @@ // @@ Get model metadata. // @@ ModelMetadata(context.Context, *ModelMetadataRequest) (*ModelMetadataResponse, error) // @@ .. cpp:var:: rpc ModelInfer(ModelInferRequest) returns // @@ (ModelInferResponse) // @@ // @@ Perform inference using a specific model. // @@ ModelInfer(context.Context, *ModelInferRequest) (*ModelInferResponse, error) // @@ .. cpp:var:: rpc ModelStreamInfer(stream ModelInferRequest) returns // @@ (stream ModelStreamInferResponse) // @@ // @@ Perform streaming inference. // @@ ModelStreamInfer(GRPCInferenceService_ModelStreamInferServer) error // @@ .. cpp:var:: rpc ModelConfig(ModelConfigRequest) returns // @@ (ModelConfigResponse) // @@ // @@ Get model configuration. // @@ ModelConfig(context.Context, *ModelConfigRequest) (*ModelConfigResponse, error) // @@ .. cpp:var:: rpc ModelStatistics( // @@ ModelStatisticsRequest) // @@ returns (ModelStatisticsResponse) // @@ // @@ Get the cumulative inference statistics for a model. // @@ ModelStatistics(context.Context, *ModelStatisticsRequest) (*ModelStatisticsResponse, error) // @@ .. cpp:var:: rpc RepositoryIndex(RepositoryIndexRequest) returns // @@ (RepositoryIndexResponse) // @@ // @@ Get the index of model repository contents. // @@ RepositoryIndex(context.Context, *RepositoryIndexRequest) (*RepositoryIndexResponse, error) // @@ .. cpp:var:: rpc RepositoryModelLoad(RepositoryModelLoadRequest) returns // @@ (RepositoryModelLoadResponse) // @@ // @@ Load or reload a model from a repository. // @@ RepositoryModelLoad(context.Context, *RepositoryModelLoadRequest) (*RepositoryModelLoadResponse, error) // @@ .. cpp:var:: rpc RepositoryModelUnload(RepositoryModelUnloadRequest) // @@ returns (RepositoryModelUnloadResponse) // @@ // @@ Unload a model. // @@ RepositoryModelUnload(context.Context, *RepositoryModelUnloadRequest) (*RepositoryModelUnloadResponse, error) // @@ SystemSharedMemoryStatusRequest) // @@ returns (SystemSharedMemoryStatusRespose) // @@ // @@ Get the status of all registered system-shared-memory regions. // @@ SystemSharedMemoryStatus(context.Context, *SystemSharedMemoryStatusRequest) (*SystemSharedMemoryStatusResponse, error) // @@ SystemSharedMemoryRegisterRequest) // @@ returns (SystemSharedMemoryRegisterResponse) // @@ // @@ Register a system-shared-memory region. // @@ SystemSharedMemoryRegister(context.Context, *SystemSharedMemoryRegisterRequest) (*SystemSharedMemoryRegisterResponse, error) // @@ SystemSharedMemoryUnregisterRequest) // @@ returns (SystemSharedMemoryUnregisterResponse) // @@ // @@ Unregister a system-shared-memory region. // @@ SystemSharedMemoryUnregister(context.Context, *SystemSharedMemoryUnregisterRequest) (*SystemSharedMemoryUnregisterResponse, error) // @@ CudaSharedMemoryStatusRequest) // @@ returns (CudaSharedMemoryStatusRespose) // @@ // @@ Get the status of all registered CUDA-shared-memory regions. // @@ CudaSharedMemoryStatus(context.Context, *CudaSharedMemoryStatusRequest) (*CudaSharedMemoryStatusResponse, error) // @@ CudaSharedMemoryRegisterRequest) // @@ returns (CudaSharedMemoryRegisterResponse) // @@ // @@ Register a CUDA-shared-memory region. // @@ CudaSharedMemoryRegister(context.Context, *CudaSharedMemoryRegisterRequest) (*CudaSharedMemoryRegisterResponse, error) // @@ CudaSharedMemoryUnregisterRequest) // @@ returns (CudaSharedMemoryUnregisterResponse) // @@ // @@ Unregister a CUDA-shared-memory region. // @@ CudaSharedMemoryUnregister(context.Context, *CudaSharedMemoryUnregisterRequest) (*CudaSharedMemoryUnregisterResponse, error) // @@ .. cpp:var:: rpc TraceSetting(TraceSettingRequest) // @@ returns (TraceSettingResponse) // @@ // @@ Update and get the trace setting of the Triton server. // @@ TraceSetting(context.Context, *TraceSettingRequest) (*TraceSettingResponse, error) // @@ .. cpp:var:: rpc LogSettings(LogSettingsRequest) // @@ returns (LogSettingsResponse) // @@ // @@ Update and get the log settings of the Triton server. // @@ LogSettings(context.Context, *LogSettingsRequest) (*LogSettingsResponse, error) // contains filtered or unexported methods }
GRPCInferenceServiceServer is the server API for GRPCInferenceService service. All implementations must embed UnimplementedGRPCInferenceServiceServer for forward compatibility
@@ @@.. cpp:var:: service InferenceService @@ @@ Inference Server GRPC endpoints. @@
type GRPCInferenceService_ModelStreamInferClient ¶
type GRPCInferenceService_ModelStreamInferClient interface { Send(*ModelInferRequest) error Recv() (*ModelStreamInferResponse, error) grpc.ClientStream }
type GRPCInferenceService_ModelStreamInferServer ¶
type GRPCInferenceService_ModelStreamInferServer interface { Send(*ModelStreamInferResponse) error Recv() (*ModelInferRequest, error) grpc.ServerStream }
type HealthCheckRequest ¶
type HealthCheckRequest struct { Service string `protobuf:"bytes,1,opt,name=service,proto3" json:"service,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message HealthCheckRequest @@ @@ Request message for HealthCheck @@
func (*HealthCheckRequest) Descriptor
deprecated
func (*HealthCheckRequest) Descriptor() ([]byte, []int)
Deprecated: Use HealthCheckRequest.ProtoReflect.Descriptor instead.
func (*HealthCheckRequest) GetService ¶
func (x *HealthCheckRequest) GetService() string
func (*HealthCheckRequest) ProtoMessage ¶
func (*HealthCheckRequest) ProtoMessage()
func (*HealthCheckRequest) ProtoReflect ¶
func (x *HealthCheckRequest) ProtoReflect() protoreflect.Message
func (*HealthCheckRequest) Reset ¶
func (x *HealthCheckRequest) Reset()
func (*HealthCheckRequest) String ¶
func (x *HealthCheckRequest) String() string
type HealthCheckResponse ¶
type HealthCheckResponse struct { Status HealthCheckResponse_ServingStatus `protobuf:"varint,1,opt,name=status,proto3,enum=grpc.health.v1.HealthCheckResponse_ServingStatus" json:"status,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message HealthCheckResponse @@ @@ Response message for HealthCheck @@
func (*HealthCheckResponse) Descriptor
deprecated
func (*HealthCheckResponse) Descriptor() ([]byte, []int)
Deprecated: Use HealthCheckResponse.ProtoReflect.Descriptor instead.
func (*HealthCheckResponse) GetStatus ¶
func (x *HealthCheckResponse) GetStatus() HealthCheckResponse_ServingStatus
func (*HealthCheckResponse) ProtoMessage ¶
func (*HealthCheckResponse) ProtoMessage()
func (*HealthCheckResponse) ProtoReflect ¶
func (x *HealthCheckResponse) ProtoReflect() protoreflect.Message
func (*HealthCheckResponse) Reset ¶
func (x *HealthCheckResponse) Reset()
func (*HealthCheckResponse) String ¶
func (x *HealthCheckResponse) String() string
type HealthCheckResponse_ServingStatus ¶
type HealthCheckResponse_ServingStatus int32
@@ @@.. cpp:enum:: ServingStatus @@ @@ Statuses supported by GRPC's health check. @@
const ( HealthCheckResponse_UNKNOWN HealthCheckResponse_ServingStatus = 0 HealthCheckResponse_SERVING HealthCheckResponse_ServingStatus = 1 HealthCheckResponse_NOT_SERVING HealthCheckResponse_ServingStatus = 2 HealthCheckResponse_SERVICE_UNKNOWN HealthCheckResponse_ServingStatus = 3 )
func (HealthCheckResponse_ServingStatus) Descriptor ¶
func (HealthCheckResponse_ServingStatus) Descriptor() protoreflect.EnumDescriptor
func (HealthCheckResponse_ServingStatus) Enum ¶
func (x HealthCheckResponse_ServingStatus) Enum() *HealthCheckResponse_ServingStatus
func (HealthCheckResponse_ServingStatus) EnumDescriptor
deprecated
func (HealthCheckResponse_ServingStatus) EnumDescriptor() ([]byte, []int)
Deprecated: Use HealthCheckResponse_ServingStatus.Descriptor instead.
func (HealthCheckResponse_ServingStatus) Number ¶
func (x HealthCheckResponse_ServingStatus) Number() protoreflect.EnumNumber
func (HealthCheckResponse_ServingStatus) String ¶
func (x HealthCheckResponse_ServingStatus) String() string
func (HealthCheckResponse_ServingStatus) Type ¶
func (HealthCheckResponse_ServingStatus) Type() protoreflect.EnumType
type HealthClient ¶
type HealthClient interface { // @@ .. cpp:var:: rpc Check(HealthCheckRequest) returns // @@ (HealthCheckResponse) // @@ // @@ Get serving status of the inference server. // @@ Check(ctx context.Context, in *HealthCheckRequest, opts ...grpc.CallOption) (*HealthCheckResponse, error) }
HealthClient is the client API for Health service.
For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream.
@@ @@.. cpp:var:: service Health @@ @@ Health service for GRPC endpoints. @@
func NewHealthClient ¶
func NewHealthClient(cc grpc.ClientConnInterface) HealthClient
type HealthServer ¶
type HealthServer interface { // @@ .. cpp:var:: rpc Check(HealthCheckRequest) returns // @@ (HealthCheckResponse) // @@ // @@ Get serving status of the inference server. // @@ Check(context.Context, *HealthCheckRequest) (*HealthCheckResponse, error) // contains filtered or unexported methods }
HealthServer is the server API for Health service. All implementations must embed UnimplementedHealthServer for forward compatibility
@@ @@.. cpp:var:: service Health @@ @@ Health service for GRPC endpoints. @@
type InferBatchStatistics ¶
type InferBatchStatistics struct { // @@ .. cpp:var:: uint64 batch_size // @@ // @@ The size of the batch. // @@ BatchSize uint64 `protobuf:"varint,1,opt,name=batch_size,json=batchSize,proto3" json:"batch_size,omitempty"` // @@ .. cpp:var:: StatisticDuration compute_input // @@ // @@ The count and cumulative duration to prepare input tensor data as // @@ required by the model framework / backend with the given batch size. // @@ For example, this duration should include the time to copy input // @@ tensor data to the GPU. // @@ ComputeInput *StatisticDuration `protobuf:"bytes,2,opt,name=compute_input,json=computeInput,proto3" json:"compute_input,omitempty"` // @@ .. cpp:var:: StatisticDuration compute_infer // @@ // @@ The count and cumulative duration to execute the model with the given // @@ batch size. // @@ ComputeInfer *StatisticDuration `protobuf:"bytes,3,opt,name=compute_infer,json=computeInfer,proto3" json:"compute_infer,omitempty"` // @@ .. cpp:var:: StatisticDuration compute_output // @@ // @@ The count and cumulative duration to extract output tensor data // @@ produced by the model framework / backend with the given batch size. // @@ For example, this duration should include the time to copy output // @@ tensor data from the GPU. // @@ ComputeOutput *StatisticDuration `protobuf:"bytes,4,opt,name=compute_output,json=computeOutput,proto3" json:"compute_output,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message InferBatchStatistics @@ @@ Inference batch statistics. @@
func (*InferBatchStatistics) Descriptor
deprecated
func (*InferBatchStatistics) Descriptor() ([]byte, []int)
Deprecated: Use InferBatchStatistics.ProtoReflect.Descriptor instead.
func (*InferBatchStatistics) GetBatchSize ¶
func (x *InferBatchStatistics) GetBatchSize() uint64
func (*InferBatchStatistics) GetComputeInfer ¶
func (x *InferBatchStatistics) GetComputeInfer() *StatisticDuration
func (*InferBatchStatistics) GetComputeInput ¶
func (x *InferBatchStatistics) GetComputeInput() *StatisticDuration
func (*InferBatchStatistics) GetComputeOutput ¶
func (x *InferBatchStatistics) GetComputeOutput() *StatisticDuration
func (*InferBatchStatistics) ProtoMessage ¶
func (*InferBatchStatistics) ProtoMessage()
func (*InferBatchStatistics) ProtoReflect ¶
func (x *InferBatchStatistics) ProtoReflect() protoreflect.Message
func (*InferBatchStatistics) Reset ¶
func (x *InferBatchStatistics) Reset()
func (*InferBatchStatistics) String ¶
func (x *InferBatchStatistics) String() string
type InferParameter ¶
type InferParameter struct { // @@ .. cpp:var:: oneof parameter_choice // @@ // @@ The parameter value can be a string, an int64, // @@ an uint64, a double, or a boolean // @@ // @@ Note: double and uint64 are currently // @@ placeholders for future use and // @@ are not supported for custom parameters // @@ // // Types that are assignable to ParameterChoice: // // *InferParameter_BoolParam // *InferParameter_Int64Param // *InferParameter_StringParam // *InferParameter_DoubleParam // *InferParameter_Uint64Param ParameterChoice isInferParameter_ParameterChoice `protobuf_oneof:"parameter_choice"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message InferParameter @@ @@ An inference parameter value. @@
func (*InferParameter) Descriptor
deprecated
func (*InferParameter) Descriptor() ([]byte, []int)
Deprecated: Use InferParameter.ProtoReflect.Descriptor instead.
func (*InferParameter) GetBoolParam ¶
func (x *InferParameter) GetBoolParam() bool
func (*InferParameter) GetDoubleParam ¶
func (x *InferParameter) GetDoubleParam() float64
func (*InferParameter) GetInt64Param ¶
func (x *InferParameter) GetInt64Param() int64
func (*InferParameter) GetParameterChoice ¶
func (m *InferParameter) GetParameterChoice() isInferParameter_ParameterChoice
func (*InferParameter) GetStringParam ¶
func (x *InferParameter) GetStringParam() string
func (*InferParameter) GetUint64Param ¶
func (x *InferParameter) GetUint64Param() uint64
func (*InferParameter) ProtoMessage ¶
func (*InferParameter) ProtoMessage()
func (*InferParameter) ProtoReflect ¶
func (x *InferParameter) ProtoReflect() protoreflect.Message
func (*InferParameter) Reset ¶
func (x *InferParameter) Reset()
func (*InferParameter) String ¶
func (x *InferParameter) String() string
type InferParameter_BoolParam ¶
type InferParameter_BoolParam struct { // @@ .. cpp:var:: bool bool_param // @@ // @@ A boolean parameter value. // @@ BoolParam bool `protobuf:"varint,1,opt,name=bool_param,json=boolParam,proto3,oneof"` }
type InferParameter_DoubleParam ¶
type InferParameter_DoubleParam struct { // @@ .. cpp:var:: double double_param // @@ // @@ A double parameter value. // @@ DoubleParam float64 `protobuf:"fixed64,4,opt,name=double_param,json=doubleParam,proto3,oneof"` }
type InferParameter_Int64Param ¶
type InferParameter_Int64Param struct { // @@ .. cpp:var:: int64 int64_param // @@ // @@ An int64 parameter value. // @@ Int64Param int64 `protobuf:"varint,2,opt,name=int64_param,json=int64Param,proto3,oneof"` }
type InferParameter_StringParam ¶
type InferParameter_StringParam struct { // @@ .. cpp:var:: string string_param // @@ // @@ A string parameter value. // @@ StringParam string `protobuf:"bytes,3,opt,name=string_param,json=stringParam,proto3,oneof"` }
type InferParameter_Uint64Param ¶
type InferParameter_Uint64Param struct { // @@ .. cpp:var:: uint64 uint64_param // @@ // @@ A uint64 parameter value. // @@ // @@ Not supported for custom parameters // @@ Uint64Param uint64 `protobuf:"varint,5,opt,name=uint64_param,json=uint64Param,proto3,oneof"` }
type InferResponseStatistics ¶
type InferResponseStatistics struct { // @@ .. cpp:var:: StatisticDuration compute_infer // @@ // @@ The count and cumulative duration to compute a response. // @@ ComputeInfer *StatisticDuration `protobuf:"bytes,1,opt,name=compute_infer,json=computeInfer,proto3" json:"compute_infer,omitempty"` // @@ .. cpp:var:: StatisticDuration compute_output // @@ // @@ The count and cumulative duration to extract the output tensors of a // @@ response. // @@ ComputeOutput *StatisticDuration `protobuf:"bytes,2,opt,name=compute_output,json=computeOutput,proto3" json:"compute_output,omitempty"` // @@ .. cpp:var:: StatisticDuration success // @@ // @@ The count and cumulative duration for successful responses. // @@ Success *StatisticDuration `protobuf:"bytes,3,opt,name=success,proto3" json:"success,omitempty"` // @@ .. cpp:var:: StatisticDuration fail // @@ // @@ The count and cumulative duration for failed responses. // @@ Fail *StatisticDuration `protobuf:"bytes,4,opt,name=fail,proto3" json:"fail,omitempty"` // @@ .. cpp:var:: StatisticDuration empty_response // @@ // @@ The count and cumulative duration for empty responses. // @@ EmptyResponse *StatisticDuration `protobuf:"bytes,5,opt,name=empty_response,json=emptyResponse,proto3" json:"empty_response,omitempty"` // @@ .. cpp:var:: StatisticDuration cancel // @@ // @@ The count and cumulative duration, for cleaning up resources held by // @@ a cancelled request, for cancelled responses. // @@ Cancel *StatisticDuration `protobuf:"bytes,6,opt,name=cancel,proto3" json:"cancel,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message InferResponseStatistics @@ @@ Statistics per response. @@
func (*InferResponseStatistics) Descriptor
deprecated
func (*InferResponseStatistics) Descriptor() ([]byte, []int)
Deprecated: Use InferResponseStatistics.ProtoReflect.Descriptor instead.
func (*InferResponseStatistics) GetCancel ¶
func (x *InferResponseStatistics) GetCancel() *StatisticDuration
func (*InferResponseStatistics) GetComputeInfer ¶
func (x *InferResponseStatistics) GetComputeInfer() *StatisticDuration
func (*InferResponseStatistics) GetComputeOutput ¶
func (x *InferResponseStatistics) GetComputeOutput() *StatisticDuration
func (*InferResponseStatistics) GetEmptyResponse ¶
func (x *InferResponseStatistics) GetEmptyResponse() *StatisticDuration
func (*InferResponseStatistics) GetFail ¶
func (x *InferResponseStatistics) GetFail() *StatisticDuration
func (*InferResponseStatistics) GetSuccess ¶
func (x *InferResponseStatistics) GetSuccess() *StatisticDuration
func (*InferResponseStatistics) ProtoMessage ¶
func (*InferResponseStatistics) ProtoMessage()
func (*InferResponseStatistics) ProtoReflect ¶
func (x *InferResponseStatistics) ProtoReflect() protoreflect.Message
func (*InferResponseStatistics) Reset ¶
func (x *InferResponseStatistics) Reset()
func (*InferResponseStatistics) String ¶
func (x *InferResponseStatistics) String() string
type InferStatistics ¶
type InferStatistics struct { // @@ .. cpp:var:: StatisticDuration success // @@ // @@ Cumulative count and duration for successful inference // @@ request. The "success" count and cumulative duration includes // @@ cache hits. // @@ Success *StatisticDuration `protobuf:"bytes,1,opt,name=success,proto3" json:"success,omitempty"` // @@ .. cpp:var:: StatisticDuration fail // @@ // @@ Cumulative count and duration for failed inference // @@ request. // @@ Fail *StatisticDuration `protobuf:"bytes,2,opt,name=fail,proto3" json:"fail,omitempty"` // @@ .. cpp:var:: StatisticDuration queue // @@ // @@ The count and cumulative duration that inference requests wait in // @@ scheduling or other queues. The "queue" count and cumulative // @@ duration includes cache hits. // @@ Queue *StatisticDuration `protobuf:"bytes,3,opt,name=queue,proto3" json:"queue,omitempty"` // @@ .. cpp:var:: StatisticDuration compute_input // @@ // @@ The count and cumulative duration to prepare input tensor data as // @@ required by the model framework / backend. For example, this duration // @@ should include the time to copy input tensor data to the GPU. // @@ The "compute_input" count and cumulative duration do not account for // @@ requests that were a cache hit. See the "cache_hit" field for more // @@ info. // @@ ComputeInput *StatisticDuration `protobuf:"bytes,4,opt,name=compute_input,json=computeInput,proto3" json:"compute_input,omitempty"` // @@ .. cpp:var:: StatisticDuration compute_infer // @@ // @@ The count and cumulative duration to execute the model. // @@ The "compute_infer" count and cumulative duration do not account for // @@ requests that were a cache hit. See the "cache_hit" field for more // @@ info. // @@ ComputeInfer *StatisticDuration `protobuf:"bytes,5,opt,name=compute_infer,json=computeInfer,proto3" json:"compute_infer,omitempty"` // @@ .. cpp:var:: StatisticDuration compute_output // @@ // @@ The count and cumulative duration to extract output tensor data // @@ produced by the model framework / backend. For example, this duration // @@ should include the time to copy output tensor data from the GPU. // @@ The "compute_output" count and cumulative duration do not account for // @@ requests that were a cache hit. See the "cache_hit" field for more // @@ info. // @@ ComputeOutput *StatisticDuration `protobuf:"bytes,6,opt,name=compute_output,json=computeOutput,proto3" json:"compute_output,omitempty"` // @@ .. cpp:var:: StatisticDuration cache_hit // @@ // @@ The count of response cache hits and cumulative duration to lookup // @@ and extract output tensor data from the Response Cache on a cache // @@ hit. For example, this duration should include the time to copy // @@ output tensor data from the Response Cache to the response object. // @@ On cache hits, triton does not need to go to the model/backend // @@ for the output tensor data, so the "compute_input", "compute_infer", // @@ and "compute_output" fields are not updated. Assuming the response // @@ cache is enabled for a given model, a cache hit occurs for a // @@ request to that model when the request metadata (model name, // @@ model version, model inputs) hashes to an existing entry in the // @@ cache. On a cache miss, the request hash and response output tensor // @@ data is added to the cache. See response cache docs for more info: // @@ // https://github.com/triton-inference-server/server/blob/main/docs/response_cache.md // @@ CacheHit *StatisticDuration `protobuf:"bytes,7,opt,name=cache_hit,json=cacheHit,proto3" json:"cache_hit,omitempty"` // @@ .. cpp:var:: StatisticDuration cache_miss // @@ // @@ The count of response cache misses and cumulative duration to lookup // @@ and insert output tensor data from the computed response to the // cache. // @@ For example, this duration should include the time to copy // @@ output tensor data from the response object to the Response Cache. // @@ Assuming the response cache is enabled for a given model, a cache // @@ miss occurs for a request to that model when the request metadata // @@ does NOT hash to an existing entry in the cache. See the response // @@ cache docs for more info: // @@ // https://github.com/triton-inference-server/server/blob/main/docs/response_cache.md // @@ CacheMiss *StatisticDuration `protobuf:"bytes,8,opt,name=cache_miss,json=cacheMiss,proto3" json:"cache_miss,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message InferStatistics @@ @@ Inference statistics. @@
func (*InferStatistics) Descriptor
deprecated
func (*InferStatistics) Descriptor() ([]byte, []int)
Deprecated: Use InferStatistics.ProtoReflect.Descriptor instead.
func (*InferStatistics) GetCacheHit ¶
func (x *InferStatistics) GetCacheHit() *StatisticDuration
func (*InferStatistics) GetCacheMiss ¶
func (x *InferStatistics) GetCacheMiss() *StatisticDuration
func (*InferStatistics) GetComputeInfer ¶
func (x *InferStatistics) GetComputeInfer() *StatisticDuration
func (*InferStatistics) GetComputeInput ¶
func (x *InferStatistics) GetComputeInput() *StatisticDuration
func (*InferStatistics) GetComputeOutput ¶
func (x *InferStatistics) GetComputeOutput() *StatisticDuration
func (*InferStatistics) GetFail ¶
func (x *InferStatistics) GetFail() *StatisticDuration
func (*InferStatistics) GetQueue ¶
func (x *InferStatistics) GetQueue() *StatisticDuration
func (*InferStatistics) GetSuccess ¶
func (x *InferStatistics) GetSuccess() *StatisticDuration
func (*InferStatistics) ProtoMessage ¶
func (*InferStatistics) ProtoMessage()
func (*InferStatistics) ProtoReflect ¶
func (x *InferStatistics) ProtoReflect() protoreflect.Message
func (*InferStatistics) Reset ¶
func (x *InferStatistics) Reset()
func (*InferStatistics) String ¶
func (x *InferStatistics) String() string
type InferTensorContents ¶
type InferTensorContents struct { // @@ // @@ .. cpp:var:: bool bool_contents (repeated) // @@ // @@ Representation for BOOL data type. The size must match what is // @@ expected by the tensor's shape. The contents must be the flattened, // @@ one-dimensional, row-major order of the tensor elements. // @@ BoolContents []bool `protobuf:"varint,1,rep,packed,name=bool_contents,json=boolContents,proto3" json:"bool_contents,omitempty"` // @@ // @@ .. cpp:var:: int32 int_contents (repeated) // @@ // @@ Representation for INT8, INT16, and INT32 data types. The size // @@ must match what is expected by the tensor's shape. The contents // @@ must be the flattened, one-dimensional, row-major order of the // @@ tensor elements. // @@ IntContents []int32 `protobuf:"varint,2,rep,packed,name=int_contents,json=intContents,proto3" json:"int_contents,omitempty"` // @@ // @@ .. cpp:var:: int64 int64_contents (repeated) // @@ // @@ Representation for INT64 data types. The size must match what // @@ is expected by the tensor's shape. The contents must be the // @@ flattened, one-dimensional, row-major order of the tensor elements. // @@ Int64Contents []int64 `protobuf:"varint,3,rep,packed,name=int64_contents,json=int64Contents,proto3" json:"int64_contents,omitempty"` // @@ // @@ .. cpp:var:: uint32 uint_contents (repeated) // @@ // @@ Representation for UINT8, UINT16, and UINT32 data types. The size // @@ must match what is expected by the tensor's shape. The contents // @@ must be the flattened, one-dimensional, row-major order of the // @@ tensor elements. // @@ UintContents []uint32 `protobuf:"varint,4,rep,packed,name=uint_contents,json=uintContents,proto3" json:"uint_contents,omitempty"` // @@ // @@ .. cpp:var:: uint64 uint64_contents (repeated) // @@ // @@ Representation for UINT64 data types. The size must match what // @@ is expected by the tensor's shape. The contents must be the // @@ flattened, one-dimensional, row-major order of the tensor elements. // @@ Uint64Contents []uint64 `protobuf:"varint,5,rep,packed,name=uint64_contents,json=uint64Contents,proto3" json:"uint64_contents,omitempty"` // @@ // @@ .. cpp:var:: float fp32_contents (repeated) // @@ // @@ Representation for FP32 data type. The size must match what is // @@ expected by the tensor's shape. The contents must be the flattened, // @@ one-dimensional, row-major order of the tensor elements. // @@ Fp32Contents []float32 `protobuf:"fixed32,6,rep,packed,name=fp32_contents,json=fp32Contents,proto3" json:"fp32_contents,omitempty"` // @@ // @@ .. cpp:var:: double fp64_contents (repeated) // @@ // @@ Representation for FP64 data type. The size must match what is // @@ expected by the tensor's shape. The contents must be the flattened, // @@ one-dimensional, row-major order of the tensor elements. // @@ Fp64Contents []float64 `protobuf:"fixed64,7,rep,packed,name=fp64_contents,json=fp64Contents,proto3" json:"fp64_contents,omitempty"` // @@ // @@ .. cpp:var:: bytes bytes_contents (repeated) // @@ // @@ Representation for BYTES data type. The size must match what is // @@ expected by the tensor's shape. The contents must be the flattened, // @@ one-dimensional, row-major order of the tensor elements. // @@ BytesContents [][]byte `protobuf:"bytes,8,rep,name=bytes_contents,json=bytesContents,proto3" json:"bytes_contents,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message InferTensorContents @@ @@ The data contained in a tensor represented by the repeated type @@ that matches the tensor's data type. Protobuf oneof is not used @@ because oneofs cannot contain repeated fields. @@
func (*InferTensorContents) Descriptor
deprecated
func (*InferTensorContents) Descriptor() ([]byte, []int)
Deprecated: Use InferTensorContents.ProtoReflect.Descriptor instead.
func (*InferTensorContents) GetBoolContents ¶
func (x *InferTensorContents) GetBoolContents() []bool
func (*InferTensorContents) GetBytesContents ¶
func (x *InferTensorContents) GetBytesContents() [][]byte
func (*InferTensorContents) GetFp32Contents ¶
func (x *InferTensorContents) GetFp32Contents() []float32
func (*InferTensorContents) GetFp64Contents ¶
func (x *InferTensorContents) GetFp64Contents() []float64
func (*InferTensorContents) GetInt64Contents ¶
func (x *InferTensorContents) GetInt64Contents() []int64
func (*InferTensorContents) GetIntContents ¶
func (x *InferTensorContents) GetIntContents() []int32
func (*InferTensorContents) GetUint64Contents ¶
func (x *InferTensorContents) GetUint64Contents() []uint64
func (*InferTensorContents) GetUintContents ¶
func (x *InferTensorContents) GetUintContents() []uint32
func (*InferTensorContents) ProtoMessage ¶
func (*InferTensorContents) ProtoMessage()
func (*InferTensorContents) ProtoReflect ¶
func (x *InferTensorContents) ProtoReflect() protoreflect.Message
func (*InferTensorContents) Reset ¶
func (x *InferTensorContents) Reset()
func (*InferTensorContents) String ¶
func (x *InferTensorContents) String() string
type LogSettingsRequest ¶
type LogSettingsRequest struct { // @@ .. cpp:var:: map<string,SettingValue> settings // @@ // @@ The current log settings. // @@ Settings map[string]*LogSettingsRequest_SettingValue `` /* 157-byte string literal not displayed */ // contains filtered or unexported fields }
@@ @@.. cpp:var:: message LogSettingsRequest @@ @@ Request message for LogSettings. @@
func (*LogSettingsRequest) Descriptor
deprecated
func (*LogSettingsRequest) Descriptor() ([]byte, []int)
Deprecated: Use LogSettingsRequest.ProtoReflect.Descriptor instead.
func (*LogSettingsRequest) GetSettings ¶
func (x *LogSettingsRequest) GetSettings() map[string]*LogSettingsRequest_SettingValue
func (*LogSettingsRequest) ProtoMessage ¶
func (*LogSettingsRequest) ProtoMessage()
func (*LogSettingsRequest) ProtoReflect ¶
func (x *LogSettingsRequest) ProtoReflect() protoreflect.Message
func (*LogSettingsRequest) Reset ¶
func (x *LogSettingsRequest) Reset()
func (*LogSettingsRequest) String ¶
func (x *LogSettingsRequest) String() string
type LogSettingsRequest_SettingValue ¶
type LogSettingsRequest_SettingValue struct { // Types that are assignable to ParameterChoice: // // *LogSettingsRequest_SettingValue_BoolParam // *LogSettingsRequest_SettingValue_Uint32Param // *LogSettingsRequest_SettingValue_StringParam ParameterChoice isLogSettingsRequest_SettingValue_ParameterChoice `protobuf_oneof:"parameter_choice"` // contains filtered or unexported fields }
func (*LogSettingsRequest_SettingValue) Descriptor
deprecated
func (*LogSettingsRequest_SettingValue) Descriptor() ([]byte, []int)
Deprecated: Use LogSettingsRequest_SettingValue.ProtoReflect.Descriptor instead.
func (*LogSettingsRequest_SettingValue) GetBoolParam ¶
func (x *LogSettingsRequest_SettingValue) GetBoolParam() bool
func (*LogSettingsRequest_SettingValue) GetParameterChoice ¶
func (m *LogSettingsRequest_SettingValue) GetParameterChoice() isLogSettingsRequest_SettingValue_ParameterChoice
func (*LogSettingsRequest_SettingValue) GetStringParam ¶
func (x *LogSettingsRequest_SettingValue) GetStringParam() string
func (*LogSettingsRequest_SettingValue) GetUint32Param ¶
func (x *LogSettingsRequest_SettingValue) GetUint32Param() uint32
func (*LogSettingsRequest_SettingValue) ProtoMessage ¶
func (*LogSettingsRequest_SettingValue) ProtoMessage()
func (*LogSettingsRequest_SettingValue) ProtoReflect ¶
func (x *LogSettingsRequest_SettingValue) ProtoReflect() protoreflect.Message
func (*LogSettingsRequest_SettingValue) Reset ¶
func (x *LogSettingsRequest_SettingValue) Reset()
func (*LogSettingsRequest_SettingValue) String ¶
func (x *LogSettingsRequest_SettingValue) String() string
type LogSettingsRequest_SettingValue_BoolParam ¶
type LogSettingsRequest_SettingValue_BoolParam struct { // @@ .. cpp:var:: bool bool_param // @@ // @@ A boolean parameter value. // @@ BoolParam bool `protobuf:"varint,1,opt,name=bool_param,json=boolParam,proto3,oneof"` }
type LogSettingsRequest_SettingValue_StringParam ¶
type LogSettingsRequest_SettingValue_StringParam struct { // @@ .. cpp:var:: string string_param // @@ // @@ A string parameter value. // @@ StringParam string `protobuf:"bytes,3,opt,name=string_param,json=stringParam,proto3,oneof"` }
type LogSettingsRequest_SettingValue_Uint32Param ¶
type LogSettingsRequest_SettingValue_Uint32Param struct { // @@ .. cpp:var:: uint32 uint32_param // @@ // @@ An uint32 parameter value. // @@ Uint32Param uint32 `protobuf:"varint,2,opt,name=uint32_param,json=uint32Param,proto3,oneof"` }
type LogSettingsResponse ¶
type LogSettingsResponse struct { // @@ .. cpp:var:: map<string,SettingValue> settings // @@ // @@ The current log settings. // @@ Settings map[string]*LogSettingsResponse_SettingValue `` /* 157-byte string literal not displayed */ // contains filtered or unexported fields }
@@ @@.. cpp:var:: message LogSettingsResponse @@ @@ Response message for LogSettings. @@
func (*LogSettingsResponse) Descriptor
deprecated
func (*LogSettingsResponse) Descriptor() ([]byte, []int)
Deprecated: Use LogSettingsResponse.ProtoReflect.Descriptor instead.
func (*LogSettingsResponse) GetSettings ¶
func (x *LogSettingsResponse) GetSettings() map[string]*LogSettingsResponse_SettingValue
func (*LogSettingsResponse) ProtoMessage ¶
func (*LogSettingsResponse) ProtoMessage()
func (*LogSettingsResponse) ProtoReflect ¶
func (x *LogSettingsResponse) ProtoReflect() protoreflect.Message
func (*LogSettingsResponse) Reset ¶
func (x *LogSettingsResponse) Reset()
func (*LogSettingsResponse) String ¶
func (x *LogSettingsResponse) String() string
type LogSettingsResponse_SettingValue ¶
type LogSettingsResponse_SettingValue struct { // Types that are assignable to ParameterChoice: // // *LogSettingsResponse_SettingValue_BoolParam // *LogSettingsResponse_SettingValue_Uint32Param // *LogSettingsResponse_SettingValue_StringParam ParameterChoice isLogSettingsResponse_SettingValue_ParameterChoice `protobuf_oneof:"parameter_choice"` // contains filtered or unexported fields }
func (*LogSettingsResponse_SettingValue) Descriptor
deprecated
func (*LogSettingsResponse_SettingValue) Descriptor() ([]byte, []int)
Deprecated: Use LogSettingsResponse_SettingValue.ProtoReflect.Descriptor instead.
func (*LogSettingsResponse_SettingValue) GetBoolParam ¶
func (x *LogSettingsResponse_SettingValue) GetBoolParam() bool
func (*LogSettingsResponse_SettingValue) GetParameterChoice ¶
func (m *LogSettingsResponse_SettingValue) GetParameterChoice() isLogSettingsResponse_SettingValue_ParameterChoice
func (*LogSettingsResponse_SettingValue) GetStringParam ¶
func (x *LogSettingsResponse_SettingValue) GetStringParam() string
func (*LogSettingsResponse_SettingValue) GetUint32Param ¶
func (x *LogSettingsResponse_SettingValue) GetUint32Param() uint32
func (*LogSettingsResponse_SettingValue) ProtoMessage ¶
func (*LogSettingsResponse_SettingValue) ProtoMessage()
func (*LogSettingsResponse_SettingValue) ProtoReflect ¶
func (x *LogSettingsResponse_SettingValue) ProtoReflect() protoreflect.Message
func (*LogSettingsResponse_SettingValue) Reset ¶
func (x *LogSettingsResponse_SettingValue) Reset()
func (*LogSettingsResponse_SettingValue) String ¶
func (x *LogSettingsResponse_SettingValue) String() string
type LogSettingsResponse_SettingValue_BoolParam ¶
type LogSettingsResponse_SettingValue_BoolParam struct { // @@ .. cpp:var:: bool bool_param // @@ // @@ A boolean parameter value. // @@ BoolParam bool `protobuf:"varint,1,opt,name=bool_param,json=boolParam,proto3,oneof"` }
type LogSettingsResponse_SettingValue_StringParam ¶
type LogSettingsResponse_SettingValue_StringParam struct { // @@ .. cpp:var:: string string_param // @@ // @@ A string parameter value. // @@ StringParam string `protobuf:"bytes,3,opt,name=string_param,json=stringParam,proto3,oneof"` }
type LogSettingsResponse_SettingValue_Uint32Param ¶
type LogSettingsResponse_SettingValue_Uint32Param struct { // @@ .. cpp:var:: uint32 uint32_param // @@ // @@ An int32 parameter value. // @@ Uint32Param uint32 `protobuf:"varint,2,opt,name=uint32_param,json=uint32Param,proto3,oneof"` }
type MemoryUsage ¶
type MemoryUsage struct { // @@ .. cpp:var:: string type // @@ // @@ The type of memory, the value can be "CPU", "CPU_PINNED", "GPU". // @@ Type string `protobuf:"bytes,1,opt,name=type,proto3" json:"type,omitempty"` // @@ .. cpp:var:: int64 id // @@ // @@ The id of the memory, typically used with "type" to identify // @@ a device that hosts the memory. // @@ Id int64 `protobuf:"varint,2,opt,name=id,proto3" json:"id,omitempty"` // @@ .. cpp:var:: uint64 byte_size // @@ // @@ The byte size of the memory. // @@ ByteSize uint64 `protobuf:"varint,3,opt,name=byte_size,json=byteSize,proto3" json:"byte_size,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message MemoryUsage @@ @@ Memory usage. @@
func (*MemoryUsage) Descriptor
deprecated
func (*MemoryUsage) Descriptor() ([]byte, []int)
Deprecated: Use MemoryUsage.ProtoReflect.Descriptor instead.
func (*MemoryUsage) GetByteSize ¶
func (x *MemoryUsage) GetByteSize() uint64
func (*MemoryUsage) GetId ¶
func (x *MemoryUsage) GetId() int64
func (*MemoryUsage) GetType ¶
func (x *MemoryUsage) GetType() string
func (*MemoryUsage) ProtoMessage ¶
func (*MemoryUsage) ProtoMessage()
func (*MemoryUsage) ProtoReflect ¶
func (x *MemoryUsage) ProtoReflect() protoreflect.Message
func (*MemoryUsage) Reset ¶
func (x *MemoryUsage) Reset()
func (*MemoryUsage) String ¶
func (x *MemoryUsage) String() string
type ModelConfig ¶
type ModelConfig struct { // @@ .. cpp:var:: string name // @@ // @@ The name of the model. // @@ Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` // @@ .. cpp:var:: string platform // @@ // @@ Additional backend-specific configuration for the model. // @@ Please refer to the backend documentation on whether this field // @@ should be specified. // @@ Platform string `protobuf:"bytes,2,opt,name=platform,proto3" json:"platform,omitempty"` // @@ .. cpp:var:: string backend // @@ // @@ The backend used by the model. // @@ Backend string `protobuf:"bytes,17,opt,name=backend,proto3" json:"backend,omitempty"` // @@ .. cpp:var:: string runtime // @@ // @@ The name of the backend library file used by the model. // @@ Runtime string `protobuf:"bytes,25,opt,name=runtime,proto3" json:"runtime,omitempty"` // @@ .. cpp:var:: ModelVersionPolicy version_policy // @@ // @@ Policy indicating which version(s) of the model will be served. // @@ VersionPolicy *ModelVersionPolicy `protobuf:"bytes,3,opt,name=version_policy,json=versionPolicy,proto3" json:"version_policy,omitempty"` // @@ .. cpp:var:: int32 max_batch_size // @@ // @@ Maximum batch size allowed for inference. This can only decrease // @@ what is allowed by the model itself. A max_batch_size value of 0 // @@ indicates that batching is not allowed for the model and the // @@ dimension/shape of the input and output tensors must exactly // @@ match what is specified in the input and output configuration. A // @@ max_batch_size value > 0 indicates that batching is allowed and // @@ so the model expects the input tensors to have an additional // @@ initial dimension for the batching that is not specified in the // @@ input (for example, if the model supports batched inputs of // @@ 2-dimensional tensors then the model configuration will specify // @@ the input shape as [ X, Y ] but the model will expect the actual // @@ input tensors to have shape [ N, X, Y ]). For max_batch_size > 0 // @@ returned outputs will also have an additional initial dimension // @@ for the batch. // @@ MaxBatchSize int32 `protobuf:"varint,4,opt,name=max_batch_size,json=maxBatchSize,proto3" json:"max_batch_size,omitempty"` // @@ .. cpp:var:: ModelInput input (repeated) // @@ // @@ The inputs request by the model. // @@ Input []*ModelInput `protobuf:"bytes,5,rep,name=input,proto3" json:"input,omitempty"` // @@ .. cpp:var:: ModelOutput output (repeated) // @@ // @@ The outputs produced by the model. // @@ Output []*ModelOutput `protobuf:"bytes,6,rep,name=output,proto3" json:"output,omitempty"` // @@ .. cpp:var:: BatchInput batch_input (repeated) // @@ // @@ The model input(s) that the server should use to communicate // @@ batch related values to the model. // @@ BatchInput []*BatchInput `protobuf:"bytes,20,rep,name=batch_input,json=batchInput,proto3" json:"batch_input,omitempty"` // @@ .. cpp:var:: BatchOutput batch_output (repeated) // @@ // @@ The outputs produced by the model that requires special handling // @@ by the model backend. // @@ BatchOutput []*BatchOutput `protobuf:"bytes,21,rep,name=batch_output,json=batchOutput,proto3" json:"batch_output,omitempty"` // @@ .. cpp:var:: ModelOptimizationPolicy optimization // @@ // @@ Optimization configuration for the model. If not specified // @@ then default optimization policy is used. // @@ Optimization *ModelOptimizationPolicy `protobuf:"bytes,12,opt,name=optimization,proto3" json:"optimization,omitempty"` // @@ .. cpp:var:: oneof scheduling_choice // @@ // @@ The scheduling policy for the model. If not specified the // @@ default scheduling policy is used for the model. The default // @@ policy is to execute each inference request independently. // @@ // // Types that are assignable to SchedulingChoice: // // *ModelConfig_DynamicBatching // *ModelConfig_SequenceBatching // *ModelConfig_EnsembleScheduling SchedulingChoice isModelConfig_SchedulingChoice `protobuf_oneof:"scheduling_choice"` // @@ .. cpp:var:: ModelInstanceGroup instance_group (repeated) // @@ // @@ Instances of this model. If not specified, one instance // @@ of the model will be instantiated on each available GPU. // @@ InstanceGroup []*ModelInstanceGroup `protobuf:"bytes,7,rep,name=instance_group,json=instanceGroup,proto3" json:"instance_group,omitempty"` // @@ .. cpp:var:: string default_model_filename // @@ // @@ Optional filename of the model file to use if a // @@ compute-capability specific model is not specified in // @@ :cpp:var:`cc_model_filenames`. If not specified the default name // @@ is 'model.graphdef', 'model.savedmodel', 'model.plan' or // @@ 'model.pt' depending on the model type. // @@ DefaultModelFilename string `protobuf:"bytes,8,opt,name=default_model_filename,json=defaultModelFilename,proto3" json:"default_model_filename,omitempty"` // @@ .. cpp:var:: map<string,string> cc_model_filenames // @@ // @@ Optional map from CUDA compute capability to the filename of // @@ the model that supports that compute capability. The filename // @@ refers to a file within the model version directory. // @@ CcModelFilenames map[string]string `` /* 199-byte string literal not displayed */ // @@ .. cpp:var:: map<string,string> metric_tags // @@ // @@ Optional metric tags. User-specific key-value pairs for metrics // @@ reported for this model. These tags are applied to the metrics // @@ reported on the HTTP metrics port. // @@ MetricTags map[string]string `` /* 180-byte string literal not displayed */ // @@ .. cpp:var:: map<string,ModelParameter> parameters // @@ // @@ Optional model parameters. User-specified parameter values. // @@ Parameters map[string]*ModelParameter `` /* 162-byte string literal not displayed */ // @@ .. cpp:var:: ModelWarmup model_warmup (repeated) // @@ // @@ Warmup setting of this model. If specified, all instances // @@ will be run with the request samples in sequence before // @@ serving the model. // @@ This field can only be specified if the model is not an ensemble // @@ model. // @@ ModelWarmup []*ModelWarmup `protobuf:"bytes,16,rep,name=model_warmup,json=modelWarmup,proto3" json:"model_warmup,omitempty"` // @@ .. cpp:var:: ModelOperations model_operations // @@ // @@ Optional metadata of the libraries providing custom operations for // @@ this model. // @@ ModelOperations *ModelOperations `protobuf:"bytes,18,opt,name=model_operations,json=modelOperations,proto3" json:"model_operations,omitempty"` // @@ .. cpp:var:: ModelTransactionPolicy model_transaction_policy // @@ // @@ Optional specification that describes the nature of transactions // @@ to be expected from the model. // @@ ModelTransactionPolicy *ModelTransactionPolicy `` /* 130-byte string literal not displayed */ // @@ .. cpp:var:: ModelRepositoryAgents model_repository_agents // @@ // @@ Optional specification of the agent(s) that should be invoked // @@ with repository actions are performed for this model. // @@ ModelRepositoryAgents *ModelRepositoryAgents `` /* 127-byte string literal not displayed */ // @@ .. cpp:var:: ModelResponseCache response_cache // @@ // @@ Optional setting for utilizing the response cache for this // @@ model. // @@ ResponseCache *ModelResponseCache `protobuf:"bytes,24,opt,name=response_cache,json=responseCache,proto3" json:"response_cache,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ModelConfig @@ @@ A model configuration. @@
func (*ModelConfig) Descriptor
deprecated
func (*ModelConfig) Descriptor() ([]byte, []int)
Deprecated: Use ModelConfig.ProtoReflect.Descriptor instead.
func (*ModelConfig) GetBackend ¶
func (x *ModelConfig) GetBackend() string
func (*ModelConfig) GetBatchInput ¶
func (x *ModelConfig) GetBatchInput() []*BatchInput
func (*ModelConfig) GetBatchOutput ¶
func (x *ModelConfig) GetBatchOutput() []*BatchOutput
func (*ModelConfig) GetCcModelFilenames ¶
func (x *ModelConfig) GetCcModelFilenames() map[string]string
func (*ModelConfig) GetDefaultModelFilename ¶
func (x *ModelConfig) GetDefaultModelFilename() string
func (*ModelConfig) GetDynamicBatching ¶
func (x *ModelConfig) GetDynamicBatching() *ModelDynamicBatching
func (*ModelConfig) GetEnsembleScheduling ¶
func (x *ModelConfig) GetEnsembleScheduling() *ModelEnsembling
func (*ModelConfig) GetInput ¶
func (x *ModelConfig) GetInput() []*ModelInput
func (*ModelConfig) GetInstanceGroup ¶
func (x *ModelConfig) GetInstanceGroup() []*ModelInstanceGroup
func (*ModelConfig) GetMaxBatchSize ¶
func (x *ModelConfig) GetMaxBatchSize() int32
func (*ModelConfig) GetMetricTags ¶
func (x *ModelConfig) GetMetricTags() map[string]string
func (*ModelConfig) GetModelOperations ¶
func (x *ModelConfig) GetModelOperations() *ModelOperations
func (*ModelConfig) GetModelRepositoryAgents ¶
func (x *ModelConfig) GetModelRepositoryAgents() *ModelRepositoryAgents
func (*ModelConfig) GetModelTransactionPolicy ¶
func (x *ModelConfig) GetModelTransactionPolicy() *ModelTransactionPolicy
func (*ModelConfig) GetModelWarmup ¶
func (x *ModelConfig) GetModelWarmup() []*ModelWarmup
func (*ModelConfig) GetName ¶
func (x *ModelConfig) GetName() string
func (*ModelConfig) GetOptimization ¶
func (x *ModelConfig) GetOptimization() *ModelOptimizationPolicy
func (*ModelConfig) GetOutput ¶
func (x *ModelConfig) GetOutput() []*ModelOutput
func (*ModelConfig) GetParameters ¶
func (x *ModelConfig) GetParameters() map[string]*ModelParameter
func (*ModelConfig) GetPlatform ¶
func (x *ModelConfig) GetPlatform() string
func (*ModelConfig) GetResponseCache ¶
func (x *ModelConfig) GetResponseCache() *ModelResponseCache
func (*ModelConfig) GetRuntime ¶
func (x *ModelConfig) GetRuntime() string
func (*ModelConfig) GetSchedulingChoice ¶
func (m *ModelConfig) GetSchedulingChoice() isModelConfig_SchedulingChoice
func (*ModelConfig) GetSequenceBatching ¶
func (x *ModelConfig) GetSequenceBatching() *ModelSequenceBatching
func (*ModelConfig) GetVersionPolicy ¶
func (x *ModelConfig) GetVersionPolicy() *ModelVersionPolicy
func (*ModelConfig) ProtoMessage ¶
func (*ModelConfig) ProtoMessage()
func (*ModelConfig) ProtoReflect ¶
func (x *ModelConfig) ProtoReflect() protoreflect.Message
func (*ModelConfig) Reset ¶
func (x *ModelConfig) Reset()
func (*ModelConfig) String ¶
func (x *ModelConfig) String() string
type ModelConfigRequest ¶
type ModelConfigRequest struct { // @@ // @@ .. cpp:var:: string name // @@ // @@ The name of the model. // @@ Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` // @@ .. cpp:var:: string version // @@ // @@ The version of the model. If not given the model version // @@ is selected automatically based on the version policy. // @@ Version string `protobuf:"bytes,2,opt,name=version,proto3" json:"version,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ModelConfigRequest @@ @@ Request message for ModelConfig. @@
func (*ModelConfigRequest) Descriptor
deprecated
func (*ModelConfigRequest) Descriptor() ([]byte, []int)
Deprecated: Use ModelConfigRequest.ProtoReflect.Descriptor instead.
func (*ModelConfigRequest) GetName ¶
func (x *ModelConfigRequest) GetName() string
func (*ModelConfigRequest) GetVersion ¶
func (x *ModelConfigRequest) GetVersion() string
func (*ModelConfigRequest) ProtoMessage ¶
func (*ModelConfigRequest) ProtoMessage()
func (*ModelConfigRequest) ProtoReflect ¶
func (x *ModelConfigRequest) ProtoReflect() protoreflect.Message
func (*ModelConfigRequest) Reset ¶
func (x *ModelConfigRequest) Reset()
func (*ModelConfigRequest) String ¶
func (x *ModelConfigRequest) String() string
type ModelConfigResponse ¶
type ModelConfigResponse struct { // @@ // @@ .. cpp:var:: ModelConfig config // @@ // @@ The model configuration. // @@ Config *ModelConfig `protobuf:"bytes,1,opt,name=config,proto3" json:"config,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ModelConfigResponse @@ @@ Response message for ModelConfig. @@
func (*ModelConfigResponse) Descriptor
deprecated
func (*ModelConfigResponse) Descriptor() ([]byte, []int)
Deprecated: Use ModelConfigResponse.ProtoReflect.Descriptor instead.
func (*ModelConfigResponse) GetConfig ¶
func (x *ModelConfigResponse) GetConfig() *ModelConfig
func (*ModelConfigResponse) ProtoMessage ¶
func (*ModelConfigResponse) ProtoMessage()
func (*ModelConfigResponse) ProtoReflect ¶
func (x *ModelConfigResponse) ProtoReflect() protoreflect.Message
func (*ModelConfigResponse) Reset ¶
func (x *ModelConfigResponse) Reset()
func (*ModelConfigResponse) String ¶
func (x *ModelConfigResponse) String() string
type ModelConfig_DynamicBatching ¶
type ModelConfig_DynamicBatching struct { // @@ .. cpp:var:: ModelDynamicBatching dynamic_batching // @@ // @@ If specified, enables the dynamic-batching scheduling // @@ policy. With dynamic-batching the scheduler may group // @@ together independent requests into a single batch to // @@ improve inference throughput. // @@ DynamicBatching *ModelDynamicBatching `protobuf:"bytes,11,opt,name=dynamic_batching,json=dynamicBatching,proto3,oneof"` }
type ModelConfig_EnsembleScheduling ¶
type ModelConfig_EnsembleScheduling struct { // @@ .. cpp:var:: ModelEnsembling ensemble_scheduling // @@ // @@ If specified, enables the model-ensembling scheduling // @@ policy. With model-ensembling, inference requests // @@ will be processed according to the specification, such as an // @@ execution sequence of models. The input specified in this model // @@ config will be the input for the ensemble, and the output // @@ specified will be the output of the ensemble. // @@ EnsembleScheduling *ModelEnsembling `protobuf:"bytes,15,opt,name=ensemble_scheduling,json=ensembleScheduling,proto3,oneof"` }
type ModelConfig_SequenceBatching ¶
type ModelConfig_SequenceBatching struct { // @@ .. cpp:var:: ModelSequenceBatching sequence_batching // @@ // @@ If specified, enables the sequence-batching scheduling // @@ policy. With sequence-batching, inference requests // @@ with the same correlation ID are routed to the same // @@ model instance. Multiple sequences of inference requests // @@ may be batched together into a single batch to // @@ improve inference throughput. // @@ SequenceBatching *ModelSequenceBatching `protobuf:"bytes,13,opt,name=sequence_batching,json=sequenceBatching,proto3,oneof"` }
type ModelDynamicBatching ¶
type ModelDynamicBatching struct { // @@ .. cpp:var:: int32 preferred_batch_size (repeated) // @@ // @@ Preferred batch sizes for dynamic batching. If a batch of one of // @@ these sizes can be formed it will be executed immediately. If // @@ not specified a preferred batch size will be chosen automatically // @@ based on model and GPU characteristics. // @@ PreferredBatchSize []int32 `protobuf:"varint,1,rep,packed,name=preferred_batch_size,json=preferredBatchSize,proto3" json:"preferred_batch_size,omitempty"` // @@ .. cpp:var:: uint64 max_queue_delay_microseconds // @@ // @@ The maximum time, in microseconds, a request will be delayed in // @@ the scheduling queue to wait for additional requests for // @@ batching. Default is 0. // @@ MaxQueueDelayMicroseconds uint64 `` /* 141-byte string literal not displayed */ // @@ .. cpp:var:: bool preserve_ordering // @@ // @@ Should the dynamic batcher preserve the ordering of responses to // @@ match the order of requests received by the scheduler. Default is // @@ false. If true, the responses will be returned in the same order as // @@ the order of requests sent to the scheduler. If false, the responses // @@ may be returned in arbitrary order. This option is specifically // @@ needed when a sequence of related inference requests (i.e. inference // @@ requests with the same correlation ID) are sent to the dynamic // @@ batcher to ensure that the sequence responses are in the correct // @@ order. // @@ PreserveOrdering bool `protobuf:"varint,3,opt,name=preserve_ordering,json=preserveOrdering,proto3" json:"preserve_ordering,omitempty"` // @@ .. cpp:var:: uint64 priority_levels // @@ // @@ The number of priority levels to be enabled for the model, // @@ the priority level starts from 1 and 1 is the highest priority. // @@ Requests are handled in priority order with all priority 1 requests // @@ processed before priority 2, all priority 2 requests processed before // @@ priority 3, etc. Requests with the same priority level will be // @@ handled in the order that they are received. // @@ PriorityLevels uint64 `protobuf:"varint,4,opt,name=priority_levels,json=priorityLevels,proto3" json:"priority_levels,omitempty"` // @@ .. cpp:var:: uint64 default_priority_level // @@ // @@ The priority level used for requests that don't specify their // @@ priority. The value must be in the range [ 1, 'priority_levels' ]. // @@ DefaultPriorityLevel uint64 `protobuf:"varint,5,opt,name=default_priority_level,json=defaultPriorityLevel,proto3" json:"default_priority_level,omitempty"` // @@ .. cpp:var:: ModelQueuePolicy default_queue_policy // @@ // @@ The default queue policy used for requests that don't require // @@ priority handling and requests that specify priority levels where // @@ there is no specific policy given. If not specified, a policy with // @@ default field values will be used. // @@ DefaultQueuePolicy *ModelQueuePolicy `protobuf:"bytes,6,opt,name=default_queue_policy,json=defaultQueuePolicy,proto3" json:"default_queue_policy,omitempty"` // @@ .. cpp:var:: map<uint64, ModelQueuePolicy> priority_queue_policy // @@ // @@ Specify the queue policy for the priority level. The default queue // @@ policy will be used if a priority level doesn't specify a queue // @@ policy. // @@ PriorityQueuePolicy map[uint64]*ModelQueuePolicy `` /* 209-byte string literal not displayed */ // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ModelDynamicBatching @@ @@ Dynamic batching configuration. These settings control how dynamic @@ batching operates for the model. @@
func (*ModelDynamicBatching) Descriptor
deprecated
func (*ModelDynamicBatching) Descriptor() ([]byte, []int)
Deprecated: Use ModelDynamicBatching.ProtoReflect.Descriptor instead.
func (*ModelDynamicBatching) GetDefaultPriorityLevel ¶
func (x *ModelDynamicBatching) GetDefaultPriorityLevel() uint64
func (*ModelDynamicBatching) GetDefaultQueuePolicy ¶
func (x *ModelDynamicBatching) GetDefaultQueuePolicy() *ModelQueuePolicy
func (*ModelDynamicBatching) GetMaxQueueDelayMicroseconds ¶
func (x *ModelDynamicBatching) GetMaxQueueDelayMicroseconds() uint64
func (*ModelDynamicBatching) GetPreferredBatchSize ¶
func (x *ModelDynamicBatching) GetPreferredBatchSize() []int32
func (*ModelDynamicBatching) GetPreserveOrdering ¶
func (x *ModelDynamicBatching) GetPreserveOrdering() bool
func (*ModelDynamicBatching) GetPriorityLevels ¶
func (x *ModelDynamicBatching) GetPriorityLevels() uint64
func (*ModelDynamicBatching) GetPriorityQueuePolicy ¶
func (x *ModelDynamicBatching) GetPriorityQueuePolicy() map[uint64]*ModelQueuePolicy
func (*ModelDynamicBatching) ProtoMessage ¶
func (*ModelDynamicBatching) ProtoMessage()
func (*ModelDynamicBatching) ProtoReflect ¶
func (x *ModelDynamicBatching) ProtoReflect() protoreflect.Message
func (*ModelDynamicBatching) Reset ¶
func (x *ModelDynamicBatching) Reset()
func (*ModelDynamicBatching) String ¶
func (x *ModelDynamicBatching) String() string
type ModelEnsembling ¶
type ModelEnsembling struct { // @@ .. cpp:var:: Step step (repeated) // @@ // @@ The models and the input / output mappings used within the ensemble. // @@ Step []*ModelEnsembling_Step `protobuf:"bytes,1,rep,name=step,proto3" json:"step,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ModelEnsembling @@ @@ Model ensembling configuration. These settings specify the models that @@ compose the ensemble and how data flows between the models. @@
func (*ModelEnsembling) Descriptor
deprecated
func (*ModelEnsembling) Descriptor() ([]byte, []int)
Deprecated: Use ModelEnsembling.ProtoReflect.Descriptor instead.
func (*ModelEnsembling) GetStep ¶
func (x *ModelEnsembling) GetStep() []*ModelEnsembling_Step
func (*ModelEnsembling) ProtoMessage ¶
func (*ModelEnsembling) ProtoMessage()
func (*ModelEnsembling) ProtoReflect ¶
func (x *ModelEnsembling) ProtoReflect() protoreflect.Message
func (*ModelEnsembling) Reset ¶
func (x *ModelEnsembling) Reset()
func (*ModelEnsembling) String ¶
func (x *ModelEnsembling) String() string
type ModelEnsembling_Step ¶
type ModelEnsembling_Step struct { // @@ .. cpp:var:: string model_name // @@ // @@ The name of the model to execute for this step of the ensemble. // @@ ModelName string `protobuf:"bytes,1,opt,name=model_name,json=modelName,proto3" json:"model_name,omitempty"` // @@ .. cpp:var:: int64 model_version // @@ // @@ The version of the model to use for inference. If -1 // @@ the latest/most-recent version of the model is used. // @@ ModelVersion int64 `protobuf:"varint,2,opt,name=model_version,json=modelVersion,proto3" json:"model_version,omitempty"` // @@ .. cpp:var:: map<string,string> input_map // @@ // @@ Map from name of an input tensor on this step's model to ensemble // @@ tensor name. The ensemble tensor must have the same data type and // @@ shape as the model input. Each model input must be assigned to // @@ one ensemble tensor, but the same ensemble tensor can be assigned // @@ to multiple model inputs. // @@ InputMap map[string]string `` /* 173-byte string literal not displayed */ // @@ .. cpp:var:: map<string,string> output_map // @@ // @@ Map from name of an output tensor on this step's model to ensemble // @@ tensor name. The data type and shape of the ensemble tensor will // @@ be inferred from the model output. It is optional to assign all // @@ model outputs to ensemble tensors. One ensemble tensor name // @@ can appear in an output map only once. // @@ OutputMap map[string]string `` /* 176-byte string literal not displayed */ // @@ .. cpp:var:: string model_namespace // @@ // @@ [RESERVED] currently this field is reserved for internal use, users // @@ must not set any value to this field to avoid unexpected behavior. // @@ ModelNamespace string `protobuf:"bytes,5,opt,name=model_namespace,json=modelNamespace,proto3" json:"model_namespace,omitempty"` // contains filtered or unexported fields }
@@ .. cpp:var:: message Step @@ @@ Each step specifies a model included in the ensemble, @@ maps ensemble tensor names to the model input tensors, @@ and maps model output tensors to ensemble tensor names @@
func (*ModelEnsembling_Step) Descriptor
deprecated
func (*ModelEnsembling_Step) Descriptor() ([]byte, []int)
Deprecated: Use ModelEnsembling_Step.ProtoReflect.Descriptor instead.
func (*ModelEnsembling_Step) GetInputMap ¶
func (x *ModelEnsembling_Step) GetInputMap() map[string]string
func (*ModelEnsembling_Step) GetModelName ¶
func (x *ModelEnsembling_Step) GetModelName() string
func (*ModelEnsembling_Step) GetModelNamespace ¶
func (x *ModelEnsembling_Step) GetModelNamespace() string
func (*ModelEnsembling_Step) GetModelVersion ¶
func (x *ModelEnsembling_Step) GetModelVersion() int64
func (*ModelEnsembling_Step) GetOutputMap ¶
func (x *ModelEnsembling_Step) GetOutputMap() map[string]string
func (*ModelEnsembling_Step) ProtoMessage ¶
func (*ModelEnsembling_Step) ProtoMessage()
func (*ModelEnsembling_Step) ProtoReflect ¶
func (x *ModelEnsembling_Step) ProtoReflect() protoreflect.Message
func (*ModelEnsembling_Step) Reset ¶
func (x *ModelEnsembling_Step) Reset()
func (*ModelEnsembling_Step) String ¶
func (x *ModelEnsembling_Step) String() string
type ModelInferRequest ¶
type ModelInferRequest struct { // @@ .. cpp:var:: string model_name // @@ // @@ The name of the model to use for inferencing. // @@ ModelName string `protobuf:"bytes,1,opt,name=model_name,json=modelName,proto3" json:"model_name,omitempty"` // @@ .. cpp:var:: string model_version // @@ // @@ The version of the model to use for inference. If not // @@ given the latest/most-recent version of the model is used. // @@ ModelVersion string `protobuf:"bytes,2,opt,name=model_version,json=modelVersion,proto3" json:"model_version,omitempty"` // @@ .. cpp:var:: string id // @@ // @@ Optional identifier for the request. If specified will be // @@ returned in the response. // @@ Id string `protobuf:"bytes,3,opt,name=id,proto3" json:"id,omitempty"` // @@ .. cpp:var:: map<string,InferParameter> parameters // @@ // @@ Optional inference parameters. // @@ Parameters map[string]*InferParameter `` /* 161-byte string literal not displayed */ // @@ // @@ .. cpp:var:: InferInputTensor inputs (repeated) // @@ // @@ The input tensors for the inference. // @@ Inputs []*ModelInferRequest_InferInputTensor `protobuf:"bytes,5,rep,name=inputs,proto3" json:"inputs,omitempty"` // @@ // @@ .. cpp:var:: InferRequestedOutputTensor outputs (repeated) // @@ // @@ The requested output tensors for the inference. Optional, if not // @@ specified all outputs specified in the model config will be // @@ returned. // @@ Outputs []*ModelInferRequest_InferRequestedOutputTensor `protobuf:"bytes,6,rep,name=outputs,proto3" json:"outputs,omitempty"` // @@ // @@ .. cpp:var:: bytes raw_input_contents // @@ // @@ The data contained in an input tensor can be represented in // @@ "raw" bytes form or in the repeated type that matches the // @@ tensor's data type. Using the "raw" bytes form will // @@ typically allow higher performance due to the way protobuf // @@ allocation and reuse interacts with GRPC. For example, see // @@ https://github.com/grpc/grpc/issues/23231. // @@ // @@ To use the raw representation 'raw_input_contents' must be // @@ initialized with data for each tensor in the same order as // @@ 'inputs'. For each tensor, the size of this content must // @@ match what is expected by the tensor's shape and data // @@ type. The raw data must be the flattened, one-dimensional, // @@ row-major order of the tensor elements without any stride // @@ or padding between the elements. Note that the FP16 and BF16 data // @@ types must be represented as raw content as there is no // @@ specific data type for a 16-bit float type. // @@ // @@ If this field is specified then InferInputTensor::contents // @@ must not be specified for any input tensor. // @@ RawInputContents [][]byte `protobuf:"bytes,7,rep,name=raw_input_contents,json=rawInputContents,proto3" json:"raw_input_contents,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ModelInferRequest @@ @@ Request message for ModelInfer. @@
func (*ModelInferRequest) Descriptor
deprecated
func (*ModelInferRequest) Descriptor() ([]byte, []int)
Deprecated: Use ModelInferRequest.ProtoReflect.Descriptor instead.
func (*ModelInferRequest) GetId ¶
func (x *ModelInferRequest) GetId() string
func (*ModelInferRequest) GetInputs ¶
func (x *ModelInferRequest) GetInputs() []*ModelInferRequest_InferInputTensor
func (*ModelInferRequest) GetModelName ¶
func (x *ModelInferRequest) GetModelName() string
func (*ModelInferRequest) GetModelVersion ¶
func (x *ModelInferRequest) GetModelVersion() string
func (*ModelInferRequest) GetOutputs ¶
func (x *ModelInferRequest) GetOutputs() []*ModelInferRequest_InferRequestedOutputTensor
func (*ModelInferRequest) GetParameters ¶
func (x *ModelInferRequest) GetParameters() map[string]*InferParameter
func (*ModelInferRequest) GetRawInputContents ¶
func (x *ModelInferRequest) GetRawInputContents() [][]byte
func (*ModelInferRequest) ProtoMessage ¶
func (*ModelInferRequest) ProtoMessage()
func (*ModelInferRequest) ProtoReflect ¶
func (x *ModelInferRequest) ProtoReflect() protoreflect.Message
func (*ModelInferRequest) Reset ¶
func (x *ModelInferRequest) Reset()
func (*ModelInferRequest) String ¶
func (x *ModelInferRequest) String() string
type ModelInferRequest_InferInputTensor ¶
type ModelInferRequest_InferInputTensor struct { // @@ // @@ .. cpp:var:: string name // @@ // @@ The tensor name. // @@ Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` // @@ // @@ .. cpp:var:: string datatype // @@ // @@ The tensor data type. // @@ Datatype string `protobuf:"bytes,2,opt,name=datatype,proto3" json:"datatype,omitempty"` // @@ // @@ .. cpp:var:: int64 shape (repeated) // @@ // @@ The tensor shape. // @@ Shape []int64 `protobuf:"varint,3,rep,packed,name=shape,proto3" json:"shape,omitempty"` // @@ .. cpp:var:: map<string,InferParameter> parameters // @@ // @@ Optional inference input tensor parameters. // @@ Parameters map[string]*InferParameter `` /* 161-byte string literal not displayed */ // @@ .. cpp:var:: InferTensorContents contents // @@ // @@ The tensor contents using a data-type format. This field // @@ must not be specified if tensor contents are being specified // @@ in ModelInferRequest.raw_input_contents. // @@ Contents *InferTensorContents `protobuf:"bytes,5,opt,name=contents,proto3" json:"contents,omitempty"` // contains filtered or unexported fields }
@@ @@ .. cpp:var:: message InferInputTensor @@ @@ An input tensor for an inference request. @@
func (*ModelInferRequest_InferInputTensor) Descriptor
deprecated
func (*ModelInferRequest_InferInputTensor) Descriptor() ([]byte, []int)
Deprecated: Use ModelInferRequest_InferInputTensor.ProtoReflect.Descriptor instead.
func (*ModelInferRequest_InferInputTensor) GetContents ¶
func (x *ModelInferRequest_InferInputTensor) GetContents() *InferTensorContents
func (*ModelInferRequest_InferInputTensor) GetDatatype ¶
func (x *ModelInferRequest_InferInputTensor) GetDatatype() string
func (*ModelInferRequest_InferInputTensor) GetName ¶
func (x *ModelInferRequest_InferInputTensor) GetName() string
func (*ModelInferRequest_InferInputTensor) GetParameters ¶
func (x *ModelInferRequest_InferInputTensor) GetParameters() map[string]*InferParameter
func (*ModelInferRequest_InferInputTensor) GetShape ¶
func (x *ModelInferRequest_InferInputTensor) GetShape() []int64
func (*ModelInferRequest_InferInputTensor) ProtoMessage ¶
func (*ModelInferRequest_InferInputTensor) ProtoMessage()
func (*ModelInferRequest_InferInputTensor) ProtoReflect ¶
func (x *ModelInferRequest_InferInputTensor) ProtoReflect() protoreflect.Message
func (*ModelInferRequest_InferInputTensor) Reset ¶
func (x *ModelInferRequest_InferInputTensor) Reset()
func (*ModelInferRequest_InferInputTensor) String ¶
func (x *ModelInferRequest_InferInputTensor) String() string
type ModelInferRequest_InferRequestedOutputTensor ¶
type ModelInferRequest_InferRequestedOutputTensor struct { // @@ // @@ .. cpp:var:: string name // @@ // @@ The tensor name. // @@ Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` // @@ .. cpp:var:: map<string,InferParameter> parameters // @@ // @@ Optional requested output tensor parameters. // @@ Parameters map[string]*InferParameter `` /* 161-byte string literal not displayed */ // contains filtered or unexported fields }
@@ @@ .. cpp:var:: message InferRequestedOutputTensor @@ @@ An output tensor requested for an inference request. @@
func (*ModelInferRequest_InferRequestedOutputTensor) Descriptor
deprecated
func (*ModelInferRequest_InferRequestedOutputTensor) Descriptor() ([]byte, []int)
Deprecated: Use ModelInferRequest_InferRequestedOutputTensor.ProtoReflect.Descriptor instead.
func (*ModelInferRequest_InferRequestedOutputTensor) GetName ¶
func (x *ModelInferRequest_InferRequestedOutputTensor) GetName() string
func (*ModelInferRequest_InferRequestedOutputTensor) GetParameters ¶
func (x *ModelInferRequest_InferRequestedOutputTensor) GetParameters() map[string]*InferParameter
func (*ModelInferRequest_InferRequestedOutputTensor) ProtoMessage ¶
func (*ModelInferRequest_InferRequestedOutputTensor) ProtoMessage()
func (*ModelInferRequest_InferRequestedOutputTensor) ProtoReflect ¶
func (x *ModelInferRequest_InferRequestedOutputTensor) ProtoReflect() protoreflect.Message
func (*ModelInferRequest_InferRequestedOutputTensor) Reset ¶
func (x *ModelInferRequest_InferRequestedOutputTensor) Reset()
func (*ModelInferRequest_InferRequestedOutputTensor) String ¶
func (x *ModelInferRequest_InferRequestedOutputTensor) String() string
type ModelInferResponse ¶
type ModelInferResponse struct { // @@ .. cpp:var:: string model_name // @@ // @@ The name of the model used for inference. // @@ ModelName string `protobuf:"bytes,1,opt,name=model_name,json=modelName,proto3" json:"model_name,omitempty"` // @@ .. cpp:var:: string model_version // @@ // @@ The version of the model used for inference. // @@ ModelVersion string `protobuf:"bytes,2,opt,name=model_version,json=modelVersion,proto3" json:"model_version,omitempty"` // @@ .. cpp:var:: string id // @@ // @@ The id of the inference request if one was specified. // @@ Id string `protobuf:"bytes,3,opt,name=id,proto3" json:"id,omitempty"` // @@ .. cpp:var:: map<string,InferParameter> parameters // @@ // @@ Optional inference response parameters. // @@ Parameters map[string]*InferParameter `` /* 161-byte string literal not displayed */ // @@ // @@ .. cpp:var:: InferOutputTensor outputs (repeated) // @@ // @@ The output tensors holding inference results. // @@ Outputs []*ModelInferResponse_InferOutputTensor `protobuf:"bytes,5,rep,name=outputs,proto3" json:"outputs,omitempty"` // @@ // @@ .. cpp:var:: bytes raw_output_contents // @@ // @@ The data contained in an output tensor can be represented in // @@ "raw" bytes form or in the repeated type that matches the // @@ tensor's data type. Using the "raw" bytes form will // @@ typically allow higher performance due to the way protobuf // @@ allocation and reuse interacts with GRPC. For example, see // @@ https://github.com/grpc/grpc/issues/23231. // @@ // @@ To use the raw representation 'raw_output_contents' must be // @@ initialized with data for each tensor in the same order as // @@ 'outputs'. For each tensor, the size of this content must // @@ match what is expected by the tensor's shape and data // @@ type. The raw data must be the flattened, one-dimensional, // @@ row-major order of the tensor elements without any stride // @@ or padding between the elements. Note that the FP16 and BF16 data // @@ types must be represented as raw content as there is no // @@ specific data type for a 16-bit float type. // @@ // @@ If this field is specified then InferOutputTensor::contents // @@ must not be specified for any output tensor. // @@ RawOutputContents [][]byte `protobuf:"bytes,6,rep,name=raw_output_contents,json=rawOutputContents,proto3" json:"raw_output_contents,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ModelInferResponse @@ @@ Response message for ModelInfer. @@
func (*ModelInferResponse) Descriptor
deprecated
func (*ModelInferResponse) Descriptor() ([]byte, []int)
Deprecated: Use ModelInferResponse.ProtoReflect.Descriptor instead.
func (*ModelInferResponse) GetId ¶
func (x *ModelInferResponse) GetId() string
func (*ModelInferResponse) GetModelName ¶
func (x *ModelInferResponse) GetModelName() string
func (*ModelInferResponse) GetModelVersion ¶
func (x *ModelInferResponse) GetModelVersion() string
func (*ModelInferResponse) GetOutputs ¶
func (x *ModelInferResponse) GetOutputs() []*ModelInferResponse_InferOutputTensor
func (*ModelInferResponse) GetParameters ¶
func (x *ModelInferResponse) GetParameters() map[string]*InferParameter
func (*ModelInferResponse) GetRawOutputContents ¶
func (x *ModelInferResponse) GetRawOutputContents() [][]byte
func (*ModelInferResponse) ProtoMessage ¶
func (*ModelInferResponse) ProtoMessage()
func (*ModelInferResponse) ProtoReflect ¶
func (x *ModelInferResponse) ProtoReflect() protoreflect.Message
func (*ModelInferResponse) Reset ¶
func (x *ModelInferResponse) Reset()
func (*ModelInferResponse) String ¶
func (x *ModelInferResponse) String() string
type ModelInferResponse_InferOutputTensor ¶
type ModelInferResponse_InferOutputTensor struct { // @@ // @@ .. cpp:var:: string name // @@ // @@ The tensor name. // @@ Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` // @@ // @@ .. cpp:var:: string datatype // @@ // @@ The tensor data type. // @@ Datatype string `protobuf:"bytes,2,opt,name=datatype,proto3" json:"datatype,omitempty"` // @@ // @@ .. cpp:var:: int64 shape (repeated) // @@ // @@ The tensor shape. // @@ Shape []int64 `protobuf:"varint,3,rep,packed,name=shape,proto3" json:"shape,omitempty"` // @@ .. cpp:var:: map<string,InferParameter> parameters // @@ // @@ Optional output tensor parameters. // @@ Parameters map[string]*InferParameter `` /* 161-byte string literal not displayed */ // @@ .. cpp:var:: InferTensorContents contents // @@ // @@ The tensor contents using a data-type format. This field // @@ must not be specified if tensor contents are being specified // @@ in ModelInferResponse.raw_output_contents. // @@ Contents *InferTensorContents `protobuf:"bytes,5,opt,name=contents,proto3" json:"contents,omitempty"` // contains filtered or unexported fields }
@@ @@ .. cpp:var:: message InferOutputTensor @@ @@ An output tensor returned for an inference request. @@
func (*ModelInferResponse_InferOutputTensor) Descriptor
deprecated
func (*ModelInferResponse_InferOutputTensor) Descriptor() ([]byte, []int)
Deprecated: Use ModelInferResponse_InferOutputTensor.ProtoReflect.Descriptor instead.
func (*ModelInferResponse_InferOutputTensor) GetContents ¶
func (x *ModelInferResponse_InferOutputTensor) GetContents() *InferTensorContents
func (*ModelInferResponse_InferOutputTensor) GetDatatype ¶
func (x *ModelInferResponse_InferOutputTensor) GetDatatype() string
func (*ModelInferResponse_InferOutputTensor) GetName ¶
func (x *ModelInferResponse_InferOutputTensor) GetName() string
func (*ModelInferResponse_InferOutputTensor) GetParameters ¶
func (x *ModelInferResponse_InferOutputTensor) GetParameters() map[string]*InferParameter
func (*ModelInferResponse_InferOutputTensor) GetShape ¶
func (x *ModelInferResponse_InferOutputTensor) GetShape() []int64
func (*ModelInferResponse_InferOutputTensor) ProtoMessage ¶
func (*ModelInferResponse_InferOutputTensor) ProtoMessage()
func (*ModelInferResponse_InferOutputTensor) ProtoReflect ¶
func (x *ModelInferResponse_InferOutputTensor) ProtoReflect() protoreflect.Message
func (*ModelInferResponse_InferOutputTensor) Reset ¶
func (x *ModelInferResponse_InferOutputTensor) Reset()
func (*ModelInferResponse_InferOutputTensor) String ¶
func (x *ModelInferResponse_InferOutputTensor) String() string
type ModelInput ¶
type ModelInput struct { // @@ .. cpp:var:: string name // @@ // @@ The name of the input. // @@ Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` // @@ .. cpp:var:: DataType data_type // @@ // @@ The data-type of the input. // @@ DataType DataType `protobuf:"varint,2,opt,name=data_type,json=dataType,proto3,enum=inference.DataType" json:"data_type,omitempty"` // @@ .. cpp:var:: Format format // @@ // @@ The format of the input. Optional. // @@ Format ModelInput_Format `protobuf:"varint,3,opt,name=format,proto3,enum=inference.ModelInput_Format" json:"format,omitempty"` // @@ .. cpp:var:: int64 dims (repeated) // @@ // @@ The dimensions/shape of the input tensor that must be provided // @@ when invoking the inference API for this model. // @@ Dims []int64 `protobuf:"varint,4,rep,packed,name=dims,proto3" json:"dims,omitempty"` // @@ .. cpp:var:: ModelTensorReshape reshape // @@ // @@ The shape expected for this input by the backend. The input will // @@ be reshaped to this before being presented to the backend. The // @@ reshape must have the same number of elements as the input shape // @@ specified by 'dims'. Optional. // @@ Reshape *ModelTensorReshape `protobuf:"bytes,5,opt,name=reshape,proto3" json:"reshape,omitempty"` // @@ .. cpp:var:: bool is_shape_tensor // @@ // @@ Whether or not the input is a shape tensor to the model. This field // @@ is currently supported only for the TensorRT model. An error will be // @@ generated if this specification does not comply with underlying // @@ model. // @@ IsShapeTensor bool `protobuf:"varint,6,opt,name=is_shape_tensor,json=isShapeTensor,proto3" json:"is_shape_tensor,omitempty"` // @@ .. cpp:var:: bool allow_ragged_batch // @@ // @@ Whether or not the input is allowed to be "ragged" in a dynamically // @@ created batch. Default is false indicating that two requests will // @@ only be batched if this tensor has the same shape in both requests. // @@ True indicates that two requests can be batched even if this tensor // @@ has a different shape in each request. // @@ AllowRaggedBatch bool `protobuf:"varint,7,opt,name=allow_ragged_batch,json=allowRaggedBatch,proto3" json:"allow_ragged_batch,omitempty"` // @@ .. cpp:var:: bool optional // @@ // @@ Whether or not the input is optional for the model execution. // @@ If true, the input is not required in the inference request. // @@ Default value is false. // @@ Optional bool `protobuf:"varint,8,opt,name=optional,proto3" json:"optional,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ModelInput @@ @@ An input required by the model. @@
func (*ModelInput) Descriptor
deprecated
func (*ModelInput) Descriptor() ([]byte, []int)
Deprecated: Use ModelInput.ProtoReflect.Descriptor instead.
func (*ModelInput) GetAllowRaggedBatch ¶
func (x *ModelInput) GetAllowRaggedBatch() bool
func (*ModelInput) GetDataType ¶
func (x *ModelInput) GetDataType() DataType
func (*ModelInput) GetDims ¶
func (x *ModelInput) GetDims() []int64
func (*ModelInput) GetFormat ¶
func (x *ModelInput) GetFormat() ModelInput_Format
func (*ModelInput) GetIsShapeTensor ¶
func (x *ModelInput) GetIsShapeTensor() bool
func (*ModelInput) GetName ¶
func (x *ModelInput) GetName() string
func (*ModelInput) GetOptional ¶
func (x *ModelInput) GetOptional() bool
func (*ModelInput) GetReshape ¶
func (x *ModelInput) GetReshape() *ModelTensorReshape
func (*ModelInput) ProtoMessage ¶
func (*ModelInput) ProtoMessage()
func (*ModelInput) ProtoReflect ¶
func (x *ModelInput) ProtoReflect() protoreflect.Message
func (*ModelInput) Reset ¶
func (x *ModelInput) Reset()
func (*ModelInput) String ¶
func (x *ModelInput) String() string
type ModelInput_Format ¶
type ModelInput_Format int32
@@ @@ .. cpp:enum:: Format @@ @@ The format for the input. @@
const ( // @@ .. cpp:enumerator:: Format::FORMAT_NONE = 0 // @@ // @@ The input has no specific format. This is the default. // @@ ModelInput_FORMAT_NONE ModelInput_Format = 0 // @@ .. cpp:enumerator:: Format::FORMAT_NHWC = 1 // @@ // @@ HWC image format. Tensors with this format require 3 dimensions // @@ if the model does not support batching (max_batch_size = 0) or 4 // @@ dimensions if the model does support batching (max_batch_size // @@ >= 1). In either case the 'dims' below should only specify the // @@ 3 non-batch dimensions (i.e. HWC or CHW). // @@ ModelInput_FORMAT_NHWC ModelInput_Format = 1 // @@ .. cpp:enumerator:: Format::FORMAT_NCHW = 2 // @@ // @@ CHW image format. Tensors with this format require 3 dimensions // @@ if the model does not support batching (max_batch_size = 0) or 4 // @@ dimensions if the model does support batching (max_batch_size // @@ >= 1). In either case the 'dims' below should only specify the // @@ 3 non-batch dimensions (i.e. HWC or CHW). // @@ ModelInput_FORMAT_NCHW ModelInput_Format = 2 )
func (ModelInput_Format) Descriptor ¶
func (ModelInput_Format) Descriptor() protoreflect.EnumDescriptor
func (ModelInput_Format) Enum ¶
func (x ModelInput_Format) Enum() *ModelInput_Format
func (ModelInput_Format) EnumDescriptor
deprecated
func (ModelInput_Format) EnumDescriptor() ([]byte, []int)
Deprecated: Use ModelInput_Format.Descriptor instead.
func (ModelInput_Format) Number ¶
func (x ModelInput_Format) Number() protoreflect.EnumNumber
func (ModelInput_Format) String ¶
func (x ModelInput_Format) String() string
func (ModelInput_Format) Type ¶
func (ModelInput_Format) Type() protoreflect.EnumType
type ModelInstanceGroup ¶
type ModelInstanceGroup struct { // @@ .. cpp:var:: string name // @@ // @@ Optional name of this group of instances. If not specified the // @@ name will be formed as <model name>_<group number>. The name of // @@ individual instances will be further formed by a unique instance // @@ number and GPU index: // @@ Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` // @@ .. cpp:var:: Kind kind // @@ // @@ The kind of this instance group. Default is KIND_AUTO. If // @@ KIND_AUTO or KIND_GPU then both 'count' and 'gpu' are valid and // @@ may be specified. If KIND_CPU or KIND_MODEL only 'count' is valid // @@ and 'gpu' cannot be specified. // @@ Kind ModelInstanceGroup_Kind `protobuf:"varint,4,opt,name=kind,proto3,enum=inference.ModelInstanceGroup_Kind" json:"kind,omitempty"` // @@ .. cpp:var:: int32 count // @@ // @@ For a group assigned to GPU, the number of instances created for // @@ each GPU listed in 'gpus'. For a group assigned to CPU the number // @@ of instances created. Default is 1. Count int32 `protobuf:"varint,2,opt,name=count,proto3" json:"count,omitempty"` // @@ .. cpp:var:: ModelRateLimiter rate_limiter // @@ // @@ The rate limiter specific settings to be associated with this // @@ instance group. Optional, if not specified no rate limiting // @@ will be applied to this instance group. // @@ RateLimiter *ModelRateLimiter `protobuf:"bytes,6,opt,name=rate_limiter,json=rateLimiter,proto3" json:"rate_limiter,omitempty"` // @@ .. cpp:var:: int32 gpus (repeated) // @@ // @@ GPU(s) where instances should be available. For each GPU listed, // @@ 'count' instances of the model will be available. Setting 'gpus' // @@ to empty (or not specifying at all) is equivalent to listing all // @@ available GPUs. // @@ Gpus []int32 `protobuf:"varint,3,rep,packed,name=gpus,proto3" json:"gpus,omitempty"` // @@ .. cpp:var:: SecondaryDevice secondary_devices (repeated) // @@ // @@ Secondary devices that are required by instances specified by this // @@ instance group. Optional. // @@ SecondaryDevices []*ModelInstanceGroup_SecondaryDevice `protobuf:"bytes,8,rep,name=secondary_devices,json=secondaryDevices,proto3" json:"secondary_devices,omitempty"` // @@ .. cpp:var:: string profile (repeated) // @@ // @@ For TensorRT models containing multiple optimization profile, this // @@ parameter specifies a set of optimization profiles available to this // @@ instance group. The inference server will choose the optimal profile // @@ based on the shapes of the input tensors. This field should lie // @@ between 0 and <TotalNumberOfOptimizationProfilesInPlanModel> - 1 // @@ and be specified only for TensorRT backend, otherwise an error will // @@ be generated. If not specified, the server will select the first // @@ optimization profile by default. // @@ Profile []string `protobuf:"bytes,5,rep,name=profile,proto3" json:"profile,omitempty"` // @@ .. cpp:var:: bool passive // @@ // @@ Whether the instances within this instance group will be accepting // @@ inference requests from the scheduler. If true, the instances will // @@ not be added to the scheduler. Default value is false. // @@ Passive bool `protobuf:"varint,7,opt,name=passive,proto3" json:"passive,omitempty"` // @@ .. cpp:var:: string host_policy // @@ // @@ The host policy name that the instance to be associated with. // @@ The default value is set to reflect the device kind of the instance, // @@ for instance, KIND_CPU is "cpu", KIND_MODEL is "model" and // @@ KIND_GPU is "gpu_<gpu_id>". // @@ HostPolicy string `protobuf:"bytes,9,opt,name=host_policy,json=hostPolicy,proto3" json:"host_policy,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ModelInstanceGroup @@ @@ A group of one or more instances of a model and resources made @@ available for those instances. @@
func (*ModelInstanceGroup) Descriptor
deprecated
func (*ModelInstanceGroup) Descriptor() ([]byte, []int)
Deprecated: Use ModelInstanceGroup.ProtoReflect.Descriptor instead.
func (*ModelInstanceGroup) GetCount ¶
func (x *ModelInstanceGroup) GetCount() int32
func (*ModelInstanceGroup) GetGpus ¶
func (x *ModelInstanceGroup) GetGpus() []int32
func (*ModelInstanceGroup) GetHostPolicy ¶
func (x *ModelInstanceGroup) GetHostPolicy() string
func (*ModelInstanceGroup) GetKind ¶
func (x *ModelInstanceGroup) GetKind() ModelInstanceGroup_Kind
func (*ModelInstanceGroup) GetName ¶
func (x *ModelInstanceGroup) GetName() string
func (*ModelInstanceGroup) GetPassive ¶
func (x *ModelInstanceGroup) GetPassive() bool
func (*ModelInstanceGroup) GetProfile ¶
func (x *ModelInstanceGroup) GetProfile() []string
func (*ModelInstanceGroup) GetRateLimiter ¶
func (x *ModelInstanceGroup) GetRateLimiter() *ModelRateLimiter
func (*ModelInstanceGroup) GetSecondaryDevices ¶
func (x *ModelInstanceGroup) GetSecondaryDevices() []*ModelInstanceGroup_SecondaryDevice
func (*ModelInstanceGroup) ProtoMessage ¶
func (*ModelInstanceGroup) ProtoMessage()
func (*ModelInstanceGroup) ProtoReflect ¶
func (x *ModelInstanceGroup) ProtoReflect() protoreflect.Message
func (*ModelInstanceGroup) Reset ¶
func (x *ModelInstanceGroup) Reset()
func (*ModelInstanceGroup) String ¶
func (x *ModelInstanceGroup) String() string
type ModelInstanceGroup_Kind ¶
type ModelInstanceGroup_Kind int32
@@ @@ .. cpp:enum:: Kind @@ @@ Kind of this instance group. @@
const ( // @@ .. cpp:enumerator:: Kind::KIND_AUTO = 0 // @@ // @@ This instance group represents instances that can run on either // @@ CPU or GPU. If all GPUs listed in 'gpus' are available then // @@ instances will be created on GPU(s), otherwise instances will // @@ be created on CPU. // @@ ModelInstanceGroup_KIND_AUTO ModelInstanceGroup_Kind = 0 // @@ .. cpp:enumerator:: Kind::KIND_GPU = 1 // @@ // @@ This instance group represents instances that must run on the // @@ GPU. // @@ ModelInstanceGroup_KIND_GPU ModelInstanceGroup_Kind = 1 // @@ .. cpp:enumerator:: Kind::KIND_CPU = 2 // @@ // @@ This instance group represents instances that must run on the // @@ CPU. // @@ ModelInstanceGroup_KIND_CPU ModelInstanceGroup_Kind = 2 // @@ .. cpp:enumerator:: Kind::KIND_MODEL = 3 // @@ // @@ This instance group represents instances that should run on the // @@ CPU and/or GPU(s) as specified by the model or backend itself. // @@ The inference server will not override the model/backend // @@ settings. // @@ ModelInstanceGroup_KIND_MODEL ModelInstanceGroup_Kind = 3 )
func (ModelInstanceGroup_Kind) Descriptor ¶
func (ModelInstanceGroup_Kind) Descriptor() protoreflect.EnumDescriptor
func (ModelInstanceGroup_Kind) Enum ¶
func (x ModelInstanceGroup_Kind) Enum() *ModelInstanceGroup_Kind
func (ModelInstanceGroup_Kind) EnumDescriptor
deprecated
func (ModelInstanceGroup_Kind) EnumDescriptor() ([]byte, []int)
Deprecated: Use ModelInstanceGroup_Kind.Descriptor instead.
func (ModelInstanceGroup_Kind) Number ¶
func (x ModelInstanceGroup_Kind) Number() protoreflect.EnumNumber
func (ModelInstanceGroup_Kind) String ¶
func (x ModelInstanceGroup_Kind) String() string
func (ModelInstanceGroup_Kind) Type ¶
func (ModelInstanceGroup_Kind) Type() protoreflect.EnumType
type ModelInstanceGroup_SecondaryDevice ¶
type ModelInstanceGroup_SecondaryDevice struct { // @@ .. cpp:var:: SecondaryDeviceKind kind // @@ // @@ The secondary device kind. // @@ Kind ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind `` /* 132-byte string literal not displayed */ // @@ .. cpp:var:: int64 device_id // @@ // @@ Identifier for the secondary device. // @@ DeviceId int64 `protobuf:"varint,2,opt,name=device_id,json=deviceId,proto3" json:"device_id,omitempty"` // contains filtered or unexported fields }
@@ @@ .. cpp:var:: message SecondaryDevice @@ @@ A secondary device required for a model instance. @@
func (*ModelInstanceGroup_SecondaryDevice) Descriptor
deprecated
func (*ModelInstanceGroup_SecondaryDevice) Descriptor() ([]byte, []int)
Deprecated: Use ModelInstanceGroup_SecondaryDevice.ProtoReflect.Descriptor instead.
func (*ModelInstanceGroup_SecondaryDevice) GetDeviceId ¶
func (x *ModelInstanceGroup_SecondaryDevice) GetDeviceId() int64
func (*ModelInstanceGroup_SecondaryDevice) ProtoMessage ¶
func (*ModelInstanceGroup_SecondaryDevice) ProtoMessage()
func (*ModelInstanceGroup_SecondaryDevice) ProtoReflect ¶
func (x *ModelInstanceGroup_SecondaryDevice) ProtoReflect() protoreflect.Message
func (*ModelInstanceGroup_SecondaryDevice) Reset ¶
func (x *ModelInstanceGroup_SecondaryDevice) Reset()
func (*ModelInstanceGroup_SecondaryDevice) String ¶
func (x *ModelInstanceGroup_SecondaryDevice) String() string
type ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind ¶
type ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind int32
@@ @@ .. cpp:enum:: SecondaryDeviceKind @@ @@ The kind of the secondary device. @@
const ( // @@ .. cpp:enumerator:: SecondaryDeviceKind::KIND_NVDLA = 0 // @@ // @@ An NVDLA core. http://nvdla.org // @@ Currently KIND_NVDLA is only supported by the TensorRT backend. // @@ ModelInstanceGroup_SecondaryDevice_KIND_NVDLA ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind = 0 )
func (ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind) Descriptor ¶
func (ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind) Descriptor() protoreflect.EnumDescriptor
func (ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind) EnumDescriptor
deprecated
func (ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind) EnumDescriptor() ([]byte, []int)
Deprecated: Use ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind.Descriptor instead.
func (ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind) Number ¶
func (x ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind) Number() protoreflect.EnumNumber
func (ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind) String ¶
func (x ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind) String() string
type ModelMetadataRequest ¶
type ModelMetadataRequest struct { // @@ // @@ .. cpp:var:: string name // @@ // @@ The name of the model. // @@ Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` // @@ .. cpp:var:: string version // @@ // @@ The version of the model to check for readiness. If not // @@ given the server will choose a version based on the // @@ model and internal policy. // @@ Version string `protobuf:"bytes,2,opt,name=version,proto3" json:"version,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ModelMetadataRequest @@ @@ Request message for ModelMetadata. @@
func (*ModelMetadataRequest) Descriptor
deprecated
func (*ModelMetadataRequest) Descriptor() ([]byte, []int)
Deprecated: Use ModelMetadataRequest.ProtoReflect.Descriptor instead.
func (*ModelMetadataRequest) GetName ¶
func (x *ModelMetadataRequest) GetName() string
func (*ModelMetadataRequest) GetVersion ¶
func (x *ModelMetadataRequest) GetVersion() string
func (*ModelMetadataRequest) ProtoMessage ¶
func (*ModelMetadataRequest) ProtoMessage()
func (*ModelMetadataRequest) ProtoReflect ¶
func (x *ModelMetadataRequest) ProtoReflect() protoreflect.Message
func (*ModelMetadataRequest) Reset ¶
func (x *ModelMetadataRequest) Reset()
func (*ModelMetadataRequest) String ¶
func (x *ModelMetadataRequest) String() string
type ModelMetadataResponse ¶
type ModelMetadataResponse struct { // @@ // @@ .. cpp:var:: string name // @@ // @@ The model name. // @@ Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` // @@ // @@ .. cpp:var:: string versions (repeated) // @@ // @@ The versions of the model. // @@ Versions []string `protobuf:"bytes,2,rep,name=versions,proto3" json:"versions,omitempty"` // @@ // @@ .. cpp:var:: string platform // @@ // @@ The model's platform. // @@ Platform string `protobuf:"bytes,3,opt,name=platform,proto3" json:"platform,omitempty"` // @@ // @@ .. cpp:var:: TensorMetadata inputs (repeated) // @@ // @@ The model's inputs. // @@ Inputs []*ModelMetadataResponse_TensorMetadata `protobuf:"bytes,4,rep,name=inputs,proto3" json:"inputs,omitempty"` // @@ // @@ .. cpp:var:: TensorMetadata outputs (repeated) // @@ // @@ The model's outputs. // @@ Outputs []*ModelMetadataResponse_TensorMetadata `protobuf:"bytes,5,rep,name=outputs,proto3" json:"outputs,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ModelMetadataResponse @@ @@ Response message for ModelMetadata. @@
func (*ModelMetadataResponse) Descriptor
deprecated
func (*ModelMetadataResponse) Descriptor() ([]byte, []int)
Deprecated: Use ModelMetadataResponse.ProtoReflect.Descriptor instead.
func (*ModelMetadataResponse) GetInputs ¶
func (x *ModelMetadataResponse) GetInputs() []*ModelMetadataResponse_TensorMetadata
func (*ModelMetadataResponse) GetName ¶
func (x *ModelMetadataResponse) GetName() string
func (*ModelMetadataResponse) GetOutputs ¶
func (x *ModelMetadataResponse) GetOutputs() []*ModelMetadataResponse_TensorMetadata
func (*ModelMetadataResponse) GetPlatform ¶
func (x *ModelMetadataResponse) GetPlatform() string
func (*ModelMetadataResponse) GetVersions ¶
func (x *ModelMetadataResponse) GetVersions() []string
func (*ModelMetadataResponse) ProtoMessage ¶
func (*ModelMetadataResponse) ProtoMessage()
func (*ModelMetadataResponse) ProtoReflect ¶
func (x *ModelMetadataResponse) ProtoReflect() protoreflect.Message
func (*ModelMetadataResponse) Reset ¶
func (x *ModelMetadataResponse) Reset()
func (*ModelMetadataResponse) String ¶
func (x *ModelMetadataResponse) String() string
type ModelMetadataResponse_TensorMetadata ¶
type ModelMetadataResponse_TensorMetadata struct { // @@ // @@ .. cpp:var:: string name // @@ // @@ The tensor name. // @@ Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` // @@ // @@ .. cpp:var:: string datatype // @@ // @@ The tensor data type. // @@ Datatype string `protobuf:"bytes,2,opt,name=datatype,proto3" json:"datatype,omitempty"` // @@ // @@ .. cpp:var:: int64 shape (repeated) // @@ // @@ The tensor shape. A variable-size dimension is represented // @@ by a -1 value. // @@ Shape []int64 `protobuf:"varint,3,rep,packed,name=shape,proto3" json:"shape,omitempty"` // contains filtered or unexported fields }
@@ @@ .. cpp:var:: message TensorMetadata @@ @@ Metadata for a tensor. @@
func (*ModelMetadataResponse_TensorMetadata) Descriptor
deprecated
func (*ModelMetadataResponse_TensorMetadata) Descriptor() ([]byte, []int)
Deprecated: Use ModelMetadataResponse_TensorMetadata.ProtoReflect.Descriptor instead.
func (*ModelMetadataResponse_TensorMetadata) GetDatatype ¶
func (x *ModelMetadataResponse_TensorMetadata) GetDatatype() string
func (*ModelMetadataResponse_TensorMetadata) GetName ¶
func (x *ModelMetadataResponse_TensorMetadata) GetName() string
func (*ModelMetadataResponse_TensorMetadata) GetShape ¶
func (x *ModelMetadataResponse_TensorMetadata) GetShape() []int64
func (*ModelMetadataResponse_TensorMetadata) ProtoMessage ¶
func (*ModelMetadataResponse_TensorMetadata) ProtoMessage()
func (*ModelMetadataResponse_TensorMetadata) ProtoReflect ¶
func (x *ModelMetadataResponse_TensorMetadata) ProtoReflect() protoreflect.Message
func (*ModelMetadataResponse_TensorMetadata) Reset ¶
func (x *ModelMetadataResponse_TensorMetadata) Reset()
func (*ModelMetadataResponse_TensorMetadata) String ¶
func (x *ModelMetadataResponse_TensorMetadata) String() string
type ModelOperations ¶
type ModelOperations struct { // @@ .. cpp:var:: string op_library_filename (repeated) // @@ // @@ Optional paths of the libraries providing custom operations for // @@ this model. Valid only for ONNX models. // @@ OpLibraryFilename []string `protobuf:"bytes,1,rep,name=op_library_filename,json=opLibraryFilename,proto3" json:"op_library_filename,omitempty"` // contains filtered or unexported fields }
@@ @@ .. cpp:var:: message ModelOperations @@ @@ The metadata of libraries providing custom operations for this model. @@
func (*ModelOperations) Descriptor
deprecated
func (*ModelOperations) Descriptor() ([]byte, []int)
Deprecated: Use ModelOperations.ProtoReflect.Descriptor instead.
func (*ModelOperations) GetOpLibraryFilename ¶
func (x *ModelOperations) GetOpLibraryFilename() []string
func (*ModelOperations) ProtoMessage ¶
func (*ModelOperations) ProtoMessage()
func (*ModelOperations) ProtoReflect ¶
func (x *ModelOperations) ProtoReflect() protoreflect.Message
func (*ModelOperations) Reset ¶
func (x *ModelOperations) Reset()
func (*ModelOperations) String ¶
func (x *ModelOperations) String() string
type ModelOptimizationPolicy ¶
type ModelOptimizationPolicy struct { // @@ .. cpp:var:: Graph graph // @@ // @@ The graph optimization setting for the model. Optional. // @@ Graph *ModelOptimizationPolicy_Graph `protobuf:"bytes,1,opt,name=graph,proto3" json:"graph,omitempty"` // @@ .. cpp:var:: ModelPriority priority // @@ // @@ The priority setting for the model. Optional. // @@ Priority ModelOptimizationPolicy_ModelPriority `protobuf:"varint,2,opt,name=priority,proto3,enum=inference.ModelOptimizationPolicy_ModelPriority" json:"priority,omitempty"` // @@ .. cpp:var:: Cuda cuda // @@ // @@ CUDA-specific optimization settings. Optional. // @@ Cuda *ModelOptimizationPolicy_Cuda `protobuf:"bytes,3,opt,name=cuda,proto3" json:"cuda,omitempty"` // @@ .. cpp:var:: ExecutionAccelerators execution_accelerators // @@ // @@ The accelerators used for the model. Optional. // @@ ExecutionAccelerators *ModelOptimizationPolicy_ExecutionAccelerators `protobuf:"bytes,4,opt,name=execution_accelerators,json=executionAccelerators,proto3" json:"execution_accelerators,omitempty"` // @@ .. cpp:var:: PinnedMemoryBuffer input_pinned_memory // @@ // @@ Use pinned memory buffer when the data transfer for inputs // @@ is between GPU memory and non-pinned system memory. // @@ Default is true. // @@ InputPinnedMemory *ModelOptimizationPolicy_PinnedMemoryBuffer `protobuf:"bytes,5,opt,name=input_pinned_memory,json=inputPinnedMemory,proto3" json:"input_pinned_memory,omitempty"` // @@ .. cpp:var:: PinnedMemoryBuffer output_pinned_memory // @@ // @@ Use pinned memory buffer when the data transfer for outputs // @@ is between GPU memory and non-pinned system memory. // @@ Default is true. // @@ OutputPinnedMemory *ModelOptimizationPolicy_PinnedMemoryBuffer `protobuf:"bytes,6,opt,name=output_pinned_memory,json=outputPinnedMemory,proto3" json:"output_pinned_memory,omitempty"` // @@ .. cpp:var:: uint32 gather_kernel_buffer_threshold // @@ // @@ The backend may use a gather kernel to gather input data if the // @@ device has direct access to the source buffer and the destination // @@ buffer. In such case, the gather kernel will be used only if the // @@ number of buffers to be gathered is greater or equal to // @@ the specified value. If 0, the gather kernel will be disabled. // @@ Default value is 0. // @@ Currently only recognized by TensorRT backend. // @@ GatherKernelBufferThreshold uint32 `` /* 147-byte string literal not displayed */ // @@ .. cpp:var:: bool eager_batching // @@ // @@ Start preparing the next batch before the model instance is ready // @@ for the next inference. This option can be used to overlap the // @@ batch preparation with model execution, with the trade-off that // @@ the next batch might be smaller than what it could have been. // @@ Default value is false. // @@ Currently only recognized by TensorRT backend. // @@ EagerBatching bool `protobuf:"varint,8,opt,name=eager_batching,json=eagerBatching,proto3" json:"eager_batching,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ModelOptimizationPolicy @@ @@ Optimization settings for a model. These settings control if/how a @@ model is optimized and prioritized by the backend framework when @@ it is loaded. @@
func (*ModelOptimizationPolicy) Descriptor
deprecated
func (*ModelOptimizationPolicy) Descriptor() ([]byte, []int)
Deprecated: Use ModelOptimizationPolicy.ProtoReflect.Descriptor instead.
func (*ModelOptimizationPolicy) GetCuda ¶
func (x *ModelOptimizationPolicy) GetCuda() *ModelOptimizationPolicy_Cuda
func (*ModelOptimizationPolicy) GetEagerBatching ¶
func (x *ModelOptimizationPolicy) GetEagerBatching() bool
func (*ModelOptimizationPolicy) GetExecutionAccelerators ¶
func (x *ModelOptimizationPolicy) GetExecutionAccelerators() *ModelOptimizationPolicy_ExecutionAccelerators
func (*ModelOptimizationPolicy) GetGatherKernelBufferThreshold ¶
func (x *ModelOptimizationPolicy) GetGatherKernelBufferThreshold() uint32
func (*ModelOptimizationPolicy) GetGraph ¶
func (x *ModelOptimizationPolicy) GetGraph() *ModelOptimizationPolicy_Graph
func (*ModelOptimizationPolicy) GetInputPinnedMemory ¶
func (x *ModelOptimizationPolicy) GetInputPinnedMemory() *ModelOptimizationPolicy_PinnedMemoryBuffer
func (*ModelOptimizationPolicy) GetOutputPinnedMemory ¶
func (x *ModelOptimizationPolicy) GetOutputPinnedMemory() *ModelOptimizationPolicy_PinnedMemoryBuffer
func (*ModelOptimizationPolicy) GetPriority ¶
func (x *ModelOptimizationPolicy) GetPriority() ModelOptimizationPolicy_ModelPriority
func (*ModelOptimizationPolicy) ProtoMessage ¶
func (*ModelOptimizationPolicy) ProtoMessage()
func (*ModelOptimizationPolicy) ProtoReflect ¶
func (x *ModelOptimizationPolicy) ProtoReflect() protoreflect.Message
func (*ModelOptimizationPolicy) Reset ¶
func (x *ModelOptimizationPolicy) Reset()
func (*ModelOptimizationPolicy) String ¶
func (x *ModelOptimizationPolicy) String() string
type ModelOptimizationPolicy_Cuda ¶
type ModelOptimizationPolicy_Cuda struct { // @@ .. cpp:var:: bool graphs // @@ // @@ Use CUDA graphs API to capture model operations and execute // @@ them more efficiently. Default value is false. // @@ Currently only recognized by TensorRT backend. // @@ Graphs bool `protobuf:"varint,1,opt,name=graphs,proto3" json:"graphs,omitempty"` // @@ .. cpp:var:: bool busy_wait_events // @@ // @@ Use busy-waiting to synchronize CUDA events to achieve minimum // @@ latency from event complete to host thread to be notified, with // @@ the cost of high CPU load. Default value is false. // @@ Currently only recognized by TensorRT backend. // @@ BusyWaitEvents bool `protobuf:"varint,2,opt,name=busy_wait_events,json=busyWaitEvents,proto3" json:"busy_wait_events,omitempty"` // @@ .. cpp:var:: GraphSpec graph_spec (repeated) // @@ // @@ Specification of the CUDA graph to be captured. If not specified // @@ and 'graphs' is true, the default CUDA graphs will be captured // @@ based on model settings. // @@ Currently only recognized by TensorRT backend. // @@ GraphSpec []*ModelOptimizationPolicy_Cuda_GraphSpec `protobuf:"bytes,3,rep,name=graph_spec,json=graphSpec,proto3" json:"graph_spec,omitempty"` // @@ .. cpp:var:: bool output_copy_stream // @@ // @@ Uses a CUDA stream separate from the inference stream to copy the // @@ output to host. However, be aware that setting this option to // @@ true will lead to an increase in the memory consumption of the // @@ model as Triton will allocate twice as much GPU memory for its // @@ I/O tensor buffers. Default value is false. // @@ Currently only recognized by TensorRT backend. // @@ OutputCopyStream bool `protobuf:"varint,4,opt,name=output_copy_stream,json=outputCopyStream,proto3" json:"output_copy_stream,omitempty"` // contains filtered or unexported fields }
@@ @@ .. cpp:var:: message Cuda @@ @@ CUDA-specific optimization settings. @@
func (*ModelOptimizationPolicy_Cuda) Descriptor
deprecated
func (*ModelOptimizationPolicy_Cuda) Descriptor() ([]byte, []int)
Deprecated: Use ModelOptimizationPolicy_Cuda.ProtoReflect.Descriptor instead.
func (*ModelOptimizationPolicy_Cuda) GetBusyWaitEvents ¶
func (x *ModelOptimizationPolicy_Cuda) GetBusyWaitEvents() bool
func (*ModelOptimizationPolicy_Cuda) GetGraphSpec ¶
func (x *ModelOptimizationPolicy_Cuda) GetGraphSpec() []*ModelOptimizationPolicy_Cuda_GraphSpec
func (*ModelOptimizationPolicy_Cuda) GetGraphs ¶
func (x *ModelOptimizationPolicy_Cuda) GetGraphs() bool
func (*ModelOptimizationPolicy_Cuda) GetOutputCopyStream ¶
func (x *ModelOptimizationPolicy_Cuda) GetOutputCopyStream() bool
func (*ModelOptimizationPolicy_Cuda) ProtoMessage ¶
func (*ModelOptimizationPolicy_Cuda) ProtoMessage()
func (*ModelOptimizationPolicy_Cuda) ProtoReflect ¶
func (x *ModelOptimizationPolicy_Cuda) ProtoReflect() protoreflect.Message
func (*ModelOptimizationPolicy_Cuda) Reset ¶
func (x *ModelOptimizationPolicy_Cuda) Reset()
func (*ModelOptimizationPolicy_Cuda) String ¶
func (x *ModelOptimizationPolicy_Cuda) String() string
type ModelOptimizationPolicy_Cuda_GraphSpec ¶
type ModelOptimizationPolicy_Cuda_GraphSpec struct { // @@ .. cpp:var:: int32 batch_size // @@ // @@ The batch size of the CUDA graph. If 'max_batch_size' is 0, // @@ 'batch_size' must be set to 0. Otherwise, 'batch_size' must // @@ be set to value between 1 and 'max_batch_size'. // @@ BatchSize int32 `protobuf:"varint,1,opt,name=batch_size,json=batchSize,proto3" json:"batch_size,omitempty"` // @@ .. cpp:var:: map<string, Shape> input // @@ // @@ The specification of the inputs. 'Shape' is the shape of the // @@ input without batching dimension. // @@ Input map[string]*ModelOptimizationPolicy_Cuda_GraphSpec_Shape `` /* 151-byte string literal not displayed */ // @@ .. cpp:var:: LowerBound graph_lower_bound // @@ // @@ Specify the lower bound of the CUDA graph. Optional. // @@ If specified, the graph can be used for input shapes and // @@ batch sizes that are in closed interval between the lower // @@ bound specification and graph specification. For dynamic // @@ shape model, this allows CUDA graphs to be launched // @@ frequently without capturing all possible shape combinations. // @@ However, using graph for shape combinations different from // @@ the one used for capturing introduces uninitialized data for // @@ execution and it may distort the inference result if // @@ the model is sensitive to uninitialized data. // @@ GraphLowerBound *ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound `protobuf:"bytes,3,opt,name=graph_lower_bound,json=graphLowerBound,proto3" json:"graph_lower_bound,omitempty"` // contains filtered or unexported fields }
@@ .. cpp:var:: message GraphSpec @@ @@ Specification of the CUDA graph to be captured. @@
func (*ModelOptimizationPolicy_Cuda_GraphSpec) Descriptor
deprecated
func (*ModelOptimizationPolicy_Cuda_GraphSpec) Descriptor() ([]byte, []int)
Deprecated: Use ModelOptimizationPolicy_Cuda_GraphSpec.ProtoReflect.Descriptor instead.
func (*ModelOptimizationPolicy_Cuda_GraphSpec) GetBatchSize ¶
func (x *ModelOptimizationPolicy_Cuda_GraphSpec) GetBatchSize() int32
func (*ModelOptimizationPolicy_Cuda_GraphSpec) GetGraphLowerBound ¶
func (x *ModelOptimizationPolicy_Cuda_GraphSpec) GetGraphLowerBound() *ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound
func (*ModelOptimizationPolicy_Cuda_GraphSpec) GetInput ¶
func (x *ModelOptimizationPolicy_Cuda_GraphSpec) GetInput() map[string]*ModelOptimizationPolicy_Cuda_GraphSpec_Shape
func (*ModelOptimizationPolicy_Cuda_GraphSpec) ProtoMessage ¶
func (*ModelOptimizationPolicy_Cuda_GraphSpec) ProtoMessage()
func (*ModelOptimizationPolicy_Cuda_GraphSpec) ProtoReflect ¶
func (x *ModelOptimizationPolicy_Cuda_GraphSpec) ProtoReflect() protoreflect.Message
func (*ModelOptimizationPolicy_Cuda_GraphSpec) Reset ¶
func (x *ModelOptimizationPolicy_Cuda_GraphSpec) Reset()
func (*ModelOptimizationPolicy_Cuda_GraphSpec) String ¶
func (x *ModelOptimizationPolicy_Cuda_GraphSpec) String() string
type ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound ¶
type ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound struct { // @@ .. cpp:var:: int32 batch_size // @@ // @@ The batch size of the CUDA graph. If 'max_batch_size' is 0, // @@ 'batch_size' must be set to 0. Otherwise, 'batch_size' must // @@ be set to value between 1 and 'max_batch_size'. // @@ BatchSize int32 `protobuf:"varint,1,opt,name=batch_size,json=batchSize,proto3" json:"batch_size,omitempty"` // @@ .. cpp:var:: map<string, Shape> input // @@ // @@ The specification of the inputs. 'Shape' is the shape of // @@ the input without batching dimension. // @@ Input map[string]*ModelOptimizationPolicy_Cuda_GraphSpec_Shape `` /* 151-byte string literal not displayed */ // contains filtered or unexported fields }
func (*ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound) Descriptor
deprecated
func (*ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound) Descriptor() ([]byte, []int)
Deprecated: Use ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound.ProtoReflect.Descriptor instead.
func (*ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound) GetBatchSize ¶
func (x *ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound) GetBatchSize() int32
func (*ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound) GetInput ¶
func (x *ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound) GetInput() map[string]*ModelOptimizationPolicy_Cuda_GraphSpec_Shape
func (*ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound) ProtoMessage ¶
func (*ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound) ProtoMessage()
func (*ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound) ProtoReflect ¶
func (x *ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound) ProtoReflect() protoreflect.Message
func (*ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound) Reset ¶
func (x *ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound) Reset()
func (*ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound) String ¶
func (x *ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound) String() string
type ModelOptimizationPolicy_Cuda_GraphSpec_Shape ¶
type ModelOptimizationPolicy_Cuda_GraphSpec_Shape struct { // @@ .. cpp:var:: int64 dim (repeated) // @@ // @@ The dimension. // @@ Dim []int64 `protobuf:"varint,1,rep,packed,name=dim,proto3" json:"dim,omitempty"` // contains filtered or unexported fields }
@@ .. cpp:var:: message Dims @@ @@ Specification of tensor dimension. @@
func (*ModelOptimizationPolicy_Cuda_GraphSpec_Shape) Descriptor
deprecated
func (*ModelOptimizationPolicy_Cuda_GraphSpec_Shape) Descriptor() ([]byte, []int)
Deprecated: Use ModelOptimizationPolicy_Cuda_GraphSpec_Shape.ProtoReflect.Descriptor instead.
func (*ModelOptimizationPolicy_Cuda_GraphSpec_Shape) GetDim ¶
func (x *ModelOptimizationPolicy_Cuda_GraphSpec_Shape) GetDim() []int64
func (*ModelOptimizationPolicy_Cuda_GraphSpec_Shape) ProtoMessage ¶
func (*ModelOptimizationPolicy_Cuda_GraphSpec_Shape) ProtoMessage()
func (*ModelOptimizationPolicy_Cuda_GraphSpec_Shape) ProtoReflect ¶
func (x *ModelOptimizationPolicy_Cuda_GraphSpec_Shape) ProtoReflect() protoreflect.Message
func (*ModelOptimizationPolicy_Cuda_GraphSpec_Shape) Reset ¶
func (x *ModelOptimizationPolicy_Cuda_GraphSpec_Shape) Reset()
func (*ModelOptimizationPolicy_Cuda_GraphSpec_Shape) String ¶
func (x *ModelOptimizationPolicy_Cuda_GraphSpec_Shape) String() string
type ModelOptimizationPolicy_ExecutionAccelerators ¶
type ModelOptimizationPolicy_ExecutionAccelerators struct { // @@ .. cpp:var:: Accelerator gpu_execution_accelerator (repeated) // @@ // @@ The preferred execution provider to be used if the model instance // @@ is deployed on GPU. // @@ // @@ For ONNX Runtime backend, possible value is "tensorrt" as name, // @@ and no parameters are required. // @@ // @@ For TensorFlow backend, possible values are "tensorrt", // @@ "auto_mixed_precision", "gpu_io". // @@ // @@ For "tensorrt", the following parameters can be specified: // @@ "precision_mode": The precision used for optimization. // @@ Allowed values are "FP32" and "FP16". Default value is "FP32". // @@ // @@ "max_cached_engines": The maximum number of cached TensorRT // @@ engines in dynamic TensorRT ops. Default value is 100. // @@ // @@ "minimum_segment_size": The smallest model subgraph that will // @@ be considered for optimization by TensorRT. Default value is 3. // @@ // @@ "max_workspace_size_bytes": The maximum GPU memory the model // @@ can use temporarily during execution. Default value is 1GB. // @@ // @@ For "auto_mixed_precision", no parameters are required. If set, // @@ the model will try to use FP16 for better performance. // @@ This optimization can not be set with "tensorrt". // @@ // @@ For "gpu_io", no parameters are required. If set, the model will // @@ be executed using TensorFlow Callable API to set input and output // @@ tensors in GPU memory if possible, which can reduce data transfer // @@ overhead if the model is used in ensemble. However, the Callable // @@ object will be created on model creation and it will request all // @@ outputs for every model execution, which may impact the // @@ performance if a request does not require all outputs. This // @@ optimization will only take affect if the model instance is // @@ created with KIND_GPU. // @@ GpuExecutionAccelerator []*ModelOptimizationPolicy_ExecutionAccelerators_Accelerator `` /* 132-byte string literal not displayed */ // @@ .. cpp:var:: Accelerator cpu_execution_accelerator (repeated) // @@ // @@ The preferred execution provider to be used if the model instance // @@ is deployed on CPU. // @@ // @@ For ONNX Runtime backend, possible value is "openvino" as name, // @@ and no parameters are required. // @@ CpuExecutionAccelerator []*ModelOptimizationPolicy_ExecutionAccelerators_Accelerator `` /* 132-byte string literal not displayed */ // contains filtered or unexported fields }
@@ @@ .. cpp:var:: message ExecutionAccelerators @@ @@ Specify the preferred execution accelerators to be used to execute @@ the model. Currently only recognized by ONNX Runtime backend and @@ TensorFlow backend. @@ @@ For ONNX Runtime backend, it will deploy the model with the execution @@ accelerators by priority, the priority is determined based on the @@ order that they are set, i.e. the provider at the front has highest @@ priority. Overall, the priority will be in the following order: @@ <gpu_execution_accelerator> (if instance is on GPU) @@ CUDA Execution Provider (if instance is on GPU) @@ <cpu_execution_accelerator> @@ Default CPU Execution Provider @@
func (*ModelOptimizationPolicy_ExecutionAccelerators) Descriptor
deprecated
func (*ModelOptimizationPolicy_ExecutionAccelerators) Descriptor() ([]byte, []int)
Deprecated: Use ModelOptimizationPolicy_ExecutionAccelerators.ProtoReflect.Descriptor instead.
func (*ModelOptimizationPolicy_ExecutionAccelerators) GetCpuExecutionAccelerator ¶
func (x *ModelOptimizationPolicy_ExecutionAccelerators) GetCpuExecutionAccelerator() []*ModelOptimizationPolicy_ExecutionAccelerators_Accelerator
func (*ModelOptimizationPolicy_ExecutionAccelerators) GetGpuExecutionAccelerator ¶
func (x *ModelOptimizationPolicy_ExecutionAccelerators) GetGpuExecutionAccelerator() []*ModelOptimizationPolicy_ExecutionAccelerators_Accelerator
func (*ModelOptimizationPolicy_ExecutionAccelerators) ProtoMessage ¶
func (*ModelOptimizationPolicy_ExecutionAccelerators) ProtoMessage()
func (*ModelOptimizationPolicy_ExecutionAccelerators) ProtoReflect ¶
func (x *ModelOptimizationPolicy_ExecutionAccelerators) ProtoReflect() protoreflect.Message
func (*ModelOptimizationPolicy_ExecutionAccelerators) Reset ¶
func (x *ModelOptimizationPolicy_ExecutionAccelerators) Reset()
func (*ModelOptimizationPolicy_ExecutionAccelerators) String ¶
func (x *ModelOptimizationPolicy_ExecutionAccelerators) String() string
type ModelOptimizationPolicy_ExecutionAccelerators_Accelerator ¶
type ModelOptimizationPolicy_ExecutionAccelerators_Accelerator struct { // @@ .. cpp:var:: string name // @@ // @@ The name of the execution accelerator. // @@ Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` // @@ .. cpp:var:: map<string, string> parameters // @@ // @@ Additional parameters used to configure the accelerator. // @@ Parameters map[string]string `` /* 161-byte string literal not displayed */ // contains filtered or unexported fields }
@@ @@ .. cpp:var:: message Accelerator @@ @@ Specify the accelerator to be used to execute the model. @@ Accelerator with the same name may accept different parameters @@ depending on the backends. @@
func (*ModelOptimizationPolicy_ExecutionAccelerators_Accelerator) Descriptor
deprecated
func (*ModelOptimizationPolicy_ExecutionAccelerators_Accelerator) Descriptor() ([]byte, []int)
Deprecated: Use ModelOptimizationPolicy_ExecutionAccelerators_Accelerator.ProtoReflect.Descriptor instead.
func (*ModelOptimizationPolicy_ExecutionAccelerators_Accelerator) GetName ¶
func (x *ModelOptimizationPolicy_ExecutionAccelerators_Accelerator) GetName() string
func (*ModelOptimizationPolicy_ExecutionAccelerators_Accelerator) GetParameters ¶
func (x *ModelOptimizationPolicy_ExecutionAccelerators_Accelerator) GetParameters() map[string]string
func (*ModelOptimizationPolicy_ExecutionAccelerators_Accelerator) ProtoMessage ¶
func (*ModelOptimizationPolicy_ExecutionAccelerators_Accelerator) ProtoMessage()
func (*ModelOptimizationPolicy_ExecutionAccelerators_Accelerator) ProtoReflect ¶
func (x *ModelOptimizationPolicy_ExecutionAccelerators_Accelerator) ProtoReflect() protoreflect.Message
func (*ModelOptimizationPolicy_ExecutionAccelerators_Accelerator) Reset ¶
func (x *ModelOptimizationPolicy_ExecutionAccelerators_Accelerator) Reset()
func (*ModelOptimizationPolicy_ExecutionAccelerators_Accelerator) String ¶
func (x *ModelOptimizationPolicy_ExecutionAccelerators_Accelerator) String() string
type ModelOptimizationPolicy_Graph ¶
type ModelOptimizationPolicy_Graph struct { // @@ .. cpp:var:: int32 level // @@ // @@ The optimization level. Defaults to 0 (zero) if not specified. // @@ // @@ - -1: Disabled // @@ - 0: Framework default // @@ - 1+: Enable optimization level (greater values indicate // @@ higher optimization levels) // @@ Level int32 `protobuf:"varint,1,opt,name=level,proto3" json:"level,omitempty"` // contains filtered or unexported fields }
@@ @@ .. cpp:var:: message Graph @@ @@ Enable generic graph optimization of the model. If not specified @@ the framework's default level of optimization is used. Supports @@ TensorFlow graphdef and savedmodel and Onnx models. For TensorFlow @@ causes XLA to be enabled/disabled for the model. For Onnx defaults @@ to enabling all optimizations, -1 enables only basic optimizations, @@ +1 enables only basic and extended optimizations. @@
func (*ModelOptimizationPolicy_Graph) Descriptor
deprecated
func (*ModelOptimizationPolicy_Graph) Descriptor() ([]byte, []int)
Deprecated: Use ModelOptimizationPolicy_Graph.ProtoReflect.Descriptor instead.
func (*ModelOptimizationPolicy_Graph) GetLevel ¶
func (x *ModelOptimizationPolicy_Graph) GetLevel() int32
func (*ModelOptimizationPolicy_Graph) ProtoMessage ¶
func (*ModelOptimizationPolicy_Graph) ProtoMessage()
func (*ModelOptimizationPolicy_Graph) ProtoReflect ¶
func (x *ModelOptimizationPolicy_Graph) ProtoReflect() protoreflect.Message
func (*ModelOptimizationPolicy_Graph) Reset ¶
func (x *ModelOptimizationPolicy_Graph) Reset()
func (*ModelOptimizationPolicy_Graph) String ¶
func (x *ModelOptimizationPolicy_Graph) String() string
type ModelOptimizationPolicy_ModelPriority ¶
type ModelOptimizationPolicy_ModelPriority int32
@@ @@ .. cpp:enum:: ModelPriority @@ @@ Model priorities. A model will be given scheduling and execution @@ preference over models at lower priorities. Current model @@ priorities only work for TensorRT models. @@
const ( // @@ .. cpp:enumerator:: ModelPriority::PRIORITY_DEFAULT = 0 // @@ // @@ The default model priority. // @@ ModelOptimizationPolicy_PRIORITY_DEFAULT ModelOptimizationPolicy_ModelPriority = 0 // @@ .. cpp:enumerator:: ModelPriority::PRIORITY_MAX = 1 // @@ // @@ The maximum model priority. // @@ ModelOptimizationPolicy_PRIORITY_MAX ModelOptimizationPolicy_ModelPriority = 1 // @@ .. cpp:enumerator:: ModelPriority::PRIORITY_MIN = 2 // @@ // @@ The minimum model priority. // @@ ModelOptimizationPolicy_PRIORITY_MIN ModelOptimizationPolicy_ModelPriority = 2 )
func (ModelOptimizationPolicy_ModelPriority) Descriptor ¶
func (ModelOptimizationPolicy_ModelPriority) Descriptor() protoreflect.EnumDescriptor
func (ModelOptimizationPolicy_ModelPriority) EnumDescriptor
deprecated
func (ModelOptimizationPolicy_ModelPriority) EnumDescriptor() ([]byte, []int)
Deprecated: Use ModelOptimizationPolicy_ModelPriority.Descriptor instead.
func (ModelOptimizationPolicy_ModelPriority) Number ¶
func (x ModelOptimizationPolicy_ModelPriority) Number() protoreflect.EnumNumber
func (ModelOptimizationPolicy_ModelPriority) String ¶
func (x ModelOptimizationPolicy_ModelPriority) String() string
func (ModelOptimizationPolicy_ModelPriority) Type ¶
func (ModelOptimizationPolicy_ModelPriority) Type() protoreflect.EnumType
type ModelOptimizationPolicy_PinnedMemoryBuffer ¶
type ModelOptimizationPolicy_PinnedMemoryBuffer struct { // @@ .. cpp:var:: bool enable // @@ // @@ Use pinned memory buffer. Default is true. // @@ Enable bool `protobuf:"varint,1,opt,name=enable,proto3" json:"enable,omitempty"` // contains filtered or unexported fields }
@@ @@ .. cpp:var:: message PinnedMemoryBuffer @@ @@ Specify whether to use a pinned memory buffer when transferring data @@ between non-pinned system memory and GPU memory. Using a pinned @@ memory buffer for system from/to GPU transfers will typically provide @@ increased performance. For example, in the common use case where the @@ request provides inputs and delivers outputs via non-pinned system @@ memory, if the model instance accepts GPU IOs, the inputs will be @@ processed by two copies: from non-pinned system memory to pinned @@ memory, and from pinned memory to GPU memory. Similarly, pinned @@ memory will be used for delivering the outputs. @@
func (*ModelOptimizationPolicy_PinnedMemoryBuffer) Descriptor
deprecated
func (*ModelOptimizationPolicy_PinnedMemoryBuffer) Descriptor() ([]byte, []int)
Deprecated: Use ModelOptimizationPolicy_PinnedMemoryBuffer.ProtoReflect.Descriptor instead.
func (*ModelOptimizationPolicy_PinnedMemoryBuffer) GetEnable ¶
func (x *ModelOptimizationPolicy_PinnedMemoryBuffer) GetEnable() bool
func (*ModelOptimizationPolicy_PinnedMemoryBuffer) ProtoMessage ¶
func (*ModelOptimizationPolicy_PinnedMemoryBuffer) ProtoMessage()
func (*ModelOptimizationPolicy_PinnedMemoryBuffer) ProtoReflect ¶
func (x *ModelOptimizationPolicy_PinnedMemoryBuffer) ProtoReflect() protoreflect.Message
func (*ModelOptimizationPolicy_PinnedMemoryBuffer) Reset ¶
func (x *ModelOptimizationPolicy_PinnedMemoryBuffer) Reset()
func (*ModelOptimizationPolicy_PinnedMemoryBuffer) String ¶
func (x *ModelOptimizationPolicy_PinnedMemoryBuffer) String() string
type ModelOutput ¶
type ModelOutput struct { // @@ .. cpp:var:: string name // @@ // @@ The name of the output. // @@ Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` // @@ .. cpp:var:: DataType data_type // @@ // @@ The data-type of the output. // @@ DataType DataType `protobuf:"varint,2,opt,name=data_type,json=dataType,proto3,enum=inference.DataType" json:"data_type,omitempty"` // @@ .. cpp:var:: int64 dims (repeated) // @@ // @@ The dimensions/shape of the output tensor. // @@ Dims []int64 `protobuf:"varint,3,rep,packed,name=dims,proto3" json:"dims,omitempty"` // @@ .. cpp:var:: ModelTensorReshape reshape // @@ // @@ The shape produced for this output by the backend. The output will // @@ be reshaped from this to the shape specified in 'dims' before being // @@ returned in the inference response. The reshape must have the same // @@ number of elements as the output shape specified by 'dims'. Optional. // @@ Reshape *ModelTensorReshape `protobuf:"bytes,5,opt,name=reshape,proto3" json:"reshape,omitempty"` // @@ .. cpp:var:: string label_filename // @@ // @@ The label file associated with this output. Should be specified only // @@ for outputs that represent classifications. Optional. // @@ LabelFilename string `protobuf:"bytes,4,opt,name=label_filename,json=labelFilename,proto3" json:"label_filename,omitempty"` // @@ .. cpp:var:: bool is_shape_tensor // @@ // @@ Whether or not the output is a shape tensor to the model. This field // @@ is currently supported only for the TensorRT model. An error will be // @@ generated if this specification does not comply with underlying // @@ model. // @@ IsShapeTensor bool `protobuf:"varint,6,opt,name=is_shape_tensor,json=isShapeTensor,proto3" json:"is_shape_tensor,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ModelOutput @@ @@ An output produced by the model. @@
func (*ModelOutput) Descriptor
deprecated
func (*ModelOutput) Descriptor() ([]byte, []int)
Deprecated: Use ModelOutput.ProtoReflect.Descriptor instead.
func (*ModelOutput) GetDataType ¶
func (x *ModelOutput) GetDataType() DataType
func (*ModelOutput) GetDims ¶
func (x *ModelOutput) GetDims() []int64
func (*ModelOutput) GetIsShapeTensor ¶
func (x *ModelOutput) GetIsShapeTensor() bool
func (*ModelOutput) GetLabelFilename ¶
func (x *ModelOutput) GetLabelFilename() string
func (*ModelOutput) GetName ¶
func (x *ModelOutput) GetName() string
func (*ModelOutput) GetReshape ¶
func (x *ModelOutput) GetReshape() *ModelTensorReshape
func (*ModelOutput) ProtoMessage ¶
func (*ModelOutput) ProtoMessage()
func (*ModelOutput) ProtoReflect ¶
func (x *ModelOutput) ProtoReflect() protoreflect.Message
func (*ModelOutput) Reset ¶
func (x *ModelOutput) Reset()
func (*ModelOutput) String ¶
func (x *ModelOutput) String() string
type ModelParameter ¶
type ModelParameter struct { // @@ .. cpp:var:: string string_value // @@ // @@ The string value of the parameter. // @@ StringValue string `protobuf:"bytes,1,opt,name=string_value,json=stringValue,proto3" json:"string_value,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ModelParameter @@ @@ A model parameter. @@
func (*ModelParameter) Descriptor
deprecated
func (*ModelParameter) Descriptor() ([]byte, []int)
Deprecated: Use ModelParameter.ProtoReflect.Descriptor instead.
func (*ModelParameter) GetStringValue ¶
func (x *ModelParameter) GetStringValue() string
func (*ModelParameter) ProtoMessage ¶
func (*ModelParameter) ProtoMessage()
func (*ModelParameter) ProtoReflect ¶
func (x *ModelParameter) ProtoReflect() protoreflect.Message
func (*ModelParameter) Reset ¶
func (x *ModelParameter) Reset()
func (*ModelParameter) String ¶
func (x *ModelParameter) String() string
type ModelQueuePolicy ¶
type ModelQueuePolicy struct { // @@ // @@ .. cpp:var:: TimeoutAction timeout_action // @@ // @@ The action applied to timed-out request. // @@ The default action is REJECT. // @@ TimeoutAction ModelQueuePolicy_TimeoutAction `` /* 147-byte string literal not displayed */ // @@ // @@ .. cpp:var:: uint64 default_timeout_microseconds // @@ // @@ The default timeout for every request, in microseconds. // @@ The default value is 0 which indicates that no timeout is set. // @@ DefaultTimeoutMicroseconds uint64 `` /* 142-byte string literal not displayed */ // @@ // @@ .. cpp:var:: bool allow_timeout_override // @@ // @@ Whether individual request can override the default timeout value. // @@ When true, individual requests can set a timeout that is less than // @@ the default timeout value but may not increase the timeout. // @@ The default value is false. // @@ AllowTimeoutOverride bool `protobuf:"varint,3,opt,name=allow_timeout_override,json=allowTimeoutOverride,proto3" json:"allow_timeout_override,omitempty"` // @@ // @@ .. cpp:var:: uint32 max_queue_size // @@ // @@ The maximum queue size for holding requests. A request will be // @@ rejected immediately if it can't be enqueued because the queue is // @@ full. The default value is 0 which indicates that no maximum // @@ queue size is enforced. // @@ MaxQueueSize uint32 `protobuf:"varint,4,opt,name=max_queue_size,json=maxQueueSize,proto3" json:"max_queue_size,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ModelQueuePolicy @@ @@ Queue policy for inference requests. @@
func (*ModelQueuePolicy) Descriptor
deprecated
func (*ModelQueuePolicy) Descriptor() ([]byte, []int)
Deprecated: Use ModelQueuePolicy.ProtoReflect.Descriptor instead.
func (*ModelQueuePolicy) GetAllowTimeoutOverride ¶
func (x *ModelQueuePolicy) GetAllowTimeoutOverride() bool
func (*ModelQueuePolicy) GetDefaultTimeoutMicroseconds ¶
func (x *ModelQueuePolicy) GetDefaultTimeoutMicroseconds() uint64
func (*ModelQueuePolicy) GetMaxQueueSize ¶
func (x *ModelQueuePolicy) GetMaxQueueSize() uint32
func (*ModelQueuePolicy) GetTimeoutAction ¶
func (x *ModelQueuePolicy) GetTimeoutAction() ModelQueuePolicy_TimeoutAction
func (*ModelQueuePolicy) ProtoMessage ¶
func (*ModelQueuePolicy) ProtoMessage()
func (*ModelQueuePolicy) ProtoReflect ¶
func (x *ModelQueuePolicy) ProtoReflect() protoreflect.Message
func (*ModelQueuePolicy) Reset ¶
func (x *ModelQueuePolicy) Reset()
func (*ModelQueuePolicy) String ¶
func (x *ModelQueuePolicy) String() string
type ModelQueuePolicy_TimeoutAction ¶
type ModelQueuePolicy_TimeoutAction int32
@@ @@ .. cpp:enum:: TimeoutAction @@ @@ The action applied to timed-out requests. @@
const ( // @@ .. cpp:enumerator:: Action::REJECT = 0 // @@ // @@ Reject the request and return error message accordingly. // @@ ModelQueuePolicy_REJECT ModelQueuePolicy_TimeoutAction = 0 // @@ .. cpp:enumerator:: Action::DELAY = 1 // @@ // @@ Delay the request until all other requests at the same // @@ (or higher) priority levels that have not reached their timeouts // @@ are processed. A delayed request will eventually be processed, // @@ but may be delayed indefinitely due to newly arriving requests. // @@ ModelQueuePolicy_DELAY ModelQueuePolicy_TimeoutAction = 1 )
func (ModelQueuePolicy_TimeoutAction) Descriptor ¶
func (ModelQueuePolicy_TimeoutAction) Descriptor() protoreflect.EnumDescriptor
func (ModelQueuePolicy_TimeoutAction) Enum ¶
func (x ModelQueuePolicy_TimeoutAction) Enum() *ModelQueuePolicy_TimeoutAction
func (ModelQueuePolicy_TimeoutAction) EnumDescriptor
deprecated
func (ModelQueuePolicy_TimeoutAction) EnumDescriptor() ([]byte, []int)
Deprecated: Use ModelQueuePolicy_TimeoutAction.Descriptor instead.
func (ModelQueuePolicy_TimeoutAction) Number ¶
func (x ModelQueuePolicy_TimeoutAction) Number() protoreflect.EnumNumber
func (ModelQueuePolicy_TimeoutAction) String ¶
func (x ModelQueuePolicy_TimeoutAction) String() string
func (ModelQueuePolicy_TimeoutAction) Type ¶
func (ModelQueuePolicy_TimeoutAction) Type() protoreflect.EnumType
type ModelRateLimiter ¶
type ModelRateLimiter struct { // @@ .. cpp:var:: Resource resources (repeated) // @@ // @@ The resources required to execute the request on a model instance. // @@ Resources are just names with a corresponding count. The execution // @@ of the instance will be blocked until the specified resources are // @@ available. By default an instance uses no rate-limiter resources. // @@ Resources []*ModelRateLimiter_Resource `protobuf:"bytes,1,rep,name=resources,proto3" json:"resources,omitempty"` // @@ .. cpp:var:: uint32 priority // @@ // @@ The optional weighting value to be used for prioritizing across // @@ instances. An instance with priority 2 will be given 1/2 the // @@ number of scheduling chances as an instance_group with priority // @@ 1. The default priority is 1. The priority of value 0 will be // @@ treated as priority 1. // @@ Priority uint32 `protobuf:"varint,2,opt,name=priority,proto3" json:"priority,omitempty"` // contains filtered or unexported fields }
@@ @@ .. cpp:var:: message ModelRateLimiter @@ @@ The specifications required by the rate limiter to properly @@ schedule the inference requests across the different models @@ and their instances. @@
func (*ModelRateLimiter) Descriptor
deprecated
func (*ModelRateLimiter) Descriptor() ([]byte, []int)
Deprecated: Use ModelRateLimiter.ProtoReflect.Descriptor instead.
func (*ModelRateLimiter) GetPriority ¶
func (x *ModelRateLimiter) GetPriority() uint32
func (*ModelRateLimiter) GetResources ¶
func (x *ModelRateLimiter) GetResources() []*ModelRateLimiter_Resource
func (*ModelRateLimiter) ProtoMessage ¶
func (*ModelRateLimiter) ProtoMessage()
func (*ModelRateLimiter) ProtoReflect ¶
func (x *ModelRateLimiter) ProtoReflect() protoreflect.Message
func (*ModelRateLimiter) Reset ¶
func (x *ModelRateLimiter) Reset()
func (*ModelRateLimiter) String ¶
func (x *ModelRateLimiter) String() string
type ModelRateLimiter_Resource ¶
type ModelRateLimiter_Resource struct { // @@ .. cpp:var:: string name // @@ // @@ The name associated with the resource. // @@ Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` // @@ .. cpp:var:: bool global // @@ // @@ Whether or not the resource is global. If true then the resource // @@ is assumed to be shared among the devices otherwise specified // @@ count of the resource is assumed for each device associated // @@ with the instance. // @@ Global bool `protobuf:"varint,2,opt,name=global,proto3" json:"global,omitempty"` // @@ .. cpp:var:: uint32 count // @@ // @@ The number of resources required for the execution of the model // @@ instance. // @@ Count uint32 `protobuf:"varint,3,opt,name=count,proto3" json:"count,omitempty"` // contains filtered or unexported fields }
@@ .. cpp:var:: message Resource @@ @@ The resource property. @@
func (*ModelRateLimiter_Resource) Descriptor
deprecated
func (*ModelRateLimiter_Resource) Descriptor() ([]byte, []int)
Deprecated: Use ModelRateLimiter_Resource.ProtoReflect.Descriptor instead.
func (*ModelRateLimiter_Resource) GetCount ¶
func (x *ModelRateLimiter_Resource) GetCount() uint32
func (*ModelRateLimiter_Resource) GetGlobal ¶
func (x *ModelRateLimiter_Resource) GetGlobal() bool
func (*ModelRateLimiter_Resource) GetName ¶
func (x *ModelRateLimiter_Resource) GetName() string
func (*ModelRateLimiter_Resource) ProtoMessage ¶
func (*ModelRateLimiter_Resource) ProtoMessage()
func (*ModelRateLimiter_Resource) ProtoReflect ¶
func (x *ModelRateLimiter_Resource) ProtoReflect() protoreflect.Message
func (*ModelRateLimiter_Resource) Reset ¶
func (x *ModelRateLimiter_Resource) Reset()
func (*ModelRateLimiter_Resource) String ¶
func (x *ModelRateLimiter_Resource) String() string
type ModelReadyRequest ¶
type ModelReadyRequest struct { // @@ // @@ .. cpp:var:: string name // @@ // @@ The name of the model to check for readiness. // @@ Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` // @@ .. cpp:var:: string version // @@ // @@ The version of the model to check for readiness. If not given the // @@ server will choose a version based on the model and internal policy. // @@ Version string `protobuf:"bytes,2,opt,name=version,proto3" json:"version,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ModelReadyRequest @@ @@ Request message for ModelReady. @@
func (*ModelReadyRequest) Descriptor
deprecated
func (*ModelReadyRequest) Descriptor() ([]byte, []int)
Deprecated: Use ModelReadyRequest.ProtoReflect.Descriptor instead.
func (*ModelReadyRequest) GetName ¶
func (x *ModelReadyRequest) GetName() string
func (*ModelReadyRequest) GetVersion ¶
func (x *ModelReadyRequest) GetVersion() string
func (*ModelReadyRequest) ProtoMessage ¶
func (*ModelReadyRequest) ProtoMessage()
func (*ModelReadyRequest) ProtoReflect ¶
func (x *ModelReadyRequest) ProtoReflect() protoreflect.Message
func (*ModelReadyRequest) Reset ¶
func (x *ModelReadyRequest) Reset()
func (*ModelReadyRequest) String ¶
func (x *ModelReadyRequest) String() string
type ModelReadyResponse ¶
type ModelReadyResponse struct { // @@ // @@ .. cpp:var:: bool ready // @@ // @@ True if the model is ready, false it not ready. // @@ Ready bool `protobuf:"varint,1,opt,name=ready,proto3" json:"ready,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ModelReadyResponse @@ @@ Response message for ModelReady. @@
func (*ModelReadyResponse) Descriptor
deprecated
func (*ModelReadyResponse) Descriptor() ([]byte, []int)
Deprecated: Use ModelReadyResponse.ProtoReflect.Descriptor instead.
func (*ModelReadyResponse) GetReady ¶
func (x *ModelReadyResponse) GetReady() bool
func (*ModelReadyResponse) ProtoMessage ¶
func (*ModelReadyResponse) ProtoMessage()
func (*ModelReadyResponse) ProtoReflect ¶
func (x *ModelReadyResponse) ProtoReflect() protoreflect.Message
func (*ModelReadyResponse) Reset ¶
func (x *ModelReadyResponse) Reset()
func (*ModelReadyResponse) String ¶
func (x *ModelReadyResponse) String() string
type ModelRepositoryAgents ¶
type ModelRepositoryAgents struct { // @@ // @@ .. cpp:var:: Agent agents (repeated) // @@ // @@ The ordered list of agents for the model. These agents will be // @@ invoked in order to respond to repository actions occurring for the // @@ model. // @@ Agents []*ModelRepositoryAgents_Agent `protobuf:"bytes,1,rep,name=agents,proto3" json:"agents,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ModelRepositoryAgents @@ @@ The repository agents for the model. @@
func (*ModelRepositoryAgents) Descriptor
deprecated
func (*ModelRepositoryAgents) Descriptor() ([]byte, []int)
Deprecated: Use ModelRepositoryAgents.ProtoReflect.Descriptor instead.
func (*ModelRepositoryAgents) GetAgents ¶
func (x *ModelRepositoryAgents) GetAgents() []*ModelRepositoryAgents_Agent
func (*ModelRepositoryAgents) ProtoMessage ¶
func (*ModelRepositoryAgents) ProtoMessage()
func (*ModelRepositoryAgents) ProtoReflect ¶
func (x *ModelRepositoryAgents) ProtoReflect() protoreflect.Message
func (*ModelRepositoryAgents) Reset ¶
func (x *ModelRepositoryAgents) Reset()
func (*ModelRepositoryAgents) String ¶
func (x *ModelRepositoryAgents) String() string
type ModelRepositoryAgents_Agent ¶
type ModelRepositoryAgents_Agent struct { // @@ .. cpp:var:: string name // @@ // @@ The name of the agent. // @@ Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` // @@ .. cpp:var:: map<string, string> parameters // @@ // @@ The parameters for the agent. // @@ Parameters map[string]string `` /* 161-byte string literal not displayed */ // contains filtered or unexported fields }
@@ @@ .. cpp:var:: message Agent @@ @@ A repository agent that should be invoked for the specified @@ repository actions for this model. @@
func (*ModelRepositoryAgents_Agent) Descriptor
deprecated
func (*ModelRepositoryAgents_Agent) Descriptor() ([]byte, []int)
Deprecated: Use ModelRepositoryAgents_Agent.ProtoReflect.Descriptor instead.
func (*ModelRepositoryAgents_Agent) GetName ¶
func (x *ModelRepositoryAgents_Agent) GetName() string
func (*ModelRepositoryAgents_Agent) GetParameters ¶
func (x *ModelRepositoryAgents_Agent) GetParameters() map[string]string
func (*ModelRepositoryAgents_Agent) ProtoMessage ¶
func (*ModelRepositoryAgents_Agent) ProtoMessage()
func (*ModelRepositoryAgents_Agent) ProtoReflect ¶
func (x *ModelRepositoryAgents_Agent) ProtoReflect() protoreflect.Message
func (*ModelRepositoryAgents_Agent) Reset ¶
func (x *ModelRepositoryAgents_Agent) Reset()
func (*ModelRepositoryAgents_Agent) String ¶
func (x *ModelRepositoryAgents_Agent) String() string
type ModelRepositoryParameter ¶
type ModelRepositoryParameter struct { // @@ .. cpp:var:: oneof parameter_choice // @@ // @@ The parameter value can be a string, an int64 or // @@ a boolean // @@ // // Types that are assignable to ParameterChoice: // // *ModelRepositoryParameter_BoolParam // *ModelRepositoryParameter_Int64Param // *ModelRepositoryParameter_StringParam // *ModelRepositoryParameter_BytesParam ParameterChoice isModelRepositoryParameter_ParameterChoice `protobuf_oneof:"parameter_choice"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ModelRepositoryParameter @@ @@ An model repository parameter value. @@
func (*ModelRepositoryParameter) Descriptor
deprecated
func (*ModelRepositoryParameter) Descriptor() ([]byte, []int)
Deprecated: Use ModelRepositoryParameter.ProtoReflect.Descriptor instead.
func (*ModelRepositoryParameter) GetBoolParam ¶
func (x *ModelRepositoryParameter) GetBoolParam() bool
func (*ModelRepositoryParameter) GetBytesParam ¶
func (x *ModelRepositoryParameter) GetBytesParam() []byte
func (*ModelRepositoryParameter) GetInt64Param ¶
func (x *ModelRepositoryParameter) GetInt64Param() int64
func (*ModelRepositoryParameter) GetParameterChoice ¶
func (m *ModelRepositoryParameter) GetParameterChoice() isModelRepositoryParameter_ParameterChoice
func (*ModelRepositoryParameter) GetStringParam ¶
func (x *ModelRepositoryParameter) GetStringParam() string
func (*ModelRepositoryParameter) ProtoMessage ¶
func (*ModelRepositoryParameter) ProtoMessage()
func (*ModelRepositoryParameter) ProtoReflect ¶
func (x *ModelRepositoryParameter) ProtoReflect() protoreflect.Message
func (*ModelRepositoryParameter) Reset ¶
func (x *ModelRepositoryParameter) Reset()
func (*ModelRepositoryParameter) String ¶
func (x *ModelRepositoryParameter) String() string
type ModelRepositoryParameter_BoolParam ¶
type ModelRepositoryParameter_BoolParam struct { // @@ .. cpp:var:: bool bool_param // @@ // @@ A boolean parameter value. // @@ BoolParam bool `protobuf:"varint,1,opt,name=bool_param,json=boolParam,proto3,oneof"` }
type ModelRepositoryParameter_BytesParam ¶
type ModelRepositoryParameter_BytesParam struct { // @@ .. cpp:var:: bytes bytes_param // @@ // @@ A bytes parameter value. // @@ BytesParam []byte `protobuf:"bytes,4,opt,name=bytes_param,json=bytesParam,proto3,oneof"` }
type ModelRepositoryParameter_Int64Param ¶
type ModelRepositoryParameter_Int64Param struct { // @@ .. cpp:var:: int64 int64_param // @@ // @@ An int64 parameter value. // @@ Int64Param int64 `protobuf:"varint,2,opt,name=int64_param,json=int64Param,proto3,oneof"` }
type ModelRepositoryParameter_StringParam ¶
type ModelRepositoryParameter_StringParam struct { // @@ .. cpp:var:: string string_param // @@ // @@ A string parameter value. // @@ StringParam string `protobuf:"bytes,3,opt,name=string_param,json=stringParam,proto3,oneof"` }
type ModelResponseCache ¶
type ModelResponseCache struct { // @@ // @@ .. cpp::var:: bool enable // @@ // @@ Whether or not to use response cache for the model. If True, the // @@ responses from the model are cached and when identical request // @@ is encountered, instead of going through the model execution, // @@ the response from the cache is utilized. By default, response // @@ cache is disabled for the models. // @@ Enable bool `protobuf:"varint,1,opt,name=enable,proto3" json:"enable,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ModelResponseCache @@ @@ The response cache setting for the model. @@
func (*ModelResponseCache) Descriptor
deprecated
func (*ModelResponseCache) Descriptor() ([]byte, []int)
Deprecated: Use ModelResponseCache.ProtoReflect.Descriptor instead.
func (*ModelResponseCache) GetEnable ¶
func (x *ModelResponseCache) GetEnable() bool
func (*ModelResponseCache) ProtoMessage ¶
func (*ModelResponseCache) ProtoMessage()
func (*ModelResponseCache) ProtoReflect ¶
func (x *ModelResponseCache) ProtoReflect() protoreflect.Message
func (*ModelResponseCache) Reset ¶
func (x *ModelResponseCache) Reset()
func (*ModelResponseCache) String ¶
func (x *ModelResponseCache) String() string
type ModelSequenceBatching ¶
type ModelSequenceBatching struct { // @@ .. cpp:var:: oneof strategy_choice // @@ // @@ The strategy used by the sequence batcher. Default strategy // @@ is 'direct'. // @@ // // Types that are assignable to StrategyChoice: // // *ModelSequenceBatching_Direct // *ModelSequenceBatching_Oldest StrategyChoice isModelSequenceBatching_StrategyChoice `protobuf_oneof:"strategy_choice"` // @@ .. cpp:var:: uint64 max_sequence_idle_microseconds // @@ // @@ The maximum time, in microseconds, that a sequence is allowed to // @@ be idle before it is aborted. The inference server considers a // @@ sequence idle when it does not have any inference request queued // @@ for the sequence. If this limit is exceeded, the inference server // @@ will free the sequence slot allocated by the sequence and make it // @@ available for another sequence. If not specified (or specified as // @@ zero) a default value of 1000000 (1 second) is used. // @@ MaxSequenceIdleMicroseconds uint64 `` /* 147-byte string literal not displayed */ // @@ .. cpp:var:: ControlInput control_input (repeated) // @@ // @@ The model input(s) that the server should use to communicate // @@ sequence start, stop, ready and similar control values to the // @@ model. // @@ ControlInput []*ModelSequenceBatching_ControlInput `protobuf:"bytes,2,rep,name=control_input,json=controlInput,proto3" json:"control_input,omitempty"` // @@ .. cpp:var:: State state (repeated) // @@ // @@ The optional state that can be stored in Triton for performing // @@ inference requests on a sequence. Each sequence holds an implicit // @@ state local to itself. The output state tensor provided by the // @@ model in 'output_name' field of the current inference request will // @@ be transferred as an input tensor named 'input_name' in the next // @@ request of the same sequence. The input state of the first request // @@ in the sequence contains garbage data. // @@ State []*ModelSequenceBatching_State `protobuf:"bytes,5,rep,name=state,proto3" json:"state,omitempty"` // @@ .. cpp:var:: bool iterative_sequence // @@ // @@ Requests for iterative sequences are processed over a number // @@ of iterations. An iterative sequence is initiated by a single // @@ request and is "rescheduled" by the model until completion. // @@ Requests for inflight requests will be batched together // @@ and can complete independently. Note this feature // @@ requires backend support. Default value is false. IterativeSequence bool `protobuf:"varint,6,opt,name=iterative_sequence,json=iterativeSequence,proto3" json:"iterative_sequence,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ModelSequenceBatching @@ @@ Sequence batching configuration. These settings control how sequence @@ batching operates for the model. @@
func (*ModelSequenceBatching) Descriptor
deprecated
func (*ModelSequenceBatching) Descriptor() ([]byte, []int)
Deprecated: Use ModelSequenceBatching.ProtoReflect.Descriptor instead.
func (*ModelSequenceBatching) GetControlInput ¶
func (x *ModelSequenceBatching) GetControlInput() []*ModelSequenceBatching_ControlInput
func (*ModelSequenceBatching) GetDirect ¶
func (x *ModelSequenceBatching) GetDirect() *ModelSequenceBatching_StrategyDirect
func (*ModelSequenceBatching) GetIterativeSequence ¶
func (x *ModelSequenceBatching) GetIterativeSequence() bool
func (*ModelSequenceBatching) GetMaxSequenceIdleMicroseconds ¶
func (x *ModelSequenceBatching) GetMaxSequenceIdleMicroseconds() uint64
func (*ModelSequenceBatching) GetOldest ¶
func (x *ModelSequenceBatching) GetOldest() *ModelSequenceBatching_StrategyOldest
func (*ModelSequenceBatching) GetState ¶
func (x *ModelSequenceBatching) GetState() []*ModelSequenceBatching_State
func (*ModelSequenceBatching) GetStrategyChoice ¶
func (m *ModelSequenceBatching) GetStrategyChoice() isModelSequenceBatching_StrategyChoice
func (*ModelSequenceBatching) ProtoMessage ¶
func (*ModelSequenceBatching) ProtoMessage()
func (*ModelSequenceBatching) ProtoReflect ¶
func (x *ModelSequenceBatching) ProtoReflect() protoreflect.Message
func (*ModelSequenceBatching) Reset ¶
func (x *ModelSequenceBatching) Reset()
func (*ModelSequenceBatching) String ¶
func (x *ModelSequenceBatching) String() string
type ModelSequenceBatching_Control ¶
type ModelSequenceBatching_Control struct { // @@ .. cpp:var:: Kind kind // @@ // @@ The kind of this control. // @@ Kind ModelSequenceBatching_Control_Kind `protobuf:"varint,1,opt,name=kind,proto3,enum=inference.ModelSequenceBatching_Control_Kind" json:"kind,omitempty"` // @@ .. cpp:var:: int32 int32_false_true (repeated) // @@ // @@ The control's true and false setting is indicated by setting // @@ a value in an int32 tensor. The tensor must be a // @@ 1-dimensional tensor with size equal to the batch size of // @@ the request. 'int32_false_true' must have two entries: the // @@ first the false value and the second the true value. // @@ Int32FalseTrue []int32 `protobuf:"varint,2,rep,packed,name=int32_false_true,json=int32FalseTrue,proto3" json:"int32_false_true,omitempty"` // @@ .. cpp:var:: float fp32_false_true (repeated) // @@ // @@ The control's true and false setting is indicated by setting // @@ a value in a fp32 tensor. The tensor must be a // @@ 1-dimensional tensor with size equal to the batch size of // @@ the request. 'fp32_false_true' must have two entries: the // @@ first the false value and the second the true value. // @@ Fp32FalseTrue []float32 `protobuf:"fixed32,3,rep,packed,name=fp32_false_true,json=fp32FalseTrue,proto3" json:"fp32_false_true,omitempty"` // @@ .. cpp:var:: bool bool_false_true (repeated) // @@ // @@ The control's true and false setting is indicated by setting // @@ a value in a bool tensor. The tensor must be a // @@ 1-dimensional tensor with size equal to the batch size of // @@ the request. 'bool_false_true' must have two entries: the // @@ first the false value and the second the true value. // @@ BoolFalseTrue []bool `protobuf:"varint,5,rep,packed,name=bool_false_true,json=boolFalseTrue,proto3" json:"bool_false_true,omitempty"` // @@ .. cpp:var:: DataType data_type // @@ // @@ The control's datatype. // @@ DataType DataType `protobuf:"varint,4,opt,name=data_type,json=dataType,proto3,enum=inference.DataType" json:"data_type,omitempty"` // contains filtered or unexported fields }
@@ .. cpp:var:: message Control @@ @@ A control is a signal that the sequence batcher uses to @@ communicate with a backend. @@
func (*ModelSequenceBatching_Control) Descriptor
deprecated
func (*ModelSequenceBatching_Control) Descriptor() ([]byte, []int)
Deprecated: Use ModelSequenceBatching_Control.ProtoReflect.Descriptor instead.
func (*ModelSequenceBatching_Control) GetBoolFalseTrue ¶
func (x *ModelSequenceBatching_Control) GetBoolFalseTrue() []bool
func (*ModelSequenceBatching_Control) GetDataType ¶
func (x *ModelSequenceBatching_Control) GetDataType() DataType
func (*ModelSequenceBatching_Control) GetFp32FalseTrue ¶
func (x *ModelSequenceBatching_Control) GetFp32FalseTrue() []float32
func (*ModelSequenceBatching_Control) GetInt32FalseTrue ¶
func (x *ModelSequenceBatching_Control) GetInt32FalseTrue() []int32
func (*ModelSequenceBatching_Control) GetKind ¶
func (x *ModelSequenceBatching_Control) GetKind() ModelSequenceBatching_Control_Kind
func (*ModelSequenceBatching_Control) ProtoMessage ¶
func (*ModelSequenceBatching_Control) ProtoMessage()
func (*ModelSequenceBatching_Control) ProtoReflect ¶
func (x *ModelSequenceBatching_Control) ProtoReflect() protoreflect.Message
func (*ModelSequenceBatching_Control) Reset ¶
func (x *ModelSequenceBatching_Control) Reset()
func (*ModelSequenceBatching_Control) String ¶
func (x *ModelSequenceBatching_Control) String() string
type ModelSequenceBatching_ControlInput ¶
type ModelSequenceBatching_ControlInput struct { // @@ .. cpp:var:: string name // @@ // @@ The name of the model input. // @@ Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` // @@ .. cpp:var:: Control control (repeated) // @@ // @@ The control value(s) that should be communicated to the // @@ model using this model input. // @@ Control []*ModelSequenceBatching_Control `protobuf:"bytes,2,rep,name=control,proto3" json:"control,omitempty"` // contains filtered or unexported fields }
@@ .. cpp:var:: message ControlInput @@ @@ The sequence control values to communicate by a model input. @@
func (*ModelSequenceBatching_ControlInput) Descriptor
deprecated
func (*ModelSequenceBatching_ControlInput) Descriptor() ([]byte, []int)
Deprecated: Use ModelSequenceBatching_ControlInput.ProtoReflect.Descriptor instead.
func (*ModelSequenceBatching_ControlInput) GetControl ¶
func (x *ModelSequenceBatching_ControlInput) GetControl() []*ModelSequenceBatching_Control
func (*ModelSequenceBatching_ControlInput) GetName ¶
func (x *ModelSequenceBatching_ControlInput) GetName() string
func (*ModelSequenceBatching_ControlInput) ProtoMessage ¶
func (*ModelSequenceBatching_ControlInput) ProtoMessage()
func (*ModelSequenceBatching_ControlInput) ProtoReflect ¶
func (x *ModelSequenceBatching_ControlInput) ProtoReflect() protoreflect.Message
func (*ModelSequenceBatching_ControlInput) Reset ¶
func (x *ModelSequenceBatching_ControlInput) Reset()
func (*ModelSequenceBatching_ControlInput) String ¶
func (x *ModelSequenceBatching_ControlInput) String() string
type ModelSequenceBatching_Control_Kind ¶
type ModelSequenceBatching_Control_Kind int32
@@ @@ .. cpp:enum:: Kind @@ @@ The kind of the control. @@
const ( // @@ .. cpp:enumerator:: Kind::CONTROL_SEQUENCE_START = 0 // @@ // @@ A new sequence is/is-not starting. If true a sequence is // @@ starting, if false a sequence is continuing. Must // @@ specify either int32_false_true, fp32_false_true or // @@ bool_false_true for this control. This control is optional. // @@ ModelSequenceBatching_Control_CONTROL_SEQUENCE_START ModelSequenceBatching_Control_Kind = 0 // @@ .. cpp:enumerator:: Kind::CONTROL_SEQUENCE_READY = 1 // @@ // @@ A sequence is/is-not ready for inference. If true the // @@ input tensor data is valid and should be used. If false // @@ the input tensor data is invalid and inferencing should // @@ be "skipped". Must specify either int32_false_true, // @@ fp32_false_true or bool_false_true for this control. This // @@ control is optional. // @@ ModelSequenceBatching_Control_CONTROL_SEQUENCE_READY ModelSequenceBatching_Control_Kind = 1 // @@ .. cpp:enumerator:: Kind::CONTROL_SEQUENCE_END = 2 // @@ // @@ A sequence is/is-not ending. If true a sequence is // @@ ending, if false a sequence is continuing. Must specify // @@ either int32_false_true, fp32_false_true or bool_false_true // @@ for this control. This control is optional. // @@ ModelSequenceBatching_Control_CONTROL_SEQUENCE_END ModelSequenceBatching_Control_Kind = 2 // @@ .. cpp:enumerator:: Kind::CONTROL_SEQUENCE_CORRID = 3 // @@ // @@ The correlation ID of the sequence. The correlation ID // @@ is an uint64_t value that is communicated in whole or // @@ in part by the tensor. The tensor's datatype must be // @@ specified by data_type and must be TYPE_UINT64, TYPE_INT64, // @@ TYPE_UINT32 or TYPE_INT32. If a 32-bit datatype is specified // @@ the correlation ID will be truncated to the low-order 32 // @@ bits. This control is optional. // @@ ModelSequenceBatching_Control_CONTROL_SEQUENCE_CORRID ModelSequenceBatching_Control_Kind = 3 )
func (ModelSequenceBatching_Control_Kind) Descriptor ¶
func (ModelSequenceBatching_Control_Kind) Descriptor() protoreflect.EnumDescriptor
func (ModelSequenceBatching_Control_Kind) Enum ¶
func (x ModelSequenceBatching_Control_Kind) Enum() *ModelSequenceBatching_Control_Kind
func (ModelSequenceBatching_Control_Kind) EnumDescriptor
deprecated
func (ModelSequenceBatching_Control_Kind) EnumDescriptor() ([]byte, []int)
Deprecated: Use ModelSequenceBatching_Control_Kind.Descriptor instead.
func (ModelSequenceBatching_Control_Kind) Number ¶
func (x ModelSequenceBatching_Control_Kind) Number() protoreflect.EnumNumber
func (ModelSequenceBatching_Control_Kind) String ¶
func (x ModelSequenceBatching_Control_Kind) String() string
func (ModelSequenceBatching_Control_Kind) Type ¶
func (ModelSequenceBatching_Control_Kind) Type() protoreflect.EnumType
type ModelSequenceBatching_Direct ¶
type ModelSequenceBatching_Direct struct { // @@ .. cpp:var:: StrategyDirect direct // @@ // @@ StrategyDirect scheduling strategy. // @@ Direct *ModelSequenceBatching_StrategyDirect `protobuf:"bytes,3,opt,name=direct,proto3,oneof"` }
type ModelSequenceBatching_InitialState ¶
type ModelSequenceBatching_InitialState struct { // @@ .. cpp:var:: DataType data_type // @@ // @@ The data-type of the state. // @@ DataType DataType `protobuf:"varint,1,opt,name=data_type,json=dataType,proto3,enum=inference.DataType" json:"data_type,omitempty"` // @@ .. cpp:var:: int64 dims (repeated) // @@ // @@ The shape of the state tensor, not including the batch // @@ dimension. // @@ Dims []int64 `protobuf:"varint,2,rep,packed,name=dims,proto3" json:"dims,omitempty"` // @@ .. cpp:var:: oneof state_data // @@ // @@ Specify how the initial state data is generated. // @@ // // Types that are assignable to StateData: // // *ModelSequenceBatching_InitialState_ZeroData // *ModelSequenceBatching_InitialState_DataFile StateData isModelSequenceBatching_InitialState_StateData `protobuf_oneof:"state_data"` // @@ .. cpp:var:: string name // @@ // @@ The name of the state initialization. // @@ Name string `protobuf:"bytes,5,opt,name=name,proto3" json:"name,omitempty"` // contains filtered or unexported fields }
@@ @@ .. cpp:var:: message InitialState @@ @@ Settings used to initialize data for implicit state. @@
func (*ModelSequenceBatching_InitialState) Descriptor
deprecated
func (*ModelSequenceBatching_InitialState) Descriptor() ([]byte, []int)
Deprecated: Use ModelSequenceBatching_InitialState.ProtoReflect.Descriptor instead.
func (*ModelSequenceBatching_InitialState) GetDataFile ¶
func (x *ModelSequenceBatching_InitialState) GetDataFile() string
func (*ModelSequenceBatching_InitialState) GetDataType ¶
func (x *ModelSequenceBatching_InitialState) GetDataType() DataType
func (*ModelSequenceBatching_InitialState) GetDims ¶
func (x *ModelSequenceBatching_InitialState) GetDims() []int64
func (*ModelSequenceBatching_InitialState) GetName ¶
func (x *ModelSequenceBatching_InitialState) GetName() string
func (*ModelSequenceBatching_InitialState) GetStateData ¶
func (m *ModelSequenceBatching_InitialState) GetStateData() isModelSequenceBatching_InitialState_StateData
func (*ModelSequenceBatching_InitialState) GetZeroData ¶
func (x *ModelSequenceBatching_InitialState) GetZeroData() bool
func (*ModelSequenceBatching_InitialState) ProtoMessage ¶
func (*ModelSequenceBatching_InitialState) ProtoMessage()
func (*ModelSequenceBatching_InitialState) ProtoReflect ¶
func (x *ModelSequenceBatching_InitialState) ProtoReflect() protoreflect.Message
func (*ModelSequenceBatching_InitialState) Reset ¶
func (x *ModelSequenceBatching_InitialState) Reset()
func (*ModelSequenceBatching_InitialState) String ¶
func (x *ModelSequenceBatching_InitialState) String() string
type ModelSequenceBatching_InitialState_DataFile ¶
type ModelSequenceBatching_InitialState_DataFile struct { // @@ .. cpp:var:: string data_file // @@ // @@ The file whose content will be used as the initial data for // @@ the state in row-major order. The file must be provided in // @@ sub-directory 'initial_state' under the model directory. // @@ DataFile string `protobuf:"bytes,4,opt,name=data_file,json=dataFile,proto3,oneof"` }
type ModelSequenceBatching_InitialState_ZeroData ¶
type ModelSequenceBatching_InitialState_ZeroData struct { // @@ // @@ .. cpp:var:: bool zero_data // @@ // @@ The identifier for using zeros as initial state data. // @@ Note that the value of 'zero_data' will not be checked, // @@ instead, zero data will be used as long as the field is set. // @@ ZeroData bool `protobuf:"varint,3,opt,name=zero_data,json=zeroData,proto3,oneof"` }
type ModelSequenceBatching_Oldest ¶
type ModelSequenceBatching_Oldest struct { // @@ .. cpp:var:: StrategyOldest oldest // @@ // @@ StrategyOldest scheduling strategy. // @@ Oldest *ModelSequenceBatching_StrategyOldest `protobuf:"bytes,4,opt,name=oldest,proto3,oneof"` }
type ModelSequenceBatching_State ¶
type ModelSequenceBatching_State struct { // @@ .. cpp:var:: string input_name // @@ // @@ The name of the model state input. // @@ InputName string `protobuf:"bytes,1,opt,name=input_name,json=inputName,proto3" json:"input_name,omitempty"` // @@ .. cpp:var:: string output_name // @@ // @@ The name of the model state output. // @@ OutputName string `protobuf:"bytes,2,opt,name=output_name,json=outputName,proto3" json:"output_name,omitempty"` // @@ .. cpp:var:: DataType data_type // @@ // @@ The data-type of the state. // @@ DataType DataType `protobuf:"varint,3,opt,name=data_type,json=dataType,proto3,enum=inference.DataType" json:"data_type,omitempty"` // @@ .. cpp:var:: int64 dim (repeated) // @@ // @@ The dimension. // @@ Dims []int64 `protobuf:"varint,4,rep,packed,name=dims,proto3" json:"dims,omitempty"` // @@ .. cpp:var:: InitialState initial_state (repeated) // @@ // @@ The optional field to specify the initial state for the model. // @@ InitialState []*ModelSequenceBatching_InitialState `protobuf:"bytes,5,rep,name=initial_state,json=initialState,proto3" json:"initial_state,omitempty"` // @@ .. cpp:var:: bool use_same_buffer_for_input_output // @@ // @@ The optional field to use a single buffer for both input and output // @@ state. Without this option, Triton allocates separate buffers // @@ for input and output state // @@ which can be problematic if the state size is // @@ large. This option reduces the memory usage by allocating a single // @@ buffer. Enabling this option is recommended whenever // @@ the input state is processed before the output state is written. // @@ When enabled the state // @@ will always be updated independent of whether // @@ TRITONBACKEND_StateUpdate is called // @@ (however TRITONBACKEND_StateUpdate should still be called for // @@ completeness). // @@ // @@ The default value is false. // @@ UseSameBufferForInputOutput bool `` /* 151-byte string literal not displayed */ // @@ .. cpp:var:: bool use_growable_memory // @@ // @@ The optional field to enable an implicit state buffer to grow // @@ without reallocating or copying existing memory. // @@ Additional memory will be appended to the end of the buffer and // @@ existing data will be preserved. // @@ This option is only available for CUDA memory and requires enabling // @@ use_same_buffer_for_input_output. When using this option, // @@ StateBuffer call will always return CUDA memory even if CPU memory // @@ is requested. // @@ // @@ The default value is false. // @@ UseGrowableMemory bool `protobuf:"varint,7,opt,name=use_growable_memory,json=useGrowableMemory,proto3" json:"use_growable_memory,omitempty"` // contains filtered or unexported fields }
@@ .. cpp:var:: message State @@ @@ An input / output pair of tensors that carry state for the sequence. @@
func (*ModelSequenceBatching_State) Descriptor
deprecated
func (*ModelSequenceBatching_State) Descriptor() ([]byte, []int)
Deprecated: Use ModelSequenceBatching_State.ProtoReflect.Descriptor instead.
func (*ModelSequenceBatching_State) GetDataType ¶
func (x *ModelSequenceBatching_State) GetDataType() DataType
func (*ModelSequenceBatching_State) GetDims ¶
func (x *ModelSequenceBatching_State) GetDims() []int64
func (*ModelSequenceBatching_State) GetInitialState ¶
func (x *ModelSequenceBatching_State) GetInitialState() []*ModelSequenceBatching_InitialState
func (*ModelSequenceBatching_State) GetInputName ¶
func (x *ModelSequenceBatching_State) GetInputName() string
func (*ModelSequenceBatching_State) GetOutputName ¶
func (x *ModelSequenceBatching_State) GetOutputName() string
func (*ModelSequenceBatching_State) GetUseGrowableMemory ¶
func (x *ModelSequenceBatching_State) GetUseGrowableMemory() bool
func (*ModelSequenceBatching_State) GetUseSameBufferForInputOutput ¶
func (x *ModelSequenceBatching_State) GetUseSameBufferForInputOutput() bool
func (*ModelSequenceBatching_State) ProtoMessage ¶
func (*ModelSequenceBatching_State) ProtoMessage()
func (*ModelSequenceBatching_State) ProtoReflect ¶
func (x *ModelSequenceBatching_State) ProtoReflect() protoreflect.Message
func (*ModelSequenceBatching_State) Reset ¶
func (x *ModelSequenceBatching_State) Reset()
func (*ModelSequenceBatching_State) String ¶
func (x *ModelSequenceBatching_State) String() string
type ModelSequenceBatching_StrategyDirect ¶
type ModelSequenceBatching_StrategyDirect struct { // @@ .. cpp:var:: uint64 max_queue_delay_microseconds // @@ // @@ The maximum time, in microseconds, a candidate request // @@ will be delayed in the sequence batch scheduling queue to // @@ wait for additional requests for batching. Default is 0. // @@ MaxQueueDelayMicroseconds uint64 `` /* 141-byte string literal not displayed */ // @@ .. cpp:var:: float minimum_slot_utilization // @@ // @@ The minimum slot utilization that must be satisfied to // @@ execute the batch before 'max_queue_delay_microseconds' expires. // @@ For example, a value of 0.5 indicates that the batch should be // @@ executed as soon as 50% or more of the slots are ready even if // @@ the 'max_queue_delay_microseconds' timeout has not expired. // @@ The default is 0.0, indicating that a batch will be executed // @@ before 'max_queue_delay_microseconds' timeout expires if at least // @@ one batch slot is ready. 'max_queue_delay_microseconds' will be // @@ ignored unless minimum_slot_utilization is set to a non-zero // @@ value. // @@ MinimumSlotUtilization float32 `` /* 131-byte string literal not displayed */ // contains filtered or unexported fields }
@@ .. cpp:var:: message StrategyDirect @@ @@ The sequence batcher uses a specific, unique batch @@ slot for each sequence. All inference requests in a @@ sequence are directed to the same batch slot in the same @@ model instance over the lifetime of the sequence. This @@ is the default strategy. @@
func (*ModelSequenceBatching_StrategyDirect) Descriptor
deprecated
func (*ModelSequenceBatching_StrategyDirect) Descriptor() ([]byte, []int)
Deprecated: Use ModelSequenceBatching_StrategyDirect.ProtoReflect.Descriptor instead.
func (*ModelSequenceBatching_StrategyDirect) GetMaxQueueDelayMicroseconds ¶
func (x *ModelSequenceBatching_StrategyDirect) GetMaxQueueDelayMicroseconds() uint64
func (*ModelSequenceBatching_StrategyDirect) GetMinimumSlotUtilization ¶
func (x *ModelSequenceBatching_StrategyDirect) GetMinimumSlotUtilization() float32
func (*ModelSequenceBatching_StrategyDirect) ProtoMessage ¶
func (*ModelSequenceBatching_StrategyDirect) ProtoMessage()
func (*ModelSequenceBatching_StrategyDirect) ProtoReflect ¶
func (x *ModelSequenceBatching_StrategyDirect) ProtoReflect() protoreflect.Message
func (*ModelSequenceBatching_StrategyDirect) Reset ¶
func (x *ModelSequenceBatching_StrategyDirect) Reset()
func (*ModelSequenceBatching_StrategyDirect) String ¶
func (x *ModelSequenceBatching_StrategyDirect) String() string
type ModelSequenceBatching_StrategyOldest ¶
type ModelSequenceBatching_StrategyOldest struct { // @@ .. cpp:var:: int32 max_candidate_sequences // @@ // @@ Maximum number of candidate sequences that the batcher // @@ maintains. Excess sequences are kept in an ordered backlog // @@ and become candidates when existing candidate sequences // @@ complete. // @@ MaxCandidateSequences int32 `` /* 127-byte string literal not displayed */ // @@ .. cpp:var:: int32 preferred_batch_size (repeated) // @@ // @@ Preferred batch sizes for dynamic batching of candidate // @@ sequences. If a batch of one of these sizes can be formed // @@ it will be executed immediately. If not specified a // @@ preferred batch size will be chosen automatically // @@ based on model and GPU characteristics. // @@ PreferredBatchSize []int32 `protobuf:"varint,2,rep,packed,name=preferred_batch_size,json=preferredBatchSize,proto3" json:"preferred_batch_size,omitempty"` // @@ .. cpp:var:: uint64 max_queue_delay_microseconds // @@ // @@ The maximum time, in microseconds, a candidate request // @@ will be delayed in the dynamic batch scheduling queue to // @@ wait for additional requests for batching. Default is 0. // @@ MaxQueueDelayMicroseconds uint64 `` /* 141-byte string literal not displayed */ // @@ .. cpp:var:: bool preserve_ordering // @@ // @@ Should the dynamic batcher preserve the ordering of responses to // @@ match the order of requests received by the scheduler. Default is // @@ false. If true, the responses will be returned in the same order // @@ as the order of requests sent to the scheduler. If false, the // @@ responses may be returned in arbitrary order. This option is // @@ specifically needed when a sequence of related inference requests // @@ (i.e. inference requests with the same correlation ID) are sent // @@ to the dynamic batcher to ensure that the sequence responses are // @@ in the correct order. // @@ // @@ When using decoupled models, setting this to true may block the // @@ responses from independent sequences from being returned to the // @@ client until the previous request completes, hurting overall // @@ performance. If using GRPC streaming protocol, the stream // @@ ordering guarantee may be sufficient alone to ensure the // @@ responses for each sequence are returned in sequence-order // @@ without blocking based on independent requests, depending on the // @@ use case. // @@ PreserveOrdering bool `protobuf:"varint,4,opt,name=preserve_ordering,json=preserveOrdering,proto3" json:"preserve_ordering,omitempty"` // contains filtered or unexported fields }
@@ .. cpp:var:: message StrategyOldest @@ @@ The sequence batcher maintains up to 'max_candidate_sequences' @@ candidate sequences. 'max_candidate_sequences' can be greater @@ than the model's 'max_batch_size'. For inferencing the batcher @@ chooses from the candidate sequences up to 'max_batch_size' @@ inference requests. Requests are chosen in an oldest-first @@ manner across all candidate sequences. A given sequence is @@ not guaranteed to be assigned to the same batch slot for @@ all inference requests of that sequence. @@
func (*ModelSequenceBatching_StrategyOldest) Descriptor
deprecated
func (*ModelSequenceBatching_StrategyOldest) Descriptor() ([]byte, []int)
Deprecated: Use ModelSequenceBatching_StrategyOldest.ProtoReflect.Descriptor instead.
func (*ModelSequenceBatching_StrategyOldest) GetMaxCandidateSequences ¶
func (x *ModelSequenceBatching_StrategyOldest) GetMaxCandidateSequences() int32
func (*ModelSequenceBatching_StrategyOldest) GetMaxQueueDelayMicroseconds ¶
func (x *ModelSequenceBatching_StrategyOldest) GetMaxQueueDelayMicroseconds() uint64
func (*ModelSequenceBatching_StrategyOldest) GetPreferredBatchSize ¶
func (x *ModelSequenceBatching_StrategyOldest) GetPreferredBatchSize() []int32
func (*ModelSequenceBatching_StrategyOldest) GetPreserveOrdering ¶
func (x *ModelSequenceBatching_StrategyOldest) GetPreserveOrdering() bool
func (*ModelSequenceBatching_StrategyOldest) ProtoMessage ¶
func (*ModelSequenceBatching_StrategyOldest) ProtoMessage()
func (*ModelSequenceBatching_StrategyOldest) ProtoReflect ¶
func (x *ModelSequenceBatching_StrategyOldest) ProtoReflect() protoreflect.Message
func (*ModelSequenceBatching_StrategyOldest) Reset ¶
func (x *ModelSequenceBatching_StrategyOldest) Reset()
func (*ModelSequenceBatching_StrategyOldest) String ¶
func (x *ModelSequenceBatching_StrategyOldest) String() string
type ModelStatistics ¶
type ModelStatistics struct { // @@ .. cpp:var:: string name // @@ // @@ The name of the model. If not given returns statistics for all // @@ Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` // @@ .. cpp:var:: string version // @@ // @@ The version of the model. // @@ Version string `protobuf:"bytes,2,opt,name=version,proto3" json:"version,omitempty"` // @@ .. cpp:var:: uint64 last_inference // @@ // @@ The timestamp of the last inference request made for this model, // @@ as milliseconds since the epoch. // @@ LastInference uint64 `protobuf:"varint,3,opt,name=last_inference,json=lastInference,proto3" json:"last_inference,omitempty"` // @@ .. cpp:var:: uint64 last_inference // @@ // @@ The cumulative count of successful inference requests made for this // @@ model. Each inference in a batched request is counted as an // @@ individual inference. For example, if a client sends a single // @@ inference request with batch size 64, "inference_count" will be // @@ incremented by 64. Similarly, if a clients sends 64 individual // @@ requests each with batch size 1, "inference_count" will be // @@ incremented by 64. The "inference_count" value DOES NOT include // @@ cache hits. // @@ InferenceCount uint64 `protobuf:"varint,4,opt,name=inference_count,json=inferenceCount,proto3" json:"inference_count,omitempty"` // @@ .. cpp:var:: uint64 last_inference // @@ // @@ The cumulative count of the number of successful inference executions // @@ performed for the model. When dynamic batching is enabled, a single // @@ model execution can perform inferencing for more than one inference // @@ request. For example, if a clients sends 64 individual requests each // @@ with batch size 1 and the dynamic batcher batches them into a single // @@ large batch for model execution then "execution_count" will be // @@ incremented by 1. If, on the other hand, the dynamic batcher is not // @@ enabled for that each of the 64 individual requests is executed // @@ independently, then "execution_count" will be incremented by 64. // @@ The "execution_count" value DOES NOT include cache hits. // @@ ExecutionCount uint64 `protobuf:"varint,5,opt,name=execution_count,json=executionCount,proto3" json:"execution_count,omitempty"` // @@ .. cpp:var:: InferStatistics inference_stats // @@ // @@ The aggregate statistics for the model/version. // @@ InferenceStats *InferStatistics `protobuf:"bytes,6,opt,name=inference_stats,json=inferenceStats,proto3" json:"inference_stats,omitempty"` // @@ .. cpp:var:: InferBatchStatistics batch_stats (repeated) // @@ // @@ The aggregate statistics for each different batch size that is // @@ executed in the model. The batch statistics indicate how many actual // @@ model executions were performed and show differences due to different // @@ batch size (for example, larger batches typically take longer to // @@ compute). // @@ BatchStats []*InferBatchStatistics `protobuf:"bytes,7,rep,name=batch_stats,json=batchStats,proto3" json:"batch_stats,omitempty"` // @@ .. cpp:var:: MemoryUsage memory_usage (repeated) // @@ // @@ The memory usage detected during model loading, which may be used to // @@ estimate the memory to be released once the model is unloaded. Note // @@ that the estimation is inferenced by the profiling tools and // @@ framework's memory schema, therefore it is advised to perform // @@ experiments to understand the scenario that the reported memory usage // @@ can be relied on. As a starting point, the GPU memory usage for // @@ models in ONNX Runtime backend and TensorRT backend is usually // @@ aligned. // @@ MemoryUsage []*MemoryUsage `protobuf:"bytes,8,rep,name=memory_usage,json=memoryUsage,proto3" json:"memory_usage,omitempty"` // @@ .. cpp:var:: map<string, InferResponseStatistics> response_stats // @@ // @@ The key and value pairs for all responses statistics. The key is a // @@ string identifying a set of response statistics aggregated together // @@ (i.e. index of the response sent). The value is the aggregated // @@ response statistics. // @@ ResponseStats map[string]*InferResponseStatistics `` /* 188-byte string literal not displayed */ // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ModelStatistics @@ @@ Statistics for a specific model and version. @@
func (*ModelStatistics) Descriptor
deprecated
func (*ModelStatistics) Descriptor() ([]byte, []int)
Deprecated: Use ModelStatistics.ProtoReflect.Descriptor instead.
func (*ModelStatistics) GetBatchStats ¶
func (x *ModelStatistics) GetBatchStats() []*InferBatchStatistics
func (*ModelStatistics) GetExecutionCount ¶
func (x *ModelStatistics) GetExecutionCount() uint64
func (*ModelStatistics) GetInferenceCount ¶
func (x *ModelStatistics) GetInferenceCount() uint64
func (*ModelStatistics) GetInferenceStats ¶
func (x *ModelStatistics) GetInferenceStats() *InferStatistics
func (*ModelStatistics) GetLastInference ¶
func (x *ModelStatistics) GetLastInference() uint64
func (*ModelStatistics) GetMemoryUsage ¶
func (x *ModelStatistics) GetMemoryUsage() []*MemoryUsage
func (*ModelStatistics) GetName ¶
func (x *ModelStatistics) GetName() string
func (*ModelStatistics) GetResponseStats ¶
func (x *ModelStatistics) GetResponseStats() map[string]*InferResponseStatistics
func (*ModelStatistics) GetVersion ¶
func (x *ModelStatistics) GetVersion() string
func (*ModelStatistics) ProtoMessage ¶
func (*ModelStatistics) ProtoMessage()
func (*ModelStatistics) ProtoReflect ¶
func (x *ModelStatistics) ProtoReflect() protoreflect.Message
func (*ModelStatistics) Reset ¶
func (x *ModelStatistics) Reset()
func (*ModelStatistics) String ¶
func (x *ModelStatistics) String() string
type ModelStatisticsRequest ¶
type ModelStatisticsRequest struct { // @@ .. cpp:var:: string name // @@ // @@ The name of the model. If not given returns statistics for // @@ all models. // @@ Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` // @@ .. cpp:var:: string version // @@ // @@ The version of the model. If not given returns statistics for // @@ all model versions. // @@ Version string `protobuf:"bytes,2,opt,name=version,proto3" json:"version,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ModelStatisticsRequest @@ @@ Request message for ModelStatistics. @@
func (*ModelStatisticsRequest) Descriptor
deprecated
func (*ModelStatisticsRequest) Descriptor() ([]byte, []int)
Deprecated: Use ModelStatisticsRequest.ProtoReflect.Descriptor instead.
func (*ModelStatisticsRequest) GetName ¶
func (x *ModelStatisticsRequest) GetName() string
func (*ModelStatisticsRequest) GetVersion ¶
func (x *ModelStatisticsRequest) GetVersion() string
func (*ModelStatisticsRequest) ProtoMessage ¶
func (*ModelStatisticsRequest) ProtoMessage()
func (*ModelStatisticsRequest) ProtoReflect ¶
func (x *ModelStatisticsRequest) ProtoReflect() protoreflect.Message
func (*ModelStatisticsRequest) Reset ¶
func (x *ModelStatisticsRequest) Reset()
func (*ModelStatisticsRequest) String ¶
func (x *ModelStatisticsRequest) String() string
type ModelStatisticsResponse ¶
type ModelStatisticsResponse struct { // @@ .. cpp:var:: ModelStatistics model_stats (repeated) // @@ // @@ Statistics for each requested model. // @@ ModelStats []*ModelStatistics `protobuf:"bytes,1,rep,name=model_stats,json=modelStats,proto3" json:"model_stats,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ModelStatisticsResponse @@ @@ Response message for ModelStatistics. @@
func (*ModelStatisticsResponse) Descriptor
deprecated
func (*ModelStatisticsResponse) Descriptor() ([]byte, []int)
Deprecated: Use ModelStatisticsResponse.ProtoReflect.Descriptor instead.
func (*ModelStatisticsResponse) GetModelStats ¶
func (x *ModelStatisticsResponse) GetModelStats() []*ModelStatistics
func (*ModelStatisticsResponse) ProtoMessage ¶
func (*ModelStatisticsResponse) ProtoMessage()
func (*ModelStatisticsResponse) ProtoReflect ¶
func (x *ModelStatisticsResponse) ProtoReflect() protoreflect.Message
func (*ModelStatisticsResponse) Reset ¶
func (x *ModelStatisticsResponse) Reset()
func (*ModelStatisticsResponse) String ¶
func (x *ModelStatisticsResponse) String() string
type ModelStreamInferResponse ¶
type ModelStreamInferResponse struct { // @@ // @@ .. cpp:var:: string error_message // @@ // @@ The message describing the error. The empty message // @@ indicates the inference was successful without errors. // @@ ErrorMessage string `protobuf:"bytes,1,opt,name=error_message,json=errorMessage,proto3" json:"error_message,omitempty"` // @@ // @@ .. cpp:var:: ModelInferResponse infer_response // @@ // @@ Holds the results of the request. // @@ InferResponse *ModelInferResponse `protobuf:"bytes,2,opt,name=infer_response,json=inferResponse,proto3" json:"infer_response,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ModelStreamInferResponse @@ @@ Response message for ModelStreamInfer. @@
func (*ModelStreamInferResponse) Descriptor
deprecated
func (*ModelStreamInferResponse) Descriptor() ([]byte, []int)
Deprecated: Use ModelStreamInferResponse.ProtoReflect.Descriptor instead.
func (*ModelStreamInferResponse) GetErrorMessage ¶
func (x *ModelStreamInferResponse) GetErrorMessage() string
func (*ModelStreamInferResponse) GetInferResponse ¶
func (x *ModelStreamInferResponse) GetInferResponse() *ModelInferResponse
func (*ModelStreamInferResponse) ProtoMessage ¶
func (*ModelStreamInferResponse) ProtoMessage()
func (*ModelStreamInferResponse) ProtoReflect ¶
func (x *ModelStreamInferResponse) ProtoReflect() protoreflect.Message
func (*ModelStreamInferResponse) Reset ¶
func (x *ModelStreamInferResponse) Reset()
func (*ModelStreamInferResponse) String ¶
func (x *ModelStreamInferResponse) String() string
type ModelTensorReshape ¶
type ModelTensorReshape struct { // @@ .. cpp:var:: int64 shape (repeated) // @@ // @@ The shape to use for reshaping. // @@ Shape []int64 `protobuf:"varint,1,rep,packed,name=shape,proto3" json:"shape,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ModelTensorReshape @@ @@ Reshape specification for input and output tensors. @@
func (*ModelTensorReshape) Descriptor
deprecated
func (*ModelTensorReshape) Descriptor() ([]byte, []int)
Deprecated: Use ModelTensorReshape.ProtoReflect.Descriptor instead.
func (*ModelTensorReshape) GetShape ¶
func (x *ModelTensorReshape) GetShape() []int64
func (*ModelTensorReshape) ProtoMessage ¶
func (*ModelTensorReshape) ProtoMessage()
func (*ModelTensorReshape) ProtoReflect ¶
func (x *ModelTensorReshape) ProtoReflect() protoreflect.Message
func (*ModelTensorReshape) Reset ¶
func (x *ModelTensorReshape) Reset()
func (*ModelTensorReshape) String ¶
func (x *ModelTensorReshape) String() string
type ModelTransactionPolicy ¶
type ModelTransactionPolicy struct { // @@ .. cpp:var:: bool decoupled // @@ // @@ Indicates whether responses generated by the model are decoupled with // @@ the requests issued to it, which means the number of responses // @@ generated by model may differ from number of requests issued, and // @@ that the responses may be out of order relative to the order of // @@ requests. The default is false, which means the model will generate // @@ exactly one response for each request. // @@ Decoupled bool `protobuf:"varint,1,opt,name=decoupled,proto3" json:"decoupled,omitempty"` // contains filtered or unexported fields }
@@ @@ .. cpp:var:: message ModelTransactionPolicy @@ @@ The specification that describes the nature of transactions @@ to be expected from the model. @@
func (*ModelTransactionPolicy) Descriptor
deprecated
func (*ModelTransactionPolicy) Descriptor() ([]byte, []int)
Deprecated: Use ModelTransactionPolicy.ProtoReflect.Descriptor instead.
func (*ModelTransactionPolicy) GetDecoupled ¶
func (x *ModelTransactionPolicy) GetDecoupled() bool
func (*ModelTransactionPolicy) ProtoMessage ¶
func (*ModelTransactionPolicy) ProtoMessage()
func (*ModelTransactionPolicy) ProtoReflect ¶
func (x *ModelTransactionPolicy) ProtoReflect() protoreflect.Message
func (*ModelTransactionPolicy) Reset ¶
func (x *ModelTransactionPolicy) Reset()
func (*ModelTransactionPolicy) String ¶
func (x *ModelTransactionPolicy) String() string
type ModelVersionPolicy ¶
type ModelVersionPolicy struct { // @@ .. cpp:var:: oneof policy_choice // @@ // @@ Each model must implement only a single version policy. The // @@ default policy is 'Latest'. // @@ // // Types that are assignable to PolicyChoice: // // *ModelVersionPolicy_Latest_ // *ModelVersionPolicy_All_ // *ModelVersionPolicy_Specific_ PolicyChoice isModelVersionPolicy_PolicyChoice `protobuf_oneof:"policy_choice"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ModelVersionPolicy @@ @@ Policy indicating which versions of a model should be made @@ available by the inference server. @@
func (*ModelVersionPolicy) Descriptor
deprecated
func (*ModelVersionPolicy) Descriptor() ([]byte, []int)
Deprecated: Use ModelVersionPolicy.ProtoReflect.Descriptor instead.
func (*ModelVersionPolicy) GetAll ¶
func (x *ModelVersionPolicy) GetAll() *ModelVersionPolicy_All
func (*ModelVersionPolicy) GetLatest ¶
func (x *ModelVersionPolicy) GetLatest() *ModelVersionPolicy_Latest
func (*ModelVersionPolicy) GetPolicyChoice ¶
func (m *ModelVersionPolicy) GetPolicyChoice() isModelVersionPolicy_PolicyChoice
func (*ModelVersionPolicy) GetSpecific ¶
func (x *ModelVersionPolicy) GetSpecific() *ModelVersionPolicy_Specific
func (*ModelVersionPolicy) ProtoMessage ¶
func (*ModelVersionPolicy) ProtoMessage()
func (*ModelVersionPolicy) ProtoReflect ¶
func (x *ModelVersionPolicy) ProtoReflect() protoreflect.Message
func (*ModelVersionPolicy) Reset ¶
func (x *ModelVersionPolicy) Reset()
func (*ModelVersionPolicy) String ¶
func (x *ModelVersionPolicy) String() string
type ModelVersionPolicy_All ¶
type ModelVersionPolicy_All struct {
// contains filtered or unexported fields
}
@@ .. cpp:var:: message All @@ @@ Serve all versions of the model. @@
func (*ModelVersionPolicy_All) Descriptor
deprecated
func (*ModelVersionPolicy_All) Descriptor() ([]byte, []int)
Deprecated: Use ModelVersionPolicy_All.ProtoReflect.Descriptor instead.
func (*ModelVersionPolicy_All) ProtoMessage ¶
func (*ModelVersionPolicy_All) ProtoMessage()
func (*ModelVersionPolicy_All) ProtoReflect ¶
func (x *ModelVersionPolicy_All) ProtoReflect() protoreflect.Message
func (*ModelVersionPolicy_All) Reset ¶
func (x *ModelVersionPolicy_All) Reset()
func (*ModelVersionPolicy_All) String ¶
func (x *ModelVersionPolicy_All) String() string
type ModelVersionPolicy_All_ ¶
type ModelVersionPolicy_All_ struct { // @@ .. cpp:var:: All all // @@ // @@ Serve all versions of the model. // @@ All *ModelVersionPolicy_All `protobuf:"bytes,2,opt,name=all,proto3,oneof"` }
type ModelVersionPolicy_Latest ¶
type ModelVersionPolicy_Latest struct { // @@ .. cpp:var:: uint32 num_versions // @@ // @@ Serve only the 'num_versions' highest-numbered versions. T // @@ The default value of 'num_versions' is 1, indicating that by // @@ default only the single highest-number version of a // @@ model will be served. // @@ NumVersions uint32 `protobuf:"varint,1,opt,name=num_versions,json=numVersions,proto3" json:"num_versions,omitempty"` // contains filtered or unexported fields }
@@ .. cpp:var:: message Latest @@ @@ Serve only the latest version(s) of a model. This is @@ the default policy. @@
func (*ModelVersionPolicy_Latest) Descriptor
deprecated
func (*ModelVersionPolicy_Latest) Descriptor() ([]byte, []int)
Deprecated: Use ModelVersionPolicy_Latest.ProtoReflect.Descriptor instead.
func (*ModelVersionPolicy_Latest) GetNumVersions ¶
func (x *ModelVersionPolicy_Latest) GetNumVersions() uint32
func (*ModelVersionPolicy_Latest) ProtoMessage ¶
func (*ModelVersionPolicy_Latest) ProtoMessage()
func (*ModelVersionPolicy_Latest) ProtoReflect ¶
func (x *ModelVersionPolicy_Latest) ProtoReflect() protoreflect.Message
func (*ModelVersionPolicy_Latest) Reset ¶
func (x *ModelVersionPolicy_Latest) Reset()
func (*ModelVersionPolicy_Latest) String ¶
func (x *ModelVersionPolicy_Latest) String() string
type ModelVersionPolicy_Latest_ ¶
type ModelVersionPolicy_Latest_ struct { // @@ .. cpp:var:: Latest latest // @@ // @@ Serve only latest version(s) of the model. // @@ Latest *ModelVersionPolicy_Latest `protobuf:"bytes,1,opt,name=latest,proto3,oneof"` }
type ModelVersionPolicy_Specific ¶
type ModelVersionPolicy_Specific struct { // @@ .. cpp:var:: int64 versions (repeated) // @@ // @@ The specific versions of the model that will be served. // @@ Versions []int64 `protobuf:"varint,1,rep,packed,name=versions,proto3" json:"versions,omitempty"` // contains filtered or unexported fields }
@@ .. cpp:var:: message Specific @@ @@ Serve only specific versions of the model. @@
func (*ModelVersionPolicy_Specific) Descriptor
deprecated
func (*ModelVersionPolicy_Specific) Descriptor() ([]byte, []int)
Deprecated: Use ModelVersionPolicy_Specific.ProtoReflect.Descriptor instead.
func (*ModelVersionPolicy_Specific) GetVersions ¶
func (x *ModelVersionPolicy_Specific) GetVersions() []int64
func (*ModelVersionPolicy_Specific) ProtoMessage ¶
func (*ModelVersionPolicy_Specific) ProtoMessage()
func (*ModelVersionPolicy_Specific) ProtoReflect ¶
func (x *ModelVersionPolicy_Specific) ProtoReflect() protoreflect.Message
func (*ModelVersionPolicy_Specific) Reset ¶
func (x *ModelVersionPolicy_Specific) Reset()
func (*ModelVersionPolicy_Specific) String ¶
func (x *ModelVersionPolicy_Specific) String() string
type ModelVersionPolicy_Specific_ ¶
type ModelVersionPolicy_Specific_ struct { // @@ .. cpp:var:: Specific specific // @@ // @@ Serve only specific version(s) of the model. // @@ Specific *ModelVersionPolicy_Specific `protobuf:"bytes,3,opt,name=specific,proto3,oneof"` }
type ModelWarmup ¶
type ModelWarmup struct { // @@ .. cpp:var:: string name // @@ // @@ The name of the request sample. // @@ Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` // @@ .. cpp:var:: uint32 batch_size // @@ // @@ The batch size of the inference request. This must be >= 1. For // @@ models that don't support batching, batch_size must be 1. If // @@ batch_size > 1, the 'inputs' specified below will be duplicated to // @@ match the batch size requested. // @@ BatchSize uint32 `protobuf:"varint,2,opt,name=batch_size,json=batchSize,proto3" json:"batch_size,omitempty"` // @@ .. cpp:var:: map<string, Input> inputs // @@ // @@ The warmup meta data associated with every model input, including // @@ control tensors. // @@ Inputs map[string]*ModelWarmup_Input `` /* 153-byte string literal not displayed */ // @@ .. cpp:var:: uint32 count // @@ // @@ The number of iterations that this warmup sample will be executed. // @@ For example, if this field is set to 2, 2 model executions using this // @@ sample will be scheduled for warmup. Default value is 0 which // @@ indicates that this sample will be used only once. // @@ Note that for sequence model, 'count' may not work well // @@ because the model often expect a valid sequence of requests which // @@ should be represented by a series of warmup samples. 'count > 1' // @@ essentially "resends" one of the sample, which may invalidate the // @@ sequence and result in unexpected warmup failure. // @@ Count uint32 `protobuf:"varint,4,opt,name=count,proto3" json:"count,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ModelWarmup @@ @@ Settings used to construct the request sample for model warmup. @@
func (*ModelWarmup) Descriptor
deprecated
func (*ModelWarmup) Descriptor() ([]byte, []int)
Deprecated: Use ModelWarmup.ProtoReflect.Descriptor instead.
func (*ModelWarmup) GetBatchSize ¶
func (x *ModelWarmup) GetBatchSize() uint32
func (*ModelWarmup) GetCount ¶
func (x *ModelWarmup) GetCount() uint32
func (*ModelWarmup) GetInputs ¶
func (x *ModelWarmup) GetInputs() map[string]*ModelWarmup_Input
func (*ModelWarmup) GetName ¶
func (x *ModelWarmup) GetName() string
func (*ModelWarmup) ProtoMessage ¶
func (*ModelWarmup) ProtoMessage()
func (*ModelWarmup) ProtoReflect ¶
func (x *ModelWarmup) ProtoReflect() protoreflect.Message
func (*ModelWarmup) Reset ¶
func (x *ModelWarmup) Reset()
func (*ModelWarmup) String ¶
func (x *ModelWarmup) String() string
type ModelWarmup_Input ¶
type ModelWarmup_Input struct { // @@ .. cpp:var:: DataType data_type // @@ // @@ The data-type of the input. // @@ DataType DataType `protobuf:"varint,1,opt,name=data_type,json=dataType,proto3,enum=inference.DataType" json:"data_type,omitempty"` // @@ .. cpp:var:: int64 dims (repeated) // @@ // @@ The shape of the input tensor, not including the batch dimension. // @@ Dims []int64 `protobuf:"varint,2,rep,packed,name=dims,proto3" json:"dims,omitempty"` // @@ .. cpp:var:: oneof input_data_type // @@ // @@ Specify how the input data is generated. If the input has STRING // @@ data type and 'random_data' is set, the data generation will fall // @@ back to 'zero_data'. // @@ // // Types that are assignable to InputDataType: // // *ModelWarmup_Input_ZeroData // *ModelWarmup_Input_RandomData // *ModelWarmup_Input_InputDataFile InputDataType isModelWarmup_Input_InputDataType `protobuf_oneof:"input_data_type"` // contains filtered or unexported fields }
@@ @@ .. cpp:var:: message Input @@ @@ Meta data associated with an input. @@
func (*ModelWarmup_Input) Descriptor
deprecated
func (*ModelWarmup_Input) Descriptor() ([]byte, []int)
Deprecated: Use ModelWarmup_Input.ProtoReflect.Descriptor instead.
func (*ModelWarmup_Input) GetDataType ¶
func (x *ModelWarmup_Input) GetDataType() DataType
func (*ModelWarmup_Input) GetDims ¶
func (x *ModelWarmup_Input) GetDims() []int64
func (*ModelWarmup_Input) GetInputDataFile ¶
func (x *ModelWarmup_Input) GetInputDataFile() string
func (*ModelWarmup_Input) GetInputDataType ¶
func (m *ModelWarmup_Input) GetInputDataType() isModelWarmup_Input_InputDataType
func (*ModelWarmup_Input) GetRandomData ¶
func (x *ModelWarmup_Input) GetRandomData() bool
func (*ModelWarmup_Input) GetZeroData ¶
func (x *ModelWarmup_Input) GetZeroData() bool
func (*ModelWarmup_Input) ProtoMessage ¶
func (*ModelWarmup_Input) ProtoMessage()
func (*ModelWarmup_Input) ProtoReflect ¶
func (x *ModelWarmup_Input) ProtoReflect() protoreflect.Message
func (*ModelWarmup_Input) Reset ¶
func (x *ModelWarmup_Input) Reset()
func (*ModelWarmup_Input) String ¶
func (x *ModelWarmup_Input) String() string
type ModelWarmup_Input_InputDataFile ¶
type ModelWarmup_Input_InputDataFile struct { // @@ .. cpp:var:: string input_data_file // @@ // @@ The file whose content will be used as raw input data in // @@ row-major order. The file must be provided in a sub-directory // @@ 'warmup' under the model directory. The file contents should be // @@ in binary format. For TYPE_STRING data-type, an element is // @@ represented by a 4-byte unsigned integer giving the length // @@ followed by the actual bytes. // @@ InputDataFile string `protobuf:"bytes,5,opt,name=input_data_file,json=inputDataFile,proto3,oneof"` }
type ModelWarmup_Input_RandomData ¶
type ModelWarmup_Input_RandomData struct { // @@ // @@ .. cpp:var:: bool random_data // @@ // @@ The identifier for using random data as input data. Note that // @@ the value of 'random_data' will not be checked, instead, // @@ random data will be used as long as the field is set. // @@ RandomData bool `protobuf:"varint,4,opt,name=random_data,json=randomData,proto3,oneof"` }
type ModelWarmup_Input_ZeroData ¶
type ModelWarmup_Input_ZeroData struct { // @@ // @@ .. cpp:var:: bool zero_data // @@ // @@ The identifier for using zeros as input data. Note that the // @@ value of 'zero_data' will not be checked, instead, zero data // @@ will be used as long as the field is set. // @@ ZeroData bool `protobuf:"varint,3,opt,name=zero_data,json=zeroData,proto3,oneof"` }
type RepositoryIndexRequest ¶
type RepositoryIndexRequest struct { // @@ .. cpp:var:: string repository_name // @@ // @@ The name of the repository. If empty the index is returned // @@ for all repositories. // @@ RepositoryName string `protobuf:"bytes,1,opt,name=repository_name,json=repositoryName,proto3" json:"repository_name,omitempty"` // @@ .. cpp:var:: bool ready // @@ // @@ If true returned only models currently ready for inferencing. // @@ Ready bool `protobuf:"varint,2,opt,name=ready,proto3" json:"ready,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message RepositoryIndexRequest @@ @@ Request message for RepositoryIndex. @@
func (*RepositoryIndexRequest) Descriptor
deprecated
func (*RepositoryIndexRequest) Descriptor() ([]byte, []int)
Deprecated: Use RepositoryIndexRequest.ProtoReflect.Descriptor instead.
func (*RepositoryIndexRequest) GetReady ¶
func (x *RepositoryIndexRequest) GetReady() bool
func (*RepositoryIndexRequest) GetRepositoryName ¶
func (x *RepositoryIndexRequest) GetRepositoryName() string
func (*RepositoryIndexRequest) ProtoMessage ¶
func (*RepositoryIndexRequest) ProtoMessage()
func (*RepositoryIndexRequest) ProtoReflect ¶
func (x *RepositoryIndexRequest) ProtoReflect() protoreflect.Message
func (*RepositoryIndexRequest) Reset ¶
func (x *RepositoryIndexRequest) Reset()
func (*RepositoryIndexRequest) String ¶
func (x *RepositoryIndexRequest) String() string
type RepositoryIndexResponse ¶
type RepositoryIndexResponse struct { // @@ // @@ .. cpp:var:: ModelIndex models (repeated) // @@ // @@ An index entry for each model. // @@ Models []*RepositoryIndexResponse_ModelIndex `protobuf:"bytes,1,rep,name=models,proto3" json:"models,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message RepositoryIndexResponse @@ @@ Response message for RepositoryIndex. @@
func (*RepositoryIndexResponse) Descriptor
deprecated
func (*RepositoryIndexResponse) Descriptor() ([]byte, []int)
Deprecated: Use RepositoryIndexResponse.ProtoReflect.Descriptor instead.
func (*RepositoryIndexResponse) GetModels ¶
func (x *RepositoryIndexResponse) GetModels() []*RepositoryIndexResponse_ModelIndex
func (*RepositoryIndexResponse) ProtoMessage ¶
func (*RepositoryIndexResponse) ProtoMessage()
func (*RepositoryIndexResponse) ProtoReflect ¶
func (x *RepositoryIndexResponse) ProtoReflect() protoreflect.Message
func (*RepositoryIndexResponse) Reset ¶
func (x *RepositoryIndexResponse) Reset()
func (*RepositoryIndexResponse) String ¶
func (x *RepositoryIndexResponse) String() string
type RepositoryIndexResponse_ModelIndex ¶
type RepositoryIndexResponse_ModelIndex struct { // @@ // @@ .. cpp:var:: string name // @@ // @@ The name of the model. // @@ Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` // @@ .. cpp:var:: string version // @@ // @@ The version of the model. // @@ Version string `protobuf:"bytes,2,opt,name=version,proto3" json:"version,omitempty"` // @@ // @@ .. cpp:var:: string state // @@ // @@ The state of the model. // @@ State string `protobuf:"bytes,3,opt,name=state,proto3" json:"state,omitempty"` // @@ // @@ .. cpp:var:: string reason // @@ // @@ The reason, if any, that the model is in the given state. // @@ Reason string `protobuf:"bytes,4,opt,name=reason,proto3" json:"reason,omitempty"` // contains filtered or unexported fields }
@@ @@ .. cpp:var:: message ModelIndex @@ @@ Index entry for a model. @@
func (*RepositoryIndexResponse_ModelIndex) Descriptor
deprecated
func (*RepositoryIndexResponse_ModelIndex) Descriptor() ([]byte, []int)
Deprecated: Use RepositoryIndexResponse_ModelIndex.ProtoReflect.Descriptor instead.
func (*RepositoryIndexResponse_ModelIndex) GetName ¶
func (x *RepositoryIndexResponse_ModelIndex) GetName() string
func (*RepositoryIndexResponse_ModelIndex) GetReason ¶
func (x *RepositoryIndexResponse_ModelIndex) GetReason() string
func (*RepositoryIndexResponse_ModelIndex) GetState ¶
func (x *RepositoryIndexResponse_ModelIndex) GetState() string
func (*RepositoryIndexResponse_ModelIndex) GetVersion ¶
func (x *RepositoryIndexResponse_ModelIndex) GetVersion() string
func (*RepositoryIndexResponse_ModelIndex) ProtoMessage ¶
func (*RepositoryIndexResponse_ModelIndex) ProtoMessage()
func (*RepositoryIndexResponse_ModelIndex) ProtoReflect ¶
func (x *RepositoryIndexResponse_ModelIndex) ProtoReflect() protoreflect.Message
func (*RepositoryIndexResponse_ModelIndex) Reset ¶
func (x *RepositoryIndexResponse_ModelIndex) Reset()
func (*RepositoryIndexResponse_ModelIndex) String ¶
func (x *RepositoryIndexResponse_ModelIndex) String() string
type RepositoryModelLoadRequest ¶
type RepositoryModelLoadRequest struct { // @@ .. cpp:var:: string repository_name // @@ // @@ The name of the repository to load from. If empty the model // @@ is loaded from any repository. // @@ RepositoryName string `protobuf:"bytes,1,opt,name=repository_name,json=repositoryName,proto3" json:"repository_name,omitempty"` // @@ .. cpp:var:: string repository_name // @@ // @@ The name of the model to load, or reload. // @@ ModelName string `protobuf:"bytes,2,opt,name=model_name,json=modelName,proto3" json:"model_name,omitempty"` // @@ .. cpp:var:: map<string,ModelRepositoryParameter> parameters // @@ // @@ Optional model repository request parameters. // @@ Parameters map[string]*ModelRepositoryParameter `` /* 161-byte string literal not displayed */ // contains filtered or unexported fields }
@@ @@.. cpp:var:: message RepositoryModelLoadRequest @@ @@ Request message for RepositoryModelLoad. @@
func (*RepositoryModelLoadRequest) Descriptor
deprecated
func (*RepositoryModelLoadRequest) Descriptor() ([]byte, []int)
Deprecated: Use RepositoryModelLoadRequest.ProtoReflect.Descriptor instead.
func (*RepositoryModelLoadRequest) GetModelName ¶
func (x *RepositoryModelLoadRequest) GetModelName() string
func (*RepositoryModelLoadRequest) GetParameters ¶
func (x *RepositoryModelLoadRequest) GetParameters() map[string]*ModelRepositoryParameter
func (*RepositoryModelLoadRequest) GetRepositoryName ¶
func (x *RepositoryModelLoadRequest) GetRepositoryName() string
func (*RepositoryModelLoadRequest) ProtoMessage ¶
func (*RepositoryModelLoadRequest) ProtoMessage()
func (*RepositoryModelLoadRequest) ProtoReflect ¶
func (x *RepositoryModelLoadRequest) ProtoReflect() protoreflect.Message
func (*RepositoryModelLoadRequest) Reset ¶
func (x *RepositoryModelLoadRequest) Reset()
func (*RepositoryModelLoadRequest) String ¶
func (x *RepositoryModelLoadRequest) String() string
type RepositoryModelLoadResponse ¶
type RepositoryModelLoadResponse struct {
// contains filtered or unexported fields
}
@@ @@.. cpp:var:: message RepositoryModelLoadResponse @@ @@ Response message for RepositoryModelLoad. @@
func (*RepositoryModelLoadResponse) Descriptor
deprecated
func (*RepositoryModelLoadResponse) Descriptor() ([]byte, []int)
Deprecated: Use RepositoryModelLoadResponse.ProtoReflect.Descriptor instead.
func (*RepositoryModelLoadResponse) ProtoMessage ¶
func (*RepositoryModelLoadResponse) ProtoMessage()
func (*RepositoryModelLoadResponse) ProtoReflect ¶
func (x *RepositoryModelLoadResponse) ProtoReflect() protoreflect.Message
func (*RepositoryModelLoadResponse) Reset ¶
func (x *RepositoryModelLoadResponse) Reset()
func (*RepositoryModelLoadResponse) String ¶
func (x *RepositoryModelLoadResponse) String() string
type RepositoryModelUnloadRequest ¶
type RepositoryModelUnloadRequest struct { // @@ .. cpp:var:: string repository_name // @@ // @@ The name of the repository from which the model was originally // @@ loaded. If empty the repository is not considered. // @@ RepositoryName string `protobuf:"bytes,1,opt,name=repository_name,json=repositoryName,proto3" json:"repository_name,omitempty"` // @@ .. cpp:var:: string repository_name // @@ // @@ The name of the model to unload. // @@ ModelName string `protobuf:"bytes,2,opt,name=model_name,json=modelName,proto3" json:"model_name,omitempty"` // @@ .. cpp:var:: map<string,ModelRepositoryParameter> parameters // @@ // @@ Optional model repository request parameters. // @@ Parameters map[string]*ModelRepositoryParameter `` /* 161-byte string literal not displayed */ // contains filtered or unexported fields }
@@ @@.. cpp:var:: message RepositoryModelUnloadRequest @@ @@ Request message for RepositoryModelUnload. @@
func (*RepositoryModelUnloadRequest) Descriptor
deprecated
func (*RepositoryModelUnloadRequest) Descriptor() ([]byte, []int)
Deprecated: Use RepositoryModelUnloadRequest.ProtoReflect.Descriptor instead.
func (*RepositoryModelUnloadRequest) GetModelName ¶
func (x *RepositoryModelUnloadRequest) GetModelName() string
func (*RepositoryModelUnloadRequest) GetParameters ¶
func (x *RepositoryModelUnloadRequest) GetParameters() map[string]*ModelRepositoryParameter
func (*RepositoryModelUnloadRequest) GetRepositoryName ¶
func (x *RepositoryModelUnloadRequest) GetRepositoryName() string
func (*RepositoryModelUnloadRequest) ProtoMessage ¶
func (*RepositoryModelUnloadRequest) ProtoMessage()
func (*RepositoryModelUnloadRequest) ProtoReflect ¶
func (x *RepositoryModelUnloadRequest) ProtoReflect() protoreflect.Message
func (*RepositoryModelUnloadRequest) Reset ¶
func (x *RepositoryModelUnloadRequest) Reset()
func (*RepositoryModelUnloadRequest) String ¶
func (x *RepositoryModelUnloadRequest) String() string
type RepositoryModelUnloadResponse ¶
type RepositoryModelUnloadResponse struct {
// contains filtered or unexported fields
}
@@ @@.. cpp:var:: message RepositoryModelUnloadResponse @@ @@ Response message for RepositoryModelUnload. @@
func (*RepositoryModelUnloadResponse) Descriptor
deprecated
func (*RepositoryModelUnloadResponse) Descriptor() ([]byte, []int)
Deprecated: Use RepositoryModelUnloadResponse.ProtoReflect.Descriptor instead.
func (*RepositoryModelUnloadResponse) ProtoMessage ¶
func (*RepositoryModelUnloadResponse) ProtoMessage()
func (*RepositoryModelUnloadResponse) ProtoReflect ¶
func (x *RepositoryModelUnloadResponse) ProtoReflect() protoreflect.Message
func (*RepositoryModelUnloadResponse) Reset ¶
func (x *RepositoryModelUnloadResponse) Reset()
func (*RepositoryModelUnloadResponse) String ¶
func (x *RepositoryModelUnloadResponse) String() string
type ServerLiveRequest ¶
type ServerLiveRequest struct {
// contains filtered or unexported fields
}
@@ @@.. cpp:var:: message ServerLiveRequest @@ @@ Request message for ServerLive. @@
func (*ServerLiveRequest) Descriptor
deprecated
func (*ServerLiveRequest) Descriptor() ([]byte, []int)
Deprecated: Use ServerLiveRequest.ProtoReflect.Descriptor instead.
func (*ServerLiveRequest) ProtoMessage ¶
func (*ServerLiveRequest) ProtoMessage()
func (*ServerLiveRequest) ProtoReflect ¶
func (x *ServerLiveRequest) ProtoReflect() protoreflect.Message
func (*ServerLiveRequest) Reset ¶
func (x *ServerLiveRequest) Reset()
func (*ServerLiveRequest) String ¶
func (x *ServerLiveRequest) String() string
type ServerLiveResponse ¶
type ServerLiveResponse struct { // @@ // @@ .. cpp:var:: bool live // @@ // @@ True if the inference server is live, false it not live. // @@ Live bool `protobuf:"varint,1,opt,name=live,proto3" json:"live,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ServerLiveResponse @@ @@ Response message for ServerLive. @@
func (*ServerLiveResponse) Descriptor
deprecated
func (*ServerLiveResponse) Descriptor() ([]byte, []int)
Deprecated: Use ServerLiveResponse.ProtoReflect.Descriptor instead.
func (*ServerLiveResponse) GetLive ¶
func (x *ServerLiveResponse) GetLive() bool
func (*ServerLiveResponse) ProtoMessage ¶
func (*ServerLiveResponse) ProtoMessage()
func (*ServerLiveResponse) ProtoReflect ¶
func (x *ServerLiveResponse) ProtoReflect() protoreflect.Message
func (*ServerLiveResponse) Reset ¶
func (x *ServerLiveResponse) Reset()
func (*ServerLiveResponse) String ¶
func (x *ServerLiveResponse) String() string
type ServerMetadataRequest ¶
type ServerMetadataRequest struct {
// contains filtered or unexported fields
}
@@ @@.. cpp:var:: message ServerMetadataRequest @@ @@ Request message for ServerMetadata. @@
func (*ServerMetadataRequest) Descriptor
deprecated
func (*ServerMetadataRequest) Descriptor() ([]byte, []int)
Deprecated: Use ServerMetadataRequest.ProtoReflect.Descriptor instead.
func (*ServerMetadataRequest) ProtoMessage ¶
func (*ServerMetadataRequest) ProtoMessage()
func (*ServerMetadataRequest) ProtoReflect ¶
func (x *ServerMetadataRequest) ProtoReflect() protoreflect.Message
func (*ServerMetadataRequest) Reset ¶
func (x *ServerMetadataRequest) Reset()
func (*ServerMetadataRequest) String ¶
func (x *ServerMetadataRequest) String() string
type ServerMetadataResponse ¶
type ServerMetadataResponse struct { // @@ // @@ .. cpp:var:: string name // @@ // @@ The server name. // @@ Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` // @@ // @@ .. cpp:var:: string version // @@ // @@ The server version. // @@ Version string `protobuf:"bytes,2,opt,name=version,proto3" json:"version,omitempty"` // @@ // @@ .. cpp:var:: string extensions (repeated) // @@ // @@ The extensions supported by the server. // @@ Extensions []string `protobuf:"bytes,3,rep,name=extensions,proto3" json:"extensions,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ServerMetadataResponse @@ @@ Response message for ServerMetadata. @@
func (*ServerMetadataResponse) Descriptor
deprecated
func (*ServerMetadataResponse) Descriptor() ([]byte, []int)
Deprecated: Use ServerMetadataResponse.ProtoReflect.Descriptor instead.
func (*ServerMetadataResponse) GetExtensions ¶
func (x *ServerMetadataResponse) GetExtensions() []string
func (*ServerMetadataResponse) GetName ¶
func (x *ServerMetadataResponse) GetName() string
func (*ServerMetadataResponse) GetVersion ¶
func (x *ServerMetadataResponse) GetVersion() string
func (*ServerMetadataResponse) ProtoMessage ¶
func (*ServerMetadataResponse) ProtoMessage()
func (*ServerMetadataResponse) ProtoReflect ¶
func (x *ServerMetadataResponse) ProtoReflect() protoreflect.Message
func (*ServerMetadataResponse) Reset ¶
func (x *ServerMetadataResponse) Reset()
func (*ServerMetadataResponse) String ¶
func (x *ServerMetadataResponse) String() string
type ServerReadyRequest ¶
type ServerReadyRequest struct {
// contains filtered or unexported fields
}
@@ @@.. cpp:var:: message ServerReadyRequest @@ @@ Request message for ServerReady. @@
func (*ServerReadyRequest) Descriptor
deprecated
func (*ServerReadyRequest) Descriptor() ([]byte, []int)
Deprecated: Use ServerReadyRequest.ProtoReflect.Descriptor instead.
func (*ServerReadyRequest) ProtoMessage ¶
func (*ServerReadyRequest) ProtoMessage()
func (*ServerReadyRequest) ProtoReflect ¶
func (x *ServerReadyRequest) ProtoReflect() protoreflect.Message
func (*ServerReadyRequest) Reset ¶
func (x *ServerReadyRequest) Reset()
func (*ServerReadyRequest) String ¶
func (x *ServerReadyRequest) String() string
type ServerReadyResponse ¶
type ServerReadyResponse struct { // @@ // @@ .. cpp:var:: bool ready // @@ // @@ True if the inference server is ready, false it not ready. // @@ Ready bool `protobuf:"varint,1,opt,name=ready,proto3" json:"ready,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ServerReadyResponse @@ @@ Response message for ServerReady. @@
func (*ServerReadyResponse) Descriptor
deprecated
func (*ServerReadyResponse) Descriptor() ([]byte, []int)
Deprecated: Use ServerReadyResponse.ProtoReflect.Descriptor instead.
func (*ServerReadyResponse) GetReady ¶
func (x *ServerReadyResponse) GetReady() bool
func (*ServerReadyResponse) ProtoMessage ¶
func (*ServerReadyResponse) ProtoMessage()
func (*ServerReadyResponse) ProtoReflect ¶
func (x *ServerReadyResponse) ProtoReflect() protoreflect.Message
func (*ServerReadyResponse) Reset ¶
func (x *ServerReadyResponse) Reset()
func (*ServerReadyResponse) String ¶
func (x *ServerReadyResponse) String() string
type StatisticDuration ¶
type StatisticDuration struct { // @@ .. cpp:var:: uint64 count // @@ // @@ Cumulative number of times this metric occurred. // @@ Count uint64 `protobuf:"varint,1,opt,name=count,proto3" json:"count,omitempty"` // @@ .. cpp:var:: uint64 total_time_ns // @@ // @@ Total collected duration of this metric in nanoseconds. // @@ Ns uint64 `protobuf:"varint,2,opt,name=ns,proto3" json:"ns,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message StatisticDuration @@ @@ Statistic recording a cumulative duration metric. @@
func (*StatisticDuration) Descriptor
deprecated
func (*StatisticDuration) Descriptor() ([]byte, []int)
Deprecated: Use StatisticDuration.ProtoReflect.Descriptor instead.
func (*StatisticDuration) GetCount ¶
func (x *StatisticDuration) GetCount() uint64
func (*StatisticDuration) GetNs ¶
func (x *StatisticDuration) GetNs() uint64
func (*StatisticDuration) ProtoMessage ¶
func (*StatisticDuration) ProtoMessage()
func (*StatisticDuration) ProtoReflect ¶
func (x *StatisticDuration) ProtoReflect() protoreflect.Message
func (*StatisticDuration) Reset ¶
func (x *StatisticDuration) Reset()
func (*StatisticDuration) String ¶
func (x *StatisticDuration) String() string
type SystemSharedMemoryRegisterRequest ¶
type SystemSharedMemoryRegisterRequest struct { // @@ .. cpp:var:: string name // @@ // @@ The name of the region to register. // @@ Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` // @@ // @@ The key of the underlying memory object that contains the // @@ shared memory region. // @@ Key string `protobuf:"bytes,2,opt,name=key,proto3" json:"key,omitempty"` // @@ // @@ Offset, in bytes, within the underlying memory object to // @@ the start of the shared memory region. // @@ Offset uint64 `protobuf:"varint,3,opt,name=offset,proto3" json:"offset,omitempty"` // @@ // @@ Size of the shared memory region, in bytes. // @@ ByteSize uint64 `protobuf:"varint,4,opt,name=byte_size,json=byteSize,proto3" json:"byte_size,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message SystemSharedMemoryRegisterRequest @@ @@ Request message for SystemSharedMemoryRegister. @@
func (*SystemSharedMemoryRegisterRequest) Descriptor
deprecated
func (*SystemSharedMemoryRegisterRequest) Descriptor() ([]byte, []int)
Deprecated: Use SystemSharedMemoryRegisterRequest.ProtoReflect.Descriptor instead.
func (*SystemSharedMemoryRegisterRequest) GetByteSize ¶
func (x *SystemSharedMemoryRegisterRequest) GetByteSize() uint64
func (*SystemSharedMemoryRegisterRequest) GetKey ¶
func (x *SystemSharedMemoryRegisterRequest) GetKey() string
func (*SystemSharedMemoryRegisterRequest) GetName ¶
func (x *SystemSharedMemoryRegisterRequest) GetName() string
func (*SystemSharedMemoryRegisterRequest) GetOffset ¶
func (x *SystemSharedMemoryRegisterRequest) GetOffset() uint64
func (*SystemSharedMemoryRegisterRequest) ProtoMessage ¶
func (*SystemSharedMemoryRegisterRequest) ProtoMessage()
func (*SystemSharedMemoryRegisterRequest) ProtoReflect ¶
func (x *SystemSharedMemoryRegisterRequest) ProtoReflect() protoreflect.Message
func (*SystemSharedMemoryRegisterRequest) Reset ¶
func (x *SystemSharedMemoryRegisterRequest) Reset()
func (*SystemSharedMemoryRegisterRequest) String ¶
func (x *SystemSharedMemoryRegisterRequest) String() string
type SystemSharedMemoryRegisterResponse ¶
type SystemSharedMemoryRegisterResponse struct {
// contains filtered or unexported fields
}
@@ @@.. cpp:var:: message SystemSharedMemoryRegisterResponse @@ @@ Response message for SystemSharedMemoryRegister. @@
func (*SystemSharedMemoryRegisterResponse) Descriptor
deprecated
func (*SystemSharedMemoryRegisterResponse) Descriptor() ([]byte, []int)
Deprecated: Use SystemSharedMemoryRegisterResponse.ProtoReflect.Descriptor instead.
func (*SystemSharedMemoryRegisterResponse) ProtoMessage ¶
func (*SystemSharedMemoryRegisterResponse) ProtoMessage()
func (*SystemSharedMemoryRegisterResponse) ProtoReflect ¶
func (x *SystemSharedMemoryRegisterResponse) ProtoReflect() protoreflect.Message
func (*SystemSharedMemoryRegisterResponse) Reset ¶
func (x *SystemSharedMemoryRegisterResponse) Reset()
func (*SystemSharedMemoryRegisterResponse) String ¶
func (x *SystemSharedMemoryRegisterResponse) String() string
type SystemSharedMemoryStatusRequest ¶
type SystemSharedMemoryStatusRequest struct { // @@ .. cpp:var:: string name // @@ // @@ The name of the region to get status for. If empty the // @@ status is returned for all registered regions. // @@ Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message SystemSharedMemoryStatusRequest @@ @@ Request message for SystemSharedMemoryStatus. @@
func (*SystemSharedMemoryStatusRequest) Descriptor
deprecated
func (*SystemSharedMemoryStatusRequest) Descriptor() ([]byte, []int)
Deprecated: Use SystemSharedMemoryStatusRequest.ProtoReflect.Descriptor instead.
func (*SystemSharedMemoryStatusRequest) GetName ¶
func (x *SystemSharedMemoryStatusRequest) GetName() string
func (*SystemSharedMemoryStatusRequest) ProtoMessage ¶
func (*SystemSharedMemoryStatusRequest) ProtoMessage()
func (*SystemSharedMemoryStatusRequest) ProtoReflect ¶
func (x *SystemSharedMemoryStatusRequest) ProtoReflect() protoreflect.Message
func (*SystemSharedMemoryStatusRequest) Reset ¶
func (x *SystemSharedMemoryStatusRequest) Reset()
func (*SystemSharedMemoryStatusRequest) String ¶
func (x *SystemSharedMemoryStatusRequest) String() string
type SystemSharedMemoryStatusResponse ¶
type SystemSharedMemoryStatusResponse struct { // @@ .. cpp:var:: map<string,RegionStatus> regions // @@ // @@ Status for each of the registered regions, indexed by // @@ region name. // @@ Regions map[string]*SystemSharedMemoryStatusResponse_RegionStatus `` /* 155-byte string literal not displayed */ // contains filtered or unexported fields }
@@ @@.. cpp:var:: message SystemSharedMemoryStatusResponse @@ @@ Response message for SystemSharedMemoryStatus. @@
func (*SystemSharedMemoryStatusResponse) Descriptor
deprecated
func (*SystemSharedMemoryStatusResponse) Descriptor() ([]byte, []int)
Deprecated: Use SystemSharedMemoryStatusResponse.ProtoReflect.Descriptor instead.
func (*SystemSharedMemoryStatusResponse) GetRegions ¶
func (x *SystemSharedMemoryStatusResponse) GetRegions() map[string]*SystemSharedMemoryStatusResponse_RegionStatus
func (*SystemSharedMemoryStatusResponse) ProtoMessage ¶
func (*SystemSharedMemoryStatusResponse) ProtoMessage()
func (*SystemSharedMemoryStatusResponse) ProtoReflect ¶
func (x *SystemSharedMemoryStatusResponse) ProtoReflect() protoreflect.Message
func (*SystemSharedMemoryStatusResponse) Reset ¶
func (x *SystemSharedMemoryStatusResponse) Reset()
func (*SystemSharedMemoryStatusResponse) String ¶
func (x *SystemSharedMemoryStatusResponse) String() string
type SystemSharedMemoryStatusResponse_RegionStatus ¶
type SystemSharedMemoryStatusResponse_RegionStatus struct { // @@ .. cpp:var:: string name // @@ // @@ The name for the shared memory region. // @@ Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` // @@ // @@ The key of the underlying memory object that contains the // @@ shared memory region. // @@ Key string `protobuf:"bytes,2,opt,name=key,proto3" json:"key,omitempty"` // @@ // @@ Offset, in bytes, within the underlying memory object to // @@ the start of the shared memory region. // @@ Offset uint64 `protobuf:"varint,3,opt,name=offset,proto3" json:"offset,omitempty"` // @@ // @@ Size of the shared memory region, in bytes. // @@ ByteSize uint64 `protobuf:"varint,4,opt,name=byte_size,json=byteSize,proto3" json:"byte_size,omitempty"` // contains filtered or unexported fields }
@@ @@ .. cpp:var:: message RegionStatus @@ @@ Status for a shared memory region. @@
func (*SystemSharedMemoryStatusResponse_RegionStatus) Descriptor
deprecated
func (*SystemSharedMemoryStatusResponse_RegionStatus) Descriptor() ([]byte, []int)
Deprecated: Use SystemSharedMemoryStatusResponse_RegionStatus.ProtoReflect.Descriptor instead.
func (*SystemSharedMemoryStatusResponse_RegionStatus) GetByteSize ¶
func (x *SystemSharedMemoryStatusResponse_RegionStatus) GetByteSize() uint64
func (*SystemSharedMemoryStatusResponse_RegionStatus) GetKey ¶
func (x *SystemSharedMemoryStatusResponse_RegionStatus) GetKey() string
func (*SystemSharedMemoryStatusResponse_RegionStatus) GetName ¶
func (x *SystemSharedMemoryStatusResponse_RegionStatus) GetName() string
func (*SystemSharedMemoryStatusResponse_RegionStatus) GetOffset ¶
func (x *SystemSharedMemoryStatusResponse_RegionStatus) GetOffset() uint64
func (*SystemSharedMemoryStatusResponse_RegionStatus) ProtoMessage ¶
func (*SystemSharedMemoryStatusResponse_RegionStatus) ProtoMessage()
func (*SystemSharedMemoryStatusResponse_RegionStatus) ProtoReflect ¶
func (x *SystemSharedMemoryStatusResponse_RegionStatus) ProtoReflect() protoreflect.Message
func (*SystemSharedMemoryStatusResponse_RegionStatus) Reset ¶
func (x *SystemSharedMemoryStatusResponse_RegionStatus) Reset()
func (*SystemSharedMemoryStatusResponse_RegionStatus) String ¶
func (x *SystemSharedMemoryStatusResponse_RegionStatus) String() string
type SystemSharedMemoryUnregisterRequest ¶
type SystemSharedMemoryUnregisterRequest struct { // @@ .. cpp:var:: string name // @@ // @@ The name of the system region to unregister. If empty // @@ all system shared-memory regions are unregistered. // @@ Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message SystemSharedMemoryUnregisterRequest @@ @@ Request message for SystemSharedMemoryUnregister. @@
func (*SystemSharedMemoryUnregisterRequest) Descriptor
deprecated
func (*SystemSharedMemoryUnregisterRequest) Descriptor() ([]byte, []int)
Deprecated: Use SystemSharedMemoryUnregisterRequest.ProtoReflect.Descriptor instead.
func (*SystemSharedMemoryUnregisterRequest) GetName ¶
func (x *SystemSharedMemoryUnregisterRequest) GetName() string
func (*SystemSharedMemoryUnregisterRequest) ProtoMessage ¶
func (*SystemSharedMemoryUnregisterRequest) ProtoMessage()
func (*SystemSharedMemoryUnregisterRequest) ProtoReflect ¶
func (x *SystemSharedMemoryUnregisterRequest) ProtoReflect() protoreflect.Message
func (*SystemSharedMemoryUnregisterRequest) Reset ¶
func (x *SystemSharedMemoryUnregisterRequest) Reset()
func (*SystemSharedMemoryUnregisterRequest) String ¶
func (x *SystemSharedMemoryUnregisterRequest) String() string
type SystemSharedMemoryUnregisterResponse ¶
type SystemSharedMemoryUnregisterResponse struct {
// contains filtered or unexported fields
}
@@ @@.. cpp:var:: message SystemSharedMemoryUnregisterResponse @@ @@ Response message for SystemSharedMemoryUnregister. @@
func (*SystemSharedMemoryUnregisterResponse) Descriptor
deprecated
func (*SystemSharedMemoryUnregisterResponse) Descriptor() ([]byte, []int)
Deprecated: Use SystemSharedMemoryUnregisterResponse.ProtoReflect.Descriptor instead.
func (*SystemSharedMemoryUnregisterResponse) ProtoMessage ¶
func (*SystemSharedMemoryUnregisterResponse) ProtoMessage()
func (*SystemSharedMemoryUnregisterResponse) ProtoReflect ¶
func (x *SystemSharedMemoryUnregisterResponse) ProtoReflect() protoreflect.Message
func (*SystemSharedMemoryUnregisterResponse) Reset ¶
func (x *SystemSharedMemoryUnregisterResponse) Reset()
func (*SystemSharedMemoryUnregisterResponse) String ¶
func (x *SystemSharedMemoryUnregisterResponse) String() string
type TraceSettingRequest ¶
type TraceSettingRequest struct { // @@ .. cpp:var:: map<string,SettingValue> settings // @@ // @@ The new setting values to be updated, // @@ settings that are not specified will remain unchanged. // @@ Settings map[string]*TraceSettingRequest_SettingValue `` /* 157-byte string literal not displayed */ // @@ // @@ .. cpp:var:: string model_name // @@ // @@ The name of the model to apply the new trace settings. // @@ If not given, the new settings will be applied globally. // @@ ModelName string `protobuf:"bytes,2,opt,name=model_name,json=modelName,proto3" json:"model_name,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message TraceSettingRequest @@ @@ Request message for TraceSetting. @@
func (*TraceSettingRequest) Descriptor
deprecated
func (*TraceSettingRequest) Descriptor() ([]byte, []int)
Deprecated: Use TraceSettingRequest.ProtoReflect.Descriptor instead.
func (*TraceSettingRequest) GetModelName ¶
func (x *TraceSettingRequest) GetModelName() string
func (*TraceSettingRequest) GetSettings ¶
func (x *TraceSettingRequest) GetSettings() map[string]*TraceSettingRequest_SettingValue
func (*TraceSettingRequest) ProtoMessage ¶
func (*TraceSettingRequest) ProtoMessage()
func (*TraceSettingRequest) ProtoReflect ¶
func (x *TraceSettingRequest) ProtoReflect() protoreflect.Message
func (*TraceSettingRequest) Reset ¶
func (x *TraceSettingRequest) Reset()
func (*TraceSettingRequest) String ¶
func (x *TraceSettingRequest) String() string
type TraceSettingRequest_SettingValue ¶
type TraceSettingRequest_SettingValue struct { // @@ // @@ .. cpp:var:: string value (repeated) // @@ // @@ The value. // @@ Value []string `protobuf:"bytes,1,rep,name=value,proto3" json:"value,omitempty"` // contains filtered or unexported fields }
@@ @@ .. cpp:var:: message SettingValue @@ @@ The values to be associated with a trace setting. @@ If no value is provided, the setting will be clear and @@ the global setting value will be used. @@
func (*TraceSettingRequest_SettingValue) Descriptor
deprecated
func (*TraceSettingRequest_SettingValue) Descriptor() ([]byte, []int)
Deprecated: Use TraceSettingRequest_SettingValue.ProtoReflect.Descriptor instead.
func (*TraceSettingRequest_SettingValue) GetValue ¶
func (x *TraceSettingRequest_SettingValue) GetValue() []string
func (*TraceSettingRequest_SettingValue) ProtoMessage ¶
func (*TraceSettingRequest_SettingValue) ProtoMessage()
func (*TraceSettingRequest_SettingValue) ProtoReflect ¶
func (x *TraceSettingRequest_SettingValue) ProtoReflect() protoreflect.Message
func (*TraceSettingRequest_SettingValue) Reset ¶
func (x *TraceSettingRequest_SettingValue) Reset()
func (*TraceSettingRequest_SettingValue) String ¶
func (x *TraceSettingRequest_SettingValue) String() string
type TraceSettingResponse ¶
type TraceSettingResponse struct { // @@ .. cpp:var:: map<string,SettingValue> settings // @@ // @@ The current trace settings, including any changes specified // @@ by TraceSettingRequest. // @@ Settings map[string]*TraceSettingResponse_SettingValue `` /* 157-byte string literal not displayed */ // contains filtered or unexported fields }
@@ @@.. cpp:var:: message TraceSettingResponse @@ @@ Response message for TraceSetting. @@
func (*TraceSettingResponse) Descriptor
deprecated
func (*TraceSettingResponse) Descriptor() ([]byte, []int)
Deprecated: Use TraceSettingResponse.ProtoReflect.Descriptor instead.
func (*TraceSettingResponse) GetSettings ¶
func (x *TraceSettingResponse) GetSettings() map[string]*TraceSettingResponse_SettingValue
func (*TraceSettingResponse) ProtoMessage ¶
func (*TraceSettingResponse) ProtoMessage()
func (*TraceSettingResponse) ProtoReflect ¶
func (x *TraceSettingResponse) ProtoReflect() protoreflect.Message
func (*TraceSettingResponse) Reset ¶
func (x *TraceSettingResponse) Reset()
func (*TraceSettingResponse) String ¶
func (x *TraceSettingResponse) String() string
type TraceSettingResponse_SettingValue ¶
type TraceSettingResponse_SettingValue struct { // @@ // @@ .. cpp:var:: string value (repeated) // @@ // @@ The value. // @@ Value []string `protobuf:"bytes,1,rep,name=value,proto3" json:"value,omitempty"` // contains filtered or unexported fields }
@@ @@ .. cpp:var:: message SettingValue @@ @@ The values to be associated with a trace setting. @@
func (*TraceSettingResponse_SettingValue) Descriptor
deprecated
func (*TraceSettingResponse_SettingValue) Descriptor() ([]byte, []int)
Deprecated: Use TraceSettingResponse_SettingValue.ProtoReflect.Descriptor instead.
func (*TraceSettingResponse_SettingValue) GetValue ¶
func (x *TraceSettingResponse_SettingValue) GetValue() []string
func (*TraceSettingResponse_SettingValue) ProtoMessage ¶
func (*TraceSettingResponse_SettingValue) ProtoMessage()
func (*TraceSettingResponse_SettingValue) ProtoReflect ¶
func (x *TraceSettingResponse_SettingValue) ProtoReflect() protoreflect.Message
func (*TraceSettingResponse_SettingValue) Reset ¶
func (x *TraceSettingResponse_SettingValue) Reset()
func (*TraceSettingResponse_SettingValue) String ¶
func (x *TraceSettingResponse_SettingValue) String() string
type UnimplementedGRPCInferenceServiceServer ¶
type UnimplementedGRPCInferenceServiceServer struct { }
UnimplementedGRPCInferenceServiceServer must be embedded to have forward compatible implementations.
func (UnimplementedGRPCInferenceServiceServer) CudaSharedMemoryRegister ¶
func (UnimplementedGRPCInferenceServiceServer) CudaSharedMemoryRegister(context.Context, *CudaSharedMemoryRegisterRequest) (*CudaSharedMemoryRegisterResponse, error)
func (UnimplementedGRPCInferenceServiceServer) CudaSharedMemoryStatus ¶
func (UnimplementedGRPCInferenceServiceServer) CudaSharedMemoryStatus(context.Context, *CudaSharedMemoryStatusRequest) (*CudaSharedMemoryStatusResponse, error)
func (UnimplementedGRPCInferenceServiceServer) CudaSharedMemoryUnregister ¶
func (UnimplementedGRPCInferenceServiceServer) CudaSharedMemoryUnregister(context.Context, *CudaSharedMemoryUnregisterRequest) (*CudaSharedMemoryUnregisterResponse, error)
func (UnimplementedGRPCInferenceServiceServer) LogSettings ¶
func (UnimplementedGRPCInferenceServiceServer) LogSettings(context.Context, *LogSettingsRequest) (*LogSettingsResponse, error)
func (UnimplementedGRPCInferenceServiceServer) ModelConfig ¶
func (UnimplementedGRPCInferenceServiceServer) ModelConfig(context.Context, *ModelConfigRequest) (*ModelConfigResponse, error)
func (UnimplementedGRPCInferenceServiceServer) ModelInfer ¶
func (UnimplementedGRPCInferenceServiceServer) ModelInfer(context.Context, *ModelInferRequest) (*ModelInferResponse, error)
func (UnimplementedGRPCInferenceServiceServer) ModelMetadata ¶
func (UnimplementedGRPCInferenceServiceServer) ModelMetadata(context.Context, *ModelMetadataRequest) (*ModelMetadataResponse, error)
func (UnimplementedGRPCInferenceServiceServer) ModelReady ¶
func (UnimplementedGRPCInferenceServiceServer) ModelReady(context.Context, *ModelReadyRequest) (*ModelReadyResponse, error)
func (UnimplementedGRPCInferenceServiceServer) ModelStatistics ¶
func (UnimplementedGRPCInferenceServiceServer) ModelStatistics(context.Context, *ModelStatisticsRequest) (*ModelStatisticsResponse, error)
func (UnimplementedGRPCInferenceServiceServer) ModelStreamInfer ¶
func (UnimplementedGRPCInferenceServiceServer) ModelStreamInfer(GRPCInferenceService_ModelStreamInferServer) error
func (UnimplementedGRPCInferenceServiceServer) RepositoryIndex ¶
func (UnimplementedGRPCInferenceServiceServer) RepositoryIndex(context.Context, *RepositoryIndexRequest) (*RepositoryIndexResponse, error)
func (UnimplementedGRPCInferenceServiceServer) RepositoryModelLoad ¶
func (UnimplementedGRPCInferenceServiceServer) RepositoryModelLoad(context.Context, *RepositoryModelLoadRequest) (*RepositoryModelLoadResponse, error)
func (UnimplementedGRPCInferenceServiceServer) RepositoryModelUnload ¶
func (UnimplementedGRPCInferenceServiceServer) RepositoryModelUnload(context.Context, *RepositoryModelUnloadRequest) (*RepositoryModelUnloadResponse, error)
func (UnimplementedGRPCInferenceServiceServer) ServerLive ¶
func (UnimplementedGRPCInferenceServiceServer) ServerLive(context.Context, *ServerLiveRequest) (*ServerLiveResponse, error)
func (UnimplementedGRPCInferenceServiceServer) ServerMetadata ¶
func (UnimplementedGRPCInferenceServiceServer) ServerMetadata(context.Context, *ServerMetadataRequest) (*ServerMetadataResponse, error)
func (UnimplementedGRPCInferenceServiceServer) ServerReady ¶
func (UnimplementedGRPCInferenceServiceServer) ServerReady(context.Context, *ServerReadyRequest) (*ServerReadyResponse, error)
func (UnimplementedGRPCInferenceServiceServer) SystemSharedMemoryRegister ¶
func (UnimplementedGRPCInferenceServiceServer) SystemSharedMemoryRegister(context.Context, *SystemSharedMemoryRegisterRequest) (*SystemSharedMemoryRegisterResponse, error)
func (UnimplementedGRPCInferenceServiceServer) SystemSharedMemoryStatus ¶
func (UnimplementedGRPCInferenceServiceServer) SystemSharedMemoryStatus(context.Context, *SystemSharedMemoryStatusRequest) (*SystemSharedMemoryStatusResponse, error)
func (UnimplementedGRPCInferenceServiceServer) SystemSharedMemoryUnregister ¶
func (UnimplementedGRPCInferenceServiceServer) SystemSharedMemoryUnregister(context.Context, *SystemSharedMemoryUnregisterRequest) (*SystemSharedMemoryUnregisterResponse, error)
func (UnimplementedGRPCInferenceServiceServer) TraceSetting ¶
func (UnimplementedGRPCInferenceServiceServer) TraceSetting(context.Context, *TraceSettingRequest) (*TraceSettingResponse, error)
type UnimplementedHealthServer ¶
type UnimplementedHealthServer struct { }
UnimplementedHealthServer must be embedded to have forward compatible implementations.
func (UnimplementedHealthServer) Check ¶
func (UnimplementedHealthServer) Check(context.Context, *HealthCheckRequest) (*HealthCheckResponse, error)
type UnsafeGRPCInferenceServiceServer ¶
type UnsafeGRPCInferenceServiceServer interface {
// contains filtered or unexported methods
}
UnsafeGRPCInferenceServiceServer may be embedded to opt out of forward compatibility for this service. Use of this interface is not recommended, as added methods to GRPCInferenceServiceServer will result in compilation errors.
type UnsafeHealthServer ¶
type UnsafeHealthServer interface {
// contains filtered or unexported methods
}
UnsafeHealthServer may be embedded to opt out of forward compatibility for this service. Use of this interface is not recommended, as added methods to HealthServer will result in compilation errors.