Documentation ¶
Index ¶
- Constants
- Variables
- func RegisterGRPCInferenceServiceServer(s grpc.ServiceRegistrar, srv GRPCInferenceServiceServer)
- type BatchInput
- func (*BatchInput) Descriptor() ([]byte, []int)deprecated
- func (x *BatchInput) GetDataType() DataType
- func (x *BatchInput) GetKind() BatchInput_Kind
- func (x *BatchInput) GetSourceInput() []string
- func (x *BatchInput) GetTargetName() []string
- func (*BatchInput) ProtoMessage()
- func (x *BatchInput) ProtoReflect() protoreflect.Message
- func (x *BatchInput) Reset()
- func (x *BatchInput) String() string
- type BatchInput_Kind
- func (BatchInput_Kind) Descriptor() protoreflect.EnumDescriptor
- func (x BatchInput_Kind) Enum() *BatchInput_Kind
- func (BatchInput_Kind) EnumDescriptor() ([]byte, []int)deprecated
- func (x BatchInput_Kind) Number() protoreflect.EnumNumber
- func (x BatchInput_Kind) String() string
- func (BatchInput_Kind) Type() protoreflect.EnumType
- type BatchOutput
- func (*BatchOutput) Descriptor() ([]byte, []int)deprecated
- func (x *BatchOutput) GetKind() BatchOutput_Kind
- func (x *BatchOutput) GetSourceInput() []string
- func (x *BatchOutput) GetTargetName() []string
- func (*BatchOutput) ProtoMessage()
- func (x *BatchOutput) ProtoReflect() protoreflect.Message
- func (x *BatchOutput) Reset()
- func (x *BatchOutput) String() string
- type BatchOutput_Kind
- func (BatchOutput_Kind) Descriptor() protoreflect.EnumDescriptor
- func (x BatchOutput_Kind) Enum() *BatchOutput_Kind
- func (BatchOutput_Kind) EnumDescriptor() ([]byte, []int)deprecated
- func (x BatchOutput_Kind) Number() protoreflect.EnumNumber
- func (x BatchOutput_Kind) String() string
- func (BatchOutput_Kind) Type() protoreflect.EnumType
- type CudaSharedMemoryRegisterRequest
- func (*CudaSharedMemoryRegisterRequest) Descriptor() ([]byte, []int)deprecated
- func (x *CudaSharedMemoryRegisterRequest) GetByteSize() uint64
- func (x *CudaSharedMemoryRegisterRequest) GetDeviceId() int64
- func (x *CudaSharedMemoryRegisterRequest) GetName() string
- func (x *CudaSharedMemoryRegisterRequest) GetRawHandle() []byte
- func (*CudaSharedMemoryRegisterRequest) ProtoMessage()
- func (x *CudaSharedMemoryRegisterRequest) ProtoReflect() protoreflect.Message
- func (x *CudaSharedMemoryRegisterRequest) Reset()
- func (x *CudaSharedMemoryRegisterRequest) String() string
- type CudaSharedMemoryRegisterResponse
- func (*CudaSharedMemoryRegisterResponse) Descriptor() ([]byte, []int)deprecated
- func (*CudaSharedMemoryRegisterResponse) ProtoMessage()
- func (x *CudaSharedMemoryRegisterResponse) ProtoReflect() protoreflect.Message
- func (x *CudaSharedMemoryRegisterResponse) Reset()
- func (x *CudaSharedMemoryRegisterResponse) String() string
- type CudaSharedMemoryStatusRequest
- func (*CudaSharedMemoryStatusRequest) Descriptor() ([]byte, []int)deprecated
- func (x *CudaSharedMemoryStatusRequest) GetName() string
- func (*CudaSharedMemoryStatusRequest) ProtoMessage()
- func (x *CudaSharedMemoryStatusRequest) ProtoReflect() protoreflect.Message
- func (x *CudaSharedMemoryStatusRequest) Reset()
- func (x *CudaSharedMemoryStatusRequest) String() string
- type CudaSharedMemoryStatusResponse
- func (*CudaSharedMemoryStatusResponse) Descriptor() ([]byte, []int)deprecated
- func (x *CudaSharedMemoryStatusResponse) GetRegions() map[string]*CudaSharedMemoryStatusResponse_RegionStatus
- func (*CudaSharedMemoryStatusResponse) ProtoMessage()
- func (x *CudaSharedMemoryStatusResponse) ProtoReflect() protoreflect.Message
- func (x *CudaSharedMemoryStatusResponse) Reset()
- func (x *CudaSharedMemoryStatusResponse) String() string
- type CudaSharedMemoryStatusResponse_RegionStatus
- func (*CudaSharedMemoryStatusResponse_RegionStatus) Descriptor() ([]byte, []int)deprecated
- func (x *CudaSharedMemoryStatusResponse_RegionStatus) GetByteSize() uint64
- func (x *CudaSharedMemoryStatusResponse_RegionStatus) GetDeviceId() uint64
- func (x *CudaSharedMemoryStatusResponse_RegionStatus) GetName() string
- func (*CudaSharedMemoryStatusResponse_RegionStatus) ProtoMessage()
- func (x *CudaSharedMemoryStatusResponse_RegionStatus) ProtoReflect() protoreflect.Message
- func (x *CudaSharedMemoryStatusResponse_RegionStatus) Reset()
- func (x *CudaSharedMemoryStatusResponse_RegionStatus) String() string
- type CudaSharedMemoryUnregisterRequest
- func (*CudaSharedMemoryUnregisterRequest) Descriptor() ([]byte, []int)deprecated
- func (x *CudaSharedMemoryUnregisterRequest) GetName() string
- func (*CudaSharedMemoryUnregisterRequest) ProtoMessage()
- func (x *CudaSharedMemoryUnregisterRequest) ProtoReflect() protoreflect.Message
- func (x *CudaSharedMemoryUnregisterRequest) Reset()
- func (x *CudaSharedMemoryUnregisterRequest) String() string
- type CudaSharedMemoryUnregisterResponse
- func (*CudaSharedMemoryUnregisterResponse) Descriptor() ([]byte, []int)deprecated
- func (*CudaSharedMemoryUnregisterResponse) ProtoMessage()
- func (x *CudaSharedMemoryUnregisterResponse) ProtoReflect() protoreflect.Message
- func (x *CudaSharedMemoryUnregisterResponse) Reset()
- func (x *CudaSharedMemoryUnregisterResponse) String() string
- type DataType
- type GRPCInferenceServiceClient
- type GRPCInferenceServiceServer
- type GRPCInferenceService_ModelStreamInferClient
- type GRPCInferenceService_ModelStreamInferServer
- type InferBatchStatistics
- func (*InferBatchStatistics) Descriptor() ([]byte, []int)deprecated
- func (x *InferBatchStatistics) GetBatchSize() uint64
- func (x *InferBatchStatistics) GetComputeInfer() *StatisticDuration
- func (x *InferBatchStatistics) GetComputeInput() *StatisticDuration
- func (x *InferBatchStatistics) GetComputeOutput() *StatisticDuration
- func (*InferBatchStatistics) ProtoMessage()
- func (x *InferBatchStatistics) ProtoReflect() protoreflect.Message
- func (x *InferBatchStatistics) Reset()
- func (x *InferBatchStatistics) String() string
- type InferParameter
- func (*InferParameter) Descriptor() ([]byte, []int)deprecated
- func (x *InferParameter) GetBoolParam() bool
- func (x *InferParameter) GetDoubleParam() float64
- func (x *InferParameter) GetInt64Param() int64
- func (m *InferParameter) GetParameterChoice() isInferParameter_ParameterChoice
- func (x *InferParameter) GetStringParam() string
- func (x *InferParameter) GetUint64Param() uint64
- func (*InferParameter) ProtoMessage()
- func (x *InferParameter) ProtoReflect() protoreflect.Message
- func (x *InferParameter) Reset()
- func (x *InferParameter) String() string
- type InferParameter_BoolParam
- type InferParameter_DoubleParam
- type InferParameter_Int64Param
- type InferParameter_StringParam
- type InferParameter_Uint64Param
- type InferResponseStatistics
- func (*InferResponseStatistics) Descriptor() ([]byte, []int)deprecated
- func (x *InferResponseStatistics) GetCancel() *StatisticDuration
- func (x *InferResponseStatistics) GetComputeInfer() *StatisticDuration
- func (x *InferResponseStatistics) GetComputeOutput() *StatisticDuration
- func (x *InferResponseStatistics) GetEmptyResponse() *StatisticDuration
- func (x *InferResponseStatistics) GetFail() *StatisticDuration
- func (x *InferResponseStatistics) GetSuccess() *StatisticDuration
- func (*InferResponseStatistics) ProtoMessage()
- func (x *InferResponseStatistics) ProtoReflect() protoreflect.Message
- func (x *InferResponseStatistics) Reset()
- func (x *InferResponseStatistics) String() string
- type InferStatistics
- func (*InferStatistics) Descriptor() ([]byte, []int)deprecated
- func (x *InferStatistics) GetCacheHit() *StatisticDuration
- func (x *InferStatistics) GetCacheMiss() *StatisticDuration
- func (x *InferStatistics) GetComputeInfer() *StatisticDuration
- func (x *InferStatistics) GetComputeInput() *StatisticDuration
- func (x *InferStatistics) GetComputeOutput() *StatisticDuration
- func (x *InferStatistics) GetFail() *StatisticDuration
- func (x *InferStatistics) GetQueue() *StatisticDuration
- func (x *InferStatistics) GetSuccess() *StatisticDuration
- func (*InferStatistics) ProtoMessage()
- func (x *InferStatistics) ProtoReflect() protoreflect.Message
- func (x *InferStatistics) Reset()
- func (x *InferStatistics) String() string
- type InferTensorContents
- func (*InferTensorContents) Descriptor() ([]byte, []int)deprecated
- func (x *InferTensorContents) GetBoolContents() []bool
- func (x *InferTensorContents) GetBytesContents() [][]byte
- func (x *InferTensorContents) GetFp32Contents() []float32
- func (x *InferTensorContents) GetFp64Contents() []float64
- func (x *InferTensorContents) GetInt64Contents() []int64
- func (x *InferTensorContents) GetIntContents() []int32
- func (x *InferTensorContents) GetUint64Contents() []uint64
- func (x *InferTensorContents) GetUintContents() []uint32
- func (*InferTensorContents) ProtoMessage()
- func (x *InferTensorContents) ProtoReflect() protoreflect.Message
- func (x *InferTensorContents) Reset()
- func (x *InferTensorContents) String() string
- type LogSettingsRequest
- func (*LogSettingsRequest) Descriptor() ([]byte, []int)deprecated
- func (x *LogSettingsRequest) GetSettings() map[string]*LogSettingsRequest_SettingValue
- func (*LogSettingsRequest) ProtoMessage()
- func (x *LogSettingsRequest) ProtoReflect() protoreflect.Message
- func (x *LogSettingsRequest) Reset()
- func (x *LogSettingsRequest) String() string
- type LogSettingsRequest_SettingValue
- func (*LogSettingsRequest_SettingValue) Descriptor() ([]byte, []int)deprecated
- func (x *LogSettingsRequest_SettingValue) GetBoolParam() bool
- func (m *LogSettingsRequest_SettingValue) GetParameterChoice() isLogSettingsRequest_SettingValue_ParameterChoice
- func (x *LogSettingsRequest_SettingValue) GetStringParam() string
- func (x *LogSettingsRequest_SettingValue) GetUint32Param() uint32
- func (*LogSettingsRequest_SettingValue) ProtoMessage()
- func (x *LogSettingsRequest_SettingValue) ProtoReflect() protoreflect.Message
- func (x *LogSettingsRequest_SettingValue) Reset()
- func (x *LogSettingsRequest_SettingValue) String() string
- type LogSettingsRequest_SettingValue_BoolParam
- type LogSettingsRequest_SettingValue_StringParam
- type LogSettingsRequest_SettingValue_Uint32Param
- type LogSettingsResponse
- func (*LogSettingsResponse) Descriptor() ([]byte, []int)deprecated
- func (x *LogSettingsResponse) GetSettings() map[string]*LogSettingsResponse_SettingValue
- func (*LogSettingsResponse) ProtoMessage()
- func (x *LogSettingsResponse) ProtoReflect() protoreflect.Message
- func (x *LogSettingsResponse) Reset()
- func (x *LogSettingsResponse) String() string
- type LogSettingsResponse_SettingValue
- func (*LogSettingsResponse_SettingValue) Descriptor() ([]byte, []int)deprecated
- func (x *LogSettingsResponse_SettingValue) GetBoolParam() bool
- func (m *LogSettingsResponse_SettingValue) GetParameterChoice() isLogSettingsResponse_SettingValue_ParameterChoice
- func (x *LogSettingsResponse_SettingValue) GetStringParam() string
- func (x *LogSettingsResponse_SettingValue) GetUint32Param() uint32
- func (*LogSettingsResponse_SettingValue) ProtoMessage()
- func (x *LogSettingsResponse_SettingValue) ProtoReflect() protoreflect.Message
- func (x *LogSettingsResponse_SettingValue) Reset()
- func (x *LogSettingsResponse_SettingValue) String() string
- type LogSettingsResponse_SettingValue_BoolParam
- type LogSettingsResponse_SettingValue_StringParam
- type LogSettingsResponse_SettingValue_Uint32Param
- type MemoryUsage
- func (*MemoryUsage) Descriptor() ([]byte, []int)deprecated
- func (x *MemoryUsage) GetByteSize() uint64
- func (x *MemoryUsage) GetId() int64
- func (x *MemoryUsage) GetType() string
- func (*MemoryUsage) ProtoMessage()
- func (x *MemoryUsage) ProtoReflect() protoreflect.Message
- func (x *MemoryUsage) Reset()
- func (x *MemoryUsage) String() string
- type ModelConfig
- func (*ModelConfig) Descriptor() ([]byte, []int)deprecated
- func (x *ModelConfig) GetBackend() string
- func (x *ModelConfig) GetBatchInput() []*BatchInput
- func (x *ModelConfig) GetBatchOutput() []*BatchOutput
- func (x *ModelConfig) GetCcModelFilenames() map[string]string
- func (x *ModelConfig) GetDefaultModelFilename() string
- func (x *ModelConfig) GetDynamicBatching() *ModelDynamicBatching
- func (x *ModelConfig) GetEnsembleScheduling() *ModelEnsembling
- func (x *ModelConfig) GetInput() []*ModelInput
- func (x *ModelConfig) GetInstanceGroup() []*ModelInstanceGroup
- func (x *ModelConfig) GetMaxBatchSize() int32
- func (x *ModelConfig) GetMetricTags() map[string]string
- func (x *ModelConfig) GetModelOperations() *ModelOperations
- func (x *ModelConfig) GetModelRepositoryAgents() *ModelRepositoryAgents
- func (x *ModelConfig) GetModelTransactionPolicy() *ModelTransactionPolicy
- func (x *ModelConfig) GetModelWarmup() []*ModelWarmup
- func (x *ModelConfig) GetName() string
- func (x *ModelConfig) GetOptimization() *ModelOptimizationPolicy
- func (x *ModelConfig) GetOutput() []*ModelOutput
- func (x *ModelConfig) GetParameters() map[string]*ModelParameter
- func (x *ModelConfig) GetPlatform() string
- func (x *ModelConfig) GetResponseCache() *ModelResponseCache
- func (x *ModelConfig) GetRuntime() string
- func (m *ModelConfig) GetSchedulingChoice() isModelConfig_SchedulingChoice
- func (x *ModelConfig) GetSequenceBatching() *ModelSequenceBatching
- func (x *ModelConfig) GetVersionPolicy() *ModelVersionPolicy
- func (*ModelConfig) ProtoMessage()
- func (x *ModelConfig) ProtoReflect() protoreflect.Message
- func (x *ModelConfig) Reset()
- func (x *ModelConfig) String() string
- type ModelConfigRequest
- func (*ModelConfigRequest) Descriptor() ([]byte, []int)deprecated
- func (x *ModelConfigRequest) GetName() string
- func (x *ModelConfigRequest) GetVersion() string
- func (*ModelConfigRequest) ProtoMessage()
- func (x *ModelConfigRequest) ProtoReflect() protoreflect.Message
- func (x *ModelConfigRequest) Reset()
- func (x *ModelConfigRequest) String() string
- type ModelConfigResponse
- func (*ModelConfigResponse) Descriptor() ([]byte, []int)deprecated
- func (x *ModelConfigResponse) GetConfig() *ModelConfig
- func (*ModelConfigResponse) ProtoMessage()
- func (x *ModelConfigResponse) ProtoReflect() protoreflect.Message
- func (x *ModelConfigResponse) Reset()
- func (x *ModelConfigResponse) String() string
- type ModelConfig_DynamicBatching
- type ModelConfig_EnsembleScheduling
- type ModelConfig_SequenceBatching
- type ModelDynamicBatching
- func (*ModelDynamicBatching) Descriptor() ([]byte, []int)deprecated
- func (x *ModelDynamicBatching) GetDefaultPriorityLevel() uint64
- func (x *ModelDynamicBatching) GetDefaultQueuePolicy() *ModelQueuePolicy
- func (x *ModelDynamicBatching) GetMaxQueueDelayMicroseconds() uint64
- func (x *ModelDynamicBatching) GetPreferredBatchSize() []int32
- func (x *ModelDynamicBatching) GetPreserveOrdering() bool
- func (x *ModelDynamicBatching) GetPriorityLevels() uint64
- func (x *ModelDynamicBatching) GetPriorityQueuePolicy() map[uint64]*ModelQueuePolicy
- func (*ModelDynamicBatching) ProtoMessage()
- func (x *ModelDynamicBatching) ProtoReflect() protoreflect.Message
- func (x *ModelDynamicBatching) Reset()
- func (x *ModelDynamicBatching) String() string
- type ModelEnsembling
- type ModelEnsembling_Step
- func (*ModelEnsembling_Step) Descriptor() ([]byte, []int)deprecated
- func (x *ModelEnsembling_Step) GetInputMap() map[string]string
- func (x *ModelEnsembling_Step) GetModelName() string
- func (x *ModelEnsembling_Step) GetModelNamespace() string
- func (x *ModelEnsembling_Step) GetModelVersion() int64
- func (x *ModelEnsembling_Step) GetOutputMap() map[string]string
- func (*ModelEnsembling_Step) ProtoMessage()
- func (x *ModelEnsembling_Step) ProtoReflect() protoreflect.Message
- func (x *ModelEnsembling_Step) Reset()
- func (x *ModelEnsembling_Step) String() string
- type ModelInferRequest
- func (*ModelInferRequest) Descriptor() ([]byte, []int)deprecated
- func (x *ModelInferRequest) GetId() string
- func (x *ModelInferRequest) GetInputs() []*ModelInferRequest_InferInputTensor
- func (x *ModelInferRequest) GetModelName() string
- func (x *ModelInferRequest) GetModelVersion() string
- func (x *ModelInferRequest) GetOutputs() []*ModelInferRequest_InferRequestedOutputTensor
- func (x *ModelInferRequest) GetParameters() map[string]*InferParameter
- func (x *ModelInferRequest) GetRawInputContents() [][]byte
- func (*ModelInferRequest) ProtoMessage()
- func (x *ModelInferRequest) ProtoReflect() protoreflect.Message
- func (x *ModelInferRequest) Reset()
- func (x *ModelInferRequest) String() string
- type ModelInferRequest_InferInputTensor
- func (*ModelInferRequest_InferInputTensor) Descriptor() ([]byte, []int)deprecated
- func (x *ModelInferRequest_InferInputTensor) GetContents() *InferTensorContents
- func (x *ModelInferRequest_InferInputTensor) GetDatatype() string
- func (x *ModelInferRequest_InferInputTensor) GetName() string
- func (x *ModelInferRequest_InferInputTensor) GetParameters() map[string]*InferParameter
- func (x *ModelInferRequest_InferInputTensor) GetShape() []int64
- func (*ModelInferRequest_InferInputTensor) ProtoMessage()
- func (x *ModelInferRequest_InferInputTensor) ProtoReflect() protoreflect.Message
- func (x *ModelInferRequest_InferInputTensor) Reset()
- func (x *ModelInferRequest_InferInputTensor) String() string
- type ModelInferRequest_InferRequestedOutputTensor
- func (*ModelInferRequest_InferRequestedOutputTensor) Descriptor() ([]byte, []int)deprecated
- func (x *ModelInferRequest_InferRequestedOutputTensor) GetName() string
- func (x *ModelInferRequest_InferRequestedOutputTensor) GetParameters() map[string]*InferParameter
- func (*ModelInferRequest_InferRequestedOutputTensor) ProtoMessage()
- func (x *ModelInferRequest_InferRequestedOutputTensor) ProtoReflect() protoreflect.Message
- func (x *ModelInferRequest_InferRequestedOutputTensor) Reset()
- func (x *ModelInferRequest_InferRequestedOutputTensor) String() string
- type ModelInferResponse
- func (*ModelInferResponse) Descriptor() ([]byte, []int)deprecated
- func (x *ModelInferResponse) GetId() string
- func (x *ModelInferResponse) GetModelName() string
- func (x *ModelInferResponse) GetModelVersion() string
- func (x *ModelInferResponse) GetOutputs() []*ModelInferResponse_InferOutputTensor
- func (x *ModelInferResponse) GetParameters() map[string]*InferParameter
- func (x *ModelInferResponse) GetRawOutputContents() [][]byte
- func (*ModelInferResponse) ProtoMessage()
- func (x *ModelInferResponse) ProtoReflect() protoreflect.Message
- func (x *ModelInferResponse) Reset()
- func (x *ModelInferResponse) String() string
- type ModelInferResponse_InferOutputTensor
- func (*ModelInferResponse_InferOutputTensor) Descriptor() ([]byte, []int)deprecated
- func (x *ModelInferResponse_InferOutputTensor) GetContents() *InferTensorContents
- func (x *ModelInferResponse_InferOutputTensor) GetDatatype() string
- func (x *ModelInferResponse_InferOutputTensor) GetName() string
- func (x *ModelInferResponse_InferOutputTensor) GetParameters() map[string]*InferParameter
- func (x *ModelInferResponse_InferOutputTensor) GetShape() []int64
- func (*ModelInferResponse_InferOutputTensor) ProtoMessage()
- func (x *ModelInferResponse_InferOutputTensor) ProtoReflect() protoreflect.Message
- func (x *ModelInferResponse_InferOutputTensor) Reset()
- func (x *ModelInferResponse_InferOutputTensor) String() string
- type ModelInput
- func (*ModelInput) Descriptor() ([]byte, []int)deprecated
- func (x *ModelInput) GetAllowRaggedBatch() bool
- func (x *ModelInput) GetDataType() DataType
- func (x *ModelInput) GetDims() []int64
- func (x *ModelInput) GetFormat() ModelInput_Format
- func (x *ModelInput) GetIsNonLinearFormatIo() bool
- func (x *ModelInput) GetIsShapeTensor() bool
- func (x *ModelInput) GetName() string
- func (x *ModelInput) GetOptional() bool
- func (x *ModelInput) GetReshape() *ModelTensorReshape
- func (*ModelInput) ProtoMessage()
- func (x *ModelInput) ProtoReflect() protoreflect.Message
- func (x *ModelInput) Reset()
- func (x *ModelInput) String() string
- type ModelInput_Format
- func (ModelInput_Format) Descriptor() protoreflect.EnumDescriptor
- func (x ModelInput_Format) Enum() *ModelInput_Format
- func (ModelInput_Format) EnumDescriptor() ([]byte, []int)deprecated
- func (x ModelInput_Format) Number() protoreflect.EnumNumber
- func (x ModelInput_Format) String() string
- func (ModelInput_Format) Type() protoreflect.EnumType
- type ModelInstanceGroup
- func (*ModelInstanceGroup) Descriptor() ([]byte, []int)deprecated
- func (x *ModelInstanceGroup) GetCount() int32
- func (x *ModelInstanceGroup) GetGpus() []int32
- func (x *ModelInstanceGroup) GetHostPolicy() string
- func (x *ModelInstanceGroup) GetKind() ModelInstanceGroup_Kind
- func (x *ModelInstanceGroup) GetName() string
- func (x *ModelInstanceGroup) GetPassive() bool
- func (x *ModelInstanceGroup) GetProfile() []string
- func (x *ModelInstanceGroup) GetRateLimiter() *ModelRateLimiter
- func (x *ModelInstanceGroup) GetSecondaryDevices() []*ModelInstanceGroup_SecondaryDevice
- func (*ModelInstanceGroup) ProtoMessage()
- func (x *ModelInstanceGroup) ProtoReflect() protoreflect.Message
- func (x *ModelInstanceGroup) Reset()
- func (x *ModelInstanceGroup) String() string
- type ModelInstanceGroup_Kind
- func (ModelInstanceGroup_Kind) Descriptor() protoreflect.EnumDescriptor
- func (x ModelInstanceGroup_Kind) Enum() *ModelInstanceGroup_Kind
- func (ModelInstanceGroup_Kind) EnumDescriptor() ([]byte, []int)deprecated
- func (x ModelInstanceGroup_Kind) Number() protoreflect.EnumNumber
- func (x ModelInstanceGroup_Kind) String() string
- func (ModelInstanceGroup_Kind) Type() protoreflect.EnumType
- type ModelInstanceGroup_SecondaryDevice
- func (*ModelInstanceGroup_SecondaryDevice) Descriptor() ([]byte, []int)deprecated
- func (x *ModelInstanceGroup_SecondaryDevice) GetDeviceId() int64
- func (x *ModelInstanceGroup_SecondaryDevice) GetKind() ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind
- func (*ModelInstanceGroup_SecondaryDevice) ProtoMessage()
- func (x *ModelInstanceGroup_SecondaryDevice) ProtoReflect() protoreflect.Message
- func (x *ModelInstanceGroup_SecondaryDevice) Reset()
- func (x *ModelInstanceGroup_SecondaryDevice) String() string
- type ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind
- func (ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind) Descriptor() protoreflect.EnumDescriptor
- func (x ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind) Enum() *ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind
- func (ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind) EnumDescriptor() ([]byte, []int)deprecated
- func (x ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind) Number() protoreflect.EnumNumber
- func (x ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind) String() string
- func (ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind) Type() protoreflect.EnumType
- type ModelMetadataRequest
- func (*ModelMetadataRequest) Descriptor() ([]byte, []int)deprecated
- func (x *ModelMetadataRequest) GetName() string
- func (x *ModelMetadataRequest) GetVersion() string
- func (*ModelMetadataRequest) ProtoMessage()
- func (x *ModelMetadataRequest) ProtoReflect() protoreflect.Message
- func (x *ModelMetadataRequest) Reset()
- func (x *ModelMetadataRequest) String() string
- type ModelMetadataResponse
- func (*ModelMetadataResponse) Descriptor() ([]byte, []int)deprecated
- func (x *ModelMetadataResponse) GetInputs() []*ModelMetadataResponse_TensorMetadata
- func (x *ModelMetadataResponse) GetName() string
- func (x *ModelMetadataResponse) GetOutputs() []*ModelMetadataResponse_TensorMetadata
- func (x *ModelMetadataResponse) GetPlatform() string
- func (x *ModelMetadataResponse) GetVersions() []string
- func (*ModelMetadataResponse) ProtoMessage()
- func (x *ModelMetadataResponse) ProtoReflect() protoreflect.Message
- func (x *ModelMetadataResponse) Reset()
- func (x *ModelMetadataResponse) String() string
- type ModelMetadataResponse_TensorMetadata
- func (*ModelMetadataResponse_TensorMetadata) Descriptor() ([]byte, []int)deprecated
- func (x *ModelMetadataResponse_TensorMetadata) GetDatatype() string
- func (x *ModelMetadataResponse_TensorMetadata) GetName() string
- func (x *ModelMetadataResponse_TensorMetadata) GetShape() []int64
- func (*ModelMetadataResponse_TensorMetadata) ProtoMessage()
- func (x *ModelMetadataResponse_TensorMetadata) ProtoReflect() protoreflect.Message
- func (x *ModelMetadataResponse_TensorMetadata) Reset()
- func (x *ModelMetadataResponse_TensorMetadata) String() string
- type ModelOperations
- type ModelOptimizationPolicy
- func (*ModelOptimizationPolicy) Descriptor() ([]byte, []int)deprecated
- func (x *ModelOptimizationPolicy) GetCuda() *ModelOptimizationPolicy_Cuda
- func (x *ModelOptimizationPolicy) GetEagerBatching() bool
- func (x *ModelOptimizationPolicy) GetExecutionAccelerators() *ModelOptimizationPolicy_ExecutionAccelerators
- func (x *ModelOptimizationPolicy) GetGatherKernelBufferThreshold() uint32
- func (x *ModelOptimizationPolicy) GetGraph() *ModelOptimizationPolicy_Graph
- func (x *ModelOptimizationPolicy) GetInputPinnedMemory() *ModelOptimizationPolicy_PinnedMemoryBuffer
- func (x *ModelOptimizationPolicy) GetOutputPinnedMemory() *ModelOptimizationPolicy_PinnedMemoryBuffer
- func (x *ModelOptimizationPolicy) GetPriority() ModelOptimizationPolicy_ModelPriority
- func (*ModelOptimizationPolicy) ProtoMessage()
- func (x *ModelOptimizationPolicy) ProtoReflect() protoreflect.Message
- func (x *ModelOptimizationPolicy) Reset()
- func (x *ModelOptimizationPolicy) String() string
- type ModelOptimizationPolicy_Cuda
- func (*ModelOptimizationPolicy_Cuda) Descriptor() ([]byte, []int)deprecated
- func (x *ModelOptimizationPolicy_Cuda) GetBusyWaitEvents() bool
- func (x *ModelOptimizationPolicy_Cuda) GetGraphSpec() []*ModelOptimizationPolicy_Cuda_GraphSpec
- func (x *ModelOptimizationPolicy_Cuda) GetGraphs() bool
- func (x *ModelOptimizationPolicy_Cuda) GetOutputCopyStream() bool
- func (*ModelOptimizationPolicy_Cuda) ProtoMessage()
- func (x *ModelOptimizationPolicy_Cuda) ProtoReflect() protoreflect.Message
- func (x *ModelOptimizationPolicy_Cuda) Reset()
- func (x *ModelOptimizationPolicy_Cuda) String() string
- type ModelOptimizationPolicy_Cuda_GraphSpec
- func (*ModelOptimizationPolicy_Cuda_GraphSpec) Descriptor() ([]byte, []int)deprecated
- func (x *ModelOptimizationPolicy_Cuda_GraphSpec) GetBatchSize() int32
- func (x *ModelOptimizationPolicy_Cuda_GraphSpec) GetGraphLowerBound() *ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound
- func (x *ModelOptimizationPolicy_Cuda_GraphSpec) GetInput() map[string]*ModelOptimizationPolicy_Cuda_GraphSpec_Shape
- func (*ModelOptimizationPolicy_Cuda_GraphSpec) ProtoMessage()
- func (x *ModelOptimizationPolicy_Cuda_GraphSpec) ProtoReflect() protoreflect.Message
- func (x *ModelOptimizationPolicy_Cuda_GraphSpec) Reset()
- func (x *ModelOptimizationPolicy_Cuda_GraphSpec) String() string
- type ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound
- func (*ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound) Descriptor() ([]byte, []int)deprecated
- func (x *ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound) GetBatchSize() int32
- func (x *ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound) GetInput() map[string]*ModelOptimizationPolicy_Cuda_GraphSpec_Shape
- func (*ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound) ProtoMessage()
- func (x *ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound) ProtoReflect() protoreflect.Message
- func (x *ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound) Reset()
- func (x *ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound) String() string
- type ModelOptimizationPolicy_Cuda_GraphSpec_Shape
- func (*ModelOptimizationPolicy_Cuda_GraphSpec_Shape) Descriptor() ([]byte, []int)deprecated
- func (x *ModelOptimizationPolicy_Cuda_GraphSpec_Shape) GetDim() []int64
- func (*ModelOptimizationPolicy_Cuda_GraphSpec_Shape) ProtoMessage()
- func (x *ModelOptimizationPolicy_Cuda_GraphSpec_Shape) ProtoReflect() protoreflect.Message
- func (x *ModelOptimizationPolicy_Cuda_GraphSpec_Shape) Reset()
- func (x *ModelOptimizationPolicy_Cuda_GraphSpec_Shape) String() string
- type ModelOptimizationPolicy_ExecutionAccelerators
- func (*ModelOptimizationPolicy_ExecutionAccelerators) Descriptor() ([]byte, []int)deprecated
- func (x *ModelOptimizationPolicy_ExecutionAccelerators) GetCpuExecutionAccelerator() []*ModelOptimizationPolicy_ExecutionAccelerators_Accelerator
- func (x *ModelOptimizationPolicy_ExecutionAccelerators) GetGpuExecutionAccelerator() []*ModelOptimizationPolicy_ExecutionAccelerators_Accelerator
- func (*ModelOptimizationPolicy_ExecutionAccelerators) ProtoMessage()
- func (x *ModelOptimizationPolicy_ExecutionAccelerators) ProtoReflect() protoreflect.Message
- func (x *ModelOptimizationPolicy_ExecutionAccelerators) Reset()
- func (x *ModelOptimizationPolicy_ExecutionAccelerators) String() string
- type ModelOptimizationPolicy_ExecutionAccelerators_Accelerator
- func (*ModelOptimizationPolicy_ExecutionAccelerators_Accelerator) Descriptor() ([]byte, []int)deprecated
- func (x *ModelOptimizationPolicy_ExecutionAccelerators_Accelerator) GetName() string
- func (x *ModelOptimizationPolicy_ExecutionAccelerators_Accelerator) GetParameters() map[string]string
- func (*ModelOptimizationPolicy_ExecutionAccelerators_Accelerator) ProtoMessage()
- func (x *ModelOptimizationPolicy_ExecutionAccelerators_Accelerator) ProtoReflect() protoreflect.Message
- func (x *ModelOptimizationPolicy_ExecutionAccelerators_Accelerator) Reset()
- func (x *ModelOptimizationPolicy_ExecutionAccelerators_Accelerator) String() string
- type ModelOptimizationPolicy_Graph
- func (*ModelOptimizationPolicy_Graph) Descriptor() ([]byte, []int)deprecated
- func (x *ModelOptimizationPolicy_Graph) GetLevel() int32
- func (*ModelOptimizationPolicy_Graph) ProtoMessage()
- func (x *ModelOptimizationPolicy_Graph) ProtoReflect() protoreflect.Message
- func (x *ModelOptimizationPolicy_Graph) Reset()
- func (x *ModelOptimizationPolicy_Graph) String() string
- type ModelOptimizationPolicy_ModelPriority
- func (ModelOptimizationPolicy_ModelPriority) Descriptor() protoreflect.EnumDescriptor
- func (x ModelOptimizationPolicy_ModelPriority) Enum() *ModelOptimizationPolicy_ModelPriority
- func (ModelOptimizationPolicy_ModelPriority) EnumDescriptor() ([]byte, []int)deprecated
- func (x ModelOptimizationPolicy_ModelPriority) Number() protoreflect.EnumNumber
- func (x ModelOptimizationPolicy_ModelPriority) String() string
- func (ModelOptimizationPolicy_ModelPriority) Type() protoreflect.EnumType
- type ModelOptimizationPolicy_PinnedMemoryBuffer
- func (*ModelOptimizationPolicy_PinnedMemoryBuffer) Descriptor() ([]byte, []int)deprecated
- func (x *ModelOptimizationPolicy_PinnedMemoryBuffer) GetEnable() bool
- func (*ModelOptimizationPolicy_PinnedMemoryBuffer) ProtoMessage()
- func (x *ModelOptimizationPolicy_PinnedMemoryBuffer) ProtoReflect() protoreflect.Message
- func (x *ModelOptimizationPolicy_PinnedMemoryBuffer) Reset()
- func (x *ModelOptimizationPolicy_PinnedMemoryBuffer) String() string
- type ModelOutput
- func (*ModelOutput) Descriptor() ([]byte, []int)deprecated
- func (x *ModelOutput) GetDataType() DataType
- func (x *ModelOutput) GetDims() []int64
- func (x *ModelOutput) GetIsNonLinearFormatIo() bool
- func (x *ModelOutput) GetIsShapeTensor() bool
- func (x *ModelOutput) GetLabelFilename() string
- func (x *ModelOutput) GetName() string
- func (x *ModelOutput) GetReshape() *ModelTensorReshape
- func (*ModelOutput) ProtoMessage()
- func (x *ModelOutput) ProtoReflect() protoreflect.Message
- func (x *ModelOutput) Reset()
- func (x *ModelOutput) String() string
- type ModelParameter
- type ModelQueuePolicy
- func (*ModelQueuePolicy) Descriptor() ([]byte, []int)deprecated
- func (x *ModelQueuePolicy) GetAllowTimeoutOverride() bool
- func (x *ModelQueuePolicy) GetDefaultTimeoutMicroseconds() uint64
- func (x *ModelQueuePolicy) GetMaxQueueSize() uint32
- func (x *ModelQueuePolicy) GetTimeoutAction() ModelQueuePolicy_TimeoutAction
- func (*ModelQueuePolicy) ProtoMessage()
- func (x *ModelQueuePolicy) ProtoReflect() protoreflect.Message
- func (x *ModelQueuePolicy) Reset()
- func (x *ModelQueuePolicy) String() string
- type ModelQueuePolicy_TimeoutAction
- func (ModelQueuePolicy_TimeoutAction) Descriptor() protoreflect.EnumDescriptor
- func (x ModelQueuePolicy_TimeoutAction) Enum() *ModelQueuePolicy_TimeoutAction
- func (ModelQueuePolicy_TimeoutAction) EnumDescriptor() ([]byte, []int)deprecated
- func (x ModelQueuePolicy_TimeoutAction) Number() protoreflect.EnumNumber
- func (x ModelQueuePolicy_TimeoutAction) String() string
- func (ModelQueuePolicy_TimeoutAction) Type() protoreflect.EnumType
- type ModelRateLimiter
- func (*ModelRateLimiter) Descriptor() ([]byte, []int)deprecated
- func (x *ModelRateLimiter) GetPriority() uint32
- func (x *ModelRateLimiter) GetResources() []*ModelRateLimiter_Resource
- func (*ModelRateLimiter) ProtoMessage()
- func (x *ModelRateLimiter) ProtoReflect() protoreflect.Message
- func (x *ModelRateLimiter) Reset()
- func (x *ModelRateLimiter) String() string
- type ModelRateLimiter_Resource
- func (*ModelRateLimiter_Resource) Descriptor() ([]byte, []int)deprecated
- func (x *ModelRateLimiter_Resource) GetCount() uint32
- func (x *ModelRateLimiter_Resource) GetGlobal() bool
- func (x *ModelRateLimiter_Resource) GetName() string
- func (*ModelRateLimiter_Resource) ProtoMessage()
- func (x *ModelRateLimiter_Resource) ProtoReflect() protoreflect.Message
- func (x *ModelRateLimiter_Resource) Reset()
- func (x *ModelRateLimiter_Resource) String() string
- type ModelReadyRequest
- func (*ModelReadyRequest) Descriptor() ([]byte, []int)deprecated
- func (x *ModelReadyRequest) GetName() string
- func (x *ModelReadyRequest) GetVersion() string
- func (*ModelReadyRequest) ProtoMessage()
- func (x *ModelReadyRequest) ProtoReflect() protoreflect.Message
- func (x *ModelReadyRequest) Reset()
- func (x *ModelReadyRequest) String() string
- type ModelReadyResponse
- type ModelRepositoryAgents
- func (*ModelRepositoryAgents) Descriptor() ([]byte, []int)deprecated
- func (x *ModelRepositoryAgents) GetAgents() []*ModelRepositoryAgents_Agent
- func (*ModelRepositoryAgents) ProtoMessage()
- func (x *ModelRepositoryAgents) ProtoReflect() protoreflect.Message
- func (x *ModelRepositoryAgents) Reset()
- func (x *ModelRepositoryAgents) String() string
- type ModelRepositoryAgents_Agent
- func (*ModelRepositoryAgents_Agent) Descriptor() ([]byte, []int)deprecated
- func (x *ModelRepositoryAgents_Agent) GetName() string
- func (x *ModelRepositoryAgents_Agent) GetParameters() map[string]string
- func (*ModelRepositoryAgents_Agent) ProtoMessage()
- func (x *ModelRepositoryAgents_Agent) ProtoReflect() protoreflect.Message
- func (x *ModelRepositoryAgents_Agent) Reset()
- func (x *ModelRepositoryAgents_Agent) String() string
- type ModelRepositoryParameter
- func (*ModelRepositoryParameter) Descriptor() ([]byte, []int)deprecated
- func (x *ModelRepositoryParameter) GetBoolParam() bool
- func (x *ModelRepositoryParameter) GetBytesParam() []byte
- func (x *ModelRepositoryParameter) GetInt64Param() int64
- func (m *ModelRepositoryParameter) GetParameterChoice() isModelRepositoryParameter_ParameterChoice
- func (x *ModelRepositoryParameter) GetStringParam() string
- func (*ModelRepositoryParameter) ProtoMessage()
- func (x *ModelRepositoryParameter) ProtoReflect() protoreflect.Message
- func (x *ModelRepositoryParameter) Reset()
- func (x *ModelRepositoryParameter) String() string
- type ModelRepositoryParameter_BoolParam
- type ModelRepositoryParameter_BytesParam
- type ModelRepositoryParameter_Int64Param
- type ModelRepositoryParameter_StringParam
- type ModelResponseCache
- func (*ModelResponseCache) Descriptor() ([]byte, []int)deprecated
- func (x *ModelResponseCache) GetEnable() bool
- func (*ModelResponseCache) ProtoMessage()
- func (x *ModelResponseCache) ProtoReflect() protoreflect.Message
- func (x *ModelResponseCache) Reset()
- func (x *ModelResponseCache) String() string
- type ModelSequenceBatching
- func (*ModelSequenceBatching) Descriptor() ([]byte, []int)deprecated
- func (x *ModelSequenceBatching) GetControlInput() []*ModelSequenceBatching_ControlInput
- func (x *ModelSequenceBatching) GetDirect() *ModelSequenceBatching_StrategyDirect
- func (x *ModelSequenceBatching) GetIterativeSequence() bool
- func (x *ModelSequenceBatching) GetMaxSequenceIdleMicroseconds() uint64
- func (x *ModelSequenceBatching) GetOldest() *ModelSequenceBatching_StrategyOldest
- func (x *ModelSequenceBatching) GetState() []*ModelSequenceBatching_State
- func (m *ModelSequenceBatching) GetStrategyChoice() isModelSequenceBatching_StrategyChoice
- func (*ModelSequenceBatching) ProtoMessage()
- func (x *ModelSequenceBatching) ProtoReflect() protoreflect.Message
- func (x *ModelSequenceBatching) Reset()
- func (x *ModelSequenceBatching) String() string
- type ModelSequenceBatching_Control
- func (*ModelSequenceBatching_Control) Descriptor() ([]byte, []int)deprecated
- func (x *ModelSequenceBatching_Control) GetBoolFalseTrue() []bool
- func (x *ModelSequenceBatching_Control) GetDataType() DataType
- func (x *ModelSequenceBatching_Control) GetFp32FalseTrue() []float32
- func (x *ModelSequenceBatching_Control) GetInt32FalseTrue() []int32
- func (x *ModelSequenceBatching_Control) GetKind() ModelSequenceBatching_Control_Kind
- func (*ModelSequenceBatching_Control) ProtoMessage()
- func (x *ModelSequenceBatching_Control) ProtoReflect() protoreflect.Message
- func (x *ModelSequenceBatching_Control) Reset()
- func (x *ModelSequenceBatching_Control) String() string
- type ModelSequenceBatching_ControlInput
- func (*ModelSequenceBatching_ControlInput) Descriptor() ([]byte, []int)deprecated
- func (x *ModelSequenceBatching_ControlInput) GetControl() []*ModelSequenceBatching_Control
- func (x *ModelSequenceBatching_ControlInput) GetName() string
- func (*ModelSequenceBatching_ControlInput) ProtoMessage()
- func (x *ModelSequenceBatching_ControlInput) ProtoReflect() protoreflect.Message
- func (x *ModelSequenceBatching_ControlInput) Reset()
- func (x *ModelSequenceBatching_ControlInput) String() string
- type ModelSequenceBatching_Control_Kind
- func (ModelSequenceBatching_Control_Kind) Descriptor() protoreflect.EnumDescriptor
- func (x ModelSequenceBatching_Control_Kind) Enum() *ModelSequenceBatching_Control_Kind
- func (ModelSequenceBatching_Control_Kind) EnumDescriptor() ([]byte, []int)deprecated
- func (x ModelSequenceBatching_Control_Kind) Number() protoreflect.EnumNumber
- func (x ModelSequenceBatching_Control_Kind) String() string
- func (ModelSequenceBatching_Control_Kind) Type() protoreflect.EnumType
- type ModelSequenceBatching_Direct
- type ModelSequenceBatching_InitialState
- func (*ModelSequenceBatching_InitialState) Descriptor() ([]byte, []int)deprecated
- func (x *ModelSequenceBatching_InitialState) GetDataFile() string
- func (x *ModelSequenceBatching_InitialState) GetDataType() DataType
- func (x *ModelSequenceBatching_InitialState) GetDims() []int64
- func (x *ModelSequenceBatching_InitialState) GetName() string
- func (m *ModelSequenceBatching_InitialState) GetStateData() isModelSequenceBatching_InitialState_StateData
- func (x *ModelSequenceBatching_InitialState) GetZeroData() bool
- func (*ModelSequenceBatching_InitialState) ProtoMessage()
- func (x *ModelSequenceBatching_InitialState) ProtoReflect() protoreflect.Message
- func (x *ModelSequenceBatching_InitialState) Reset()
- func (x *ModelSequenceBatching_InitialState) String() string
- type ModelSequenceBatching_InitialState_DataFile
- type ModelSequenceBatching_InitialState_ZeroData
- type ModelSequenceBatching_Oldest
- type ModelSequenceBatching_State
- func (*ModelSequenceBatching_State) Descriptor() ([]byte, []int)deprecated
- func (x *ModelSequenceBatching_State) GetDataType() DataType
- func (x *ModelSequenceBatching_State) GetDims() []int64
- func (x *ModelSequenceBatching_State) GetInitialState() []*ModelSequenceBatching_InitialState
- func (x *ModelSequenceBatching_State) GetInputName() string
- func (x *ModelSequenceBatching_State) GetOutputName() string
- func (x *ModelSequenceBatching_State) GetUseGrowableMemory() bool
- func (x *ModelSequenceBatching_State) GetUseSameBufferForInputOutput() bool
- func (*ModelSequenceBatching_State) ProtoMessage()
- func (x *ModelSequenceBatching_State) ProtoReflect() protoreflect.Message
- func (x *ModelSequenceBatching_State) Reset()
- func (x *ModelSequenceBatching_State) String() string
- type ModelSequenceBatching_StrategyDirect
- func (*ModelSequenceBatching_StrategyDirect) Descriptor() ([]byte, []int)deprecated
- func (x *ModelSequenceBatching_StrategyDirect) GetMaxQueueDelayMicroseconds() uint64
- func (x *ModelSequenceBatching_StrategyDirect) GetMinimumSlotUtilization() float32
- func (*ModelSequenceBatching_StrategyDirect) ProtoMessage()
- func (x *ModelSequenceBatching_StrategyDirect) ProtoReflect() protoreflect.Message
- func (x *ModelSequenceBatching_StrategyDirect) Reset()
- func (x *ModelSequenceBatching_StrategyDirect) String() string
- type ModelSequenceBatching_StrategyOldest
- func (*ModelSequenceBatching_StrategyOldest) Descriptor() ([]byte, []int)deprecated
- func (x *ModelSequenceBatching_StrategyOldest) GetMaxCandidateSequences() int32
- func (x *ModelSequenceBatching_StrategyOldest) GetMaxQueueDelayMicroseconds() uint64
- func (x *ModelSequenceBatching_StrategyOldest) GetPreferredBatchSize() []int32
- func (x *ModelSequenceBatching_StrategyOldest) GetPreserveOrdering() bool
- func (*ModelSequenceBatching_StrategyOldest) ProtoMessage()
- func (x *ModelSequenceBatching_StrategyOldest) ProtoReflect() protoreflect.Message
- func (x *ModelSequenceBatching_StrategyOldest) Reset()
- func (x *ModelSequenceBatching_StrategyOldest) String() string
- type ModelStatistics
- func (*ModelStatistics) Descriptor() ([]byte, []int)deprecated
- func (x *ModelStatistics) GetBatchStats() []*InferBatchStatistics
- func (x *ModelStatistics) GetExecutionCount() uint64
- func (x *ModelStatistics) GetInferenceCount() uint64
- func (x *ModelStatistics) GetInferenceStats() *InferStatistics
- func (x *ModelStatistics) GetLastInference() uint64
- func (x *ModelStatistics) GetMemoryUsage() []*MemoryUsage
- func (x *ModelStatistics) GetName() string
- func (x *ModelStatistics) GetResponseStats() map[string]*InferResponseStatistics
- func (x *ModelStatistics) GetVersion() string
- func (*ModelStatistics) ProtoMessage()
- func (x *ModelStatistics) ProtoReflect() protoreflect.Message
- func (x *ModelStatistics) Reset()
- func (x *ModelStatistics) String() string
- type ModelStatisticsRequest
- func (*ModelStatisticsRequest) Descriptor() ([]byte, []int)deprecated
- func (x *ModelStatisticsRequest) GetName() string
- func (x *ModelStatisticsRequest) GetVersion() string
- func (*ModelStatisticsRequest) ProtoMessage()
- func (x *ModelStatisticsRequest) ProtoReflect() protoreflect.Message
- func (x *ModelStatisticsRequest) Reset()
- func (x *ModelStatisticsRequest) String() string
- type ModelStatisticsResponse
- func (*ModelStatisticsResponse) Descriptor() ([]byte, []int)deprecated
- func (x *ModelStatisticsResponse) GetModelStats() []*ModelStatistics
- func (*ModelStatisticsResponse) ProtoMessage()
- func (x *ModelStatisticsResponse) ProtoReflect() protoreflect.Message
- func (x *ModelStatisticsResponse) Reset()
- func (x *ModelStatisticsResponse) String() string
- type ModelStreamInferResponse
- func (*ModelStreamInferResponse) Descriptor() ([]byte, []int)deprecated
- func (x *ModelStreamInferResponse) GetErrorMessage() string
- func (x *ModelStreamInferResponse) GetInferResponse() *ModelInferResponse
- func (*ModelStreamInferResponse) ProtoMessage()
- func (x *ModelStreamInferResponse) ProtoReflect() protoreflect.Message
- func (x *ModelStreamInferResponse) Reset()
- func (x *ModelStreamInferResponse) String() string
- type ModelTensorReshape
- func (*ModelTensorReshape) Descriptor() ([]byte, []int)deprecated
- func (x *ModelTensorReshape) GetShape() []int64
- func (*ModelTensorReshape) ProtoMessage()
- func (x *ModelTensorReshape) ProtoReflect() protoreflect.Message
- func (x *ModelTensorReshape) Reset()
- func (x *ModelTensorReshape) String() string
- type ModelTransactionPolicy
- func (*ModelTransactionPolicy) Descriptor() ([]byte, []int)deprecated
- func (x *ModelTransactionPolicy) GetDecoupled() bool
- func (*ModelTransactionPolicy) ProtoMessage()
- func (x *ModelTransactionPolicy) ProtoReflect() protoreflect.Message
- func (x *ModelTransactionPolicy) Reset()
- func (x *ModelTransactionPolicy) String() string
- type ModelVersionPolicy
- func (*ModelVersionPolicy) Descriptor() ([]byte, []int)deprecated
- func (x *ModelVersionPolicy) GetAll() *ModelVersionPolicy_All
- func (x *ModelVersionPolicy) GetLatest() *ModelVersionPolicy_Latest
- func (m *ModelVersionPolicy) GetPolicyChoice() isModelVersionPolicy_PolicyChoice
- func (x *ModelVersionPolicy) GetSpecific() *ModelVersionPolicy_Specific
- func (*ModelVersionPolicy) ProtoMessage()
- func (x *ModelVersionPolicy) ProtoReflect() protoreflect.Message
- func (x *ModelVersionPolicy) Reset()
- func (x *ModelVersionPolicy) String() string
- type ModelVersionPolicy_All
- type ModelVersionPolicy_All_
- type ModelVersionPolicy_Latest
- func (*ModelVersionPolicy_Latest) Descriptor() ([]byte, []int)deprecated
- func (x *ModelVersionPolicy_Latest) GetNumVersions() uint32
- func (*ModelVersionPolicy_Latest) ProtoMessage()
- func (x *ModelVersionPolicy_Latest) ProtoReflect() protoreflect.Message
- func (x *ModelVersionPolicy_Latest) Reset()
- func (x *ModelVersionPolicy_Latest) String() string
- type ModelVersionPolicy_Latest_
- type ModelVersionPolicy_Specific
- func (*ModelVersionPolicy_Specific) Descriptor() ([]byte, []int)deprecated
- func (x *ModelVersionPolicy_Specific) GetVersions() []int64
- func (*ModelVersionPolicy_Specific) ProtoMessage()
- func (x *ModelVersionPolicy_Specific) ProtoReflect() protoreflect.Message
- func (x *ModelVersionPolicy_Specific) Reset()
- func (x *ModelVersionPolicy_Specific) String() string
- type ModelVersionPolicy_Specific_
- type ModelWarmup
- func (*ModelWarmup) Descriptor() ([]byte, []int)deprecated
- func (x *ModelWarmup) GetBatchSize() uint32
- func (x *ModelWarmup) GetCount() uint32
- func (x *ModelWarmup) GetInputs() map[string]*ModelWarmup_Input
- func (x *ModelWarmup) GetName() string
- func (*ModelWarmup) ProtoMessage()
- func (x *ModelWarmup) ProtoReflect() protoreflect.Message
- func (x *ModelWarmup) Reset()
- func (x *ModelWarmup) String() string
- type ModelWarmup_Input
- func (*ModelWarmup_Input) Descriptor() ([]byte, []int)deprecated
- func (x *ModelWarmup_Input) GetDataType() DataType
- func (x *ModelWarmup_Input) GetDims() []int64
- func (x *ModelWarmup_Input) GetInputDataFile() string
- func (m *ModelWarmup_Input) GetInputDataType() isModelWarmup_Input_InputDataType
- func (x *ModelWarmup_Input) GetRandomData() bool
- func (x *ModelWarmup_Input) GetZeroData() bool
- func (*ModelWarmup_Input) ProtoMessage()
- func (x *ModelWarmup_Input) ProtoReflect() protoreflect.Message
- func (x *ModelWarmup_Input) Reset()
- func (x *ModelWarmup_Input) String() string
- type ModelWarmup_Input_InputDataFile
- type ModelWarmup_Input_RandomData
- type ModelWarmup_Input_ZeroData
- type RepositoryIndexRequest
- func (*RepositoryIndexRequest) Descriptor() ([]byte, []int)deprecated
- func (x *RepositoryIndexRequest) GetReady() bool
- func (x *RepositoryIndexRequest) GetRepositoryName() string
- func (*RepositoryIndexRequest) ProtoMessage()
- func (x *RepositoryIndexRequest) ProtoReflect() protoreflect.Message
- func (x *RepositoryIndexRequest) Reset()
- func (x *RepositoryIndexRequest) String() string
- type RepositoryIndexResponse
- func (*RepositoryIndexResponse) Descriptor() ([]byte, []int)deprecated
- func (x *RepositoryIndexResponse) GetModels() []*RepositoryIndexResponse_ModelIndex
- func (*RepositoryIndexResponse) ProtoMessage()
- func (x *RepositoryIndexResponse) ProtoReflect() protoreflect.Message
- func (x *RepositoryIndexResponse) Reset()
- func (x *RepositoryIndexResponse) String() string
- type RepositoryIndexResponse_ModelIndex
- func (*RepositoryIndexResponse_ModelIndex) Descriptor() ([]byte, []int)deprecated
- func (x *RepositoryIndexResponse_ModelIndex) GetName() string
- func (x *RepositoryIndexResponse_ModelIndex) GetReason() string
- func (x *RepositoryIndexResponse_ModelIndex) GetState() string
- func (x *RepositoryIndexResponse_ModelIndex) GetVersion() string
- func (*RepositoryIndexResponse_ModelIndex) ProtoMessage()
- func (x *RepositoryIndexResponse_ModelIndex) ProtoReflect() protoreflect.Message
- func (x *RepositoryIndexResponse_ModelIndex) Reset()
- func (x *RepositoryIndexResponse_ModelIndex) String() string
- type RepositoryModelLoadRequest
- func (*RepositoryModelLoadRequest) Descriptor() ([]byte, []int)deprecated
- func (x *RepositoryModelLoadRequest) GetModelName() string
- func (x *RepositoryModelLoadRequest) GetParameters() map[string]*ModelRepositoryParameter
- func (x *RepositoryModelLoadRequest) GetRepositoryName() string
- func (*RepositoryModelLoadRequest) ProtoMessage()
- func (x *RepositoryModelLoadRequest) ProtoReflect() protoreflect.Message
- func (x *RepositoryModelLoadRequest) Reset()
- func (x *RepositoryModelLoadRequest) String() string
- type RepositoryModelLoadResponse
- type RepositoryModelUnloadRequest
- func (*RepositoryModelUnloadRequest) Descriptor() ([]byte, []int)deprecated
- func (x *RepositoryModelUnloadRequest) GetModelName() string
- func (x *RepositoryModelUnloadRequest) GetParameters() map[string]*ModelRepositoryParameter
- func (x *RepositoryModelUnloadRequest) GetRepositoryName() string
- func (*RepositoryModelUnloadRequest) ProtoMessage()
- func (x *RepositoryModelUnloadRequest) ProtoReflect() protoreflect.Message
- func (x *RepositoryModelUnloadRequest) Reset()
- func (x *RepositoryModelUnloadRequest) String() string
- type RepositoryModelUnloadResponse
- func (*RepositoryModelUnloadResponse) Descriptor() ([]byte, []int)deprecated
- func (*RepositoryModelUnloadResponse) ProtoMessage()
- func (x *RepositoryModelUnloadResponse) ProtoReflect() protoreflect.Message
- func (x *RepositoryModelUnloadResponse) Reset()
- func (x *RepositoryModelUnloadResponse) String() string
- type ServerLiveRequest
- type ServerLiveResponse
- type ServerMetadataRequest
- type ServerMetadataResponse
- func (*ServerMetadataResponse) Descriptor() ([]byte, []int)deprecated
- func (x *ServerMetadataResponse) GetExtensions() []string
- func (x *ServerMetadataResponse) GetName() string
- func (x *ServerMetadataResponse) GetVersion() string
- func (*ServerMetadataResponse) ProtoMessage()
- func (x *ServerMetadataResponse) ProtoReflect() protoreflect.Message
- func (x *ServerMetadataResponse) Reset()
- func (x *ServerMetadataResponse) String() string
- type ServerReadyRequest
- type ServerReadyResponse
- func (*ServerReadyResponse) Descriptor() ([]byte, []int)deprecated
- func (x *ServerReadyResponse) GetReady() bool
- func (*ServerReadyResponse) ProtoMessage()
- func (x *ServerReadyResponse) ProtoReflect() protoreflect.Message
- func (x *ServerReadyResponse) Reset()
- func (x *ServerReadyResponse) String() string
- type StatisticDuration
- func (*StatisticDuration) Descriptor() ([]byte, []int)deprecated
- func (x *StatisticDuration) GetCount() uint64
- func (x *StatisticDuration) GetNs() uint64
- func (*StatisticDuration) ProtoMessage()
- func (x *StatisticDuration) ProtoReflect() protoreflect.Message
- func (x *StatisticDuration) Reset()
- func (x *StatisticDuration) String() string
- type SystemSharedMemoryRegisterRequest
- func (*SystemSharedMemoryRegisterRequest) Descriptor() ([]byte, []int)deprecated
- func (x *SystemSharedMemoryRegisterRequest) GetByteSize() uint64
- func (x *SystemSharedMemoryRegisterRequest) GetKey() string
- func (x *SystemSharedMemoryRegisterRequest) GetName() string
- func (x *SystemSharedMemoryRegisterRequest) GetOffset() uint64
- func (*SystemSharedMemoryRegisterRequest) ProtoMessage()
- func (x *SystemSharedMemoryRegisterRequest) ProtoReflect() protoreflect.Message
- func (x *SystemSharedMemoryRegisterRequest) Reset()
- func (x *SystemSharedMemoryRegisterRequest) String() string
- type SystemSharedMemoryRegisterResponse
- func (*SystemSharedMemoryRegisterResponse) Descriptor() ([]byte, []int)deprecated
- func (*SystemSharedMemoryRegisterResponse) ProtoMessage()
- func (x *SystemSharedMemoryRegisterResponse) ProtoReflect() protoreflect.Message
- func (x *SystemSharedMemoryRegisterResponse) Reset()
- func (x *SystemSharedMemoryRegisterResponse) String() string
- type SystemSharedMemoryStatusRequest
- func (*SystemSharedMemoryStatusRequest) Descriptor() ([]byte, []int)deprecated
- func (x *SystemSharedMemoryStatusRequest) GetName() string
- func (*SystemSharedMemoryStatusRequest) ProtoMessage()
- func (x *SystemSharedMemoryStatusRequest) ProtoReflect() protoreflect.Message
- func (x *SystemSharedMemoryStatusRequest) Reset()
- func (x *SystemSharedMemoryStatusRequest) String() string
- type SystemSharedMemoryStatusResponse
- func (*SystemSharedMemoryStatusResponse) Descriptor() ([]byte, []int)deprecated
- func (x *SystemSharedMemoryStatusResponse) GetRegions() map[string]*SystemSharedMemoryStatusResponse_RegionStatus
- func (*SystemSharedMemoryStatusResponse) ProtoMessage()
- func (x *SystemSharedMemoryStatusResponse) ProtoReflect() protoreflect.Message
- func (x *SystemSharedMemoryStatusResponse) Reset()
- func (x *SystemSharedMemoryStatusResponse) String() string
- type SystemSharedMemoryStatusResponse_RegionStatus
- func (*SystemSharedMemoryStatusResponse_RegionStatus) Descriptor() ([]byte, []int)deprecated
- func (x *SystemSharedMemoryStatusResponse_RegionStatus) GetByteSize() uint64
- func (x *SystemSharedMemoryStatusResponse_RegionStatus) GetKey() string
- func (x *SystemSharedMemoryStatusResponse_RegionStatus) GetName() string
- func (x *SystemSharedMemoryStatusResponse_RegionStatus) GetOffset() uint64
- func (*SystemSharedMemoryStatusResponse_RegionStatus) ProtoMessage()
- func (x *SystemSharedMemoryStatusResponse_RegionStatus) ProtoReflect() protoreflect.Message
- func (x *SystemSharedMemoryStatusResponse_RegionStatus) Reset()
- func (x *SystemSharedMemoryStatusResponse_RegionStatus) String() string
- type SystemSharedMemoryUnregisterRequest
- func (*SystemSharedMemoryUnregisterRequest) Descriptor() ([]byte, []int)deprecated
- func (x *SystemSharedMemoryUnregisterRequest) GetName() string
- func (*SystemSharedMemoryUnregisterRequest) ProtoMessage()
- func (x *SystemSharedMemoryUnregisterRequest) ProtoReflect() protoreflect.Message
- func (x *SystemSharedMemoryUnregisterRequest) Reset()
- func (x *SystemSharedMemoryUnregisterRequest) String() string
- type SystemSharedMemoryUnregisterResponse
- func (*SystemSharedMemoryUnregisterResponse) Descriptor() ([]byte, []int)deprecated
- func (*SystemSharedMemoryUnregisterResponse) ProtoMessage()
- func (x *SystemSharedMemoryUnregisterResponse) ProtoReflect() protoreflect.Message
- func (x *SystemSharedMemoryUnregisterResponse) Reset()
- func (x *SystemSharedMemoryUnregisterResponse) String() string
- type TraceSettingRequest
- func (*TraceSettingRequest) Descriptor() ([]byte, []int)deprecated
- func (x *TraceSettingRequest) GetModelName() string
- func (x *TraceSettingRequest) GetSettings() map[string]*TraceSettingRequest_SettingValue
- func (*TraceSettingRequest) ProtoMessage()
- func (x *TraceSettingRequest) ProtoReflect() protoreflect.Message
- func (x *TraceSettingRequest) Reset()
- func (x *TraceSettingRequest) String() string
- type TraceSettingRequest_SettingValue
- func (*TraceSettingRequest_SettingValue) Descriptor() ([]byte, []int)deprecated
- func (x *TraceSettingRequest_SettingValue) GetValue() []string
- func (*TraceSettingRequest_SettingValue) ProtoMessage()
- func (x *TraceSettingRequest_SettingValue) ProtoReflect() protoreflect.Message
- func (x *TraceSettingRequest_SettingValue) Reset()
- func (x *TraceSettingRequest_SettingValue) String() string
- type TraceSettingResponse
- func (*TraceSettingResponse) Descriptor() ([]byte, []int)deprecated
- func (x *TraceSettingResponse) GetSettings() map[string]*TraceSettingResponse_SettingValue
- func (*TraceSettingResponse) ProtoMessage()
- func (x *TraceSettingResponse) ProtoReflect() protoreflect.Message
- func (x *TraceSettingResponse) Reset()
- func (x *TraceSettingResponse) String() string
- type TraceSettingResponse_SettingValue
- func (*TraceSettingResponse_SettingValue) Descriptor() ([]byte, []int)deprecated
- func (x *TraceSettingResponse_SettingValue) GetValue() []string
- func (*TraceSettingResponse_SettingValue) ProtoMessage()
- func (x *TraceSettingResponse_SettingValue) ProtoReflect() protoreflect.Message
- func (x *TraceSettingResponse_SettingValue) Reset()
- func (x *TraceSettingResponse_SettingValue) String() string
- type UnimplementedGRPCInferenceServiceServer
- func (UnimplementedGRPCInferenceServiceServer) CudaSharedMemoryRegister(context.Context, *CudaSharedMemoryRegisterRequest) (*CudaSharedMemoryRegisterResponse, error)
- func (UnimplementedGRPCInferenceServiceServer) CudaSharedMemoryStatus(context.Context, *CudaSharedMemoryStatusRequest) (*CudaSharedMemoryStatusResponse, error)
- func (UnimplementedGRPCInferenceServiceServer) CudaSharedMemoryUnregister(context.Context, *CudaSharedMemoryUnregisterRequest) (*CudaSharedMemoryUnregisterResponse, error)
- func (UnimplementedGRPCInferenceServiceServer) LogSettings(context.Context, *LogSettingsRequest) (*LogSettingsResponse, error)
- func (UnimplementedGRPCInferenceServiceServer) ModelConfig(context.Context, *ModelConfigRequest) (*ModelConfigResponse, error)
- func (UnimplementedGRPCInferenceServiceServer) ModelInfer(context.Context, *ModelInferRequest) (*ModelInferResponse, error)
- func (UnimplementedGRPCInferenceServiceServer) ModelMetadata(context.Context, *ModelMetadataRequest) (*ModelMetadataResponse, error)
- func (UnimplementedGRPCInferenceServiceServer) ModelReady(context.Context, *ModelReadyRequest) (*ModelReadyResponse, error)
- func (UnimplementedGRPCInferenceServiceServer) ModelStatistics(context.Context, *ModelStatisticsRequest) (*ModelStatisticsResponse, error)
- func (UnimplementedGRPCInferenceServiceServer) ModelStreamInfer(grpc.BidiStreamingServer[ModelInferRequest, ModelStreamInferResponse]) error
- func (UnimplementedGRPCInferenceServiceServer) RepositoryIndex(context.Context, *RepositoryIndexRequest) (*RepositoryIndexResponse, error)
- func (UnimplementedGRPCInferenceServiceServer) RepositoryModelLoad(context.Context, *RepositoryModelLoadRequest) (*RepositoryModelLoadResponse, error)
- func (UnimplementedGRPCInferenceServiceServer) RepositoryModelUnload(context.Context, *RepositoryModelUnloadRequest) (*RepositoryModelUnloadResponse, error)
- func (UnimplementedGRPCInferenceServiceServer) ServerLive(context.Context, *ServerLiveRequest) (*ServerLiveResponse, error)
- func (UnimplementedGRPCInferenceServiceServer) ServerMetadata(context.Context, *ServerMetadataRequest) (*ServerMetadataResponse, error)
- func (UnimplementedGRPCInferenceServiceServer) ServerReady(context.Context, *ServerReadyRequest) (*ServerReadyResponse, error)
- func (UnimplementedGRPCInferenceServiceServer) SystemSharedMemoryRegister(context.Context, *SystemSharedMemoryRegisterRequest) (*SystemSharedMemoryRegisterResponse, error)
- func (UnimplementedGRPCInferenceServiceServer) SystemSharedMemoryStatus(context.Context, *SystemSharedMemoryStatusRequest) (*SystemSharedMemoryStatusResponse, error)
- func (UnimplementedGRPCInferenceServiceServer) SystemSharedMemoryUnregister(context.Context, *SystemSharedMemoryUnregisterRequest) (*SystemSharedMemoryUnregisterResponse, error)
- func (UnimplementedGRPCInferenceServiceServer) TraceSetting(context.Context, *TraceSettingRequest) (*TraceSettingResponse, error)
- type UnsafeGRPCInferenceServiceServer
Constants ¶
const ( GRPCInferenceService_ServerLive_FullMethodName = "/inference.GRPCInferenceService/ServerLive" GRPCInferenceService_ServerReady_FullMethodName = "/inference.GRPCInferenceService/ServerReady" GRPCInferenceService_ModelReady_FullMethodName = "/inference.GRPCInferenceService/ModelReady" GRPCInferenceService_ServerMetadata_FullMethodName = "/inference.GRPCInferenceService/ServerMetadata" GRPCInferenceService_ModelMetadata_FullMethodName = "/inference.GRPCInferenceService/ModelMetadata" GRPCInferenceService_ModelInfer_FullMethodName = "/inference.GRPCInferenceService/ModelInfer" GRPCInferenceService_ModelStreamInfer_FullMethodName = "/inference.GRPCInferenceService/ModelStreamInfer" GRPCInferenceService_ModelConfig_FullMethodName = "/inference.GRPCInferenceService/ModelConfig" GRPCInferenceService_ModelStatistics_FullMethodName = "/inference.GRPCInferenceService/ModelStatistics" GRPCInferenceService_RepositoryIndex_FullMethodName = "/inference.GRPCInferenceService/RepositoryIndex" GRPCInferenceService_RepositoryModelLoad_FullMethodName = "/inference.GRPCInferenceService/RepositoryModelLoad" GRPCInferenceService_RepositoryModelUnload_FullMethodName = "/inference.GRPCInferenceService/RepositoryModelUnload" GRPCInferenceService_TraceSetting_FullMethodName = "/inference.GRPCInferenceService/TraceSetting" GRPCInferenceService_LogSettings_FullMethodName = "/inference.GRPCInferenceService/LogSettings" )
Variables ¶
var ( DataType_name = map[int32]string{ 0: "TYPE_INVALID", 1: "TYPE_BOOL", 2: "TYPE_UINT8", 3: "TYPE_UINT16", 4: "TYPE_UINT32", 5: "TYPE_UINT64", 6: "TYPE_INT8", 7: "TYPE_INT16", 8: "TYPE_INT32", 9: "TYPE_INT64", 10: "TYPE_FP16", 11: "TYPE_FP32", 12: "TYPE_FP64", 13: "TYPE_STRING", 14: "TYPE_BF16", } DataType_value = map[string]int32{ "TYPE_INVALID": 0, "TYPE_BOOL": 1, "TYPE_UINT8": 2, "TYPE_UINT16": 3, "TYPE_UINT32": 4, "TYPE_UINT64": 5, "TYPE_INT8": 6, "TYPE_INT16": 7, "TYPE_INT32": 8, "TYPE_INT64": 9, "TYPE_FP16": 10, "TYPE_FP32": 11, "TYPE_FP64": 12, "TYPE_STRING": 13, "TYPE_BF16": 14, } )
Enum value maps for DataType.
var ( ModelInstanceGroup_Kind_name = map[int32]string{ 0: "KIND_AUTO", 1: "KIND_GPU", 2: "KIND_CPU", 3: "KIND_MODEL", } ModelInstanceGroup_Kind_value = map[string]int32{ "KIND_AUTO": 0, "KIND_GPU": 1, "KIND_CPU": 2, "KIND_MODEL": 3, } )
Enum value maps for ModelInstanceGroup_Kind.
var ( ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind_name = map[int32]string{ 0: "KIND_NVDLA", } ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind_value = map[string]int32{ "KIND_NVDLA": 0, } )
Enum value maps for ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind.
var ( ModelInput_Format_name = map[int32]string{ 0: "FORMAT_NONE", 1: "FORMAT_NHWC", 2: "FORMAT_NCHW", } ModelInput_Format_value = map[string]int32{ "FORMAT_NONE": 0, "FORMAT_NHWC": 1, "FORMAT_NCHW": 2, } )
Enum value maps for ModelInput_Format.
var ( BatchInput_Kind_name = map[int32]string{ 0: "BATCH_ELEMENT_COUNT", 1: "BATCH_ACCUMULATED_ELEMENT_COUNT", 2: "BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO", 3: "BATCH_MAX_ELEMENT_COUNT_AS_SHAPE", 4: "BATCH_ITEM_SHAPE", 5: "BATCH_ITEM_SHAPE_FLATTEN", } BatchInput_Kind_value = map[string]int32{ "BATCH_ELEMENT_COUNT": 0, "BATCH_ACCUMULATED_ELEMENT_COUNT": 1, "BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO": 2, "BATCH_MAX_ELEMENT_COUNT_AS_SHAPE": 3, "BATCH_ITEM_SHAPE": 4, "BATCH_ITEM_SHAPE_FLATTEN": 5, } )
Enum value maps for BatchInput_Kind.
var ( BatchOutput_Kind_name = map[int32]string{ 0: "BATCH_SCATTER_WITH_INPUT_SHAPE", } BatchOutput_Kind_value = map[string]int32{ "BATCH_SCATTER_WITH_INPUT_SHAPE": 0, } )
Enum value maps for BatchOutput_Kind.
var ( ModelOptimizationPolicy_ModelPriority_name = map[int32]string{ 0: "PRIORITY_DEFAULT", 1: "PRIORITY_MAX", 2: "PRIORITY_MIN", } ModelOptimizationPolicy_ModelPriority_value = map[string]int32{ "PRIORITY_DEFAULT": 0, "PRIORITY_MAX": 1, "PRIORITY_MIN": 2, } )
Enum value maps for ModelOptimizationPolicy_ModelPriority.
var ( ModelQueuePolicy_TimeoutAction_name = map[int32]string{ 0: "REJECT", 1: "DELAY", } ModelQueuePolicy_TimeoutAction_value = map[string]int32{ "REJECT": 0, "DELAY": 1, } )
Enum value maps for ModelQueuePolicy_TimeoutAction.
var ( ModelSequenceBatching_Control_Kind_name = map[int32]string{ 0: "CONTROL_SEQUENCE_START", 1: "CONTROL_SEQUENCE_READY", 2: "CONTROL_SEQUENCE_END", 3: "CONTROL_SEQUENCE_CORRID", } ModelSequenceBatching_Control_Kind_value = map[string]int32{ "CONTROL_SEQUENCE_START": 0, "CONTROL_SEQUENCE_READY": 1, "CONTROL_SEQUENCE_END": 2, "CONTROL_SEQUENCE_CORRID": 3, } )
Enum value maps for ModelSequenceBatching_Control_Kind.
var File_grpc_service_proto protoreflect.FileDescriptor
var File_model_config_proto protoreflect.FileDescriptor
var GRPCInferenceService_ServiceDesc = grpc.ServiceDesc{ ServiceName: "inference.GRPCInferenceService", HandlerType: (*GRPCInferenceServiceServer)(nil), Methods: []grpc.MethodDesc{ { MethodName: "ServerLive", Handler: _GRPCInferenceService_ServerLive_Handler, }, { MethodName: "ServerReady", Handler: _GRPCInferenceService_ServerReady_Handler, }, { MethodName: "ModelReady", Handler: _GRPCInferenceService_ModelReady_Handler, }, { MethodName: "ServerMetadata", Handler: _GRPCInferenceService_ServerMetadata_Handler, }, { MethodName: "ModelMetadata", Handler: _GRPCInferenceService_ModelMetadata_Handler, }, { MethodName: "ModelInfer", Handler: _GRPCInferenceService_ModelInfer_Handler, }, { MethodName: "ModelConfig", Handler: _GRPCInferenceService_ModelConfig_Handler, }, { MethodName: "ModelStatistics", Handler: _GRPCInferenceService_ModelStatistics_Handler, }, { MethodName: "RepositoryIndex", Handler: _GRPCInferenceService_RepositoryIndex_Handler, }, { MethodName: "RepositoryModelLoad", Handler: _GRPCInferenceService_RepositoryModelLoad_Handler, }, { MethodName: "RepositoryModelUnload", Handler: _GRPCInferenceService_RepositoryModelUnload_Handler, }, { MethodName: "SystemSharedMemoryStatus", Handler: _GRPCInferenceService_SystemSharedMemoryStatus_Handler, }, { MethodName: "SystemSharedMemoryRegister", Handler: _GRPCInferenceService_SystemSharedMemoryRegister_Handler, }, { MethodName: "SystemSharedMemoryUnregister", Handler: _GRPCInferenceService_SystemSharedMemoryUnregister_Handler, }, { MethodName: "CudaSharedMemoryStatus", Handler: _GRPCInferenceService_CudaSharedMemoryStatus_Handler, }, { MethodName: "CudaSharedMemoryRegister", Handler: _GRPCInferenceService_CudaSharedMemoryRegister_Handler, }, { MethodName: "CudaSharedMemoryUnregister", Handler: _GRPCInferenceService_CudaSharedMemoryUnregister_Handler, }, { MethodName: "TraceSetting", Handler: _GRPCInferenceService_TraceSetting_Handler, }, { MethodName: "LogSettings", Handler: _GRPCInferenceService_LogSettings_Handler, }, }, Streams: []grpc.StreamDesc{ { StreamName: "ModelStreamInfer", Handler: _GRPCInferenceService_ModelStreamInfer_Handler, ServerStreams: true, ClientStreams: true, }, }, Metadata: "grpc_service.proto", }
GRPCInferenceService_ServiceDesc is the grpc.ServiceDesc for GRPCInferenceService service. It's only intended for direct use with grpc.RegisterService, and not to be introspected or modified (even as a copy)
Functions ¶
func RegisterGRPCInferenceServiceServer ¶
func RegisterGRPCInferenceServiceServer(s grpc.ServiceRegistrar, srv GRPCInferenceServiceServer)
Types ¶
type BatchInput ¶
type BatchInput struct { // @@ .. cpp:var:: Kind kind // @@ // @@ The kind of this batch input. // @@ Kind BatchInput_Kind `protobuf:"varint,1,opt,name=kind,proto3,enum=inference.BatchInput_Kind" json:"kind,omitempty"` // @@ .. cpp:var:: string target_name (repeated) // @@ // @@ The name of the model inputs that the backend will create // @@ for this batch input. // @@ TargetName []string `protobuf:"bytes,2,rep,name=target_name,json=targetName,proto3" json:"target_name,omitempty"` // @@ .. cpp:var:: DataType data_type // @@ // @@ The input's datatype. The data type can be TYPE_INT32 or // @@ TYPE_FP32. // @@ DataType DataType `protobuf:"varint,3,opt,name=data_type,json=dataType,proto3,enum=inference.DataType" json:"data_type,omitempty"` // @@ .. cpp:var:: string source_input (repeated) // @@ // @@ The backend derives the value for each batch input from one or // @@ more other inputs. 'source_input' gives the names of those // @@ inputs. // @@ SourceInput []string `protobuf:"bytes,4,rep,name=source_input,json=sourceInput,proto3" json:"source_input,omitempty"` // contains filtered or unexported fields }
@@ .. cpp:var:: message BatchInput @@ @@ A batch input is an additional input that must be added by @@ the backend based on all the requests in a batch. @@
func (*BatchInput) Descriptor
deprecated
func (*BatchInput) Descriptor() ([]byte, []int)
Deprecated: Use BatchInput.ProtoReflect.Descriptor instead.
func (*BatchInput) GetDataType ¶
func (x *BatchInput) GetDataType() DataType
func (*BatchInput) GetKind ¶
func (x *BatchInput) GetKind() BatchInput_Kind
func (*BatchInput) GetSourceInput ¶
func (x *BatchInput) GetSourceInput() []string
func (*BatchInput) GetTargetName ¶
func (x *BatchInput) GetTargetName() []string
func (*BatchInput) ProtoMessage ¶
func (*BatchInput) ProtoMessage()
func (*BatchInput) ProtoReflect ¶
func (x *BatchInput) ProtoReflect() protoreflect.Message
func (*BatchInput) Reset ¶
func (x *BatchInput) Reset()
func (*BatchInput) String ¶
func (x *BatchInput) String() string
type BatchInput_Kind ¶
type BatchInput_Kind int32
@@ @@ .. cpp:enum:: Kind @@ @@ The kind of the batch input. @@
const ( // @@ .. cpp:enumerator:: Kind::BATCH_ELEMENT_COUNT = 0 // @@ // @@ The element count of the 'source_input' will be added as // @@ input with shape [1]. // @@ BatchInput_BATCH_ELEMENT_COUNT BatchInput_Kind = 0 // @@ .. cpp:enumerator:: Kind::BATCH_ACCUMULATED_ELEMENT_COUNT = 1 // @@ // @@ The accumulated element count of the 'source_input' will be // @@ added as input with shape [1]. For example, if there is a // @@ batch of two request, each with 2 elements, an input of value // @@ 2 will be added to the first request, and an input of value // @@ 4 will be added to the second request. // @@ BatchInput_BATCH_ACCUMULATED_ELEMENT_COUNT BatchInput_Kind = 1 // @@ .. cpp:enumerator:: // @@ Kind::BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO = 2 // @@ // @@ The accumulated element count of the 'source_input' will be // @@ added as input with shape [1], except for the first request // @@ in the batch. For the first request in the batch, the input // @@ will have shape [2] where the first element is value 0. // @@ BatchInput_BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO BatchInput_Kind = 2 // @@ .. cpp:enumerator:: Kind::BATCH_MAX_ELEMENT_COUNT_AS_SHAPE = 3 // @@ // @@ Among the requests in the batch, the max element count of the // @@ 'source_input' will be added as input with shape // @@ [max_element_count] for the first request in the batch. // @@ For other requests, such input will be with shape [0]. // @@ The data of the tensor will be uninitialized. // @@ BatchInput_BATCH_MAX_ELEMENT_COUNT_AS_SHAPE BatchInput_Kind = 3 // @@ .. cpp:enumerator:: Kind::BATCH_ITEM_SHAPE = 4 // @@ // @@ Among the requests in the batch, the shape of the // @@ 'source_input' will be added as input with shape // @@ [batch_size, len(input_dim)]. For example, if one // @@ batch-2 input with shape [3, 1] and batch-1 input // @@ with shape [2, 2] are batched, the batch input will // @@ have shape [3, 2] and value [ [3, 1], [3, 1], [2, 2]]. // @@ BatchInput_BATCH_ITEM_SHAPE BatchInput_Kind = 4 // @@ .. cpp:enumerator:: Kind::BATCH_ITEM_SHAPE_FLATTEN = 5 // @@ // @@ Among the requests in the batch, the shape of the // @@ 'source_input' will be added as input with single dimensional // @@ shape [batch_size * len(input_dim)]. For example, if one // @@ batch-2 input with shape [3, 1] and batch-1 input // @@ with shape [2, 2] are batched, the batch input will // @@ have shape [6] and value [3, 1, 3, 1, 2, 2]. // @@ BatchInput_BATCH_ITEM_SHAPE_FLATTEN BatchInput_Kind = 5 )
func (BatchInput_Kind) Descriptor ¶
func (BatchInput_Kind) Descriptor() protoreflect.EnumDescriptor
func (BatchInput_Kind) Enum ¶
func (x BatchInput_Kind) Enum() *BatchInput_Kind
func (BatchInput_Kind) EnumDescriptor
deprecated
func (BatchInput_Kind) EnumDescriptor() ([]byte, []int)
Deprecated: Use BatchInput_Kind.Descriptor instead.
func (BatchInput_Kind) Number ¶
func (x BatchInput_Kind) Number() protoreflect.EnumNumber
func (BatchInput_Kind) String ¶
func (x BatchInput_Kind) String() string
func (BatchInput_Kind) Type ¶
func (BatchInput_Kind) Type() protoreflect.EnumType
type BatchOutput ¶
type BatchOutput struct { // @@ .. cpp:var:: string target_name (repeated) // @@ // @@ The name of the outputs to be produced by this batch output // @@ specification. // @@ TargetName []string `protobuf:"bytes,1,rep,name=target_name,json=targetName,proto3" json:"target_name,omitempty"` // @@ .. cpp:var:: Kind kind // @@ // @@ The kind of this batch output. // @@ Kind BatchOutput_Kind `protobuf:"varint,2,opt,name=kind,proto3,enum=inference.BatchOutput_Kind" json:"kind,omitempty"` // @@ .. cpp:var:: string source_input (repeated) // @@ // @@ The backend derives each batch output from one or more inputs. // @@ 'source_input' gives the names of those inputs. // @@ SourceInput []string `protobuf:"bytes,3,rep,name=source_input,json=sourceInput,proto3" json:"source_input,omitempty"` // contains filtered or unexported fields }
@@.. cpp:var:: message BatchOutput @@ @@ A batch output is an output produced by the model that must be handled @@ differently by the backend based on all the requests in a batch. @@
func (*BatchOutput) Descriptor
deprecated
func (*BatchOutput) Descriptor() ([]byte, []int)
Deprecated: Use BatchOutput.ProtoReflect.Descriptor instead.
func (*BatchOutput) GetKind ¶
func (x *BatchOutput) GetKind() BatchOutput_Kind
func (*BatchOutput) GetSourceInput ¶
func (x *BatchOutput) GetSourceInput() []string
func (*BatchOutput) GetTargetName ¶
func (x *BatchOutput) GetTargetName() []string
func (*BatchOutput) ProtoMessage ¶
func (*BatchOutput) ProtoMessage()
func (*BatchOutput) ProtoReflect ¶
func (x *BatchOutput) ProtoReflect() protoreflect.Message
func (*BatchOutput) Reset ¶
func (x *BatchOutput) Reset()
func (*BatchOutput) String ¶
func (x *BatchOutput) String() string
type BatchOutput_Kind ¶
type BatchOutput_Kind int32
@@ @@ .. cpp:enum:: Kind @@ @@ The kind of the batch output. @@
const ( // @@ .. cpp:enumerator:: Kind::BATCH_SCATTER_WITH_INPUT_SHAPE = 0 // @@ // @@ The output should be scattered according to the shape of // @@ 'source_input'. The dynamic dimension of the output will // @@ be set to the value of the same dimension in the input. // @@ BatchOutput_BATCH_SCATTER_WITH_INPUT_SHAPE BatchOutput_Kind = 0 )
func (BatchOutput_Kind) Descriptor ¶
func (BatchOutput_Kind) Descriptor() protoreflect.EnumDescriptor
func (BatchOutput_Kind) Enum ¶
func (x BatchOutput_Kind) Enum() *BatchOutput_Kind
func (BatchOutput_Kind) EnumDescriptor
deprecated
func (BatchOutput_Kind) EnumDescriptor() ([]byte, []int)
Deprecated: Use BatchOutput_Kind.Descriptor instead.
func (BatchOutput_Kind) Number ¶
func (x BatchOutput_Kind) Number() protoreflect.EnumNumber
func (BatchOutput_Kind) String ¶
func (x BatchOutput_Kind) String() string
func (BatchOutput_Kind) Type ¶
func (BatchOutput_Kind) Type() protoreflect.EnumType
type CudaSharedMemoryRegisterRequest ¶
type CudaSharedMemoryRegisterRequest struct { // @@ .. cpp:var:: string name // @@ // @@ The name of the region to register. // @@ Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` // @@ // @@ The raw serialized cudaIPC handle. // @@ RawHandle []byte `protobuf:"bytes,2,opt,name=raw_handle,json=rawHandle,proto3" json:"raw_handle,omitempty"` // @@ // @@ The GPU device ID on which the cudaIPC handle was created. // @@ DeviceId int64 `protobuf:"varint,3,opt,name=device_id,json=deviceId,proto3" json:"device_id,omitempty"` // @@ // @@ Size of the shared memory block, in bytes. // @@ ByteSize uint64 `protobuf:"varint,4,opt,name=byte_size,json=byteSize,proto3" json:"byte_size,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message CudaSharedMemoryRegisterRequest @@ @@ Request message for CudaSharedMemoryRegister. @@
func (*CudaSharedMemoryRegisterRequest) Descriptor
deprecated
func (*CudaSharedMemoryRegisterRequest) Descriptor() ([]byte, []int)
Deprecated: Use CudaSharedMemoryRegisterRequest.ProtoReflect.Descriptor instead.
func (*CudaSharedMemoryRegisterRequest) GetByteSize ¶
func (x *CudaSharedMemoryRegisterRequest) GetByteSize() uint64
func (*CudaSharedMemoryRegisterRequest) GetDeviceId ¶
func (x *CudaSharedMemoryRegisterRequest) GetDeviceId() int64
func (*CudaSharedMemoryRegisterRequest) GetName ¶
func (x *CudaSharedMemoryRegisterRequest) GetName() string
func (*CudaSharedMemoryRegisterRequest) GetRawHandle ¶
func (x *CudaSharedMemoryRegisterRequest) GetRawHandle() []byte
func (*CudaSharedMemoryRegisterRequest) ProtoMessage ¶
func (*CudaSharedMemoryRegisterRequest) ProtoMessage()
func (*CudaSharedMemoryRegisterRequest) ProtoReflect ¶
func (x *CudaSharedMemoryRegisterRequest) ProtoReflect() protoreflect.Message
func (*CudaSharedMemoryRegisterRequest) Reset ¶
func (x *CudaSharedMemoryRegisterRequest) Reset()
func (*CudaSharedMemoryRegisterRequest) String ¶
func (x *CudaSharedMemoryRegisterRequest) String() string
type CudaSharedMemoryRegisterResponse ¶
type CudaSharedMemoryRegisterResponse struct {
// contains filtered or unexported fields
}
@@ @@.. cpp:var:: message CudaSharedMemoryRegisterResponse @@ @@ Response message for CudaSharedMemoryRegister. @@
func (*CudaSharedMemoryRegisterResponse) Descriptor
deprecated
func (*CudaSharedMemoryRegisterResponse) Descriptor() ([]byte, []int)
Deprecated: Use CudaSharedMemoryRegisterResponse.ProtoReflect.Descriptor instead.
func (*CudaSharedMemoryRegisterResponse) ProtoMessage ¶
func (*CudaSharedMemoryRegisterResponse) ProtoMessage()
func (*CudaSharedMemoryRegisterResponse) ProtoReflect ¶
func (x *CudaSharedMemoryRegisterResponse) ProtoReflect() protoreflect.Message
func (*CudaSharedMemoryRegisterResponse) Reset ¶
func (x *CudaSharedMemoryRegisterResponse) Reset()
func (*CudaSharedMemoryRegisterResponse) String ¶
func (x *CudaSharedMemoryRegisterResponse) String() string
type CudaSharedMemoryStatusRequest ¶
type CudaSharedMemoryStatusRequest struct { // @@ .. cpp:var:: string name // @@ // @@ The name of the region to get status for. If empty the // @@ status is returned for all registered regions. // @@ Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message CudaSharedMemoryStatusRequest @@ @@ Request message for CudaSharedMemoryStatus. @@
func (*CudaSharedMemoryStatusRequest) Descriptor
deprecated
func (*CudaSharedMemoryStatusRequest) Descriptor() ([]byte, []int)
Deprecated: Use CudaSharedMemoryStatusRequest.ProtoReflect.Descriptor instead.
func (*CudaSharedMemoryStatusRequest) GetName ¶
func (x *CudaSharedMemoryStatusRequest) GetName() string
func (*CudaSharedMemoryStatusRequest) ProtoMessage ¶
func (*CudaSharedMemoryStatusRequest) ProtoMessage()
func (*CudaSharedMemoryStatusRequest) ProtoReflect ¶
func (x *CudaSharedMemoryStatusRequest) ProtoReflect() protoreflect.Message
func (*CudaSharedMemoryStatusRequest) Reset ¶
func (x *CudaSharedMemoryStatusRequest) Reset()
func (*CudaSharedMemoryStatusRequest) String ¶
func (x *CudaSharedMemoryStatusRequest) String() string
type CudaSharedMemoryStatusResponse ¶
type CudaSharedMemoryStatusResponse struct { // @@ .. cpp:var:: map<string,RegionStatus> regions // @@ // @@ Status for each of the registered regions, indexed by // @@ region name. // @@ Regions map[string]*CudaSharedMemoryStatusResponse_RegionStatus `` /* 155-byte string literal not displayed */ // contains filtered or unexported fields }
@@ @@.. cpp:var:: message CudaSharedMemoryStatusResponse @@ @@ Response message for CudaSharedMemoryStatus. @@
func (*CudaSharedMemoryStatusResponse) Descriptor
deprecated
func (*CudaSharedMemoryStatusResponse) Descriptor() ([]byte, []int)
Deprecated: Use CudaSharedMemoryStatusResponse.ProtoReflect.Descriptor instead.
func (*CudaSharedMemoryStatusResponse) GetRegions ¶
func (x *CudaSharedMemoryStatusResponse) GetRegions() map[string]*CudaSharedMemoryStatusResponse_RegionStatus
func (*CudaSharedMemoryStatusResponse) ProtoMessage ¶
func (*CudaSharedMemoryStatusResponse) ProtoMessage()
func (*CudaSharedMemoryStatusResponse) ProtoReflect ¶
func (x *CudaSharedMemoryStatusResponse) ProtoReflect() protoreflect.Message
func (*CudaSharedMemoryStatusResponse) Reset ¶
func (x *CudaSharedMemoryStatusResponse) Reset()
func (*CudaSharedMemoryStatusResponse) String ¶
func (x *CudaSharedMemoryStatusResponse) String() string
type CudaSharedMemoryStatusResponse_RegionStatus ¶
type CudaSharedMemoryStatusResponse_RegionStatus struct { // @@ .. cpp:var:: string name // @@ // @@ The name for the shared memory region. // @@ Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` // @@ // @@ The GPU device ID where the cudaIPC handle was created. // @@ DeviceId uint64 `protobuf:"varint,2,opt,name=device_id,json=deviceId,proto3" json:"device_id,omitempty"` // @@ // @@ Size of the shared memory region, in bytes. // @@ ByteSize uint64 `protobuf:"varint,3,opt,name=byte_size,json=byteSize,proto3" json:"byte_size,omitempty"` // contains filtered or unexported fields }
@@ @@ .. cpp:var:: message RegionStatus @@ @@ Status for a shared memory region. @@
func (*CudaSharedMemoryStatusResponse_RegionStatus) Descriptor
deprecated
func (*CudaSharedMemoryStatusResponse_RegionStatus) Descriptor() ([]byte, []int)
Deprecated: Use CudaSharedMemoryStatusResponse_RegionStatus.ProtoReflect.Descriptor instead.
func (*CudaSharedMemoryStatusResponse_RegionStatus) GetByteSize ¶
func (x *CudaSharedMemoryStatusResponse_RegionStatus) GetByteSize() uint64
func (*CudaSharedMemoryStatusResponse_RegionStatus) GetDeviceId ¶
func (x *CudaSharedMemoryStatusResponse_RegionStatus) GetDeviceId() uint64
func (*CudaSharedMemoryStatusResponse_RegionStatus) GetName ¶
func (x *CudaSharedMemoryStatusResponse_RegionStatus) GetName() string
func (*CudaSharedMemoryStatusResponse_RegionStatus) ProtoMessage ¶
func (*CudaSharedMemoryStatusResponse_RegionStatus) ProtoMessage()
func (*CudaSharedMemoryStatusResponse_RegionStatus) ProtoReflect ¶
func (x *CudaSharedMemoryStatusResponse_RegionStatus) ProtoReflect() protoreflect.Message
func (*CudaSharedMemoryStatusResponse_RegionStatus) Reset ¶
func (x *CudaSharedMemoryStatusResponse_RegionStatus) Reset()
func (*CudaSharedMemoryStatusResponse_RegionStatus) String ¶
func (x *CudaSharedMemoryStatusResponse_RegionStatus) String() string
type CudaSharedMemoryUnregisterRequest ¶
type CudaSharedMemoryUnregisterRequest struct { // @@ .. cpp:var:: string name // @@ // @@ The name of the cuda region to unregister. If empty // @@ all cuda shared-memory regions are unregistered. // @@ Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message CudaSharedMemoryUnregisterRequest @@ @@ Request message for CudaSharedMemoryUnregister. @@
func (*CudaSharedMemoryUnregisterRequest) Descriptor
deprecated
func (*CudaSharedMemoryUnregisterRequest) Descriptor() ([]byte, []int)
Deprecated: Use CudaSharedMemoryUnregisterRequest.ProtoReflect.Descriptor instead.
func (*CudaSharedMemoryUnregisterRequest) GetName ¶
func (x *CudaSharedMemoryUnregisterRequest) GetName() string
func (*CudaSharedMemoryUnregisterRequest) ProtoMessage ¶
func (*CudaSharedMemoryUnregisterRequest) ProtoMessage()
func (*CudaSharedMemoryUnregisterRequest) ProtoReflect ¶
func (x *CudaSharedMemoryUnregisterRequest) ProtoReflect() protoreflect.Message
func (*CudaSharedMemoryUnregisterRequest) Reset ¶
func (x *CudaSharedMemoryUnregisterRequest) Reset()
func (*CudaSharedMemoryUnregisterRequest) String ¶
func (x *CudaSharedMemoryUnregisterRequest) String() string
type CudaSharedMemoryUnregisterResponse ¶
type CudaSharedMemoryUnregisterResponse struct {
// contains filtered or unexported fields
}
@@ @@.. cpp:var:: message CudaSharedMemoryUnregisterResponse @@ @@ Response message for CudaSharedMemoryUnregister. @@
func (*CudaSharedMemoryUnregisterResponse) Descriptor
deprecated
func (*CudaSharedMemoryUnregisterResponse) Descriptor() ([]byte, []int)
Deprecated: Use CudaSharedMemoryUnregisterResponse.ProtoReflect.Descriptor instead.
func (*CudaSharedMemoryUnregisterResponse) ProtoMessage ¶
func (*CudaSharedMemoryUnregisterResponse) ProtoMessage()
func (*CudaSharedMemoryUnregisterResponse) ProtoReflect ¶
func (x *CudaSharedMemoryUnregisterResponse) ProtoReflect() protoreflect.Message
func (*CudaSharedMemoryUnregisterResponse) Reset ¶
func (x *CudaSharedMemoryUnregisterResponse) Reset()
func (*CudaSharedMemoryUnregisterResponse) String ¶
func (x *CudaSharedMemoryUnregisterResponse) String() string
type DataType ¶
type DataType int32
@@ @@.. cpp:enum:: DataType @@ @@ Data types supported for input and output tensors. @@
const ( // @@ .. cpp:enumerator:: DataType::INVALID = 0 DataType_TYPE_INVALID DataType = 0 // @@ .. cpp:enumerator:: DataType::BOOL = 1 DataType_TYPE_BOOL DataType = 1 // @@ .. cpp:enumerator:: DataType::UINT8 = 2 DataType_TYPE_UINT8 DataType = 2 // @@ .. cpp:enumerator:: DataType::UINT16 = 3 DataType_TYPE_UINT16 DataType = 3 // @@ .. cpp:enumerator:: DataType::UINT32 = 4 DataType_TYPE_UINT32 DataType = 4 // @@ .. cpp:enumerator:: DataType::UINT64 = 5 DataType_TYPE_UINT64 DataType = 5 // @@ .. cpp:enumerator:: DataType::INT8 = 6 DataType_TYPE_INT8 DataType = 6 // @@ .. cpp:enumerator:: DataType::INT16 = 7 DataType_TYPE_INT16 DataType = 7 // @@ .. cpp:enumerator:: DataType::INT32 = 8 DataType_TYPE_INT32 DataType = 8 // @@ .. cpp:enumerator:: DataType::INT64 = 9 DataType_TYPE_INT64 DataType = 9 // @@ .. cpp:enumerator:: DataType::FP16 = 10 DataType_TYPE_FP16 DataType = 10 // @@ .. cpp:enumerator:: DataType::FP32 = 11 DataType_TYPE_FP32 DataType = 11 // @@ .. cpp:enumerator:: DataType::FP64 = 12 DataType_TYPE_FP64 DataType = 12 // @@ .. cpp:enumerator:: DataType::STRING = 13 DataType_TYPE_STRING DataType = 13 // @@ .. cpp:enumerator:: DataType::BF16 = 14 DataType_TYPE_BF16 DataType = 14 )
func (DataType) Descriptor ¶
func (DataType) Descriptor() protoreflect.EnumDescriptor
func (DataType) EnumDescriptor
deprecated
func (DataType) Number ¶
func (x DataType) Number() protoreflect.EnumNumber
func (DataType) Type ¶
func (DataType) Type() protoreflect.EnumType
type GRPCInferenceServiceClient ¶
type GRPCInferenceServiceClient interface { // @@ .. cpp:var:: rpc ServerLive(ServerLiveRequest) returns // @@ (ServerLiveResponse) // @@ // @@ Check liveness of the inference server. // @@ ServerLive(ctx context.Context, in *ServerLiveRequest, opts ...grpc.CallOption) (*ServerLiveResponse, error) // @@ .. cpp:var:: rpc ServerReady(ServerReadyRequest) returns // @@ (ServerReadyResponse) // @@ // @@ Check readiness of the inference server. // @@ ServerReady(ctx context.Context, in *ServerReadyRequest, opts ...grpc.CallOption) (*ServerReadyResponse, error) // @@ .. cpp:var:: rpc ModelReady(ModelReadyRequest) returns // @@ (ModelReadyResponse) // @@ // @@ Check readiness of a model in the inference server. // @@ ModelReady(ctx context.Context, in *ModelReadyRequest, opts ...grpc.CallOption) (*ModelReadyResponse, error) // @@ .. cpp:var:: rpc ServerMetadata(ServerMetadataRequest) returns // @@ (ServerMetadataResponse) // @@ // @@ Get server metadata. // @@ ServerMetadata(ctx context.Context, in *ServerMetadataRequest, opts ...grpc.CallOption) (*ServerMetadataResponse, error) // @@ .. cpp:var:: rpc ModelMetadata(ModelMetadataRequest) returns // @@ (ModelMetadataResponse) // @@ // @@ Get model metadata. // @@ ModelMetadata(ctx context.Context, in *ModelMetadataRequest, opts ...grpc.CallOption) (*ModelMetadataResponse, error) // @@ .. cpp:var:: rpc ModelInfer(ModelInferRequest) returns // @@ (ModelInferResponse) // @@ // @@ Perform inference using a specific model. // @@ ModelInfer(ctx context.Context, in *ModelInferRequest, opts ...grpc.CallOption) (*ModelInferResponse, error) // @@ .. cpp:var:: rpc ModelStreamInfer(stream ModelInferRequest) returns // @@ (stream ModelStreamInferResponse) // @@ // @@ Perform streaming inference. // @@ ModelStreamInfer(ctx context.Context, opts ...grpc.CallOption) (grpc.BidiStreamingClient[ModelInferRequest, ModelStreamInferResponse], error) // @@ .. cpp:var:: rpc ModelConfig(ModelConfigRequest) returns // @@ (ModelConfigResponse) // @@ // @@ Get model configuration. // @@ ModelConfig(ctx context.Context, in *ModelConfigRequest, opts ...grpc.CallOption) (*ModelConfigResponse, error) // @@ .. cpp:var:: rpc ModelStatistics( // @@ ModelStatisticsRequest) // @@ returns (ModelStatisticsResponse) // @@ // @@ Get the cumulative inference statistics for a model. // @@ ModelStatistics(ctx context.Context, in *ModelStatisticsRequest, opts ...grpc.CallOption) (*ModelStatisticsResponse, error) // @@ .. cpp:var:: rpc RepositoryIndex(RepositoryIndexRequest) returns // @@ (RepositoryIndexResponse) // @@ // @@ Get the index of model repository contents. // @@ RepositoryIndex(ctx context.Context, in *RepositoryIndexRequest, opts ...grpc.CallOption) (*RepositoryIndexResponse, error) // @@ .. cpp:var:: rpc RepositoryModelLoad(RepositoryModelLoadRequest) returns // @@ (RepositoryModelLoadResponse) // @@ // @@ Load or reload a model from a repository. // @@ RepositoryModelLoad(ctx context.Context, in *RepositoryModelLoadRequest, opts ...grpc.CallOption) (*RepositoryModelLoadResponse, error) // @@ .. cpp:var:: rpc RepositoryModelUnload(RepositoryModelUnloadRequest) // @@ returns (RepositoryModelUnloadResponse) // @@ // @@ Unload a model. // @@ RepositoryModelUnload(ctx context.Context, in *RepositoryModelUnloadRequest, opts ...grpc.CallOption) (*RepositoryModelUnloadResponse, error) // @@ SystemSharedMemoryStatusRequest) // @@ returns (SystemSharedMemoryStatusRespose) // @@ // @@ Get the status of all registered system-shared-memory regions. // @@ SystemSharedMemoryStatus(ctx context.Context, in *SystemSharedMemoryStatusRequest, opts ...grpc.CallOption) (*SystemSharedMemoryStatusResponse, error) // @@ SystemSharedMemoryRegisterRequest) // @@ returns (SystemSharedMemoryRegisterResponse) // @@ // @@ Register a system-shared-memory region. // @@ SystemSharedMemoryRegister(ctx context.Context, in *SystemSharedMemoryRegisterRequest, opts ...grpc.CallOption) (*SystemSharedMemoryRegisterResponse, error) // @@ SystemSharedMemoryUnregisterRequest) // @@ returns (SystemSharedMemoryUnregisterResponse) // @@ // @@ Unregister a system-shared-memory region. // @@ SystemSharedMemoryUnregister(ctx context.Context, in *SystemSharedMemoryUnregisterRequest, opts ...grpc.CallOption) (*SystemSharedMemoryUnregisterResponse, error) // @@ CudaSharedMemoryStatusRequest) // @@ returns (CudaSharedMemoryStatusRespose) // @@ // @@ Get the status of all registered CUDA-shared-memory regions. // @@ CudaSharedMemoryStatus(ctx context.Context, in *CudaSharedMemoryStatusRequest, opts ...grpc.CallOption) (*CudaSharedMemoryStatusResponse, error) // @@ CudaSharedMemoryRegisterRequest) // @@ returns (CudaSharedMemoryRegisterResponse) // @@ // @@ Register a CUDA-shared-memory region. // @@ CudaSharedMemoryRegister(ctx context.Context, in *CudaSharedMemoryRegisterRequest, opts ...grpc.CallOption) (*CudaSharedMemoryRegisterResponse, error) // @@ CudaSharedMemoryUnregisterRequest) // @@ returns (CudaSharedMemoryUnregisterResponse) // @@ // @@ Unregister a CUDA-shared-memory region. // @@ CudaSharedMemoryUnregister(ctx context.Context, in *CudaSharedMemoryUnregisterRequest, opts ...grpc.CallOption) (*CudaSharedMemoryUnregisterResponse, error) // @@ .. cpp:var:: rpc TraceSetting(TraceSettingRequest) // @@ returns (TraceSettingResponse) // @@ // @@ Update and get the trace setting of the Triton server. // @@ TraceSetting(ctx context.Context, in *TraceSettingRequest, opts ...grpc.CallOption) (*TraceSettingResponse, error) // @@ .. cpp:var:: rpc LogSettings(LogSettingsRequest) // @@ returns (LogSettingsResponse) // @@ // @@ Update and get the log settings of the Triton server. // @@ LogSettings(ctx context.Context, in *LogSettingsRequest, opts ...grpc.CallOption) (*LogSettingsResponse, error) }
GRPCInferenceServiceClient is the client API for GRPCInferenceService service.
For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream.
@@ @@.. cpp:var:: service InferenceService @@ @@ Inference Server GRPC endpoints. @@
func NewGRPCInferenceServiceClient ¶
func NewGRPCInferenceServiceClient(cc grpc.ClientConnInterface) GRPCInferenceServiceClient
type GRPCInferenceServiceServer ¶
type GRPCInferenceServiceServer interface { // @@ .. cpp:var:: rpc ServerLive(ServerLiveRequest) returns // @@ (ServerLiveResponse) // @@ // @@ Check liveness of the inference server. // @@ ServerLive(context.Context, *ServerLiveRequest) (*ServerLiveResponse, error) // @@ .. cpp:var:: rpc ServerReady(ServerReadyRequest) returns // @@ (ServerReadyResponse) // @@ // @@ Check readiness of the inference server. // @@ ServerReady(context.Context, *ServerReadyRequest) (*ServerReadyResponse, error) // @@ .. cpp:var:: rpc ModelReady(ModelReadyRequest) returns // @@ (ModelReadyResponse) // @@ // @@ Check readiness of a model in the inference server. // @@ ModelReady(context.Context, *ModelReadyRequest) (*ModelReadyResponse, error) // @@ .. cpp:var:: rpc ServerMetadata(ServerMetadataRequest) returns // @@ (ServerMetadataResponse) // @@ // @@ Get server metadata. // @@ ServerMetadata(context.Context, *ServerMetadataRequest) (*ServerMetadataResponse, error) // @@ .. cpp:var:: rpc ModelMetadata(ModelMetadataRequest) returns // @@ (ModelMetadataResponse) // @@ // @@ Get model metadata. // @@ ModelMetadata(context.Context, *ModelMetadataRequest) (*ModelMetadataResponse, error) // @@ .. cpp:var:: rpc ModelInfer(ModelInferRequest) returns // @@ (ModelInferResponse) // @@ // @@ Perform inference using a specific model. // @@ ModelInfer(context.Context, *ModelInferRequest) (*ModelInferResponse, error) // @@ .. cpp:var:: rpc ModelStreamInfer(stream ModelInferRequest) returns // @@ (stream ModelStreamInferResponse) // @@ // @@ Perform streaming inference. // @@ ModelStreamInfer(grpc.BidiStreamingServer[ModelInferRequest, ModelStreamInferResponse]) error // @@ .. cpp:var:: rpc ModelConfig(ModelConfigRequest) returns // @@ (ModelConfigResponse) // @@ // @@ Get model configuration. // @@ ModelConfig(context.Context, *ModelConfigRequest) (*ModelConfigResponse, error) // @@ .. cpp:var:: rpc ModelStatistics( // @@ ModelStatisticsRequest) // @@ returns (ModelStatisticsResponse) // @@ // @@ Get the cumulative inference statistics for a model. // @@ ModelStatistics(context.Context, *ModelStatisticsRequest) (*ModelStatisticsResponse, error) // @@ .. cpp:var:: rpc RepositoryIndex(RepositoryIndexRequest) returns // @@ (RepositoryIndexResponse) // @@ // @@ Get the index of model repository contents. // @@ RepositoryIndex(context.Context, *RepositoryIndexRequest) (*RepositoryIndexResponse, error) // @@ .. cpp:var:: rpc RepositoryModelLoad(RepositoryModelLoadRequest) returns // @@ (RepositoryModelLoadResponse) // @@ // @@ Load or reload a model from a repository. // @@ RepositoryModelLoad(context.Context, *RepositoryModelLoadRequest) (*RepositoryModelLoadResponse, error) // @@ .. cpp:var:: rpc RepositoryModelUnload(RepositoryModelUnloadRequest) // @@ returns (RepositoryModelUnloadResponse) // @@ // @@ Unload a model. // @@ RepositoryModelUnload(context.Context, *RepositoryModelUnloadRequest) (*RepositoryModelUnloadResponse, error) // @@ SystemSharedMemoryStatusRequest) // @@ returns (SystemSharedMemoryStatusRespose) // @@ // @@ Get the status of all registered system-shared-memory regions. // @@ SystemSharedMemoryStatus(context.Context, *SystemSharedMemoryStatusRequest) (*SystemSharedMemoryStatusResponse, error) // @@ SystemSharedMemoryRegisterRequest) // @@ returns (SystemSharedMemoryRegisterResponse) // @@ // @@ Register a system-shared-memory region. // @@ SystemSharedMemoryRegister(context.Context, *SystemSharedMemoryRegisterRequest) (*SystemSharedMemoryRegisterResponse, error) // @@ SystemSharedMemoryUnregisterRequest) // @@ returns (SystemSharedMemoryUnregisterResponse) // @@ // @@ Unregister a system-shared-memory region. // @@ SystemSharedMemoryUnregister(context.Context, *SystemSharedMemoryUnregisterRequest) (*SystemSharedMemoryUnregisterResponse, error) // @@ CudaSharedMemoryStatusRequest) // @@ returns (CudaSharedMemoryStatusRespose) // @@ // @@ Get the status of all registered CUDA-shared-memory regions. // @@ CudaSharedMemoryStatus(context.Context, *CudaSharedMemoryStatusRequest) (*CudaSharedMemoryStatusResponse, error) // @@ CudaSharedMemoryRegisterRequest) // @@ returns (CudaSharedMemoryRegisterResponse) // @@ // @@ Register a CUDA-shared-memory region. // @@ CudaSharedMemoryRegister(context.Context, *CudaSharedMemoryRegisterRequest) (*CudaSharedMemoryRegisterResponse, error) // @@ CudaSharedMemoryUnregisterRequest) // @@ returns (CudaSharedMemoryUnregisterResponse) // @@ // @@ Unregister a CUDA-shared-memory region. // @@ CudaSharedMemoryUnregister(context.Context, *CudaSharedMemoryUnregisterRequest) (*CudaSharedMemoryUnregisterResponse, error) // @@ .. cpp:var:: rpc TraceSetting(TraceSettingRequest) // @@ returns (TraceSettingResponse) // @@ // @@ Update and get the trace setting of the Triton server. // @@ TraceSetting(context.Context, *TraceSettingRequest) (*TraceSettingResponse, error) // @@ .. cpp:var:: rpc LogSettings(LogSettingsRequest) // @@ returns (LogSettingsResponse) // @@ // @@ Update and get the log settings of the Triton server. // @@ LogSettings(context.Context, *LogSettingsRequest) (*LogSettingsResponse, error) // contains filtered or unexported methods }
GRPCInferenceServiceServer is the server API for GRPCInferenceService service. All implementations must embed UnimplementedGRPCInferenceServiceServer for forward compatibility.
@@ @@.. cpp:var:: service InferenceService @@ @@ Inference Server GRPC endpoints. @@
type GRPCInferenceService_ModelStreamInferClient ¶
type GRPCInferenceService_ModelStreamInferClient = grpc.BidiStreamingClient[ModelInferRequest, ModelStreamInferResponse]
This type alias is provided for backwards compatibility with existing code that references the prior non-generic stream type by name.
type GRPCInferenceService_ModelStreamInferServer ¶
type GRPCInferenceService_ModelStreamInferServer = grpc.BidiStreamingServer[ModelInferRequest, ModelStreamInferResponse]
This type alias is provided for backwards compatibility with existing code that references the prior non-generic stream type by name.
type InferBatchStatistics ¶
type InferBatchStatistics struct { // @@ .. cpp:var:: uint64 batch_size // @@ // @@ The size of the batch. // @@ BatchSize uint64 `protobuf:"varint,1,opt,name=batch_size,json=batchSize,proto3" json:"batch_size,omitempty"` // @@ .. cpp:var:: StatisticDuration compute_input // @@ // @@ The count and cumulative duration to prepare input tensor data as // @@ required by the model framework / backend with the given batch size. // @@ For example, this duration should include the time to copy input // @@ tensor data to the GPU. // @@ ComputeInput *StatisticDuration `protobuf:"bytes,2,opt,name=compute_input,json=computeInput,proto3" json:"compute_input,omitempty"` // @@ .. cpp:var:: StatisticDuration compute_infer // @@ // @@ The count and cumulative duration to execute the model with the given // @@ batch size. // @@ ComputeInfer *StatisticDuration `protobuf:"bytes,3,opt,name=compute_infer,json=computeInfer,proto3" json:"compute_infer,omitempty"` // @@ .. cpp:var:: StatisticDuration compute_output // @@ // @@ The count and cumulative duration to extract output tensor data // @@ produced by the model framework / backend with the given batch size. // @@ For example, this duration should include the time to copy output // @@ tensor data from the GPU. // @@ ComputeOutput *StatisticDuration `protobuf:"bytes,4,opt,name=compute_output,json=computeOutput,proto3" json:"compute_output,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message InferBatchStatistics @@ @@ Inference batch statistics. @@
func (*InferBatchStatistics) Descriptor
deprecated
func (*InferBatchStatistics) Descriptor() ([]byte, []int)
Deprecated: Use InferBatchStatistics.ProtoReflect.Descriptor instead.
func (*InferBatchStatistics) GetBatchSize ¶
func (x *InferBatchStatistics) GetBatchSize() uint64
func (*InferBatchStatistics) GetComputeInfer ¶
func (x *InferBatchStatistics) GetComputeInfer() *StatisticDuration
func (*InferBatchStatistics) GetComputeInput ¶
func (x *InferBatchStatistics) GetComputeInput() *StatisticDuration
func (*InferBatchStatistics) GetComputeOutput ¶
func (x *InferBatchStatistics) GetComputeOutput() *StatisticDuration
func (*InferBatchStatistics) ProtoMessage ¶
func (*InferBatchStatistics) ProtoMessage()
func (*InferBatchStatistics) ProtoReflect ¶
func (x *InferBatchStatistics) ProtoReflect() protoreflect.Message
func (*InferBatchStatistics) Reset ¶
func (x *InferBatchStatistics) Reset()
func (*InferBatchStatistics) String ¶
func (x *InferBatchStatistics) String() string
type InferParameter ¶
type InferParameter struct { // @@ .. cpp:var:: oneof parameter_choice // @@ // @@ The parameter value can be a string, an int64, // @@ an uint64, a double, or a boolean // @@ // @@ Note: double and uint64 are currently // @@ placeholders for future use and // @@ are not supported for custom parameters // @@ // // Types that are assignable to ParameterChoice: // // *InferParameter_BoolParam // *InferParameter_Int64Param // *InferParameter_StringParam // *InferParameter_DoubleParam // *InferParameter_Uint64Param ParameterChoice isInferParameter_ParameterChoice `protobuf_oneof:"parameter_choice"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message InferParameter @@ @@ An inference parameter value. @@
func (*InferParameter) Descriptor
deprecated
func (*InferParameter) Descriptor() ([]byte, []int)
Deprecated: Use InferParameter.ProtoReflect.Descriptor instead.
func (*InferParameter) GetBoolParam ¶
func (x *InferParameter) GetBoolParam() bool
func (*InferParameter) GetDoubleParam ¶
func (x *InferParameter) GetDoubleParam() float64
func (*InferParameter) GetInt64Param ¶
func (x *InferParameter) GetInt64Param() int64
func (*InferParameter) GetParameterChoice ¶
func (m *InferParameter) GetParameterChoice() isInferParameter_ParameterChoice
func (*InferParameter) GetStringParam ¶
func (x *InferParameter) GetStringParam() string
func (*InferParameter) GetUint64Param ¶
func (x *InferParameter) GetUint64Param() uint64
func (*InferParameter) ProtoMessage ¶
func (*InferParameter) ProtoMessage()
func (*InferParameter) ProtoReflect ¶
func (x *InferParameter) ProtoReflect() protoreflect.Message
func (*InferParameter) Reset ¶
func (x *InferParameter) Reset()
func (*InferParameter) String ¶
func (x *InferParameter) String() string
type InferParameter_BoolParam ¶
type InferParameter_BoolParam struct { // @@ .. cpp:var:: bool bool_param // @@ // @@ A boolean parameter value. // @@ BoolParam bool `protobuf:"varint,1,opt,name=bool_param,json=boolParam,proto3,oneof"` }
type InferParameter_DoubleParam ¶
type InferParameter_DoubleParam struct { // @@ .. cpp:var:: double double_param // @@ // @@ A double parameter value. // @@ DoubleParam float64 `protobuf:"fixed64,4,opt,name=double_param,json=doubleParam,proto3,oneof"` }
type InferParameter_Int64Param ¶
type InferParameter_Int64Param struct { // @@ .. cpp:var:: int64 int64_param // @@ // @@ An int64 parameter value. // @@ Int64Param int64 `protobuf:"varint,2,opt,name=int64_param,json=int64Param,proto3,oneof"` }
type InferParameter_StringParam ¶
type InferParameter_StringParam struct { // @@ .. cpp:var:: string string_param // @@ // @@ A string parameter value. // @@ StringParam string `protobuf:"bytes,3,opt,name=string_param,json=stringParam,proto3,oneof"` }
type InferParameter_Uint64Param ¶
type InferParameter_Uint64Param struct { // @@ .. cpp:var:: uint64 uint64_param // @@ // @@ A uint64 parameter value. // @@ // @@ Not supported for custom parameters // @@ Uint64Param uint64 `protobuf:"varint,5,opt,name=uint64_param,json=uint64Param,proto3,oneof"` }
type InferResponseStatistics ¶
type InferResponseStatistics struct { // @@ .. cpp:var:: StatisticDuration compute_infer // @@ // @@ The count and cumulative duration to compute a response. // @@ ComputeInfer *StatisticDuration `protobuf:"bytes,1,opt,name=compute_infer,json=computeInfer,proto3" json:"compute_infer,omitempty"` // @@ .. cpp:var:: StatisticDuration compute_output // @@ // @@ The count and cumulative duration to extract the output tensors of a // @@ response. // @@ ComputeOutput *StatisticDuration `protobuf:"bytes,2,opt,name=compute_output,json=computeOutput,proto3" json:"compute_output,omitempty"` // @@ .. cpp:var:: StatisticDuration success // @@ // @@ The count and cumulative duration for successful responses. // @@ Success *StatisticDuration `protobuf:"bytes,3,opt,name=success,proto3" json:"success,omitempty"` // @@ .. cpp:var:: StatisticDuration fail // @@ // @@ The count and cumulative duration for failed responses. // @@ Fail *StatisticDuration `protobuf:"bytes,4,opt,name=fail,proto3" json:"fail,omitempty"` // @@ .. cpp:var:: StatisticDuration empty_response // @@ // @@ The count and cumulative duration for empty responses. // @@ EmptyResponse *StatisticDuration `protobuf:"bytes,5,opt,name=empty_response,json=emptyResponse,proto3" json:"empty_response,omitempty"` // @@ .. cpp:var:: StatisticDuration cancel // @@ // @@ The count and cumulative duration, for cleaning up resources held by // @@ a cancelled request, for cancelled responses. // @@ Cancel *StatisticDuration `protobuf:"bytes,6,opt,name=cancel,proto3" json:"cancel,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message InferResponseStatistics @@ @@ Statistics per response. @@
func (*InferResponseStatistics) Descriptor
deprecated
func (*InferResponseStatistics) Descriptor() ([]byte, []int)
Deprecated: Use InferResponseStatistics.ProtoReflect.Descriptor instead.
func (*InferResponseStatistics) GetCancel ¶
func (x *InferResponseStatistics) GetCancel() *StatisticDuration
func (*InferResponseStatistics) GetComputeInfer ¶
func (x *InferResponseStatistics) GetComputeInfer() *StatisticDuration
func (*InferResponseStatistics) GetComputeOutput ¶
func (x *InferResponseStatistics) GetComputeOutput() *StatisticDuration
func (*InferResponseStatistics) GetEmptyResponse ¶
func (x *InferResponseStatistics) GetEmptyResponse() *StatisticDuration
func (*InferResponseStatistics) GetFail ¶
func (x *InferResponseStatistics) GetFail() *StatisticDuration
func (*InferResponseStatistics) GetSuccess ¶
func (x *InferResponseStatistics) GetSuccess() *StatisticDuration
func (*InferResponseStatistics) ProtoMessage ¶
func (*InferResponseStatistics) ProtoMessage()
func (*InferResponseStatistics) ProtoReflect ¶
func (x *InferResponseStatistics) ProtoReflect() protoreflect.Message
func (*InferResponseStatistics) Reset ¶
func (x *InferResponseStatistics) Reset()
func (*InferResponseStatistics) String ¶
func (x *InferResponseStatistics) String() string
type InferStatistics ¶
type InferStatistics struct { // @@ .. cpp:var:: StatisticDuration success // @@ // @@ Cumulative count and duration for successful inference // @@ request. The "success" count and cumulative duration includes // @@ cache hits. // @@ Success *StatisticDuration `protobuf:"bytes,1,opt,name=success,proto3" json:"success,omitempty"` // @@ .. cpp:var:: StatisticDuration fail // @@ // @@ Cumulative count and duration for failed inference // @@ request. // @@ Fail *StatisticDuration `protobuf:"bytes,2,opt,name=fail,proto3" json:"fail,omitempty"` // @@ .. cpp:var:: StatisticDuration queue // @@ // @@ The count and cumulative duration that inference requests wait in // @@ scheduling or other queues. The "queue" count and cumulative // @@ duration includes cache hits. // @@ Queue *StatisticDuration `protobuf:"bytes,3,opt,name=queue,proto3" json:"queue,omitempty"` // @@ .. cpp:var:: StatisticDuration compute_input // @@ // @@ The count and cumulative duration to prepare input tensor data as // @@ required by the model framework / backend. For example, this duration // @@ should include the time to copy input tensor data to the GPU. // @@ The "compute_input" count and cumulative duration do not account for // @@ requests that were a cache hit. See the "cache_hit" field for more // @@ info. // @@ ComputeInput *StatisticDuration `protobuf:"bytes,4,opt,name=compute_input,json=computeInput,proto3" json:"compute_input,omitempty"` // @@ .. cpp:var:: StatisticDuration compute_infer // @@ // @@ The count and cumulative duration to execute the model. // @@ The "compute_infer" count and cumulative duration do not account for // @@ requests that were a cache hit. See the "cache_hit" field for more // @@ info. // @@ ComputeInfer *StatisticDuration `protobuf:"bytes,5,opt,name=compute_infer,json=computeInfer,proto3" json:"compute_infer,omitempty"` // @@ .. cpp:var:: StatisticDuration compute_output // @@ // @@ The count and cumulative duration to extract output tensor data // @@ produced by the model framework / backend. For example, this duration // @@ should include the time to copy output tensor data from the GPU. // @@ The "compute_output" count and cumulative duration do not account for // @@ requests that were a cache hit. See the "cache_hit" field for more // @@ info. // @@ ComputeOutput *StatisticDuration `protobuf:"bytes,6,opt,name=compute_output,json=computeOutput,proto3" json:"compute_output,omitempty"` // @@ .. cpp:var:: StatisticDuration cache_hit // @@ // @@ The count of response cache hits and cumulative duration to lookup // @@ and extract output tensor data from the Response Cache on a cache // @@ hit. For example, this duration should include the time to copy // @@ output tensor data from the Response Cache to the response object. // @@ On cache hits, triton does not need to go to the model/backend // @@ for the output tensor data, so the "compute_input", "compute_infer", // @@ and "compute_output" fields are not updated. Assuming the response // @@ cache is enabled for a given model, a cache hit occurs for a // @@ request to that model when the request metadata (model name, // @@ model version, model inputs) hashes to an existing entry in the // @@ cache. On a cache miss, the request hash and response output tensor // @@ data is added to the cache. See response cache docs for more info: // @@ // https://github.com/triton-inference-server/server/blob/main/docs/response_cache.md // @@ CacheHit *StatisticDuration `protobuf:"bytes,7,opt,name=cache_hit,json=cacheHit,proto3" json:"cache_hit,omitempty"` // @@ .. cpp:var:: StatisticDuration cache_miss // @@ // @@ The count of response cache misses and cumulative duration to lookup // @@ and insert output tensor data from the computed response to the // cache. // @@ For example, this duration should include the time to copy // @@ output tensor data from the response object to the Response Cache. // @@ Assuming the response cache is enabled for a given model, a cache // @@ miss occurs for a request to that model when the request metadata // @@ does NOT hash to an existing entry in the cache. See the response // @@ cache docs for more info: // @@ // https://github.com/triton-inference-server/server/blob/main/docs/response_cache.md // @@ CacheMiss *StatisticDuration `protobuf:"bytes,8,opt,name=cache_miss,json=cacheMiss,proto3" json:"cache_miss,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message InferStatistics @@ @@ Inference statistics. @@
func (*InferStatistics) Descriptor
deprecated
func (*InferStatistics) Descriptor() ([]byte, []int)
Deprecated: Use InferStatistics.ProtoReflect.Descriptor instead.
func (*InferStatistics) GetCacheHit ¶
func (x *InferStatistics) GetCacheHit() *StatisticDuration
func (*InferStatistics) GetCacheMiss ¶
func (x *InferStatistics) GetCacheMiss() *StatisticDuration
func (*InferStatistics) GetComputeInfer ¶
func (x *InferStatistics) GetComputeInfer() *StatisticDuration
func (*InferStatistics) GetComputeInput ¶
func (x *InferStatistics) GetComputeInput() *StatisticDuration
func (*InferStatistics) GetComputeOutput ¶
func (x *InferStatistics) GetComputeOutput() *StatisticDuration
func (*InferStatistics) GetFail ¶
func (x *InferStatistics) GetFail() *StatisticDuration
func (*InferStatistics) GetQueue ¶
func (x *InferStatistics) GetQueue() *StatisticDuration
func (*InferStatistics) GetSuccess ¶
func (x *InferStatistics) GetSuccess() *StatisticDuration
func (*InferStatistics) ProtoMessage ¶
func (*InferStatistics) ProtoMessage()
func (*InferStatistics) ProtoReflect ¶
func (x *InferStatistics) ProtoReflect() protoreflect.Message
func (*InferStatistics) Reset ¶
func (x *InferStatistics) Reset()
func (*InferStatistics) String ¶
func (x *InferStatistics) String() string
type InferTensorContents ¶
type InferTensorContents struct { // @@ // @@ .. cpp:var:: bool bool_contents (repeated) // @@ // @@ Representation for BOOL data type. The size must match what is // @@ expected by the tensor's shape. The contents must be the flattened, // @@ one-dimensional, row-major order of the tensor elements. // @@ BoolContents []bool `protobuf:"varint,1,rep,packed,name=bool_contents,json=boolContents,proto3" json:"bool_contents,omitempty"` // @@ // @@ .. cpp:var:: int32 int_contents (repeated) // @@ // @@ Representation for INT8, INT16, and INT32 data types. The size // @@ must match what is expected by the tensor's shape. The contents // @@ must be the flattened, one-dimensional, row-major order of the // @@ tensor elements. // @@ IntContents []int32 `protobuf:"varint,2,rep,packed,name=int_contents,json=intContents,proto3" json:"int_contents,omitempty"` // @@ // @@ .. cpp:var:: int64 int64_contents (repeated) // @@ // @@ Representation for INT64 data types. The size must match what // @@ is expected by the tensor's shape. The contents must be the // @@ flattened, one-dimensional, row-major order of the tensor elements. // @@ Int64Contents []int64 `protobuf:"varint,3,rep,packed,name=int64_contents,json=int64Contents,proto3" json:"int64_contents,omitempty"` // @@ // @@ .. cpp:var:: uint32 uint_contents (repeated) // @@ // @@ Representation for UINT8, UINT16, and UINT32 data types. The size // @@ must match what is expected by the tensor's shape. The contents // @@ must be the flattened, one-dimensional, row-major order of the // @@ tensor elements. // @@ UintContents []uint32 `protobuf:"varint,4,rep,packed,name=uint_contents,json=uintContents,proto3" json:"uint_contents,omitempty"` // @@ // @@ .. cpp:var:: uint64 uint64_contents (repeated) // @@ // @@ Representation for UINT64 data types. The size must match what // @@ is expected by the tensor's shape. The contents must be the // @@ flattened, one-dimensional, row-major order of the tensor elements. // @@ Uint64Contents []uint64 `protobuf:"varint,5,rep,packed,name=uint64_contents,json=uint64Contents,proto3" json:"uint64_contents,omitempty"` // @@ // @@ .. cpp:var:: float fp32_contents (repeated) // @@ // @@ Representation for FP32 data type. The size must match what is // @@ expected by the tensor's shape. The contents must be the flattened, // @@ one-dimensional, row-major order of the tensor elements. // @@ Fp32Contents []float32 `protobuf:"fixed32,6,rep,packed,name=fp32_contents,json=fp32Contents,proto3" json:"fp32_contents,omitempty"` // @@ // @@ .. cpp:var:: double fp64_contents (repeated) // @@ // @@ Representation for FP64 data type. The size must match what is // @@ expected by the tensor's shape. The contents must be the flattened, // @@ one-dimensional, row-major order of the tensor elements. // @@ Fp64Contents []float64 `protobuf:"fixed64,7,rep,packed,name=fp64_contents,json=fp64Contents,proto3" json:"fp64_contents,omitempty"` // @@ // @@ .. cpp:var:: bytes bytes_contents (repeated) // @@ // @@ Representation for BYTES data type. The size must match what is // @@ expected by the tensor's shape. The contents must be the flattened, // @@ one-dimensional, row-major order of the tensor elements. // @@ BytesContents [][]byte `protobuf:"bytes,8,rep,name=bytes_contents,json=bytesContents,proto3" json:"bytes_contents,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message InferTensorContents @@ @@ The data contained in a tensor represented by the repeated type @@ that matches the tensor's data type. Protobuf oneof is not used @@ because oneofs cannot contain repeated fields. @@
func (*InferTensorContents) Descriptor
deprecated
func (*InferTensorContents) Descriptor() ([]byte, []int)
Deprecated: Use InferTensorContents.ProtoReflect.Descriptor instead.
func (*InferTensorContents) GetBoolContents ¶
func (x *InferTensorContents) GetBoolContents() []bool
func (*InferTensorContents) GetBytesContents ¶
func (x *InferTensorContents) GetBytesContents() [][]byte
func (*InferTensorContents) GetFp32Contents ¶
func (x *InferTensorContents) GetFp32Contents() []float32
func (*InferTensorContents) GetFp64Contents ¶
func (x *InferTensorContents) GetFp64Contents() []float64
func (*InferTensorContents) GetInt64Contents ¶
func (x *InferTensorContents) GetInt64Contents() []int64
func (*InferTensorContents) GetIntContents ¶
func (x *InferTensorContents) GetIntContents() []int32
func (*InferTensorContents) GetUint64Contents ¶
func (x *InferTensorContents) GetUint64Contents() []uint64
func (*InferTensorContents) GetUintContents ¶
func (x *InferTensorContents) GetUintContents() []uint32
func (*InferTensorContents) ProtoMessage ¶
func (*InferTensorContents) ProtoMessage()
func (*InferTensorContents) ProtoReflect ¶
func (x *InferTensorContents) ProtoReflect() protoreflect.Message
func (*InferTensorContents) Reset ¶
func (x *InferTensorContents) Reset()
func (*InferTensorContents) String ¶
func (x *InferTensorContents) String() string
type LogSettingsRequest ¶
type LogSettingsRequest struct { // @@ .. cpp:var:: map<string,SettingValue> settings // @@ // @@ The current log settings. // @@ Settings map[string]*LogSettingsRequest_SettingValue `` /* 157-byte string literal not displayed */ // contains filtered or unexported fields }
@@ @@.. cpp:var:: message LogSettingsRequest @@ @@ Request message for LogSettings. @@
func (*LogSettingsRequest) Descriptor
deprecated
func (*LogSettingsRequest) Descriptor() ([]byte, []int)
Deprecated: Use LogSettingsRequest.ProtoReflect.Descriptor instead.
func (*LogSettingsRequest) GetSettings ¶
func (x *LogSettingsRequest) GetSettings() map[string]*LogSettingsRequest_SettingValue
func (*LogSettingsRequest) ProtoMessage ¶
func (*LogSettingsRequest) ProtoMessage()
func (*LogSettingsRequest) ProtoReflect ¶
func (x *LogSettingsRequest) ProtoReflect() protoreflect.Message
func (*LogSettingsRequest) Reset ¶
func (x *LogSettingsRequest) Reset()
func (*LogSettingsRequest) String ¶
func (x *LogSettingsRequest) String() string
type LogSettingsRequest_SettingValue ¶
type LogSettingsRequest_SettingValue struct { // Types that are assignable to ParameterChoice: // // *LogSettingsRequest_SettingValue_BoolParam // *LogSettingsRequest_SettingValue_Uint32Param // *LogSettingsRequest_SettingValue_StringParam ParameterChoice isLogSettingsRequest_SettingValue_ParameterChoice `protobuf_oneof:"parameter_choice"` // contains filtered or unexported fields }
func (*LogSettingsRequest_SettingValue) Descriptor
deprecated
func (*LogSettingsRequest_SettingValue) Descriptor() ([]byte, []int)
Deprecated: Use LogSettingsRequest_SettingValue.ProtoReflect.Descriptor instead.
func (*LogSettingsRequest_SettingValue) GetBoolParam ¶
func (x *LogSettingsRequest_SettingValue) GetBoolParam() bool
func (*LogSettingsRequest_SettingValue) GetParameterChoice ¶
func (m *LogSettingsRequest_SettingValue) GetParameterChoice() isLogSettingsRequest_SettingValue_ParameterChoice
func (*LogSettingsRequest_SettingValue) GetStringParam ¶
func (x *LogSettingsRequest_SettingValue) GetStringParam() string
func (*LogSettingsRequest_SettingValue) GetUint32Param ¶
func (x *LogSettingsRequest_SettingValue) GetUint32Param() uint32
func (*LogSettingsRequest_SettingValue) ProtoMessage ¶
func (*LogSettingsRequest_SettingValue) ProtoMessage()
func (*LogSettingsRequest_SettingValue) ProtoReflect ¶
func (x *LogSettingsRequest_SettingValue) ProtoReflect() protoreflect.Message
func (*LogSettingsRequest_SettingValue) Reset ¶
func (x *LogSettingsRequest_SettingValue) Reset()
func (*LogSettingsRequest_SettingValue) String ¶
func (x *LogSettingsRequest_SettingValue) String() string
type LogSettingsRequest_SettingValue_BoolParam ¶
type LogSettingsRequest_SettingValue_BoolParam struct { // @@ .. cpp:var:: bool bool_param // @@ // @@ A boolean parameter value. // @@ BoolParam bool `protobuf:"varint,1,opt,name=bool_param,json=boolParam,proto3,oneof"` }
type LogSettingsRequest_SettingValue_StringParam ¶
type LogSettingsRequest_SettingValue_StringParam struct { // @@ .. cpp:var:: string string_param // @@ // @@ A string parameter value. // @@ StringParam string `protobuf:"bytes,3,opt,name=string_param,json=stringParam,proto3,oneof"` }
type LogSettingsRequest_SettingValue_Uint32Param ¶
type LogSettingsRequest_SettingValue_Uint32Param struct { // @@ .. cpp:var:: uint32 uint32_param // @@ // @@ An uint32 parameter value. // @@ Uint32Param uint32 `protobuf:"varint,2,opt,name=uint32_param,json=uint32Param,proto3,oneof"` }
type LogSettingsResponse ¶
type LogSettingsResponse struct { // @@ .. cpp:var:: map<string,SettingValue> settings // @@ // @@ The current log settings. // @@ Settings map[string]*LogSettingsResponse_SettingValue `` /* 157-byte string literal not displayed */ // contains filtered or unexported fields }
@@ @@.. cpp:var:: message LogSettingsResponse @@ @@ Response message for LogSettings. @@
func (*LogSettingsResponse) Descriptor
deprecated
func (*LogSettingsResponse) Descriptor() ([]byte, []int)
Deprecated: Use LogSettingsResponse.ProtoReflect.Descriptor instead.
func (*LogSettingsResponse) GetSettings ¶
func (x *LogSettingsResponse) GetSettings() map[string]*LogSettingsResponse_SettingValue
func (*LogSettingsResponse) ProtoMessage ¶
func (*LogSettingsResponse) ProtoMessage()
func (*LogSettingsResponse) ProtoReflect ¶
func (x *LogSettingsResponse) ProtoReflect() protoreflect.Message
func (*LogSettingsResponse) Reset ¶
func (x *LogSettingsResponse) Reset()
func (*LogSettingsResponse) String ¶
func (x *LogSettingsResponse) String() string
type LogSettingsResponse_SettingValue ¶
type LogSettingsResponse_SettingValue struct { // Types that are assignable to ParameterChoice: // // *LogSettingsResponse_SettingValue_BoolParam // *LogSettingsResponse_SettingValue_Uint32Param // *LogSettingsResponse_SettingValue_StringParam ParameterChoice isLogSettingsResponse_SettingValue_ParameterChoice `protobuf_oneof:"parameter_choice"` // contains filtered or unexported fields }
func (*LogSettingsResponse_SettingValue) Descriptor
deprecated
func (*LogSettingsResponse_SettingValue) Descriptor() ([]byte, []int)
Deprecated: Use LogSettingsResponse_SettingValue.ProtoReflect.Descriptor instead.
func (*LogSettingsResponse_SettingValue) GetBoolParam ¶
func (x *LogSettingsResponse_SettingValue) GetBoolParam() bool
func (*LogSettingsResponse_SettingValue) GetParameterChoice ¶
func (m *LogSettingsResponse_SettingValue) GetParameterChoice() isLogSettingsResponse_SettingValue_ParameterChoice
func (*LogSettingsResponse_SettingValue) GetStringParam ¶
func (x *LogSettingsResponse_SettingValue) GetStringParam() string
func (*LogSettingsResponse_SettingValue) GetUint32Param ¶
func (x *LogSettingsResponse_SettingValue) GetUint32Param() uint32
func (*LogSettingsResponse_SettingValue) ProtoMessage ¶
func (*LogSettingsResponse_SettingValue) ProtoMessage()
func (*LogSettingsResponse_SettingValue) ProtoReflect ¶
func (x *LogSettingsResponse_SettingValue) ProtoReflect() protoreflect.Message
func (*LogSettingsResponse_SettingValue) Reset ¶
func (x *LogSettingsResponse_SettingValue) Reset()
func (*LogSettingsResponse_SettingValue) String ¶
func (x *LogSettingsResponse_SettingValue) String() string
type LogSettingsResponse_SettingValue_BoolParam ¶
type LogSettingsResponse_SettingValue_BoolParam struct { // @@ .. cpp:var:: bool bool_param // @@ // @@ A boolean parameter value. // @@ BoolParam bool `protobuf:"varint,1,opt,name=bool_param,json=boolParam,proto3,oneof"` }
type LogSettingsResponse_SettingValue_StringParam ¶
type LogSettingsResponse_SettingValue_StringParam struct { // @@ .. cpp:var:: string string_param // @@ // @@ A string parameter value. // @@ StringParam string `protobuf:"bytes,3,opt,name=string_param,json=stringParam,proto3,oneof"` }
type LogSettingsResponse_SettingValue_Uint32Param ¶
type LogSettingsResponse_SettingValue_Uint32Param struct { // @@ .. cpp:var:: uint32 uint32_param // @@ // @@ An int32 parameter value. // @@ Uint32Param uint32 `protobuf:"varint,2,opt,name=uint32_param,json=uint32Param,proto3,oneof"` }
type MemoryUsage ¶
type MemoryUsage struct { // @@ .. cpp:var:: string type // @@ // @@ The type of memory, the value can be "CPU", "CPU_PINNED", "GPU". // @@ Type string `protobuf:"bytes,1,opt,name=type,proto3" json:"type,omitempty"` // @@ .. cpp:var:: int64 id // @@ // @@ The id of the memory, typically used with "type" to identify // @@ a device that hosts the memory. // @@ Id int64 `protobuf:"varint,2,opt,name=id,proto3" json:"id,omitempty"` // @@ .. cpp:var:: uint64 byte_size // @@ // @@ The byte size of the memory. // @@ ByteSize uint64 `protobuf:"varint,3,opt,name=byte_size,json=byteSize,proto3" json:"byte_size,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message MemoryUsage @@ @@ Memory usage. @@
func (*MemoryUsage) Descriptor
deprecated
func (*MemoryUsage) Descriptor() ([]byte, []int)
Deprecated: Use MemoryUsage.ProtoReflect.Descriptor instead.
func (*MemoryUsage) GetByteSize ¶
func (x *MemoryUsage) GetByteSize() uint64
func (*MemoryUsage) GetId ¶
func (x *MemoryUsage) GetId() int64
func (*MemoryUsage) GetType ¶
func (x *MemoryUsage) GetType() string
func (*MemoryUsage) ProtoMessage ¶
func (*MemoryUsage) ProtoMessage()
func (*MemoryUsage) ProtoReflect ¶
func (x *MemoryUsage) ProtoReflect() protoreflect.Message
func (*MemoryUsage) Reset ¶
func (x *MemoryUsage) Reset()
func (*MemoryUsage) String ¶
func (x *MemoryUsage) String() string
type ModelConfig ¶
type ModelConfig struct { // @@ .. cpp:var:: string name // @@ // @@ The name of the model. // @@ Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` // @@ .. cpp:var:: string platform // @@ // @@ Additional backend-specific configuration for the model. // @@ Please refer to the backend documentation on whether this field // @@ should be specified. // @@ Platform string `protobuf:"bytes,2,opt,name=platform,proto3" json:"platform,omitempty"` // @@ .. cpp:var:: string backend // @@ // @@ The backend used by the model. // @@ Backend string `protobuf:"bytes,17,opt,name=backend,proto3" json:"backend,omitempty"` // @@ .. cpp:var:: string runtime // @@ // @@ The name of the backend library file used by the model. // @@ Runtime string `protobuf:"bytes,25,opt,name=runtime,proto3" json:"runtime,omitempty"` // @@ .. cpp:var:: ModelVersionPolicy version_policy // @@ // @@ Policy indicating which version(s) of the model will be served. // @@ VersionPolicy *ModelVersionPolicy `protobuf:"bytes,3,opt,name=version_policy,json=versionPolicy,proto3" json:"version_policy,omitempty"` // @@ .. cpp:var:: int32 max_batch_size // @@ // @@ Maximum batch size allowed for inference. This can only decrease // @@ what is allowed by the model itself. A max_batch_size value of 0 // @@ indicates that batching is not allowed for the model and the // @@ dimension/shape of the input and output tensors must exactly // @@ match what is specified in the input and output configuration. A // @@ max_batch_size value > 0 indicates that batching is allowed and // @@ so the model expects the input tensors to have an additional // @@ initial dimension for the batching that is not specified in the // @@ input (for example, if the model supports batched inputs of // @@ 2-dimensional tensors then the model configuration will specify // @@ the input shape as [ X, Y ] but the model will expect the actual // @@ input tensors to have shape [ N, X, Y ]). For max_batch_size > 0 // @@ returned outputs will also have an additional initial dimension // @@ for the batch. // @@ MaxBatchSize int32 `protobuf:"varint,4,opt,name=max_batch_size,json=maxBatchSize,proto3" json:"max_batch_size,omitempty"` // @@ .. cpp:var:: ModelInput input (repeated) // @@ // @@ The inputs request by the model. // @@ Input []*ModelInput `protobuf:"bytes,5,rep,name=input,proto3" json:"input,omitempty"` // @@ .. cpp:var:: ModelOutput output (repeated) // @@ // @@ The outputs produced by the model. // @@ Output []*ModelOutput `protobuf:"bytes,6,rep,name=output,proto3" json:"output,omitempty"` // @@ .. cpp:var:: BatchInput batch_input (repeated) // @@ // @@ The model input(s) that the server should use to communicate // @@ batch related values to the model. // @@ BatchInput []*BatchInput `protobuf:"bytes,20,rep,name=batch_input,json=batchInput,proto3" json:"batch_input,omitempty"` // @@ .. cpp:var:: BatchOutput batch_output (repeated) // @@ // @@ The outputs produced by the model that requires special handling // @@ by the model backend. // @@ BatchOutput []*BatchOutput `protobuf:"bytes,21,rep,name=batch_output,json=batchOutput,proto3" json:"batch_output,omitempty"` // @@ .. cpp:var:: ModelOptimizationPolicy optimization // @@ // @@ Optimization configuration for the model. If not specified // @@ then default optimization policy is used. // @@ Optimization *ModelOptimizationPolicy `protobuf:"bytes,12,opt,name=optimization,proto3" json:"optimization,omitempty"` // @@ .. cpp:var:: oneof scheduling_choice // @@ // @@ The scheduling policy for the model. If not specified the // @@ default scheduling policy is used for the model. The default // @@ policy is to execute each inference request independently. // @@ // // Types that are assignable to SchedulingChoice: // // *ModelConfig_DynamicBatching // *ModelConfig_SequenceBatching // *ModelConfig_EnsembleScheduling SchedulingChoice isModelConfig_SchedulingChoice `protobuf_oneof:"scheduling_choice"` // @@ .. cpp:var:: ModelInstanceGroup instance_group (repeated) // @@ // @@ Instances of this model. If not specified, one instance // @@ of the model will be instantiated on each available GPU. // @@ InstanceGroup []*ModelInstanceGroup `protobuf:"bytes,7,rep,name=instance_group,json=instanceGroup,proto3" json:"instance_group,omitempty"` // @@ .. cpp:var:: string default_model_filename // @@ // @@ Optional filename of the model file to use if a // @@ compute-capability specific model is not specified in // @@ :cpp:var:`cc_model_filenames`. If not specified the default name // @@ is 'model.graphdef', 'model.savedmodel', 'model.plan' or // @@ 'model.pt' depending on the model type. // @@ DefaultModelFilename string `protobuf:"bytes,8,opt,name=default_model_filename,json=defaultModelFilename,proto3" json:"default_model_filename,omitempty"` // @@ .. cpp:var:: map<string,string> cc_model_filenames // @@ // @@ Optional map from CUDA compute capability to the filename of // @@ the model that supports that compute capability. The filename // @@ refers to a file within the model version directory. // @@ CcModelFilenames map[string]string `` /* 199-byte string literal not displayed */ // @@ .. cpp:var:: map<string,string> metric_tags // @@ // @@ Optional metric tags. User-specific key-value pairs for metrics // @@ reported for this model. These tags are applied to the metrics // @@ reported on the HTTP metrics port. // @@ MetricTags map[string]string `` /* 180-byte string literal not displayed */ // @@ .. cpp:var:: map<string,ModelParameter> parameters // @@ // @@ Optional model parameters. User-specified parameter values. // @@ Parameters map[string]*ModelParameter `` /* 162-byte string literal not displayed */ // @@ .. cpp:var:: ModelWarmup model_warmup (repeated) // @@ // @@ Warmup setting of this model. If specified, all instances // @@ will be run with the request samples in sequence before // @@ serving the model. // @@ This field can only be specified if the model is not an ensemble // @@ model. // @@ ModelWarmup []*ModelWarmup `protobuf:"bytes,16,rep,name=model_warmup,json=modelWarmup,proto3" json:"model_warmup,omitempty"` // @@ .. cpp:var:: ModelOperations model_operations // @@ // @@ Optional metadata of the libraries providing custom operations for // @@ this model. // @@ ModelOperations *ModelOperations `protobuf:"bytes,18,opt,name=model_operations,json=modelOperations,proto3" json:"model_operations,omitempty"` // @@ .. cpp:var:: ModelTransactionPolicy model_transaction_policy // @@ // @@ Optional specification that describes the nature of transactions // @@ to be expected from the model. // @@ ModelTransactionPolicy *ModelTransactionPolicy `` /* 130-byte string literal not displayed */ // @@ .. cpp:var:: ModelRepositoryAgents model_repository_agents // @@ // @@ Optional specification of the agent(s) that should be invoked // @@ with repository actions are performed for this model. // @@ ModelRepositoryAgents *ModelRepositoryAgents `` /* 127-byte string literal not displayed */ // @@ .. cpp:var:: ModelResponseCache response_cache // @@ // @@ Optional setting for utilizing the response cache for this // @@ model. // @@ ResponseCache *ModelResponseCache `protobuf:"bytes,24,opt,name=response_cache,json=responseCache,proto3" json:"response_cache,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ModelConfig @@ @@ A model configuration. @@
func (*ModelConfig) Descriptor
deprecated
func (*ModelConfig) Descriptor() ([]byte, []int)
Deprecated: Use ModelConfig.ProtoReflect.Descriptor instead.
func (*ModelConfig) GetBackend ¶
func (x *ModelConfig) GetBackend() string
func (*ModelConfig) GetBatchInput ¶
func (x *ModelConfig) GetBatchInput() []*BatchInput
func (*ModelConfig) GetBatchOutput ¶
func (x *ModelConfig) GetBatchOutput() []*BatchOutput
func (*ModelConfig) GetCcModelFilenames ¶
func (x *ModelConfig) GetCcModelFilenames() map[string]string
func (*ModelConfig) GetDefaultModelFilename ¶
func (x *ModelConfig) GetDefaultModelFilename() string
func (*ModelConfig) GetDynamicBatching ¶
func (x *ModelConfig) GetDynamicBatching() *ModelDynamicBatching
func (*ModelConfig) GetEnsembleScheduling ¶
func (x *ModelConfig) GetEnsembleScheduling() *ModelEnsembling
func (*ModelConfig) GetInput ¶
func (x *ModelConfig) GetInput() []*ModelInput
func (*ModelConfig) GetInstanceGroup ¶
func (x *ModelConfig) GetInstanceGroup() []*ModelInstanceGroup
func (*ModelConfig) GetMaxBatchSize ¶
func (x *ModelConfig) GetMaxBatchSize() int32
func (*ModelConfig) GetMetricTags ¶
func (x *ModelConfig) GetMetricTags() map[string]string
func (*ModelConfig) GetModelOperations ¶
func (x *ModelConfig) GetModelOperations() *ModelOperations
func (*ModelConfig) GetModelRepositoryAgents ¶
func (x *ModelConfig) GetModelRepositoryAgents() *ModelRepositoryAgents
func (*ModelConfig) GetModelTransactionPolicy ¶
func (x *ModelConfig) GetModelTransactionPolicy() *ModelTransactionPolicy
func (*ModelConfig) GetModelWarmup ¶
func (x *ModelConfig) GetModelWarmup() []*ModelWarmup
func (*ModelConfig) GetName ¶
func (x *ModelConfig) GetName() string
func (*ModelConfig) GetOptimization ¶
func (x *ModelConfig) GetOptimization() *ModelOptimizationPolicy
func (*ModelConfig) GetOutput ¶
func (x *ModelConfig) GetOutput() []*ModelOutput
func (*ModelConfig) GetParameters ¶
func (x *ModelConfig) GetParameters() map[string]*ModelParameter
func (*ModelConfig) GetPlatform ¶
func (x *ModelConfig) GetPlatform() string
func (*ModelConfig) GetResponseCache ¶
func (x *ModelConfig) GetResponseCache() *ModelResponseCache
func (*ModelConfig) GetRuntime ¶
func (x *ModelConfig) GetRuntime() string
func (*ModelConfig) GetSchedulingChoice ¶
func (m *ModelConfig) GetSchedulingChoice() isModelConfig_SchedulingChoice
func (*ModelConfig) GetSequenceBatching ¶
func (x *ModelConfig) GetSequenceBatching() *ModelSequenceBatching
func (*ModelConfig) GetVersionPolicy ¶
func (x *ModelConfig) GetVersionPolicy() *ModelVersionPolicy
func (*ModelConfig) ProtoMessage ¶
func (*ModelConfig) ProtoMessage()
func (*ModelConfig) ProtoReflect ¶
func (x *ModelConfig) ProtoReflect() protoreflect.Message
func (*ModelConfig) Reset ¶
func (x *ModelConfig) Reset()
func (*ModelConfig) String ¶
func (x *ModelConfig) String() string
type ModelConfigRequest ¶
type ModelConfigRequest struct { // @@ // @@ .. cpp:var:: string name // @@ // @@ The name of the model. // @@ Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` // @@ .. cpp:var:: string version // @@ // @@ The version of the model. If not given the model version // @@ is selected automatically based on the version policy. // @@ Version string `protobuf:"bytes,2,opt,name=version,proto3" json:"version,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ModelConfigRequest @@ @@ Request message for ModelConfig. @@
func (*ModelConfigRequest) Descriptor
deprecated
func (*ModelConfigRequest) Descriptor() ([]byte, []int)
Deprecated: Use ModelConfigRequest.ProtoReflect.Descriptor instead.
func (*ModelConfigRequest) GetName ¶
func (x *ModelConfigRequest) GetName() string
func (*ModelConfigRequest) GetVersion ¶
func (x *ModelConfigRequest) GetVersion() string
func (*ModelConfigRequest) ProtoMessage ¶
func (*ModelConfigRequest) ProtoMessage()
func (*ModelConfigRequest) ProtoReflect ¶
func (x *ModelConfigRequest) ProtoReflect() protoreflect.Message
func (*ModelConfigRequest) Reset ¶
func (x *ModelConfigRequest) Reset()
func (*ModelConfigRequest) String ¶
func (x *ModelConfigRequest) String() string
type ModelConfigResponse ¶
type ModelConfigResponse struct { // @@ // @@ .. cpp:var:: ModelConfig config // @@ // @@ The model configuration. // @@ Config *ModelConfig `protobuf:"bytes,1,opt,name=config,proto3" json:"config,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ModelConfigResponse @@ @@ Response message for ModelConfig. @@
func (*ModelConfigResponse) Descriptor
deprecated
func (*ModelConfigResponse) Descriptor() ([]byte, []int)
Deprecated: Use ModelConfigResponse.ProtoReflect.Descriptor instead.
func (*ModelConfigResponse) GetConfig ¶
func (x *ModelConfigResponse) GetConfig() *ModelConfig
func (*ModelConfigResponse) ProtoMessage ¶
func (*ModelConfigResponse) ProtoMessage()
func (*ModelConfigResponse) ProtoReflect ¶
func (x *ModelConfigResponse) ProtoReflect() protoreflect.Message
func (*ModelConfigResponse) Reset ¶
func (x *ModelConfigResponse) Reset()
func (*ModelConfigResponse) String ¶
func (x *ModelConfigResponse) String() string
type ModelConfig_DynamicBatching ¶
type ModelConfig_DynamicBatching struct { // @@ .. cpp:var:: ModelDynamicBatching dynamic_batching // @@ // @@ If specified, enables the dynamic-batching scheduling // @@ policy. With dynamic-batching the scheduler may group // @@ together independent requests into a single batch to // @@ improve inference throughput. // @@ DynamicBatching *ModelDynamicBatching `protobuf:"bytes,11,opt,name=dynamic_batching,json=dynamicBatching,proto3,oneof"` }
type ModelConfig_EnsembleScheduling ¶
type ModelConfig_EnsembleScheduling struct { // @@ .. cpp:var:: ModelEnsembling ensemble_scheduling // @@ // @@ If specified, enables the model-ensembling scheduling // @@ policy. With model-ensembling, inference requests // @@ will be processed according to the specification, such as an // @@ execution sequence of models. The input specified in this model // @@ config will be the input for the ensemble, and the output // @@ specified will be the output of the ensemble. // @@ EnsembleScheduling *ModelEnsembling `protobuf:"bytes,15,opt,name=ensemble_scheduling,json=ensembleScheduling,proto3,oneof"` }
type ModelConfig_SequenceBatching ¶
type ModelConfig_SequenceBatching struct { // @@ .. cpp:var:: ModelSequenceBatching sequence_batching // @@ // @@ If specified, enables the sequence-batching scheduling // @@ policy. With sequence-batching, inference requests // @@ with the same correlation ID are routed to the same // @@ model instance. Multiple sequences of inference requests // @@ may be batched together into a single batch to // @@ improve inference throughput. // @@ SequenceBatching *ModelSequenceBatching `protobuf:"bytes,13,opt,name=sequence_batching,json=sequenceBatching,proto3,oneof"` }
type ModelDynamicBatching ¶
type ModelDynamicBatching struct { // @@ .. cpp:var:: int32 preferred_batch_size (repeated) // @@ // @@ Preferred batch sizes for dynamic batching. If a batch of one of // @@ these sizes can be formed it will be executed immediately. If // @@ not specified a preferred batch size will be chosen automatically // @@ based on model and GPU characteristics. // @@ PreferredBatchSize []int32 `protobuf:"varint,1,rep,packed,name=preferred_batch_size,json=preferredBatchSize,proto3" json:"preferred_batch_size,omitempty"` // @@ .. cpp:var:: uint64 max_queue_delay_microseconds // @@ // @@ The maximum time, in microseconds, a request will be delayed in // @@ the scheduling queue to wait for additional requests for // @@ batching. Default is 0. // @@ MaxQueueDelayMicroseconds uint64 `` /* 141-byte string literal not displayed */ // @@ .. cpp:var:: bool preserve_ordering // @@ // @@ Should the dynamic batcher preserve the ordering of responses to // @@ match the order of requests received by the scheduler. Default is // @@ false. If true, the responses will be returned in the same order as // @@ the order of requests sent to the scheduler. If false, the responses // @@ may be returned in arbitrary order. This option is specifically // @@ needed when a sequence of related inference requests (i.e. inference // @@ requests with the same correlation ID) are sent to the dynamic // @@ batcher to ensure that the sequence responses are in the correct // @@ order. // @@ PreserveOrdering bool `protobuf:"varint,3,opt,name=preserve_ordering,json=preserveOrdering,proto3" json:"preserve_ordering,omitempty"` // @@ .. cpp:var:: uint64 priority_levels // @@ // @@ The number of priority levels to be enabled for the model, // @@ the priority level starts from 1 and 1 is the highest priority. // @@ Requests are handled in priority order with all priority 1 requests // @@ processed before priority 2, all priority 2 requests processed before // @@ priority 3, etc. Requests with the same priority level will be // @@ handled in the order that they are received. // @@ PriorityLevels uint64 `protobuf:"varint,4,opt,name=priority_levels,json=priorityLevels,proto3" json:"priority_levels,omitempty"` // @@ .. cpp:var:: uint64 default_priority_level // @@ // @@ The priority level used for requests that don't specify their // @@ priority. The value must be in the range [ 1, 'priority_levels' ]. // @@ DefaultPriorityLevel uint64 `protobuf:"varint,5,opt,name=default_priority_level,json=defaultPriorityLevel,proto3" json:"default_priority_level,omitempty"` // @@ .. cpp:var:: ModelQueuePolicy default_queue_policy // @@ // @@ The default queue policy used for requests that don't require // @@ priority handling and requests that specify priority levels where // @@ there is no specific policy given. If not specified, a policy with // @@ default field values will be used. // @@ DefaultQueuePolicy *ModelQueuePolicy `protobuf:"bytes,6,opt,name=default_queue_policy,json=defaultQueuePolicy,proto3" json:"default_queue_policy,omitempty"` // @@ .. cpp:var:: map<uint64, ModelQueuePolicy> priority_queue_policy // @@ // @@ Specify the queue policy for the priority level. The default queue // @@ policy will be used if a priority level doesn't specify a queue // @@ policy. // @@ PriorityQueuePolicy map[uint64]*ModelQueuePolicy `` /* 209-byte string literal not displayed */ // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ModelDynamicBatching @@ @@ Dynamic batching configuration. These settings control how dynamic @@ batching operates for the model. @@
func (*ModelDynamicBatching) Descriptor
deprecated
func (*ModelDynamicBatching) Descriptor() ([]byte, []int)
Deprecated: Use ModelDynamicBatching.ProtoReflect.Descriptor instead.
func (*ModelDynamicBatching) GetDefaultPriorityLevel ¶
func (x *ModelDynamicBatching) GetDefaultPriorityLevel() uint64
func (*ModelDynamicBatching) GetDefaultQueuePolicy ¶
func (x *ModelDynamicBatching) GetDefaultQueuePolicy() *ModelQueuePolicy
func (*ModelDynamicBatching) GetMaxQueueDelayMicroseconds ¶
func (x *ModelDynamicBatching) GetMaxQueueDelayMicroseconds() uint64
func (*ModelDynamicBatching) GetPreferredBatchSize ¶
func (x *ModelDynamicBatching) GetPreferredBatchSize() []int32
func (*ModelDynamicBatching) GetPreserveOrdering ¶
func (x *ModelDynamicBatching) GetPreserveOrdering() bool
func (*ModelDynamicBatching) GetPriorityLevels ¶
func (x *ModelDynamicBatching) GetPriorityLevels() uint64
func (*ModelDynamicBatching) GetPriorityQueuePolicy ¶
func (x *ModelDynamicBatching) GetPriorityQueuePolicy() map[uint64]*ModelQueuePolicy
func (*ModelDynamicBatching) ProtoMessage ¶
func (*ModelDynamicBatching) ProtoMessage()
func (*ModelDynamicBatching) ProtoReflect ¶
func (x *ModelDynamicBatching) ProtoReflect() protoreflect.Message
func (*ModelDynamicBatching) Reset ¶
func (x *ModelDynamicBatching) Reset()
func (*ModelDynamicBatching) String ¶
func (x *ModelDynamicBatching) String() string
type ModelEnsembling ¶
type ModelEnsembling struct { // @@ .. cpp:var:: Step step (repeated) // @@ // @@ The models and the input / output mappings used within the ensemble. // @@ Step []*ModelEnsembling_Step `protobuf:"bytes,1,rep,name=step,proto3" json:"step,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ModelEnsembling @@ @@ Model ensembling configuration. These settings specify the models that @@ compose the ensemble and how data flows between the models. @@
func (*ModelEnsembling) Descriptor
deprecated
func (*ModelEnsembling) Descriptor() ([]byte, []int)
Deprecated: Use ModelEnsembling.ProtoReflect.Descriptor instead.
func (*ModelEnsembling) GetStep ¶
func (x *ModelEnsembling) GetStep() []*ModelEnsembling_Step
func (*ModelEnsembling) ProtoMessage ¶
func (*ModelEnsembling) ProtoMessage()
func (*ModelEnsembling) ProtoReflect ¶
func (x *ModelEnsembling) ProtoReflect() protoreflect.Message
func (*ModelEnsembling) Reset ¶
func (x *ModelEnsembling) Reset()
func (*ModelEnsembling) String ¶
func (x *ModelEnsembling) String() string
type ModelEnsembling_Step ¶
type ModelEnsembling_Step struct { // @@ .. cpp:var:: string model_name // @@ // @@ The name of the model to execute for this step of the ensemble. // @@ ModelName string `protobuf:"bytes,1,opt,name=model_name,json=modelName,proto3" json:"model_name,omitempty"` // @@ .. cpp:var:: int64 model_version // @@ // @@ The version of the model to use for inference. If -1 // @@ the latest/most-recent version of the model is used. // @@ ModelVersion int64 `protobuf:"varint,2,opt,name=model_version,json=modelVersion,proto3" json:"model_version,omitempty"` // @@ .. cpp:var:: map<string,string> input_map // @@ // @@ Map from name of an input tensor on this step's model to ensemble // @@ tensor name. The ensemble tensor must have the same data type and // @@ shape as the model input. Each model input must be assigned to // @@ one ensemble tensor, but the same ensemble tensor can be assigned // @@ to multiple model inputs. // @@ InputMap map[string]string `` /* 173-byte string literal not displayed */ // @@ .. cpp:var:: map<string,string> output_map // @@ // @@ Map from name of an output tensor on this step's model to ensemble // @@ tensor name. The data type and shape of the ensemble tensor will // @@ be inferred from the model output. It is optional to assign all // @@ model outputs to ensemble tensors. One ensemble tensor name // @@ can appear in an output map only once. // @@ OutputMap map[string]string `` /* 176-byte string literal not displayed */ // @@ .. cpp:var:: string model_namespace // @@ // @@ [RESERVED] currently this field is reserved for internal use, users // @@ must not set any value to this field to avoid unexpected behavior. // @@ ModelNamespace string `protobuf:"bytes,5,opt,name=model_namespace,json=modelNamespace,proto3" json:"model_namespace,omitempty"` // contains filtered or unexported fields }
@@ .. cpp:var:: message Step @@ @@ Each step specifies a model included in the ensemble, @@ maps ensemble tensor names to the model input tensors, @@ and maps model output tensors to ensemble tensor names @@
func (*ModelEnsembling_Step) Descriptor
deprecated
func (*ModelEnsembling_Step) Descriptor() ([]byte, []int)
Deprecated: Use ModelEnsembling_Step.ProtoReflect.Descriptor instead.
func (*ModelEnsembling_Step) GetInputMap ¶
func (x *ModelEnsembling_Step) GetInputMap() map[string]string
func (*ModelEnsembling_Step) GetModelName ¶
func (x *ModelEnsembling_Step) GetModelName() string
func (*ModelEnsembling_Step) GetModelNamespace ¶
func (x *ModelEnsembling_Step) GetModelNamespace() string
func (*ModelEnsembling_Step) GetModelVersion ¶
func (x *ModelEnsembling_Step) GetModelVersion() int64
func (*ModelEnsembling_Step) GetOutputMap ¶
func (x *ModelEnsembling_Step) GetOutputMap() map[string]string
func (*ModelEnsembling_Step) ProtoMessage ¶
func (*ModelEnsembling_Step) ProtoMessage()
func (*ModelEnsembling_Step) ProtoReflect ¶
func (x *ModelEnsembling_Step) ProtoReflect() protoreflect.Message
func (*ModelEnsembling_Step) Reset ¶
func (x *ModelEnsembling_Step) Reset()
func (*ModelEnsembling_Step) String ¶
func (x *ModelEnsembling_Step) String() string
type ModelInferRequest ¶
type ModelInferRequest struct { // @@ .. cpp:var:: string model_name // @@ // @@ The name of the model to use for inferencing. // @@ ModelName string `protobuf:"bytes,1,opt,name=model_name,json=modelName,proto3" json:"model_name,omitempty"` // @@ .. cpp:var:: string model_version // @@ // @@ The version of the model to use for inference. If not // @@ given the latest/most-recent version of the model is used. // @@ ModelVersion string `protobuf:"bytes,2,opt,name=model_version,json=modelVersion,proto3" json:"model_version,omitempty"` // @@ .. cpp:var:: string id // @@ // @@ Optional identifier for the request. If specified will be // @@ returned in the response. // @@ Id string `protobuf:"bytes,3,opt,name=id,proto3" json:"id,omitempty"` // @@ .. cpp:var:: map<string,InferParameter> parameters // @@ // @@ Optional inference parameters. // @@ Parameters map[string]*InferParameter `` /* 161-byte string literal not displayed */ // @@ // @@ .. cpp:var:: InferInputTensor inputs (repeated) // @@ // @@ The input tensors for the inference. // @@ Inputs []*ModelInferRequest_InferInputTensor `protobuf:"bytes,5,rep,name=inputs,proto3" json:"inputs,omitempty"` // @@ // @@ .. cpp:var:: InferRequestedOutputTensor outputs (repeated) // @@ // @@ The requested output tensors for the inference. Optional, if not // @@ specified all outputs specified in the model config will be // @@ returned. // @@ Outputs []*ModelInferRequest_InferRequestedOutputTensor `protobuf:"bytes,6,rep,name=outputs,proto3" json:"outputs,omitempty"` // @@ // @@ .. cpp:var:: bytes raw_input_contents // @@ // @@ The data contained in an input tensor can be represented in // @@ "raw" bytes form or in the repeated type that matches the // @@ tensor's data type. Using the "raw" bytes form will // @@ typically allow higher performance due to the way protobuf // @@ allocation and reuse interacts with GRPC. For example, see // @@ https://github.com/grpc/grpc/issues/23231. // @@ // @@ To use the raw representation 'raw_input_contents' must be // @@ initialized with data for each tensor in the same order as // @@ 'inputs'. For each tensor, the size of this content must // @@ match what is expected by the tensor's shape and data // @@ type. The raw data must be the flattened, one-dimensional, // @@ row-major order of the tensor elements without any stride // @@ or padding between the elements. Note that the FP16 and BF16 data // @@ types must be represented as raw content as there is no // @@ specific data type for a 16-bit float type. // @@ // @@ If this field is specified then InferInputTensor::contents // @@ must not be specified for any input tensor. // @@ RawInputContents [][]byte `protobuf:"bytes,7,rep,name=raw_input_contents,json=rawInputContents,proto3" json:"raw_input_contents,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ModelInferRequest @@ @@ Request message for ModelInfer. @@
func (*ModelInferRequest) Descriptor
deprecated
func (*ModelInferRequest) Descriptor() ([]byte, []int)
Deprecated: Use ModelInferRequest.ProtoReflect.Descriptor instead.
func (*ModelInferRequest) GetId ¶
func (x *ModelInferRequest) GetId() string
func (*ModelInferRequest) GetInputs ¶
func (x *ModelInferRequest) GetInputs() []*ModelInferRequest_InferInputTensor
func (*ModelInferRequest) GetModelName ¶
func (x *ModelInferRequest) GetModelName() string
func (*ModelInferRequest) GetModelVersion ¶
func (x *ModelInferRequest) GetModelVersion() string
func (*ModelInferRequest) GetOutputs ¶
func (x *ModelInferRequest) GetOutputs() []*ModelInferRequest_InferRequestedOutputTensor
func (*ModelInferRequest) GetParameters ¶
func (x *ModelInferRequest) GetParameters() map[string]*InferParameter
func (*ModelInferRequest) GetRawInputContents ¶
func (x *ModelInferRequest) GetRawInputContents() [][]byte
func (*ModelInferRequest) ProtoMessage ¶
func (*ModelInferRequest) ProtoMessage()
func (*ModelInferRequest) ProtoReflect ¶
func (x *ModelInferRequest) ProtoReflect() protoreflect.Message
func (*ModelInferRequest) Reset ¶
func (x *ModelInferRequest) Reset()
func (*ModelInferRequest) String ¶
func (x *ModelInferRequest) String() string
type ModelInferRequest_InferInputTensor ¶
type ModelInferRequest_InferInputTensor struct { // @@ // @@ .. cpp:var:: string name // @@ // @@ The tensor name. // @@ Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` // @@ // @@ .. cpp:var:: string datatype // @@ // @@ The tensor data type. // @@ Datatype string `protobuf:"bytes,2,opt,name=datatype,proto3" json:"datatype,omitempty"` // @@ // @@ .. cpp:var:: int64 shape (repeated) // @@ // @@ The tensor shape. // @@ Shape []int64 `protobuf:"varint,3,rep,packed,name=shape,proto3" json:"shape,omitempty"` // @@ .. cpp:var:: map<string,InferParameter> parameters // @@ // @@ Optional inference input tensor parameters. // @@ Parameters map[string]*InferParameter `` /* 161-byte string literal not displayed */ // @@ .. cpp:var:: InferTensorContents contents // @@ // @@ The tensor contents using a data-type format. This field // @@ must not be specified if tensor contents are being specified // @@ in ModelInferRequest.raw_input_contents. // @@ Contents *InferTensorContents `protobuf:"bytes,5,opt,name=contents,proto3" json:"contents,omitempty"` // contains filtered or unexported fields }
@@ @@ .. cpp:var:: message InferInputTensor @@ @@ An input tensor for an inference request. @@
func (*ModelInferRequest_InferInputTensor) Descriptor
deprecated
func (*ModelInferRequest_InferInputTensor) Descriptor() ([]byte, []int)
Deprecated: Use ModelInferRequest_InferInputTensor.ProtoReflect.Descriptor instead.
func (*ModelInferRequest_InferInputTensor) GetContents ¶
func (x *ModelInferRequest_InferInputTensor) GetContents() *InferTensorContents
func (*ModelInferRequest_InferInputTensor) GetDatatype ¶
func (x *ModelInferRequest_InferInputTensor) GetDatatype() string
func (*ModelInferRequest_InferInputTensor) GetName ¶
func (x *ModelInferRequest_InferInputTensor) GetName() string
func (*ModelInferRequest_InferInputTensor) GetParameters ¶
func (x *ModelInferRequest_InferInputTensor) GetParameters() map[string]*InferParameter
func (*ModelInferRequest_InferInputTensor) GetShape ¶
func (x *ModelInferRequest_InferInputTensor) GetShape() []int64
func (*ModelInferRequest_InferInputTensor) ProtoMessage ¶
func (*ModelInferRequest_InferInputTensor) ProtoMessage()
func (*ModelInferRequest_InferInputTensor) ProtoReflect ¶
func (x *ModelInferRequest_InferInputTensor) ProtoReflect() protoreflect.Message
func (*ModelInferRequest_InferInputTensor) Reset ¶
func (x *ModelInferRequest_InferInputTensor) Reset()
func (*ModelInferRequest_InferInputTensor) String ¶
func (x *ModelInferRequest_InferInputTensor) String() string
type ModelInferRequest_InferRequestedOutputTensor ¶
type ModelInferRequest_InferRequestedOutputTensor struct { // @@ // @@ .. cpp:var:: string name // @@ // @@ The tensor name. // @@ Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` // @@ .. cpp:var:: map<string,InferParameter> parameters // @@ // @@ Optional requested output tensor parameters. // @@ Parameters map[string]*InferParameter `` /* 161-byte string literal not displayed */ // contains filtered or unexported fields }
@@ @@ .. cpp:var:: message InferRequestedOutputTensor @@ @@ An output tensor requested for an inference request. @@
func (*ModelInferRequest_InferRequestedOutputTensor) Descriptor
deprecated
func (*ModelInferRequest_InferRequestedOutputTensor) Descriptor() ([]byte, []int)
Deprecated: Use ModelInferRequest_InferRequestedOutputTensor.ProtoReflect.Descriptor instead.
func (*ModelInferRequest_InferRequestedOutputTensor) GetName ¶
func (x *ModelInferRequest_InferRequestedOutputTensor) GetName() string
func (*ModelInferRequest_InferRequestedOutputTensor) GetParameters ¶
func (x *ModelInferRequest_InferRequestedOutputTensor) GetParameters() map[string]*InferParameter
func (*ModelInferRequest_InferRequestedOutputTensor) ProtoMessage ¶
func (*ModelInferRequest_InferRequestedOutputTensor) ProtoMessage()
func (*ModelInferRequest_InferRequestedOutputTensor) ProtoReflect ¶
func (x *ModelInferRequest_InferRequestedOutputTensor) ProtoReflect() protoreflect.Message
func (*ModelInferRequest_InferRequestedOutputTensor) Reset ¶
func (x *ModelInferRequest_InferRequestedOutputTensor) Reset()
func (*ModelInferRequest_InferRequestedOutputTensor) String ¶
func (x *ModelInferRequest_InferRequestedOutputTensor) String() string
type ModelInferResponse ¶
type ModelInferResponse struct { // @@ .. cpp:var:: string model_name // @@ // @@ The name of the model used for inference. // @@ ModelName string `protobuf:"bytes,1,opt,name=model_name,json=modelName,proto3" json:"model_name,omitempty"` // @@ .. cpp:var:: string model_version // @@ // @@ The version of the model used for inference. // @@ ModelVersion string `protobuf:"bytes,2,opt,name=model_version,json=modelVersion,proto3" json:"model_version,omitempty"` // @@ .. cpp:var:: string id // @@ // @@ The id of the inference request if one was specified. // @@ Id string `protobuf:"bytes,3,opt,name=id,proto3" json:"id,omitempty"` // @@ .. cpp:var:: map<string,InferParameter> parameters // @@ // @@ Optional inference response parameters. // @@ Parameters map[string]*InferParameter `` /* 161-byte string literal not displayed */ // @@ // @@ .. cpp:var:: InferOutputTensor outputs (repeated) // @@ // @@ The output tensors holding inference results. // @@ Outputs []*ModelInferResponse_InferOutputTensor `protobuf:"bytes,5,rep,name=outputs,proto3" json:"outputs,omitempty"` // @@ // @@ .. cpp:var:: bytes raw_output_contents // @@ // @@ The data contained in an output tensor can be represented in // @@ "raw" bytes form or in the repeated type that matches the // @@ tensor's data type. Using the "raw" bytes form will // @@ typically allow higher performance due to the way protobuf // @@ allocation and reuse interacts with GRPC. For example, see // @@ https://github.com/grpc/grpc/issues/23231. // @@ // @@ To use the raw representation 'raw_output_contents' must be // @@ initialized with data for each tensor in the same order as // @@ 'outputs'. For each tensor, the size of this content must // @@ match what is expected by the tensor's shape and data // @@ type. The raw data must be the flattened, one-dimensional, // @@ row-major order of the tensor elements without any stride // @@ or padding between the elements. Note that the FP16 and BF16 data // @@ types must be represented as raw content as there is no // @@ specific data type for a 16-bit float type. // @@ // @@ If this field is specified then InferOutputTensor::contents // @@ must not be specified for any output tensor. // @@ RawOutputContents [][]byte `protobuf:"bytes,6,rep,name=raw_output_contents,json=rawOutputContents,proto3" json:"raw_output_contents,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ModelInferResponse @@ @@ Response message for ModelInfer. @@
func (*ModelInferResponse) Descriptor
deprecated
func (*ModelInferResponse) Descriptor() ([]byte, []int)
Deprecated: Use ModelInferResponse.ProtoReflect.Descriptor instead.
func (*ModelInferResponse) GetId ¶
func (x *ModelInferResponse) GetId() string
func (*ModelInferResponse) GetModelName ¶
func (x *ModelInferResponse) GetModelName() string
func (*ModelInferResponse) GetModelVersion ¶
func (x *ModelInferResponse) GetModelVersion() string
func (*ModelInferResponse) GetOutputs ¶
func (x *ModelInferResponse) GetOutputs() []*ModelInferResponse_InferOutputTensor
func (*ModelInferResponse) GetParameters ¶
func (x *ModelInferResponse) GetParameters() map[string]*InferParameter
func (*ModelInferResponse) GetRawOutputContents ¶
func (x *ModelInferResponse) GetRawOutputContents() [][]byte
func (*ModelInferResponse) ProtoMessage ¶
func (*ModelInferResponse) ProtoMessage()
func (*ModelInferResponse) ProtoReflect ¶
func (x *ModelInferResponse) ProtoReflect() protoreflect.Message
func (*ModelInferResponse) Reset ¶
func (x *ModelInferResponse) Reset()
func (*ModelInferResponse) String ¶
func (x *ModelInferResponse) String() string
type ModelInferResponse_InferOutputTensor ¶
type ModelInferResponse_InferOutputTensor struct { // @@ // @@ .. cpp:var:: string name // @@ // @@ The tensor name. // @@ Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` // @@ // @@ .. cpp:var:: string datatype // @@ // @@ The tensor data type. // @@ Datatype string `protobuf:"bytes,2,opt,name=datatype,proto3" json:"datatype,omitempty"` // @@ // @@ .. cpp:var:: int64 shape (repeated) // @@ // @@ The tensor shape. // @@ Shape []int64 `protobuf:"varint,3,rep,packed,name=shape,proto3" json:"shape,omitempty"` // @@ .. cpp:var:: map<string,InferParameter> parameters // @@ // @@ Optional output tensor parameters. // @@ Parameters map[string]*InferParameter `` /* 161-byte string literal not displayed */ // @@ .. cpp:var:: InferTensorContents contents // @@ // @@ The tensor contents using a data-type format. This field // @@ must not be specified if tensor contents are being specified // @@ in ModelInferResponse.raw_output_contents. // @@ Contents *InferTensorContents `protobuf:"bytes,5,opt,name=contents,proto3" json:"contents,omitempty"` // contains filtered or unexported fields }
@@ @@ .. cpp:var:: message InferOutputTensor @@ @@ An output tensor returned for an inference request. @@
func (*ModelInferResponse_InferOutputTensor) Descriptor
deprecated
func (*ModelInferResponse_InferOutputTensor) Descriptor() ([]byte, []int)
Deprecated: Use ModelInferResponse_InferOutputTensor.ProtoReflect.Descriptor instead.
func (*ModelInferResponse_InferOutputTensor) GetContents ¶
func (x *ModelInferResponse_InferOutputTensor) GetContents() *InferTensorContents
func (*ModelInferResponse_InferOutputTensor) GetDatatype ¶
func (x *ModelInferResponse_InferOutputTensor) GetDatatype() string
func (*ModelInferResponse_InferOutputTensor) GetName ¶
func (x *ModelInferResponse_InferOutputTensor) GetName() string
func (*ModelInferResponse_InferOutputTensor) GetParameters ¶
func (x *ModelInferResponse_InferOutputTensor) GetParameters() map[string]*InferParameter
func (*ModelInferResponse_InferOutputTensor) GetShape ¶
func (x *ModelInferResponse_InferOutputTensor) GetShape() []int64
func (*ModelInferResponse_InferOutputTensor) ProtoMessage ¶
func (*ModelInferResponse_InferOutputTensor) ProtoMessage()
func (*ModelInferResponse_InferOutputTensor) ProtoReflect ¶
func (x *ModelInferResponse_InferOutputTensor) ProtoReflect() protoreflect.Message
func (*ModelInferResponse_InferOutputTensor) Reset ¶
func (x *ModelInferResponse_InferOutputTensor) Reset()
func (*ModelInferResponse_InferOutputTensor) String ¶
func (x *ModelInferResponse_InferOutputTensor) String() string
type ModelInput ¶
type ModelInput struct { // @@ .. cpp:var:: string name // @@ // @@ The name of the input. // @@ Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` // @@ .. cpp:var:: DataType data_type // @@ // @@ The data-type of the input. // @@ DataType DataType `protobuf:"varint,2,opt,name=data_type,json=dataType,proto3,enum=inference.DataType" json:"data_type,omitempty"` // @@ .. cpp:var:: Format format // @@ // @@ The format of the input. Optional. // @@ Format ModelInput_Format `protobuf:"varint,3,opt,name=format,proto3,enum=inference.ModelInput_Format" json:"format,omitempty"` // @@ .. cpp:var:: int64 dims (repeated) // @@ // @@ The dimensions/shape of the input tensor that must be provided // @@ when invoking the inference API for this model. // @@ Dims []int64 `protobuf:"varint,4,rep,packed,name=dims,proto3" json:"dims,omitempty"` // @@ .. cpp:var:: ModelTensorReshape reshape // @@ // @@ The shape expected for this input by the backend. The input will // @@ be reshaped to this before being presented to the backend. The // @@ reshape must have the same number of elements as the input shape // @@ specified by 'dims'. Optional. // @@ Reshape *ModelTensorReshape `protobuf:"bytes,5,opt,name=reshape,proto3" json:"reshape,omitempty"` // @@ .. cpp:var:: bool is_shape_tensor // @@ // @@ Whether or not the input is a shape tensor to the model. This field // @@ is currently supported only for the TensorRT model. An error will be // @@ generated if this specification does not comply with underlying // @@ model. // @@ IsShapeTensor bool `protobuf:"varint,6,opt,name=is_shape_tensor,json=isShapeTensor,proto3" json:"is_shape_tensor,omitempty"` // @@ .. cpp:var:: bool allow_ragged_batch // @@ // @@ Whether or not the input is allowed to be "ragged" in a dynamically // @@ created batch. Default is false indicating that two requests will // @@ only be batched if this tensor has the same shape in both requests. // @@ True indicates that two requests can be batched even if this tensor // @@ has a different shape in each request. // @@ AllowRaggedBatch bool `protobuf:"varint,7,opt,name=allow_ragged_batch,json=allowRaggedBatch,proto3" json:"allow_ragged_batch,omitempty"` // @@ .. cpp:var:: bool optional // @@ // @@ Whether or not the input is optional for the model execution. // @@ If true, the input is not required in the inference request. // @@ Default value is false. // @@ Optional bool `protobuf:"varint,8,opt,name=optional,proto3" json:"optional,omitempty"` // @@ .. cpp:var:: bool is_non_linear_format_io // @@ // @@ Indicates whether the input tensor uses a non-linear IO format. This // @@ field is currently supported only for TensorRT models. An error will // @@ be generated if this specification does not comply with the // @@ underlying model. // @@ IsNonLinearFormatIo bool `protobuf:"varint,9,opt,name=is_non_linear_format_io,json=isNonLinearFormatIo,proto3" json:"is_non_linear_format_io,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ModelInput @@ @@ An input required by the model. @@
func (*ModelInput) Descriptor
deprecated
func (*ModelInput) Descriptor() ([]byte, []int)
Deprecated: Use ModelInput.ProtoReflect.Descriptor instead.
func (*ModelInput) GetAllowRaggedBatch ¶
func (x *ModelInput) GetAllowRaggedBatch() bool
func (*ModelInput) GetDataType ¶
func (x *ModelInput) GetDataType() DataType
func (*ModelInput) GetDims ¶
func (x *ModelInput) GetDims() []int64
func (*ModelInput) GetFormat ¶
func (x *ModelInput) GetFormat() ModelInput_Format
func (*ModelInput) GetIsNonLinearFormatIo ¶
func (x *ModelInput) GetIsNonLinearFormatIo() bool
func (*ModelInput) GetIsShapeTensor ¶
func (x *ModelInput) GetIsShapeTensor() bool
func (*ModelInput) GetName ¶
func (x *ModelInput) GetName() string
func (*ModelInput) GetOptional ¶
func (x *ModelInput) GetOptional() bool
func (*ModelInput) GetReshape ¶
func (x *ModelInput) GetReshape() *ModelTensorReshape
func (*ModelInput) ProtoMessage ¶
func (*ModelInput) ProtoMessage()
func (*ModelInput) ProtoReflect ¶
func (x *ModelInput) ProtoReflect() protoreflect.Message
func (*ModelInput) Reset ¶
func (x *ModelInput) Reset()
func (*ModelInput) String ¶
func (x *ModelInput) String() string
type ModelInput_Format ¶
type ModelInput_Format int32
@@ @@ .. cpp:enum:: Format @@ @@ The format for the input. @@
const ( // @@ .. cpp:enumerator:: Format::FORMAT_NONE = 0 // @@ // @@ The input has no specific format. This is the default. // @@ ModelInput_FORMAT_NONE ModelInput_Format = 0 // @@ .. cpp:enumerator:: Format::FORMAT_NHWC = 1 // @@ // @@ HWC image format. Tensors with this format require 3 dimensions // @@ if the model does not support batching (max_batch_size = 0) or 4 // @@ dimensions if the model does support batching (max_batch_size // @@ >= 1). In either case the 'dims' below should only specify the // @@ 3 non-batch dimensions (i.e. HWC or CHW). // @@ ModelInput_FORMAT_NHWC ModelInput_Format = 1 // @@ .. cpp:enumerator:: Format::FORMAT_NCHW = 2 // @@ // @@ CHW image format. Tensors with this format require 3 dimensions // @@ if the model does not support batching (max_batch_size = 0) or 4 // @@ dimensions if the model does support batching (max_batch_size // @@ >= 1). In either case the 'dims' below should only specify the // @@ 3 non-batch dimensions (i.e. HWC or CHW). // @@ ModelInput_FORMAT_NCHW ModelInput_Format = 2 )
func (ModelInput_Format) Descriptor ¶
func (ModelInput_Format) Descriptor() protoreflect.EnumDescriptor
func (ModelInput_Format) Enum ¶
func (x ModelInput_Format) Enum() *ModelInput_Format
func (ModelInput_Format) EnumDescriptor
deprecated
func (ModelInput_Format) EnumDescriptor() ([]byte, []int)
Deprecated: Use ModelInput_Format.Descriptor instead.
func (ModelInput_Format) Number ¶
func (x ModelInput_Format) Number() protoreflect.EnumNumber
func (ModelInput_Format) String ¶
func (x ModelInput_Format) String() string
func (ModelInput_Format) Type ¶
func (ModelInput_Format) Type() protoreflect.EnumType
type ModelInstanceGroup ¶
type ModelInstanceGroup struct { // @@ .. cpp:var:: string name // @@ // @@ Optional name of this group of instances. If not specified the // @@ name will be formed as <model name>_<group number>. The name of // @@ individual instances will be further formed by a unique instance // @@ number and GPU index: // @@ Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` // @@ .. cpp:var:: Kind kind // @@ // @@ The kind of this instance group. Default is KIND_AUTO. If // @@ KIND_AUTO or KIND_GPU then both 'count' and 'gpu' are valid and // @@ may be specified. If KIND_CPU or KIND_MODEL only 'count' is valid // @@ and 'gpu' cannot be specified. // @@ Kind ModelInstanceGroup_Kind `protobuf:"varint,4,opt,name=kind,proto3,enum=inference.ModelInstanceGroup_Kind" json:"kind,omitempty"` // @@ .. cpp:var:: int32 count // @@ // @@ For a group assigned to GPU, the number of instances created for // @@ each GPU listed in 'gpus'. For a group assigned to CPU the number // @@ of instances created. Default is 1. Count int32 `protobuf:"varint,2,opt,name=count,proto3" json:"count,omitempty"` // @@ .. cpp:var:: ModelRateLimiter rate_limiter // @@ // @@ The rate limiter specific settings to be associated with this // @@ instance group. Optional, if not specified no rate limiting // @@ will be applied to this instance group. // @@ RateLimiter *ModelRateLimiter `protobuf:"bytes,6,opt,name=rate_limiter,json=rateLimiter,proto3" json:"rate_limiter,omitempty"` // @@ .. cpp:var:: int32 gpus (repeated) // @@ // @@ GPU(s) where instances should be available. For each GPU listed, // @@ 'count' instances of the model will be available. Setting 'gpus' // @@ to empty (or not specifying at all) is equivalent to listing all // @@ available GPUs. // @@ Gpus []int32 `protobuf:"varint,3,rep,packed,name=gpus,proto3" json:"gpus,omitempty"` // @@ .. cpp:var:: SecondaryDevice secondary_devices (repeated) // @@ // @@ Secondary devices that are required by instances specified by this // @@ instance group. Optional. // @@ SecondaryDevices []*ModelInstanceGroup_SecondaryDevice `protobuf:"bytes,8,rep,name=secondary_devices,json=secondaryDevices,proto3" json:"secondary_devices,omitempty"` // @@ .. cpp:var:: string profile (repeated) // @@ // @@ For TensorRT models containing multiple optimization profile, this // @@ parameter specifies a set of optimization profiles available to this // @@ instance group. The inference server will choose the optimal profile // @@ based on the shapes of the input tensors. This field should lie // @@ between 0 and <TotalNumberOfOptimizationProfilesInPlanModel> - 1 // @@ and be specified only for TensorRT backend, otherwise an error will // @@ be generated. If not specified, the server will select the first // @@ optimization profile by default. // @@ Profile []string `protobuf:"bytes,5,rep,name=profile,proto3" json:"profile,omitempty"` // @@ .. cpp:var:: bool passive // @@ // @@ Whether the instances within this instance group will be accepting // @@ inference requests from the scheduler. If true, the instances will // @@ not be added to the scheduler. Default value is false. // @@ Passive bool `protobuf:"varint,7,opt,name=passive,proto3" json:"passive,omitempty"` // @@ .. cpp:var:: string host_policy // @@ // @@ The host policy name that the instance to be associated with. // @@ The default value is set to reflect the device kind of the instance, // @@ for instance, KIND_CPU is "cpu", KIND_MODEL is "model" and // @@ KIND_GPU is "gpu_<gpu_id>". // @@ HostPolicy string `protobuf:"bytes,9,opt,name=host_policy,json=hostPolicy,proto3" json:"host_policy,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ModelInstanceGroup @@ @@ A group of one or more instances of a model and resources made @@ available for those instances. @@
func (*ModelInstanceGroup) Descriptor
deprecated
func (*ModelInstanceGroup) Descriptor() ([]byte, []int)
Deprecated: Use ModelInstanceGroup.ProtoReflect.Descriptor instead.
func (*ModelInstanceGroup) GetCount ¶
func (x *ModelInstanceGroup) GetCount() int32
func (*ModelInstanceGroup) GetGpus ¶
func (x *ModelInstanceGroup) GetGpus() []int32
func (*ModelInstanceGroup) GetHostPolicy ¶
func (x *ModelInstanceGroup) GetHostPolicy() string
func (*ModelInstanceGroup) GetKind ¶
func (x *ModelInstanceGroup) GetKind() ModelInstanceGroup_Kind
func (*ModelInstanceGroup) GetName ¶
func (x *ModelInstanceGroup) GetName() string
func (*ModelInstanceGroup) GetPassive ¶
func (x *ModelInstanceGroup) GetPassive() bool
func (*ModelInstanceGroup) GetProfile ¶
func (x *ModelInstanceGroup) GetProfile() []string
func (*ModelInstanceGroup) GetRateLimiter ¶
func (x *ModelInstanceGroup) GetRateLimiter() *ModelRateLimiter
func (*ModelInstanceGroup) GetSecondaryDevices ¶
func (x *ModelInstanceGroup) GetSecondaryDevices() []*ModelInstanceGroup_SecondaryDevice
func (*ModelInstanceGroup) ProtoMessage ¶
func (*ModelInstanceGroup) ProtoMessage()
func (*ModelInstanceGroup) ProtoReflect ¶
func (x *ModelInstanceGroup) ProtoReflect() protoreflect.Message
func (*ModelInstanceGroup) Reset ¶
func (x *ModelInstanceGroup) Reset()
func (*ModelInstanceGroup) String ¶
func (x *ModelInstanceGroup) String() string
type ModelInstanceGroup_Kind ¶
type ModelInstanceGroup_Kind int32
@@ @@ .. cpp:enum:: Kind @@ @@ Kind of this instance group. @@
const ( // @@ .. cpp:enumerator:: Kind::KIND_AUTO = 0 // @@ // @@ This instance group represents instances that can run on either // @@ CPU or GPU. If all GPUs listed in 'gpus' are available then // @@ instances will be created on GPU(s), otherwise instances will // @@ be created on CPU. // @@ ModelInstanceGroup_KIND_AUTO ModelInstanceGroup_Kind = 0 // @@ .. cpp:enumerator:: Kind::KIND_GPU = 1 // @@ // @@ This instance group represents instances that must run on the // @@ GPU. // @@ ModelInstanceGroup_KIND_GPU ModelInstanceGroup_Kind = 1 // @@ .. cpp:enumerator:: Kind::KIND_CPU = 2 // @@ // @@ This instance group represents instances that must run on the // @@ CPU. // @@ ModelInstanceGroup_KIND_CPU ModelInstanceGroup_Kind = 2 // @@ .. cpp:enumerator:: Kind::KIND_MODEL = 3 // @@ // @@ This instance group represents instances that should run on the // @@ CPU and/or GPU(s) as specified by the model or backend itself. // @@ The inference server will not override the model/backend // @@ settings. // @@ ModelInstanceGroup_KIND_MODEL ModelInstanceGroup_Kind = 3 )
func (ModelInstanceGroup_Kind) Descriptor ¶
func (ModelInstanceGroup_Kind) Descriptor() protoreflect.EnumDescriptor
func (ModelInstanceGroup_Kind) Enum ¶
func (x ModelInstanceGroup_Kind) Enum() *ModelInstanceGroup_Kind
func (ModelInstanceGroup_Kind) EnumDescriptor
deprecated
func (ModelInstanceGroup_Kind) EnumDescriptor() ([]byte, []int)
Deprecated: Use ModelInstanceGroup_Kind.Descriptor instead.
func (ModelInstanceGroup_Kind) Number ¶
func (x ModelInstanceGroup_Kind) Number() protoreflect.EnumNumber
func (ModelInstanceGroup_Kind) String ¶
func (x ModelInstanceGroup_Kind) String() string
func (ModelInstanceGroup_Kind) Type ¶
func (ModelInstanceGroup_Kind) Type() protoreflect.EnumType
type ModelInstanceGroup_SecondaryDevice ¶
type ModelInstanceGroup_SecondaryDevice struct { // @@ .. cpp:var:: SecondaryDeviceKind kind // @@ // @@ The secondary device kind. // @@ Kind ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind `` /* 132-byte string literal not displayed */ // @@ .. cpp:var:: int64 device_id // @@ // @@ Identifier for the secondary device. // @@ DeviceId int64 `protobuf:"varint,2,opt,name=device_id,json=deviceId,proto3" json:"device_id,omitempty"` // contains filtered or unexported fields }
@@ @@ .. cpp:var:: message SecondaryDevice @@ @@ A secondary device required for a model instance. @@
func (*ModelInstanceGroup_SecondaryDevice) Descriptor
deprecated
func (*ModelInstanceGroup_SecondaryDevice) Descriptor() ([]byte, []int)
Deprecated: Use ModelInstanceGroup_SecondaryDevice.ProtoReflect.Descriptor instead.
func (*ModelInstanceGroup_SecondaryDevice) GetDeviceId ¶
func (x *ModelInstanceGroup_SecondaryDevice) GetDeviceId() int64
func (*ModelInstanceGroup_SecondaryDevice) ProtoMessage ¶
func (*ModelInstanceGroup_SecondaryDevice) ProtoMessage()
func (*ModelInstanceGroup_SecondaryDevice) ProtoReflect ¶
func (x *ModelInstanceGroup_SecondaryDevice) ProtoReflect() protoreflect.Message
func (*ModelInstanceGroup_SecondaryDevice) Reset ¶
func (x *ModelInstanceGroup_SecondaryDevice) Reset()
func (*ModelInstanceGroup_SecondaryDevice) String ¶
func (x *ModelInstanceGroup_SecondaryDevice) String() string
type ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind ¶
type ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind int32
@@ @@ .. cpp:enum:: SecondaryDeviceKind @@ @@ The kind of the secondary device. @@
const ( // @@ .. cpp:enumerator:: SecondaryDeviceKind::KIND_NVDLA = 0 // @@ // @@ An NVDLA core. http://nvdla.org // @@ Currently KIND_NVDLA is only supported by the TensorRT backend. // @@ ModelInstanceGroup_SecondaryDevice_KIND_NVDLA ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind = 0 )
func (ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind) Descriptor ¶
func (ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind) Descriptor() protoreflect.EnumDescriptor
func (ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind) EnumDescriptor
deprecated
func (ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind) EnumDescriptor() ([]byte, []int)
Deprecated: Use ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind.Descriptor instead.
func (ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind) Number ¶
func (x ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind) Number() protoreflect.EnumNumber
func (ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind) String ¶
func (x ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind) String() string
type ModelMetadataRequest ¶
type ModelMetadataRequest struct { // @@ // @@ .. cpp:var:: string name // @@ // @@ The name of the model. // @@ Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` // @@ .. cpp:var:: string version // @@ // @@ The version of the model to check for readiness. If not // @@ given the server will choose a version based on the // @@ model and internal policy. // @@ Version string `protobuf:"bytes,2,opt,name=version,proto3" json:"version,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ModelMetadataRequest @@ @@ Request message for ModelMetadata. @@
func (*ModelMetadataRequest) Descriptor
deprecated
func (*ModelMetadataRequest) Descriptor() ([]byte, []int)
Deprecated: Use ModelMetadataRequest.ProtoReflect.Descriptor instead.
func (*ModelMetadataRequest) GetName ¶
func (x *ModelMetadataRequest) GetName() string
func (*ModelMetadataRequest) GetVersion ¶
func (x *ModelMetadataRequest) GetVersion() string
func (*ModelMetadataRequest) ProtoMessage ¶
func (*ModelMetadataRequest) ProtoMessage()
func (*ModelMetadataRequest) ProtoReflect ¶
func (x *ModelMetadataRequest) ProtoReflect() protoreflect.Message
func (*ModelMetadataRequest) Reset ¶
func (x *ModelMetadataRequest) Reset()
func (*ModelMetadataRequest) String ¶
func (x *ModelMetadataRequest) String() string
type ModelMetadataResponse ¶
type ModelMetadataResponse struct { // @@ // @@ .. cpp:var:: string name // @@ // @@ The model name. // @@ Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` // @@ // @@ .. cpp:var:: string versions (repeated) // @@ // @@ The versions of the model. // @@ Versions []string `protobuf:"bytes,2,rep,name=versions,proto3" json:"versions,omitempty"` // @@ // @@ .. cpp:var:: string platform // @@ // @@ The model's platform. // @@ Platform string `protobuf:"bytes,3,opt,name=platform,proto3" json:"platform,omitempty"` // @@ // @@ .. cpp:var:: TensorMetadata inputs (repeated) // @@ // @@ The model's inputs. // @@ Inputs []*ModelMetadataResponse_TensorMetadata `protobuf:"bytes,4,rep,name=inputs,proto3" json:"inputs,omitempty"` // @@ // @@ .. cpp:var:: TensorMetadata outputs (repeated) // @@ // @@ The model's outputs. // @@ Outputs []*ModelMetadataResponse_TensorMetadata `protobuf:"bytes,5,rep,name=outputs,proto3" json:"outputs,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ModelMetadataResponse @@ @@ Response message for ModelMetadata. @@
func (*ModelMetadataResponse) Descriptor
deprecated
func (*ModelMetadataResponse) Descriptor() ([]byte, []int)
Deprecated: Use ModelMetadataResponse.ProtoReflect.Descriptor instead.
func (*ModelMetadataResponse) GetInputs ¶
func (x *ModelMetadataResponse) GetInputs() []*ModelMetadataResponse_TensorMetadata
func (*ModelMetadataResponse) GetName ¶
func (x *ModelMetadataResponse) GetName() string
func (*ModelMetadataResponse) GetOutputs ¶
func (x *ModelMetadataResponse) GetOutputs() []*ModelMetadataResponse_TensorMetadata
func (*ModelMetadataResponse) GetPlatform ¶
func (x *ModelMetadataResponse) GetPlatform() string
func (*ModelMetadataResponse) GetVersions ¶
func (x *ModelMetadataResponse) GetVersions() []string
func (*ModelMetadataResponse) ProtoMessage ¶
func (*ModelMetadataResponse) ProtoMessage()
func (*ModelMetadataResponse) ProtoReflect ¶
func (x *ModelMetadataResponse) ProtoReflect() protoreflect.Message
func (*ModelMetadataResponse) Reset ¶
func (x *ModelMetadataResponse) Reset()
func (*ModelMetadataResponse) String ¶
func (x *ModelMetadataResponse) String() string
type ModelMetadataResponse_TensorMetadata ¶
type ModelMetadataResponse_TensorMetadata struct { // @@ // @@ .. cpp:var:: string name // @@ // @@ The tensor name. // @@ Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` // @@ // @@ .. cpp:var:: string datatype // @@ // @@ The tensor data type. // @@ Datatype string `protobuf:"bytes,2,opt,name=datatype,proto3" json:"datatype,omitempty"` // @@ // @@ .. cpp:var:: int64 shape (repeated) // @@ // @@ The tensor shape. A variable-size dimension is represented // @@ by a -1 value. // @@ Shape []int64 `protobuf:"varint,3,rep,packed,name=shape,proto3" json:"shape,omitempty"` // contains filtered or unexported fields }
@@ @@ .. cpp:var:: message TensorMetadata @@ @@ Metadata for a tensor. @@
func (*ModelMetadataResponse_TensorMetadata) Descriptor
deprecated
func (*ModelMetadataResponse_TensorMetadata) Descriptor() ([]byte, []int)
Deprecated: Use ModelMetadataResponse_TensorMetadata.ProtoReflect.Descriptor instead.
func (*ModelMetadataResponse_TensorMetadata) GetDatatype ¶
func (x *ModelMetadataResponse_TensorMetadata) GetDatatype() string
func (*ModelMetadataResponse_TensorMetadata) GetName ¶
func (x *ModelMetadataResponse_TensorMetadata) GetName() string
func (*ModelMetadataResponse_TensorMetadata) GetShape ¶
func (x *ModelMetadataResponse_TensorMetadata) GetShape() []int64
func (*ModelMetadataResponse_TensorMetadata) ProtoMessage ¶
func (*ModelMetadataResponse_TensorMetadata) ProtoMessage()
func (*ModelMetadataResponse_TensorMetadata) ProtoReflect ¶
func (x *ModelMetadataResponse_TensorMetadata) ProtoReflect() protoreflect.Message
func (*ModelMetadataResponse_TensorMetadata) Reset ¶
func (x *ModelMetadataResponse_TensorMetadata) Reset()
func (*ModelMetadataResponse_TensorMetadata) String ¶
func (x *ModelMetadataResponse_TensorMetadata) String() string
type ModelOperations ¶
type ModelOperations struct { // @@ .. cpp:var:: string op_library_filename (repeated) // @@ // @@ Optional paths of the libraries providing custom operations for // @@ this model. Valid only for ONNX models. // @@ OpLibraryFilename []string `protobuf:"bytes,1,rep,name=op_library_filename,json=opLibraryFilename,proto3" json:"op_library_filename,omitempty"` // contains filtered or unexported fields }
@@ @@ .. cpp:var:: message ModelOperations @@ @@ The metadata of libraries providing custom operations for this model. @@
func (*ModelOperations) Descriptor
deprecated
func (*ModelOperations) Descriptor() ([]byte, []int)
Deprecated: Use ModelOperations.ProtoReflect.Descriptor instead.
func (*ModelOperations) GetOpLibraryFilename ¶
func (x *ModelOperations) GetOpLibraryFilename() []string
func (*ModelOperations) ProtoMessage ¶
func (*ModelOperations) ProtoMessage()
func (*ModelOperations) ProtoReflect ¶
func (x *ModelOperations) ProtoReflect() protoreflect.Message
func (*ModelOperations) Reset ¶
func (x *ModelOperations) Reset()
func (*ModelOperations) String ¶
func (x *ModelOperations) String() string
type ModelOptimizationPolicy ¶
type ModelOptimizationPolicy struct { // @@ .. cpp:var:: Graph graph // @@ // @@ The graph optimization setting for the model. Optional. // @@ Graph *ModelOptimizationPolicy_Graph `protobuf:"bytes,1,opt,name=graph,proto3" json:"graph,omitempty"` // @@ .. cpp:var:: ModelPriority priority // @@ // @@ The priority setting for the model. Optional. // @@ Priority ModelOptimizationPolicy_ModelPriority `protobuf:"varint,2,opt,name=priority,proto3,enum=inference.ModelOptimizationPolicy_ModelPriority" json:"priority,omitempty"` // @@ .. cpp:var:: Cuda cuda // @@ // @@ CUDA-specific optimization settings. Optional. // @@ Cuda *ModelOptimizationPolicy_Cuda `protobuf:"bytes,3,opt,name=cuda,proto3" json:"cuda,omitempty"` // @@ .. cpp:var:: ExecutionAccelerators execution_accelerators // @@ // @@ The accelerators used for the model. Optional. // @@ ExecutionAccelerators *ModelOptimizationPolicy_ExecutionAccelerators `protobuf:"bytes,4,opt,name=execution_accelerators,json=executionAccelerators,proto3" json:"execution_accelerators,omitempty"` // @@ .. cpp:var:: PinnedMemoryBuffer input_pinned_memory // @@ // @@ Use pinned memory buffer when the data transfer for inputs // @@ is between GPU memory and non-pinned system memory. // @@ Default is true. // @@ InputPinnedMemory *ModelOptimizationPolicy_PinnedMemoryBuffer `protobuf:"bytes,5,opt,name=input_pinned_memory,json=inputPinnedMemory,proto3" json:"input_pinned_memory,omitempty"` // @@ .. cpp:var:: PinnedMemoryBuffer output_pinned_memory // @@ // @@ Use pinned memory buffer when the data transfer for outputs // @@ is between GPU memory and non-pinned system memory. // @@ Default is true. // @@ OutputPinnedMemory *ModelOptimizationPolicy_PinnedMemoryBuffer `protobuf:"bytes,6,opt,name=output_pinned_memory,json=outputPinnedMemory,proto3" json:"output_pinned_memory,omitempty"` // @@ .. cpp:var:: uint32 gather_kernel_buffer_threshold // @@ // @@ The backend may use a gather kernel to gather input data if the // @@ device has direct access to the source buffer and the destination // @@ buffer. In such case, the gather kernel will be used only if the // @@ number of buffers to be gathered is greater or equal to // @@ the specified value. If 0, the gather kernel will be disabled. // @@ Default value is 0. // @@ Currently only recognized by TensorRT backend. // @@ GatherKernelBufferThreshold uint32 `` /* 147-byte string literal not displayed */ // @@ .. cpp:var:: bool eager_batching // @@ // @@ Start preparing the next batch before the model instance is ready // @@ for the next inference. This option can be used to overlap the // @@ batch preparation with model execution, with the trade-off that // @@ the next batch might be smaller than what it could have been. // @@ Default value is false. // @@ Currently only recognized by TensorRT backend. // @@ EagerBatching bool `protobuf:"varint,8,opt,name=eager_batching,json=eagerBatching,proto3" json:"eager_batching,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ModelOptimizationPolicy @@ @@ Optimization settings for a model. These settings control if/how a @@ model is optimized and prioritized by the backend framework when @@ it is loaded. @@
func (*ModelOptimizationPolicy) Descriptor
deprecated
func (*ModelOptimizationPolicy) Descriptor() ([]byte, []int)
Deprecated: Use ModelOptimizationPolicy.ProtoReflect.Descriptor instead.
func (*ModelOptimizationPolicy) GetCuda ¶
func (x *ModelOptimizationPolicy) GetCuda() *ModelOptimizationPolicy_Cuda
func (*ModelOptimizationPolicy) GetEagerBatching ¶
func (x *ModelOptimizationPolicy) GetEagerBatching() bool
func (*ModelOptimizationPolicy) GetExecutionAccelerators ¶
func (x *ModelOptimizationPolicy) GetExecutionAccelerators() *ModelOptimizationPolicy_ExecutionAccelerators
func (*ModelOptimizationPolicy) GetGatherKernelBufferThreshold ¶
func (x *ModelOptimizationPolicy) GetGatherKernelBufferThreshold() uint32
func (*ModelOptimizationPolicy) GetGraph ¶
func (x *ModelOptimizationPolicy) GetGraph() *ModelOptimizationPolicy_Graph
func (*ModelOptimizationPolicy) GetInputPinnedMemory ¶
func (x *ModelOptimizationPolicy) GetInputPinnedMemory() *ModelOptimizationPolicy_PinnedMemoryBuffer
func (*ModelOptimizationPolicy) GetOutputPinnedMemory ¶
func (x *ModelOptimizationPolicy) GetOutputPinnedMemory() *ModelOptimizationPolicy_PinnedMemoryBuffer
func (*ModelOptimizationPolicy) GetPriority ¶
func (x *ModelOptimizationPolicy) GetPriority() ModelOptimizationPolicy_ModelPriority
func (*ModelOptimizationPolicy) ProtoMessage ¶
func (*ModelOptimizationPolicy) ProtoMessage()
func (*ModelOptimizationPolicy) ProtoReflect ¶
func (x *ModelOptimizationPolicy) ProtoReflect() protoreflect.Message
func (*ModelOptimizationPolicy) Reset ¶
func (x *ModelOptimizationPolicy) Reset()
func (*ModelOptimizationPolicy) String ¶
func (x *ModelOptimizationPolicy) String() string
type ModelOptimizationPolicy_Cuda ¶
type ModelOptimizationPolicy_Cuda struct { // @@ .. cpp:var:: bool graphs // @@ // @@ Use CUDA graphs API to capture model operations and execute // @@ them more efficiently. Default value is false. // @@ Currently only recognized by TensorRT backend. // @@ Graphs bool `protobuf:"varint,1,opt,name=graphs,proto3" json:"graphs,omitempty"` // @@ .. cpp:var:: bool busy_wait_events // @@ // @@ Use busy-waiting to synchronize CUDA events to achieve minimum // @@ latency from event complete to host thread to be notified, with // @@ the cost of high CPU load. Default value is false. // @@ Currently only recognized by TensorRT backend. // @@ BusyWaitEvents bool `protobuf:"varint,2,opt,name=busy_wait_events,json=busyWaitEvents,proto3" json:"busy_wait_events,omitempty"` // @@ .. cpp:var:: GraphSpec graph_spec (repeated) // @@ // @@ Specification of the CUDA graph to be captured. If not specified // @@ and 'graphs' is true, the default CUDA graphs will be captured // @@ based on model settings. // @@ Currently only recognized by TensorRT backend. // @@ GraphSpec []*ModelOptimizationPolicy_Cuda_GraphSpec `protobuf:"bytes,3,rep,name=graph_spec,json=graphSpec,proto3" json:"graph_spec,omitempty"` // @@ .. cpp:var:: bool output_copy_stream // @@ // @@ Uses a CUDA stream separate from the inference stream to copy the // @@ output to host. However, be aware that setting this option to // @@ true will lead to an increase in the memory consumption of the // @@ model as Triton will allocate twice as much GPU memory for its // @@ I/O tensor buffers. Default value is false. // @@ Currently only recognized by TensorRT backend. // @@ OutputCopyStream bool `protobuf:"varint,4,opt,name=output_copy_stream,json=outputCopyStream,proto3" json:"output_copy_stream,omitempty"` // contains filtered or unexported fields }
@@ @@ .. cpp:var:: message Cuda @@ @@ CUDA-specific optimization settings. @@
func (*ModelOptimizationPolicy_Cuda) Descriptor
deprecated
func (*ModelOptimizationPolicy_Cuda) Descriptor() ([]byte, []int)
Deprecated: Use ModelOptimizationPolicy_Cuda.ProtoReflect.Descriptor instead.
func (*ModelOptimizationPolicy_Cuda) GetBusyWaitEvents ¶
func (x *ModelOptimizationPolicy_Cuda) GetBusyWaitEvents() bool
func (*ModelOptimizationPolicy_Cuda) GetGraphSpec ¶
func (x *ModelOptimizationPolicy_Cuda) GetGraphSpec() []*ModelOptimizationPolicy_Cuda_GraphSpec
func (*ModelOptimizationPolicy_Cuda) GetGraphs ¶
func (x *ModelOptimizationPolicy_Cuda) GetGraphs() bool
func (*ModelOptimizationPolicy_Cuda) GetOutputCopyStream ¶
func (x *ModelOptimizationPolicy_Cuda) GetOutputCopyStream() bool
func (*ModelOptimizationPolicy_Cuda) ProtoMessage ¶
func (*ModelOptimizationPolicy_Cuda) ProtoMessage()
func (*ModelOptimizationPolicy_Cuda) ProtoReflect ¶
func (x *ModelOptimizationPolicy_Cuda) ProtoReflect() protoreflect.Message
func (*ModelOptimizationPolicy_Cuda) Reset ¶
func (x *ModelOptimizationPolicy_Cuda) Reset()
func (*ModelOptimizationPolicy_Cuda) String ¶
func (x *ModelOptimizationPolicy_Cuda) String() string
type ModelOptimizationPolicy_Cuda_GraphSpec ¶
type ModelOptimizationPolicy_Cuda_GraphSpec struct { // @@ .. cpp:var:: int32 batch_size // @@ // @@ The batch size of the CUDA graph. If 'max_batch_size' is 0, // @@ 'batch_size' must be set to 0. Otherwise, 'batch_size' must // @@ be set to value between 1 and 'max_batch_size'. // @@ BatchSize int32 `protobuf:"varint,1,opt,name=batch_size,json=batchSize,proto3" json:"batch_size,omitempty"` // @@ .. cpp:var:: map<string, Shape> input // @@ // @@ The specification of the inputs. 'Shape' is the shape of the // @@ input without batching dimension. // @@ Input map[string]*ModelOptimizationPolicy_Cuda_GraphSpec_Shape `` /* 151-byte string literal not displayed */ // @@ .. cpp:var:: LowerBound graph_lower_bound // @@ // @@ Specify the lower bound of the CUDA graph. Optional. // @@ If specified, the graph can be used for input shapes and // @@ batch sizes that are in closed interval between the lower // @@ bound specification and graph specification. For dynamic // @@ shape model, this allows CUDA graphs to be launched // @@ frequently without capturing all possible shape combinations. // @@ However, using graph for shape combinations different from // @@ the one used for capturing introduces uninitialized data for // @@ execution and it may distort the inference result if // @@ the model is sensitive to uninitialized data. // @@ GraphLowerBound *ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound `protobuf:"bytes,3,opt,name=graph_lower_bound,json=graphLowerBound,proto3" json:"graph_lower_bound,omitempty"` // contains filtered or unexported fields }
@@ .. cpp:var:: message GraphSpec @@ @@ Specification of the CUDA graph to be captured. @@
func (*ModelOptimizationPolicy_Cuda_GraphSpec) Descriptor
deprecated
func (*ModelOptimizationPolicy_Cuda_GraphSpec) Descriptor() ([]byte, []int)
Deprecated: Use ModelOptimizationPolicy_Cuda_GraphSpec.ProtoReflect.Descriptor instead.
func (*ModelOptimizationPolicy_Cuda_GraphSpec) GetBatchSize ¶
func (x *ModelOptimizationPolicy_Cuda_GraphSpec) GetBatchSize() int32
func (*ModelOptimizationPolicy_Cuda_GraphSpec) GetGraphLowerBound ¶
func (x *ModelOptimizationPolicy_Cuda_GraphSpec) GetGraphLowerBound() *ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound
func (*ModelOptimizationPolicy_Cuda_GraphSpec) GetInput ¶
func (x *ModelOptimizationPolicy_Cuda_GraphSpec) GetInput() map[string]*ModelOptimizationPolicy_Cuda_GraphSpec_Shape
func (*ModelOptimizationPolicy_Cuda_GraphSpec) ProtoMessage ¶
func (*ModelOptimizationPolicy_Cuda_GraphSpec) ProtoMessage()
func (*ModelOptimizationPolicy_Cuda_GraphSpec) ProtoReflect ¶
func (x *ModelOptimizationPolicy_Cuda_GraphSpec) ProtoReflect() protoreflect.Message
func (*ModelOptimizationPolicy_Cuda_GraphSpec) Reset ¶
func (x *ModelOptimizationPolicy_Cuda_GraphSpec) Reset()
func (*ModelOptimizationPolicy_Cuda_GraphSpec) String ¶
func (x *ModelOptimizationPolicy_Cuda_GraphSpec) String() string
type ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound ¶
type ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound struct { // @@ .. cpp:var:: int32 batch_size // @@ // @@ The batch size of the CUDA graph. If 'max_batch_size' is 0, // @@ 'batch_size' must be set to 0. Otherwise, 'batch_size' must // @@ be set to value between 1 and 'max_batch_size'. // @@ BatchSize int32 `protobuf:"varint,1,opt,name=batch_size,json=batchSize,proto3" json:"batch_size,omitempty"` // @@ .. cpp:var:: map<string, Shape> input // @@ // @@ The specification of the inputs. 'Shape' is the shape of // @@ the input without batching dimension. // @@ Input map[string]*ModelOptimizationPolicy_Cuda_GraphSpec_Shape `` /* 151-byte string literal not displayed */ // contains filtered or unexported fields }
func (*ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound) Descriptor
deprecated
func (*ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound) Descriptor() ([]byte, []int)
Deprecated: Use ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound.ProtoReflect.Descriptor instead.
func (*ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound) GetBatchSize ¶
func (x *ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound) GetBatchSize() int32
func (*ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound) GetInput ¶
func (x *ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound) GetInput() map[string]*ModelOptimizationPolicy_Cuda_GraphSpec_Shape
func (*ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound) ProtoMessage ¶
func (*ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound) ProtoMessage()
func (*ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound) ProtoReflect ¶
func (x *ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound) ProtoReflect() protoreflect.Message
func (*ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound) Reset ¶
func (x *ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound) Reset()
func (*ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound) String ¶
func (x *ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound) String() string
type ModelOptimizationPolicy_Cuda_GraphSpec_Shape ¶
type ModelOptimizationPolicy_Cuda_GraphSpec_Shape struct { // @@ .. cpp:var:: int64 dim (repeated) // @@ // @@ The dimension. // @@ Dim []int64 `protobuf:"varint,1,rep,packed,name=dim,proto3" json:"dim,omitempty"` // contains filtered or unexported fields }
@@ .. cpp:var:: message Dims @@ @@ Specification of tensor dimension. @@
func (*ModelOptimizationPolicy_Cuda_GraphSpec_Shape) Descriptor
deprecated
func (*ModelOptimizationPolicy_Cuda_GraphSpec_Shape) Descriptor() ([]byte, []int)
Deprecated: Use ModelOptimizationPolicy_Cuda_GraphSpec_Shape.ProtoReflect.Descriptor instead.
func (*ModelOptimizationPolicy_Cuda_GraphSpec_Shape) GetDim ¶
func (x *ModelOptimizationPolicy_Cuda_GraphSpec_Shape) GetDim() []int64
func (*ModelOptimizationPolicy_Cuda_GraphSpec_Shape) ProtoMessage ¶
func (*ModelOptimizationPolicy_Cuda_GraphSpec_Shape) ProtoMessage()
func (*ModelOptimizationPolicy_Cuda_GraphSpec_Shape) ProtoReflect ¶
func (x *ModelOptimizationPolicy_Cuda_GraphSpec_Shape) ProtoReflect() protoreflect.Message
func (*ModelOptimizationPolicy_Cuda_GraphSpec_Shape) Reset ¶
func (x *ModelOptimizationPolicy_Cuda_GraphSpec_Shape) Reset()
func (*ModelOptimizationPolicy_Cuda_GraphSpec_Shape) String ¶
func (x *ModelOptimizationPolicy_Cuda_GraphSpec_Shape) String() string
type ModelOptimizationPolicy_ExecutionAccelerators ¶
type ModelOptimizationPolicy_ExecutionAccelerators struct { // @@ .. cpp:var:: Accelerator gpu_execution_accelerator (repeated) // @@ // @@ The preferred execution provider to be used if the model instance // @@ is deployed on GPU. // @@ // @@ For ONNX Runtime backend, possible value is "tensorrt" as name, // @@ and no parameters are required. // @@ // @@ For TensorFlow backend, possible values are "tensorrt", // @@ "auto_mixed_precision", "gpu_io". // @@ // @@ For "tensorrt", the following parameters can be specified: // @@ "precision_mode": The precision used for optimization. // @@ Allowed values are "FP32" and "FP16". Default value is "FP32". // @@ // @@ "max_cached_engines": The maximum number of cached TensorRT // @@ engines in dynamic TensorRT ops. Default value is 100. // @@ // @@ "minimum_segment_size": The smallest model subgraph that will // @@ be considered for optimization by TensorRT. Default value is 3. // @@ // @@ "max_workspace_size_bytes": The maximum GPU memory the model // @@ can use temporarily during execution. Default value is 1GB. // @@ // @@ For "auto_mixed_precision", no parameters are required. If set, // @@ the model will try to use FP16 for better performance. // @@ This optimization can not be set with "tensorrt". // @@ // @@ For "gpu_io", no parameters are required. If set, the model will // @@ be executed using TensorFlow Callable API to set input and output // @@ tensors in GPU memory if possible, which can reduce data transfer // @@ overhead if the model is used in ensemble. However, the Callable // @@ object will be created on model creation and it will request all // @@ outputs for every model execution, which may impact the // @@ performance if a request does not require all outputs. This // @@ optimization will only take affect if the model instance is // @@ created with KIND_GPU. // @@ GpuExecutionAccelerator []*ModelOptimizationPolicy_ExecutionAccelerators_Accelerator `` /* 132-byte string literal not displayed */ // @@ .. cpp:var:: Accelerator cpu_execution_accelerator (repeated) // @@ // @@ The preferred execution provider to be used if the model instance // @@ is deployed on CPU. // @@ // @@ For ONNX Runtime backend, possible value is "openvino" as name, // @@ and no parameters are required. // @@ CpuExecutionAccelerator []*ModelOptimizationPolicy_ExecutionAccelerators_Accelerator `` /* 132-byte string literal not displayed */ // contains filtered or unexported fields }
@@ @@ .. cpp:var:: message ExecutionAccelerators @@ @@ Specify the preferred execution accelerators to be used to execute @@ the model. Currently only recognized by ONNX Runtime backend and @@ TensorFlow backend. @@ @@ For ONNX Runtime backend, it will deploy the model with the execution @@ accelerators by priority, the priority is determined based on the @@ order that they are set, i.e. the provider at the front has highest @@ priority. Overall, the priority will be in the following order: @@ <gpu_execution_accelerator> (if instance is on GPU) @@ CUDA Execution Provider (if instance is on GPU) @@ <cpu_execution_accelerator> @@ Default CPU Execution Provider @@
func (*ModelOptimizationPolicy_ExecutionAccelerators) Descriptor
deprecated
func (*ModelOptimizationPolicy_ExecutionAccelerators) Descriptor() ([]byte, []int)
Deprecated: Use ModelOptimizationPolicy_ExecutionAccelerators.ProtoReflect.Descriptor instead.
func (*ModelOptimizationPolicy_ExecutionAccelerators) GetCpuExecutionAccelerator ¶
func (x *ModelOptimizationPolicy_ExecutionAccelerators) GetCpuExecutionAccelerator() []*ModelOptimizationPolicy_ExecutionAccelerators_Accelerator
func (*ModelOptimizationPolicy_ExecutionAccelerators) GetGpuExecutionAccelerator ¶
func (x *ModelOptimizationPolicy_ExecutionAccelerators) GetGpuExecutionAccelerator() []*ModelOptimizationPolicy_ExecutionAccelerators_Accelerator
func (*ModelOptimizationPolicy_ExecutionAccelerators) ProtoMessage ¶
func (*ModelOptimizationPolicy_ExecutionAccelerators) ProtoMessage()
func (*ModelOptimizationPolicy_ExecutionAccelerators) ProtoReflect ¶
func (x *ModelOptimizationPolicy_ExecutionAccelerators) ProtoReflect() protoreflect.Message
func (*ModelOptimizationPolicy_ExecutionAccelerators) Reset ¶
func (x *ModelOptimizationPolicy_ExecutionAccelerators) Reset()
func (*ModelOptimizationPolicy_ExecutionAccelerators) String ¶
func (x *ModelOptimizationPolicy_ExecutionAccelerators) String() string
type ModelOptimizationPolicy_ExecutionAccelerators_Accelerator ¶
type ModelOptimizationPolicy_ExecutionAccelerators_Accelerator struct { // @@ .. cpp:var:: string name // @@ // @@ The name of the execution accelerator. // @@ Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` // @@ .. cpp:var:: map<string, string> parameters // @@ // @@ Additional parameters used to configure the accelerator. // @@ Parameters map[string]string `` /* 161-byte string literal not displayed */ // contains filtered or unexported fields }
@@ @@ .. cpp:var:: message Accelerator @@ @@ Specify the accelerator to be used to execute the model. @@ Accelerator with the same name may accept different parameters @@ depending on the backends. @@
func (*ModelOptimizationPolicy_ExecutionAccelerators_Accelerator) Descriptor
deprecated
func (*ModelOptimizationPolicy_ExecutionAccelerators_Accelerator) Descriptor() ([]byte, []int)
Deprecated: Use ModelOptimizationPolicy_ExecutionAccelerators_Accelerator.ProtoReflect.Descriptor instead.
func (*ModelOptimizationPolicy_ExecutionAccelerators_Accelerator) GetName ¶
func (x *ModelOptimizationPolicy_ExecutionAccelerators_Accelerator) GetName() string
func (*ModelOptimizationPolicy_ExecutionAccelerators_Accelerator) GetParameters ¶
func (x *ModelOptimizationPolicy_ExecutionAccelerators_Accelerator) GetParameters() map[string]string
func (*ModelOptimizationPolicy_ExecutionAccelerators_Accelerator) ProtoMessage ¶
func (*ModelOptimizationPolicy_ExecutionAccelerators_Accelerator) ProtoMessage()
func (*ModelOptimizationPolicy_ExecutionAccelerators_Accelerator) ProtoReflect ¶
func (x *ModelOptimizationPolicy_ExecutionAccelerators_Accelerator) ProtoReflect() protoreflect.Message
func (*ModelOptimizationPolicy_ExecutionAccelerators_Accelerator) Reset ¶
func (x *ModelOptimizationPolicy_ExecutionAccelerators_Accelerator) Reset()
func (*ModelOptimizationPolicy_ExecutionAccelerators_Accelerator) String ¶
func (x *ModelOptimizationPolicy_ExecutionAccelerators_Accelerator) String() string
type ModelOptimizationPolicy_Graph ¶
type ModelOptimizationPolicy_Graph struct { // @@ .. cpp:var:: int32 level // @@ // @@ The optimization level. Defaults to 0 (zero) if not specified. // @@ // @@ - -1: Disabled // @@ - 0: Framework default // @@ - 1+: Enable optimization level (greater values indicate // @@ higher optimization levels) // @@ Level int32 `protobuf:"varint,1,opt,name=level,proto3" json:"level,omitempty"` // contains filtered or unexported fields }
@@ @@ .. cpp:var:: message Graph @@ @@ Enable generic graph optimization of the model. If not specified @@ the framework's default level of optimization is used. Supports @@ TensorFlow graphdef and savedmodel and Onnx models. For TensorFlow @@ causes XLA to be enabled/disabled for the model. For Onnx defaults @@ to enabling all optimizations, -1 enables only basic optimizations, @@ +1 enables only basic and extended optimizations. @@
func (*ModelOptimizationPolicy_Graph) Descriptor
deprecated
func (*ModelOptimizationPolicy_Graph) Descriptor() ([]byte, []int)
Deprecated: Use ModelOptimizationPolicy_Graph.ProtoReflect.Descriptor instead.
func (*ModelOptimizationPolicy_Graph) GetLevel ¶
func (x *ModelOptimizationPolicy_Graph) GetLevel() int32
func (*ModelOptimizationPolicy_Graph) ProtoMessage ¶
func (*ModelOptimizationPolicy_Graph) ProtoMessage()
func (*ModelOptimizationPolicy_Graph) ProtoReflect ¶
func (x *ModelOptimizationPolicy_Graph) ProtoReflect() protoreflect.Message
func (*ModelOptimizationPolicy_Graph) Reset ¶
func (x *ModelOptimizationPolicy_Graph) Reset()
func (*ModelOptimizationPolicy_Graph) String ¶
func (x *ModelOptimizationPolicy_Graph) String() string
type ModelOptimizationPolicy_ModelPriority ¶
type ModelOptimizationPolicy_ModelPriority int32
@@ @@ .. cpp:enum:: ModelPriority @@ @@ Model priorities. A model will be given scheduling and execution @@ preference over models at lower priorities. Current model @@ priorities only work for TensorRT models. @@
const ( // @@ .. cpp:enumerator:: ModelPriority::PRIORITY_DEFAULT = 0 // @@ // @@ The default model priority. // @@ ModelOptimizationPolicy_PRIORITY_DEFAULT ModelOptimizationPolicy_ModelPriority = 0 // @@ .. cpp:enumerator:: ModelPriority::PRIORITY_MAX = 1 // @@ // @@ The maximum model priority. // @@ ModelOptimizationPolicy_PRIORITY_MAX ModelOptimizationPolicy_ModelPriority = 1 // @@ .. cpp:enumerator:: ModelPriority::PRIORITY_MIN = 2 // @@ // @@ The minimum model priority. // @@ ModelOptimizationPolicy_PRIORITY_MIN ModelOptimizationPolicy_ModelPriority = 2 )
func (ModelOptimizationPolicy_ModelPriority) Descriptor ¶
func (ModelOptimizationPolicy_ModelPriority) Descriptor() protoreflect.EnumDescriptor
func (ModelOptimizationPolicy_ModelPriority) EnumDescriptor
deprecated
func (ModelOptimizationPolicy_ModelPriority) EnumDescriptor() ([]byte, []int)
Deprecated: Use ModelOptimizationPolicy_ModelPriority.Descriptor instead.
func (ModelOptimizationPolicy_ModelPriority) Number ¶
func (x ModelOptimizationPolicy_ModelPriority) Number() protoreflect.EnumNumber
func (ModelOptimizationPolicy_ModelPriority) String ¶
func (x ModelOptimizationPolicy_ModelPriority) String() string
func (ModelOptimizationPolicy_ModelPriority) Type ¶
func (ModelOptimizationPolicy_ModelPriority) Type() protoreflect.EnumType
type ModelOptimizationPolicy_PinnedMemoryBuffer ¶
type ModelOptimizationPolicy_PinnedMemoryBuffer struct { // @@ .. cpp:var:: bool enable // @@ // @@ Use pinned memory buffer. Default is true. // @@ Enable bool `protobuf:"varint,1,opt,name=enable,proto3" json:"enable,omitempty"` // contains filtered or unexported fields }
@@ @@ .. cpp:var:: message PinnedMemoryBuffer @@ @@ Specify whether to use a pinned memory buffer when transferring data @@ between non-pinned system memory and GPU memory. Using a pinned @@ memory buffer for system from/to GPU transfers will typically provide @@ increased performance. For example, in the common use case where the @@ request provides inputs and delivers outputs via non-pinned system @@ memory, if the model instance accepts GPU IOs, the inputs will be @@ processed by two copies: from non-pinned system memory to pinned @@ memory, and from pinned memory to GPU memory. Similarly, pinned @@ memory will be used for delivering the outputs. @@
func (*ModelOptimizationPolicy_PinnedMemoryBuffer) Descriptor
deprecated
func (*ModelOptimizationPolicy_PinnedMemoryBuffer) Descriptor() ([]byte, []int)
Deprecated: Use ModelOptimizationPolicy_PinnedMemoryBuffer.ProtoReflect.Descriptor instead.
func (*ModelOptimizationPolicy_PinnedMemoryBuffer) GetEnable ¶
func (x *ModelOptimizationPolicy_PinnedMemoryBuffer) GetEnable() bool
func (*ModelOptimizationPolicy_PinnedMemoryBuffer) ProtoMessage ¶
func (*ModelOptimizationPolicy_PinnedMemoryBuffer) ProtoMessage()
func (*ModelOptimizationPolicy_PinnedMemoryBuffer) ProtoReflect ¶
func (x *ModelOptimizationPolicy_PinnedMemoryBuffer) ProtoReflect() protoreflect.Message
func (*ModelOptimizationPolicy_PinnedMemoryBuffer) Reset ¶
func (x *ModelOptimizationPolicy_PinnedMemoryBuffer) Reset()
func (*ModelOptimizationPolicy_PinnedMemoryBuffer) String ¶
func (x *ModelOptimizationPolicy_PinnedMemoryBuffer) String() string
type ModelOutput ¶
type ModelOutput struct { // @@ .. cpp:var:: string name // @@ // @@ The name of the output. // @@ Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` // @@ .. cpp:var:: DataType data_type // @@ // @@ The data-type of the output. // @@ DataType DataType `protobuf:"varint,2,opt,name=data_type,json=dataType,proto3,enum=inference.DataType" json:"data_type,omitempty"` // @@ .. cpp:var:: int64 dims (repeated) // @@ // @@ The dimensions/shape of the output tensor. // @@ Dims []int64 `protobuf:"varint,3,rep,packed,name=dims,proto3" json:"dims,omitempty"` // @@ .. cpp:var:: ModelTensorReshape reshape // @@ // @@ The shape produced for this output by the backend. The output will // @@ be reshaped from this to the shape specified in 'dims' before being // @@ returned in the inference response. The reshape must have the same // @@ number of elements as the output shape specified by 'dims'. Optional. // @@ Reshape *ModelTensorReshape `protobuf:"bytes,5,opt,name=reshape,proto3" json:"reshape,omitempty"` // @@ .. cpp:var:: string label_filename // @@ // @@ The label file associated with this output. Should be specified only // @@ for outputs that represent classifications. Optional. // @@ LabelFilename string `protobuf:"bytes,4,opt,name=label_filename,json=labelFilename,proto3" json:"label_filename,omitempty"` // @@ .. cpp:var:: bool is_shape_tensor // @@ // @@ Whether or not the output is a shape tensor to the model. This field // @@ is currently supported only for the TensorRT model. An error will be // @@ generated if this specification does not comply with underlying // @@ model. // @@ IsShapeTensor bool `protobuf:"varint,6,opt,name=is_shape_tensor,json=isShapeTensor,proto3" json:"is_shape_tensor,omitempty"` // @@ .. cpp:var:: bool is_non_linear_format_io // @@ // @@ Indicates whether the output tensor uses a non-linear IO format. This // @@ field is currently supported only for TensorRT models. An error will // @@ be generated if this specification does not comply with the // @@ underlying model. // @@ IsNonLinearFormatIo bool `protobuf:"varint,7,opt,name=is_non_linear_format_io,json=isNonLinearFormatIo,proto3" json:"is_non_linear_format_io,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ModelOutput @@ @@ An output produced by the model. @@
func (*ModelOutput) Descriptor
deprecated
func (*ModelOutput) Descriptor() ([]byte, []int)
Deprecated: Use ModelOutput.ProtoReflect.Descriptor instead.
func (*ModelOutput) GetDataType ¶
func (x *ModelOutput) GetDataType() DataType
func (*ModelOutput) GetDims ¶
func (x *ModelOutput) GetDims() []int64
func (*ModelOutput) GetIsNonLinearFormatIo ¶
func (x *ModelOutput) GetIsNonLinearFormatIo() bool
func (*ModelOutput) GetIsShapeTensor ¶
func (x *ModelOutput) GetIsShapeTensor() bool
func (*ModelOutput) GetLabelFilename ¶
func (x *ModelOutput) GetLabelFilename() string
func (*ModelOutput) GetName ¶
func (x *ModelOutput) GetName() string
func (*ModelOutput) GetReshape ¶
func (x *ModelOutput) GetReshape() *ModelTensorReshape
func (*ModelOutput) ProtoMessage ¶
func (*ModelOutput) ProtoMessage()
func (*ModelOutput) ProtoReflect ¶
func (x *ModelOutput) ProtoReflect() protoreflect.Message
func (*ModelOutput) Reset ¶
func (x *ModelOutput) Reset()
func (*ModelOutput) String ¶
func (x *ModelOutput) String() string
type ModelParameter ¶
type ModelParameter struct { // @@ .. cpp:var:: string string_value // @@ // @@ The string value of the parameter. // @@ StringValue string `protobuf:"bytes,1,opt,name=string_value,json=stringValue,proto3" json:"string_value,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ModelParameter @@ @@ A model parameter. @@
func (*ModelParameter) Descriptor
deprecated
func (*ModelParameter) Descriptor() ([]byte, []int)
Deprecated: Use ModelParameter.ProtoReflect.Descriptor instead.
func (*ModelParameter) GetStringValue ¶
func (x *ModelParameter) GetStringValue() string
func (*ModelParameter) ProtoMessage ¶
func (*ModelParameter) ProtoMessage()
func (*ModelParameter) ProtoReflect ¶
func (x *ModelParameter) ProtoReflect() protoreflect.Message
func (*ModelParameter) Reset ¶
func (x *ModelParameter) Reset()
func (*ModelParameter) String ¶
func (x *ModelParameter) String() string
type ModelQueuePolicy ¶
type ModelQueuePolicy struct { // @@ // @@ .. cpp:var:: TimeoutAction timeout_action // @@ // @@ The action applied to timed-out request. // @@ The default action is REJECT. // @@ TimeoutAction ModelQueuePolicy_TimeoutAction `` /* 147-byte string literal not displayed */ // @@ // @@ .. cpp:var:: uint64 default_timeout_microseconds // @@ // @@ The default timeout for every request, in microseconds. // @@ The default value is 0 which indicates that no timeout is set. // @@ DefaultTimeoutMicroseconds uint64 `` /* 142-byte string literal not displayed */ // @@ // @@ .. cpp:var:: bool allow_timeout_override // @@ // @@ Whether individual request can override the default timeout value. // @@ When true, individual requests can set a timeout that is less than // @@ the default timeout value but may not increase the timeout. // @@ The default value is false. // @@ AllowTimeoutOverride bool `protobuf:"varint,3,opt,name=allow_timeout_override,json=allowTimeoutOverride,proto3" json:"allow_timeout_override,omitempty"` // @@ // @@ .. cpp:var:: uint32 max_queue_size // @@ // @@ The maximum queue size for holding requests. A request will be // @@ rejected immediately if it can't be enqueued because the queue is // @@ full. The default value is 0 which indicates that no maximum // @@ queue size is enforced. // @@ MaxQueueSize uint32 `protobuf:"varint,4,opt,name=max_queue_size,json=maxQueueSize,proto3" json:"max_queue_size,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ModelQueuePolicy @@ @@ Queue policy for inference requests. @@
func (*ModelQueuePolicy) Descriptor
deprecated
func (*ModelQueuePolicy) Descriptor() ([]byte, []int)
Deprecated: Use ModelQueuePolicy.ProtoReflect.Descriptor instead.
func (*ModelQueuePolicy) GetAllowTimeoutOverride ¶
func (x *ModelQueuePolicy) GetAllowTimeoutOverride() bool
func (*ModelQueuePolicy) GetDefaultTimeoutMicroseconds ¶
func (x *ModelQueuePolicy) GetDefaultTimeoutMicroseconds() uint64
func (*ModelQueuePolicy) GetMaxQueueSize ¶
func (x *ModelQueuePolicy) GetMaxQueueSize() uint32
func (*ModelQueuePolicy) GetTimeoutAction ¶
func (x *ModelQueuePolicy) GetTimeoutAction() ModelQueuePolicy_TimeoutAction
func (*ModelQueuePolicy) ProtoMessage ¶
func (*ModelQueuePolicy) ProtoMessage()
func (*ModelQueuePolicy) ProtoReflect ¶
func (x *ModelQueuePolicy) ProtoReflect() protoreflect.Message
func (*ModelQueuePolicy) Reset ¶
func (x *ModelQueuePolicy) Reset()
func (*ModelQueuePolicy) String ¶
func (x *ModelQueuePolicy) String() string
type ModelQueuePolicy_TimeoutAction ¶
type ModelQueuePolicy_TimeoutAction int32
@@ @@ .. cpp:enum:: TimeoutAction @@ @@ The action applied to timed-out requests. @@
const ( // @@ .. cpp:enumerator:: Action::REJECT = 0 // @@ // @@ Reject the request and return error message accordingly. // @@ ModelQueuePolicy_REJECT ModelQueuePolicy_TimeoutAction = 0 // @@ .. cpp:enumerator:: Action::DELAY = 1 // @@ // @@ Delay the request until all other requests at the same // @@ (or higher) priority levels that have not reached their timeouts // @@ are processed. A delayed request will eventually be processed, // @@ but may be delayed indefinitely due to newly arriving requests. // @@ ModelQueuePolicy_DELAY ModelQueuePolicy_TimeoutAction = 1 )
func (ModelQueuePolicy_TimeoutAction) Descriptor ¶
func (ModelQueuePolicy_TimeoutAction) Descriptor() protoreflect.EnumDescriptor
func (ModelQueuePolicy_TimeoutAction) Enum ¶
func (x ModelQueuePolicy_TimeoutAction) Enum() *ModelQueuePolicy_TimeoutAction
func (ModelQueuePolicy_TimeoutAction) EnumDescriptor
deprecated
func (ModelQueuePolicy_TimeoutAction) EnumDescriptor() ([]byte, []int)
Deprecated: Use ModelQueuePolicy_TimeoutAction.Descriptor instead.
func (ModelQueuePolicy_TimeoutAction) Number ¶
func (x ModelQueuePolicy_TimeoutAction) Number() protoreflect.EnumNumber
func (ModelQueuePolicy_TimeoutAction) String ¶
func (x ModelQueuePolicy_TimeoutAction) String() string
func (ModelQueuePolicy_TimeoutAction) Type ¶
func (ModelQueuePolicy_TimeoutAction) Type() protoreflect.EnumType
type ModelRateLimiter ¶
type ModelRateLimiter struct { // @@ .. cpp:var:: Resource resources (repeated) // @@ // @@ The resources required to execute the request on a model instance. // @@ Resources are just names with a corresponding count. The execution // @@ of the instance will be blocked until the specified resources are // @@ available. By default an instance uses no rate-limiter resources. // @@ Resources []*ModelRateLimiter_Resource `protobuf:"bytes,1,rep,name=resources,proto3" json:"resources,omitempty"` // @@ .. cpp:var:: uint32 priority // @@ // @@ The optional weighting value to be used for prioritizing across // @@ instances. An instance with priority 2 will be given 1/2 the // @@ number of scheduling chances as an instance_group with priority // @@ 1. The default priority is 1. The priority of value 0 will be // @@ treated as priority 1. // @@ Priority uint32 `protobuf:"varint,2,opt,name=priority,proto3" json:"priority,omitempty"` // contains filtered or unexported fields }
@@ @@ .. cpp:var:: message ModelRateLimiter @@ @@ The specifications required by the rate limiter to properly @@ schedule the inference requests across the different models @@ and their instances. @@
func (*ModelRateLimiter) Descriptor
deprecated
func (*ModelRateLimiter) Descriptor() ([]byte, []int)
Deprecated: Use ModelRateLimiter.ProtoReflect.Descriptor instead.
func (*ModelRateLimiter) GetPriority ¶
func (x *ModelRateLimiter) GetPriority() uint32
func (*ModelRateLimiter) GetResources ¶
func (x *ModelRateLimiter) GetResources() []*ModelRateLimiter_Resource
func (*ModelRateLimiter) ProtoMessage ¶
func (*ModelRateLimiter) ProtoMessage()
func (*ModelRateLimiter) ProtoReflect ¶
func (x *ModelRateLimiter) ProtoReflect() protoreflect.Message
func (*ModelRateLimiter) Reset ¶
func (x *ModelRateLimiter) Reset()
func (*ModelRateLimiter) String ¶
func (x *ModelRateLimiter) String() string
type ModelRateLimiter_Resource ¶
type ModelRateLimiter_Resource struct { // @@ .. cpp:var:: string name // @@ // @@ The name associated with the resource. // @@ Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` // @@ .. cpp:var:: bool global // @@ // @@ Whether or not the resource is global. If true then the resource // @@ is assumed to be shared among the devices otherwise specified // @@ count of the resource is assumed for each device associated // @@ with the instance. // @@ Global bool `protobuf:"varint,2,opt,name=global,proto3" json:"global,omitempty"` // @@ .. cpp:var:: uint32 count // @@ // @@ The number of resources required for the execution of the model // @@ instance. // @@ Count uint32 `protobuf:"varint,3,opt,name=count,proto3" json:"count,omitempty"` // contains filtered or unexported fields }
@@ .. cpp:var:: message Resource @@ @@ The resource property. @@
func (*ModelRateLimiter_Resource) Descriptor
deprecated
func (*ModelRateLimiter_Resource) Descriptor() ([]byte, []int)
Deprecated: Use ModelRateLimiter_Resource.ProtoReflect.Descriptor instead.
func (*ModelRateLimiter_Resource) GetCount ¶
func (x *ModelRateLimiter_Resource) GetCount() uint32
func (*ModelRateLimiter_Resource) GetGlobal ¶
func (x *ModelRateLimiter_Resource) GetGlobal() bool
func (*ModelRateLimiter_Resource) GetName ¶
func (x *ModelRateLimiter_Resource) GetName() string
func (*ModelRateLimiter_Resource) ProtoMessage ¶
func (*ModelRateLimiter_Resource) ProtoMessage()
func (*ModelRateLimiter_Resource) ProtoReflect ¶
func (x *ModelRateLimiter_Resource) ProtoReflect() protoreflect.Message
func (*ModelRateLimiter_Resource) Reset ¶
func (x *ModelRateLimiter_Resource) Reset()
func (*ModelRateLimiter_Resource) String ¶
func (x *ModelRateLimiter_Resource) String() string
type ModelReadyRequest ¶
type ModelReadyRequest struct { // @@ // @@ .. cpp:var:: string name // @@ // @@ The name of the model to check for readiness. // @@ Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` // @@ .. cpp:var:: string version // @@ // @@ The version of the model to check for readiness. If not given the // @@ server will choose a version based on the model and internal policy. // @@ Version string `protobuf:"bytes,2,opt,name=version,proto3" json:"version,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ModelReadyRequest @@ @@ Request message for ModelReady. @@
func (*ModelReadyRequest) Descriptor
deprecated
func (*ModelReadyRequest) Descriptor() ([]byte, []int)
Deprecated: Use ModelReadyRequest.ProtoReflect.Descriptor instead.
func (*ModelReadyRequest) GetName ¶
func (x *ModelReadyRequest) GetName() string
func (*ModelReadyRequest) GetVersion ¶
func (x *ModelReadyRequest) GetVersion() string
func (*ModelReadyRequest) ProtoMessage ¶
func (*ModelReadyRequest) ProtoMessage()
func (*ModelReadyRequest) ProtoReflect ¶
func (x *ModelReadyRequest) ProtoReflect() protoreflect.Message
func (*ModelReadyRequest) Reset ¶
func (x *ModelReadyRequest) Reset()
func (*ModelReadyRequest) String ¶
func (x *ModelReadyRequest) String() string
type ModelReadyResponse ¶
type ModelReadyResponse struct { // @@ // @@ .. cpp:var:: bool ready // @@ // @@ True if the model is ready, false it not ready. // @@ Ready bool `protobuf:"varint,1,opt,name=ready,proto3" json:"ready,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ModelReadyResponse @@ @@ Response message for ModelReady. @@
func (*ModelReadyResponse) Descriptor
deprecated
func (*ModelReadyResponse) Descriptor() ([]byte, []int)
Deprecated: Use ModelReadyResponse.ProtoReflect.Descriptor instead.
func (*ModelReadyResponse) GetReady ¶
func (x *ModelReadyResponse) GetReady() bool
func (*ModelReadyResponse) ProtoMessage ¶
func (*ModelReadyResponse) ProtoMessage()
func (*ModelReadyResponse) ProtoReflect ¶
func (x *ModelReadyResponse) ProtoReflect() protoreflect.Message
func (*ModelReadyResponse) Reset ¶
func (x *ModelReadyResponse) Reset()
func (*ModelReadyResponse) String ¶
func (x *ModelReadyResponse) String() string
type ModelRepositoryAgents ¶
type ModelRepositoryAgents struct { // @@ // @@ .. cpp:var:: Agent agents (repeated) // @@ // @@ The ordered list of agents for the model. These agents will be // @@ invoked in order to respond to repository actions occurring for the // @@ model. // @@ Agents []*ModelRepositoryAgents_Agent `protobuf:"bytes,1,rep,name=agents,proto3" json:"agents,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ModelRepositoryAgents @@ @@ The repository agents for the model. @@
func (*ModelRepositoryAgents) Descriptor
deprecated
func (*ModelRepositoryAgents) Descriptor() ([]byte, []int)
Deprecated: Use ModelRepositoryAgents.ProtoReflect.Descriptor instead.
func (*ModelRepositoryAgents) GetAgents ¶
func (x *ModelRepositoryAgents) GetAgents() []*ModelRepositoryAgents_Agent
func (*ModelRepositoryAgents) ProtoMessage ¶
func (*ModelRepositoryAgents) ProtoMessage()
func (*ModelRepositoryAgents) ProtoReflect ¶
func (x *ModelRepositoryAgents) ProtoReflect() protoreflect.Message
func (*ModelRepositoryAgents) Reset ¶
func (x *ModelRepositoryAgents) Reset()
func (*ModelRepositoryAgents) String ¶
func (x *ModelRepositoryAgents) String() string
type ModelRepositoryAgents_Agent ¶
type ModelRepositoryAgents_Agent struct { // @@ .. cpp:var:: string name // @@ // @@ The name of the agent. // @@ Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` // @@ .. cpp:var:: map<string, string> parameters // @@ // @@ The parameters for the agent. // @@ Parameters map[string]string `` /* 161-byte string literal not displayed */ // contains filtered or unexported fields }
@@ @@ .. cpp:var:: message Agent @@ @@ A repository agent that should be invoked for the specified @@ repository actions for this model. @@
func (*ModelRepositoryAgents_Agent) Descriptor
deprecated
func (*ModelRepositoryAgents_Agent) Descriptor() ([]byte, []int)
Deprecated: Use ModelRepositoryAgents_Agent.ProtoReflect.Descriptor instead.
func (*ModelRepositoryAgents_Agent) GetName ¶
func (x *ModelRepositoryAgents_Agent) GetName() string
func (*ModelRepositoryAgents_Agent) GetParameters ¶
func (x *ModelRepositoryAgents_Agent) GetParameters() map[string]string
func (*ModelRepositoryAgents_Agent) ProtoMessage ¶
func (*ModelRepositoryAgents_Agent) ProtoMessage()
func (*ModelRepositoryAgents_Agent) ProtoReflect ¶
func (x *ModelRepositoryAgents_Agent) ProtoReflect() protoreflect.Message
func (*ModelRepositoryAgents_Agent) Reset ¶
func (x *ModelRepositoryAgents_Agent) Reset()
func (*ModelRepositoryAgents_Agent) String ¶
func (x *ModelRepositoryAgents_Agent) String() string
type ModelRepositoryParameter ¶
type ModelRepositoryParameter struct { // @@ .. cpp:var:: oneof parameter_choice // @@ // @@ The parameter value can be a string, an int64 or // @@ a boolean // @@ // // Types that are assignable to ParameterChoice: // // *ModelRepositoryParameter_BoolParam // *ModelRepositoryParameter_Int64Param // *ModelRepositoryParameter_StringParam // *ModelRepositoryParameter_BytesParam ParameterChoice isModelRepositoryParameter_ParameterChoice `protobuf_oneof:"parameter_choice"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ModelRepositoryParameter @@ @@ An model repository parameter value. @@
func (*ModelRepositoryParameter) Descriptor
deprecated
func (*ModelRepositoryParameter) Descriptor() ([]byte, []int)
Deprecated: Use ModelRepositoryParameter.ProtoReflect.Descriptor instead.
func (*ModelRepositoryParameter) GetBoolParam ¶
func (x *ModelRepositoryParameter) GetBoolParam() bool
func (*ModelRepositoryParameter) GetBytesParam ¶
func (x *ModelRepositoryParameter) GetBytesParam() []byte
func (*ModelRepositoryParameter) GetInt64Param ¶
func (x *ModelRepositoryParameter) GetInt64Param() int64
func (*ModelRepositoryParameter) GetParameterChoice ¶
func (m *ModelRepositoryParameter) GetParameterChoice() isModelRepositoryParameter_ParameterChoice
func (*ModelRepositoryParameter) GetStringParam ¶
func (x *ModelRepositoryParameter) GetStringParam() string
func (*ModelRepositoryParameter) ProtoMessage ¶
func (*ModelRepositoryParameter) ProtoMessage()
func (*ModelRepositoryParameter) ProtoReflect ¶
func (x *ModelRepositoryParameter) ProtoReflect() protoreflect.Message
func (*ModelRepositoryParameter) Reset ¶
func (x *ModelRepositoryParameter) Reset()
func (*ModelRepositoryParameter) String ¶
func (x *ModelRepositoryParameter) String() string
type ModelRepositoryParameter_BoolParam ¶
type ModelRepositoryParameter_BoolParam struct { // @@ .. cpp:var:: bool bool_param // @@ // @@ A boolean parameter value. // @@ BoolParam bool `protobuf:"varint,1,opt,name=bool_param,json=boolParam,proto3,oneof"` }
type ModelRepositoryParameter_BytesParam ¶
type ModelRepositoryParameter_BytesParam struct { // @@ .. cpp:var:: bytes bytes_param // @@ // @@ A bytes parameter value. // @@ BytesParam []byte `protobuf:"bytes,4,opt,name=bytes_param,json=bytesParam,proto3,oneof"` }
type ModelRepositoryParameter_Int64Param ¶
type ModelRepositoryParameter_Int64Param struct { // @@ .. cpp:var:: int64 int64_param // @@ // @@ An int64 parameter value. // @@ Int64Param int64 `protobuf:"varint,2,opt,name=int64_param,json=int64Param,proto3,oneof"` }
type ModelRepositoryParameter_StringParam ¶
type ModelRepositoryParameter_StringParam struct { // @@ .. cpp:var:: string string_param // @@ // @@ A string parameter value. // @@ StringParam string `protobuf:"bytes,3,opt,name=string_param,json=stringParam,proto3,oneof"` }
type ModelResponseCache ¶
type ModelResponseCache struct { // @@ // @@ .. cpp::var:: bool enable // @@ // @@ Whether or not to use response cache for the model. If True, the // @@ responses from the model are cached and when identical request // @@ is encountered, instead of going through the model execution, // @@ the response from the cache is utilized. By default, response // @@ cache is disabled for the models. // @@ Enable bool `protobuf:"varint,1,opt,name=enable,proto3" json:"enable,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ModelResponseCache @@ @@ The response cache setting for the model. @@
func (*ModelResponseCache) Descriptor
deprecated
func (*ModelResponseCache) Descriptor() ([]byte, []int)
Deprecated: Use ModelResponseCache.ProtoReflect.Descriptor instead.
func (*ModelResponseCache) GetEnable ¶
func (x *ModelResponseCache) GetEnable() bool
func (*ModelResponseCache) ProtoMessage ¶
func (*ModelResponseCache) ProtoMessage()
func (*ModelResponseCache) ProtoReflect ¶
func (x *ModelResponseCache) ProtoReflect() protoreflect.Message
func (*ModelResponseCache) Reset ¶
func (x *ModelResponseCache) Reset()
func (*ModelResponseCache) String ¶
func (x *ModelResponseCache) String() string
type ModelSequenceBatching ¶
type ModelSequenceBatching struct { // @@ .. cpp:var:: oneof strategy_choice // @@ // @@ The strategy used by the sequence batcher. Default strategy // @@ is 'direct'. // @@ // // Types that are assignable to StrategyChoice: // // *ModelSequenceBatching_Direct // *ModelSequenceBatching_Oldest StrategyChoice isModelSequenceBatching_StrategyChoice `protobuf_oneof:"strategy_choice"` // @@ .. cpp:var:: uint64 max_sequence_idle_microseconds // @@ // @@ The maximum time, in microseconds, that a sequence is allowed to // @@ be idle before it is aborted. The inference server considers a // @@ sequence idle when it does not have any inference request queued // @@ for the sequence. If this limit is exceeded, the inference server // @@ will free the sequence slot allocated by the sequence and make it // @@ available for another sequence. If not specified (or specified as // @@ zero) a default value of 1000000 (1 second) is used. // @@ MaxSequenceIdleMicroseconds uint64 `` /* 147-byte string literal not displayed */ // @@ .. cpp:var:: ControlInput control_input (repeated) // @@ // @@ The model input(s) that the server should use to communicate // @@ sequence start, stop, ready and similar control values to the // @@ model. // @@ ControlInput []*ModelSequenceBatching_ControlInput `protobuf:"bytes,2,rep,name=control_input,json=controlInput,proto3" json:"control_input,omitempty"` // @@ .. cpp:var:: State state (repeated) // @@ // @@ The optional state that can be stored in Triton for performing // @@ inference requests on a sequence. Each sequence holds an implicit // @@ state local to itself. The output state tensor provided by the // @@ model in 'output_name' field of the current inference request will // @@ be transferred as an input tensor named 'input_name' in the next // @@ request of the same sequence. The input state of the first request // @@ in the sequence contains garbage data. // @@ State []*ModelSequenceBatching_State `protobuf:"bytes,5,rep,name=state,proto3" json:"state,omitempty"` // @@ .. cpp:var:: bool iterative_sequence // @@ // @@ Requests for iterative sequences are processed over a number // @@ of iterations. An iterative sequence is initiated by a single // @@ request and is "rescheduled" by the model until completion. // @@ Requests for inflight requests will be batched together // @@ and can complete independently. Note this feature // @@ requires backend support. Default value is false. IterativeSequence bool `protobuf:"varint,6,opt,name=iterative_sequence,json=iterativeSequence,proto3" json:"iterative_sequence,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ModelSequenceBatching @@ @@ Sequence batching configuration. These settings control how sequence @@ batching operates for the model. @@
func (*ModelSequenceBatching) Descriptor
deprecated
func (*ModelSequenceBatching) Descriptor() ([]byte, []int)
Deprecated: Use ModelSequenceBatching.ProtoReflect.Descriptor instead.
func (*ModelSequenceBatching) GetControlInput ¶
func (x *ModelSequenceBatching) GetControlInput() []*ModelSequenceBatching_ControlInput
func (*ModelSequenceBatching) GetDirect ¶
func (x *ModelSequenceBatching) GetDirect() *ModelSequenceBatching_StrategyDirect
func (*ModelSequenceBatching) GetIterativeSequence ¶
func (x *ModelSequenceBatching) GetIterativeSequence() bool
func (*ModelSequenceBatching) GetMaxSequenceIdleMicroseconds ¶
func (x *ModelSequenceBatching) GetMaxSequenceIdleMicroseconds() uint64
func (*ModelSequenceBatching) GetOldest ¶
func (x *ModelSequenceBatching) GetOldest() *ModelSequenceBatching_StrategyOldest
func (*ModelSequenceBatching) GetState ¶
func (x *ModelSequenceBatching) GetState() []*ModelSequenceBatching_State
func (*ModelSequenceBatching) GetStrategyChoice ¶
func (m *ModelSequenceBatching) GetStrategyChoice() isModelSequenceBatching_StrategyChoice
func (*ModelSequenceBatching) ProtoMessage ¶
func (*ModelSequenceBatching) ProtoMessage()
func (*ModelSequenceBatching) ProtoReflect ¶
func (x *ModelSequenceBatching) ProtoReflect() protoreflect.Message
func (*ModelSequenceBatching) Reset ¶
func (x *ModelSequenceBatching) Reset()
func (*ModelSequenceBatching) String ¶
func (x *ModelSequenceBatching) String() string
type ModelSequenceBatching_Control ¶
type ModelSequenceBatching_Control struct { // @@ .. cpp:var:: Kind kind // @@ // @@ The kind of this control. // @@ Kind ModelSequenceBatching_Control_Kind `protobuf:"varint,1,opt,name=kind,proto3,enum=inference.ModelSequenceBatching_Control_Kind" json:"kind,omitempty"` // @@ .. cpp:var:: int32 int32_false_true (repeated) // @@ // @@ The control's true and false setting is indicated by setting // @@ a value in an int32 tensor. The tensor must be a // @@ 1-dimensional tensor with size equal to the batch size of // @@ the request. 'int32_false_true' must have two entries: the // @@ first the false value and the second the true value. // @@ Int32FalseTrue []int32 `protobuf:"varint,2,rep,packed,name=int32_false_true,json=int32FalseTrue,proto3" json:"int32_false_true,omitempty"` // @@ .. cpp:var:: float fp32_false_true (repeated) // @@ // @@ The control's true and false setting is indicated by setting // @@ a value in a fp32 tensor. The tensor must be a // @@ 1-dimensional tensor with size equal to the batch size of // @@ the request. 'fp32_false_true' must have two entries: the // @@ first the false value and the second the true value. // @@ Fp32FalseTrue []float32 `protobuf:"fixed32,3,rep,packed,name=fp32_false_true,json=fp32FalseTrue,proto3" json:"fp32_false_true,omitempty"` // @@ .. cpp:var:: bool bool_false_true (repeated) // @@ // @@ The control's true and false setting is indicated by setting // @@ a value in a bool tensor. The tensor must be a // @@ 1-dimensional tensor with size equal to the batch size of // @@ the request. 'bool_false_true' must have two entries: the // @@ first the false value and the second the true value. // @@ BoolFalseTrue []bool `protobuf:"varint,5,rep,packed,name=bool_false_true,json=boolFalseTrue,proto3" json:"bool_false_true,omitempty"` // @@ .. cpp:var:: DataType data_type // @@ // @@ The control's datatype. // @@ DataType DataType `protobuf:"varint,4,opt,name=data_type,json=dataType,proto3,enum=inference.DataType" json:"data_type,omitempty"` // contains filtered or unexported fields }
@@ .. cpp:var:: message Control @@ @@ A control is a signal that the sequence batcher uses to @@ communicate with a backend. @@
func (*ModelSequenceBatching_Control) Descriptor
deprecated
func (*ModelSequenceBatching_Control) Descriptor() ([]byte, []int)
Deprecated: Use ModelSequenceBatching_Control.ProtoReflect.Descriptor instead.
func (*ModelSequenceBatching_Control) GetBoolFalseTrue ¶
func (x *ModelSequenceBatching_Control) GetBoolFalseTrue() []bool
func (*ModelSequenceBatching_Control) GetDataType ¶
func (x *ModelSequenceBatching_Control) GetDataType() DataType
func (*ModelSequenceBatching_Control) GetFp32FalseTrue ¶
func (x *ModelSequenceBatching_Control) GetFp32FalseTrue() []float32
func (*ModelSequenceBatching_Control) GetInt32FalseTrue ¶
func (x *ModelSequenceBatching_Control) GetInt32FalseTrue() []int32
func (*ModelSequenceBatching_Control) GetKind ¶
func (x *ModelSequenceBatching_Control) GetKind() ModelSequenceBatching_Control_Kind
func (*ModelSequenceBatching_Control) ProtoMessage ¶
func (*ModelSequenceBatching_Control) ProtoMessage()
func (*ModelSequenceBatching_Control) ProtoReflect ¶
func (x *ModelSequenceBatching_Control) ProtoReflect() protoreflect.Message
func (*ModelSequenceBatching_Control) Reset ¶
func (x *ModelSequenceBatching_Control) Reset()
func (*ModelSequenceBatching_Control) String ¶
func (x *ModelSequenceBatching_Control) String() string
type ModelSequenceBatching_ControlInput ¶
type ModelSequenceBatching_ControlInput struct { // @@ .. cpp:var:: string name // @@ // @@ The name of the model input. // @@ Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` // @@ .. cpp:var:: Control control (repeated) // @@ // @@ The control value(s) that should be communicated to the // @@ model using this model input. // @@ Control []*ModelSequenceBatching_Control `protobuf:"bytes,2,rep,name=control,proto3" json:"control,omitempty"` // contains filtered or unexported fields }
@@ .. cpp:var:: message ControlInput @@ @@ The sequence control values to communicate by a model input. @@
func (*ModelSequenceBatching_ControlInput) Descriptor
deprecated
func (*ModelSequenceBatching_ControlInput) Descriptor() ([]byte, []int)
Deprecated: Use ModelSequenceBatching_ControlInput.ProtoReflect.Descriptor instead.
func (*ModelSequenceBatching_ControlInput) GetControl ¶
func (x *ModelSequenceBatching_ControlInput) GetControl() []*ModelSequenceBatching_Control
func (*ModelSequenceBatching_ControlInput) GetName ¶
func (x *ModelSequenceBatching_ControlInput) GetName() string
func (*ModelSequenceBatching_ControlInput) ProtoMessage ¶
func (*ModelSequenceBatching_ControlInput) ProtoMessage()
func (*ModelSequenceBatching_ControlInput) ProtoReflect ¶
func (x *ModelSequenceBatching_ControlInput) ProtoReflect() protoreflect.Message
func (*ModelSequenceBatching_ControlInput) Reset ¶
func (x *ModelSequenceBatching_ControlInput) Reset()
func (*ModelSequenceBatching_ControlInput) String ¶
func (x *ModelSequenceBatching_ControlInput) String() string
type ModelSequenceBatching_Control_Kind ¶
type ModelSequenceBatching_Control_Kind int32
@@ @@ .. cpp:enum:: Kind @@ @@ The kind of the control. @@
const ( // @@ .. cpp:enumerator:: Kind::CONTROL_SEQUENCE_START = 0 // @@ // @@ A new sequence is/is-not starting. If true a sequence is // @@ starting, if false a sequence is continuing. Must // @@ specify either int32_false_true, fp32_false_true or // @@ bool_false_true for this control. This control is optional. // @@ ModelSequenceBatching_Control_CONTROL_SEQUENCE_START ModelSequenceBatching_Control_Kind = 0 // @@ .. cpp:enumerator:: Kind::CONTROL_SEQUENCE_READY = 1 // @@ // @@ A sequence is/is-not ready for inference. If true the // @@ input tensor data is valid and should be used. If false // @@ the input tensor data is invalid and inferencing should // @@ be "skipped". Must specify either int32_false_true, // @@ fp32_false_true or bool_false_true for this control. This // @@ control is optional. // @@ ModelSequenceBatching_Control_CONTROL_SEQUENCE_READY ModelSequenceBatching_Control_Kind = 1 // @@ .. cpp:enumerator:: Kind::CONTROL_SEQUENCE_END = 2 // @@ // @@ A sequence is/is-not ending. If true a sequence is // @@ ending, if false a sequence is continuing. Must specify // @@ either int32_false_true, fp32_false_true or bool_false_true // @@ for this control. This control is optional. // @@ ModelSequenceBatching_Control_CONTROL_SEQUENCE_END ModelSequenceBatching_Control_Kind = 2 // @@ .. cpp:enumerator:: Kind::CONTROL_SEQUENCE_CORRID = 3 // @@ // @@ The correlation ID of the sequence. The correlation ID // @@ is an uint64_t value that is communicated in whole or // @@ in part by the tensor. The tensor's datatype must be // @@ specified by data_type and must be TYPE_UINT64, TYPE_INT64, // @@ TYPE_UINT32 or TYPE_INT32. If a 32-bit datatype is specified // @@ the correlation ID will be truncated to the low-order 32 // @@ bits. This control is optional. // @@ ModelSequenceBatching_Control_CONTROL_SEQUENCE_CORRID ModelSequenceBatching_Control_Kind = 3 )
func (ModelSequenceBatching_Control_Kind) Descriptor ¶
func (ModelSequenceBatching_Control_Kind) Descriptor() protoreflect.EnumDescriptor
func (ModelSequenceBatching_Control_Kind) Enum ¶
func (x ModelSequenceBatching_Control_Kind) Enum() *ModelSequenceBatching_Control_Kind
func (ModelSequenceBatching_Control_Kind) EnumDescriptor
deprecated
func (ModelSequenceBatching_Control_Kind) EnumDescriptor() ([]byte, []int)
Deprecated: Use ModelSequenceBatching_Control_Kind.Descriptor instead.
func (ModelSequenceBatching_Control_Kind) Number ¶
func (x ModelSequenceBatching_Control_Kind) Number() protoreflect.EnumNumber
func (ModelSequenceBatching_Control_Kind) String ¶
func (x ModelSequenceBatching_Control_Kind) String() string
func (ModelSequenceBatching_Control_Kind) Type ¶
func (ModelSequenceBatching_Control_Kind) Type() protoreflect.EnumType
type ModelSequenceBatching_Direct ¶
type ModelSequenceBatching_Direct struct { // @@ .. cpp:var:: StrategyDirect direct // @@ // @@ StrategyDirect scheduling strategy. // @@ Direct *ModelSequenceBatching_StrategyDirect `protobuf:"bytes,3,opt,name=direct,proto3,oneof"` }
type ModelSequenceBatching_InitialState ¶
type ModelSequenceBatching_InitialState struct { // @@ .. cpp:var:: DataType data_type // @@ // @@ The data-type of the state. // @@ DataType DataType `protobuf:"varint,1,opt,name=data_type,json=dataType,proto3,enum=inference.DataType" json:"data_type,omitempty"` // @@ .. cpp:var:: int64 dims (repeated) // @@ // @@ The shape of the state tensor, not including the batch // @@ dimension. // @@ Dims []int64 `protobuf:"varint,2,rep,packed,name=dims,proto3" json:"dims,omitempty"` // @@ .. cpp:var:: oneof state_data // @@ // @@ Specify how the initial state data is generated. // @@ // // Types that are assignable to StateData: // // *ModelSequenceBatching_InitialState_ZeroData // *ModelSequenceBatching_InitialState_DataFile StateData isModelSequenceBatching_InitialState_StateData `protobuf_oneof:"state_data"` // @@ .. cpp:var:: string name // @@ // @@ The name of the state initialization. // @@ Name string `protobuf:"bytes,5,opt,name=name,proto3" json:"name,omitempty"` // contains filtered or unexported fields }
@@ @@ .. cpp:var:: message InitialState @@ @@ Settings used to initialize data for implicit state. @@
func (*ModelSequenceBatching_InitialState) Descriptor
deprecated
func (*ModelSequenceBatching_InitialState) Descriptor() ([]byte, []int)
Deprecated: Use ModelSequenceBatching_InitialState.ProtoReflect.Descriptor instead.
func (*ModelSequenceBatching_InitialState) GetDataFile ¶
func (x *ModelSequenceBatching_InitialState) GetDataFile() string
func (*ModelSequenceBatching_InitialState) GetDataType ¶
func (x *ModelSequenceBatching_InitialState) GetDataType() DataType
func (*ModelSequenceBatching_InitialState) GetDims ¶
func (x *ModelSequenceBatching_InitialState) GetDims() []int64
func (*ModelSequenceBatching_InitialState) GetName ¶
func (x *ModelSequenceBatching_InitialState) GetName() string
func (*ModelSequenceBatching_InitialState) GetStateData ¶
func (m *ModelSequenceBatching_InitialState) GetStateData() isModelSequenceBatching_InitialState_StateData
func (*ModelSequenceBatching_InitialState) GetZeroData ¶
func (x *ModelSequenceBatching_InitialState) GetZeroData() bool
func (*ModelSequenceBatching_InitialState) ProtoMessage ¶
func (*ModelSequenceBatching_InitialState) ProtoMessage()
func (*ModelSequenceBatching_InitialState) ProtoReflect ¶
func (x *ModelSequenceBatching_InitialState) ProtoReflect() protoreflect.Message
func (*ModelSequenceBatching_InitialState) Reset ¶
func (x *ModelSequenceBatching_InitialState) Reset()
func (*ModelSequenceBatching_InitialState) String ¶
func (x *ModelSequenceBatching_InitialState) String() string
type ModelSequenceBatching_InitialState_DataFile ¶
type ModelSequenceBatching_InitialState_DataFile struct { // @@ .. cpp:var:: string data_file // @@ // @@ The file whose content will be used as the initial data for // @@ the state in row-major order. The file must be provided in // @@ sub-directory 'initial_state' under the model directory. // @@ DataFile string `protobuf:"bytes,4,opt,name=data_file,json=dataFile,proto3,oneof"` }
type ModelSequenceBatching_InitialState_ZeroData ¶
type ModelSequenceBatching_InitialState_ZeroData struct { // @@ // @@ .. cpp:var:: bool zero_data // @@ // @@ The identifier for using zeros as initial state data. // @@ Note that the value of 'zero_data' will not be checked, // @@ instead, zero data will be used as long as the field is set. // @@ ZeroData bool `protobuf:"varint,3,opt,name=zero_data,json=zeroData,proto3,oneof"` }
type ModelSequenceBatching_Oldest ¶
type ModelSequenceBatching_Oldest struct { // @@ .. cpp:var:: StrategyOldest oldest // @@ // @@ StrategyOldest scheduling strategy. // @@ Oldest *ModelSequenceBatching_StrategyOldest `protobuf:"bytes,4,opt,name=oldest,proto3,oneof"` }
type ModelSequenceBatching_State ¶
type ModelSequenceBatching_State struct { // @@ .. cpp:var:: string input_name // @@ // @@ The name of the model state input. // @@ InputName string `protobuf:"bytes,1,opt,name=input_name,json=inputName,proto3" json:"input_name,omitempty"` // @@ .. cpp:var:: string output_name // @@ // @@ The name of the model state output. // @@ OutputName string `protobuf:"bytes,2,opt,name=output_name,json=outputName,proto3" json:"output_name,omitempty"` // @@ .. cpp:var:: DataType data_type // @@ // @@ The data-type of the state. // @@ DataType DataType `protobuf:"varint,3,opt,name=data_type,json=dataType,proto3,enum=inference.DataType" json:"data_type,omitempty"` // @@ .. cpp:var:: int64 dim (repeated) // @@ // @@ The dimension. // @@ Dims []int64 `protobuf:"varint,4,rep,packed,name=dims,proto3" json:"dims,omitempty"` // @@ .. cpp:var:: InitialState initial_state (repeated) // @@ // @@ The optional field to specify the initial state for the model. // @@ InitialState []*ModelSequenceBatching_InitialState `protobuf:"bytes,5,rep,name=initial_state,json=initialState,proto3" json:"initial_state,omitempty"` // @@ .. cpp:var:: bool use_same_buffer_for_input_output // @@ // @@ The optional field to use a single buffer for both input and output // @@ state. Without this option, Triton allocates separate buffers // @@ for input and output state // @@ which can be problematic if the state size is // @@ large. This option reduces the memory usage by allocating a single // @@ buffer. Enabling this option is recommended whenever // @@ the input state is processed before the output state is written. // @@ When enabled the state // @@ will always be updated independent of whether // @@ TRITONBACKEND_StateUpdate is called // @@ (however TRITONBACKEND_StateUpdate should still be called for // @@ completeness). // @@ // @@ The default value is false. // @@ UseSameBufferForInputOutput bool `` /* 151-byte string literal not displayed */ // @@ .. cpp:var:: bool use_growable_memory // @@ // @@ The optional field to enable an implicit state buffer to grow // @@ without reallocating or copying existing memory. // @@ Additional memory will be appended to the end of the buffer and // @@ existing data will be preserved. // @@ This option is only available for CUDA memory and requires enabling // @@ use_same_buffer_for_input_output. When using this option, // @@ StateBuffer call will always return CUDA memory even if CPU memory // @@ is requested. // @@ // @@ The default value is false. // @@ UseGrowableMemory bool `protobuf:"varint,7,opt,name=use_growable_memory,json=useGrowableMemory,proto3" json:"use_growable_memory,omitempty"` // contains filtered or unexported fields }
@@ .. cpp:var:: message State @@ @@ An input / output pair of tensors that carry state for the sequence. @@
func (*ModelSequenceBatching_State) Descriptor
deprecated
func (*ModelSequenceBatching_State) Descriptor() ([]byte, []int)
Deprecated: Use ModelSequenceBatching_State.ProtoReflect.Descriptor instead.
func (*ModelSequenceBatching_State) GetDataType ¶
func (x *ModelSequenceBatching_State) GetDataType() DataType
func (*ModelSequenceBatching_State) GetDims ¶
func (x *ModelSequenceBatching_State) GetDims() []int64
func (*ModelSequenceBatching_State) GetInitialState ¶
func (x *ModelSequenceBatching_State) GetInitialState() []*ModelSequenceBatching_InitialState
func (*ModelSequenceBatching_State) GetInputName ¶
func (x *ModelSequenceBatching_State) GetInputName() string
func (*ModelSequenceBatching_State) GetOutputName ¶
func (x *ModelSequenceBatching_State) GetOutputName() string
func (*ModelSequenceBatching_State) GetUseGrowableMemory ¶
func (x *ModelSequenceBatching_State) GetUseGrowableMemory() bool
func (*ModelSequenceBatching_State) GetUseSameBufferForInputOutput ¶
func (x *ModelSequenceBatching_State) GetUseSameBufferForInputOutput() bool
func (*ModelSequenceBatching_State) ProtoMessage ¶
func (*ModelSequenceBatching_State) ProtoMessage()
func (*ModelSequenceBatching_State) ProtoReflect ¶
func (x *ModelSequenceBatching_State) ProtoReflect() protoreflect.Message
func (*ModelSequenceBatching_State) Reset ¶
func (x *ModelSequenceBatching_State) Reset()
func (*ModelSequenceBatching_State) String ¶
func (x *ModelSequenceBatching_State) String() string
type ModelSequenceBatching_StrategyDirect ¶
type ModelSequenceBatching_StrategyDirect struct { // @@ .. cpp:var:: uint64 max_queue_delay_microseconds // @@ // @@ The maximum time, in microseconds, a candidate request // @@ will be delayed in the sequence batch scheduling queue to // @@ wait for additional requests for batching. Default is 0. // @@ MaxQueueDelayMicroseconds uint64 `` /* 141-byte string literal not displayed */ // @@ .. cpp:var:: float minimum_slot_utilization // @@ // @@ The minimum slot utilization that must be satisfied to // @@ execute the batch before 'max_queue_delay_microseconds' expires. // @@ For example, a value of 0.5 indicates that the batch should be // @@ executed as soon as 50% or more of the slots are ready even if // @@ the 'max_queue_delay_microseconds' timeout has not expired. // @@ The default is 0.0, indicating that a batch will be executed // @@ before 'max_queue_delay_microseconds' timeout expires if at least // @@ one batch slot is ready. 'max_queue_delay_microseconds' will be // @@ ignored unless minimum_slot_utilization is set to a non-zero // @@ value. // @@ MinimumSlotUtilization float32 `` /* 131-byte string literal not displayed */ // contains filtered or unexported fields }
@@ .. cpp:var:: message StrategyDirect @@ @@ The sequence batcher uses a specific, unique batch @@ slot for each sequence. All inference requests in a @@ sequence are directed to the same batch slot in the same @@ model instance over the lifetime of the sequence. This @@ is the default strategy. @@
func (*ModelSequenceBatching_StrategyDirect) Descriptor
deprecated
func (*ModelSequenceBatching_StrategyDirect) Descriptor() ([]byte, []int)
Deprecated: Use ModelSequenceBatching_StrategyDirect.ProtoReflect.Descriptor instead.
func (*ModelSequenceBatching_StrategyDirect) GetMaxQueueDelayMicroseconds ¶
func (x *ModelSequenceBatching_StrategyDirect) GetMaxQueueDelayMicroseconds() uint64
func (*ModelSequenceBatching_StrategyDirect) GetMinimumSlotUtilization ¶
func (x *ModelSequenceBatching_StrategyDirect) GetMinimumSlotUtilization() float32
func (*ModelSequenceBatching_StrategyDirect) ProtoMessage ¶
func (*ModelSequenceBatching_StrategyDirect) ProtoMessage()
func (*ModelSequenceBatching_StrategyDirect) ProtoReflect ¶
func (x *ModelSequenceBatching_StrategyDirect) ProtoReflect() protoreflect.Message
func (*ModelSequenceBatching_StrategyDirect) Reset ¶
func (x *ModelSequenceBatching_StrategyDirect) Reset()
func (*ModelSequenceBatching_StrategyDirect) String ¶
func (x *ModelSequenceBatching_StrategyDirect) String() string
type ModelSequenceBatching_StrategyOldest ¶
type ModelSequenceBatching_StrategyOldest struct { // @@ .. cpp:var:: int32 max_candidate_sequences // @@ // @@ Maximum number of candidate sequences that the batcher // @@ maintains. Excess sequences are kept in an ordered backlog // @@ and become candidates when existing candidate sequences // @@ complete. // @@ MaxCandidateSequences int32 `` /* 127-byte string literal not displayed */ // @@ .. cpp:var:: int32 preferred_batch_size (repeated) // @@ // @@ Preferred batch sizes for dynamic batching of candidate // @@ sequences. If a batch of one of these sizes can be formed // @@ it will be executed immediately. If not specified a // @@ preferred batch size will be chosen automatically // @@ based on model and GPU characteristics. // @@ PreferredBatchSize []int32 `protobuf:"varint,2,rep,packed,name=preferred_batch_size,json=preferredBatchSize,proto3" json:"preferred_batch_size,omitempty"` // @@ .. cpp:var:: uint64 max_queue_delay_microseconds // @@ // @@ The maximum time, in microseconds, a candidate request // @@ will be delayed in the dynamic batch scheduling queue to // @@ wait for additional requests for batching. Default is 0. // @@ MaxQueueDelayMicroseconds uint64 `` /* 141-byte string literal not displayed */ // @@ .. cpp:var:: bool preserve_ordering // @@ // @@ Should the dynamic batcher preserve the ordering of responses to // @@ match the order of requests received by the scheduler. Default is // @@ false. If true, the responses will be returned in the same order // @@ as the order of requests sent to the scheduler. If false, the // @@ responses may be returned in arbitrary order. This option is // @@ specifically needed when a sequence of related inference requests // @@ (i.e. inference requests with the same correlation ID) are sent // @@ to the dynamic batcher to ensure that the sequence responses are // @@ in the correct order. // @@ // @@ When using decoupled models, setting this to true may block the // @@ responses from independent sequences from being returned to the // @@ client until the previous request completes, hurting overall // @@ performance. If using GRPC streaming protocol, the stream // @@ ordering guarantee may be sufficient alone to ensure the // @@ responses for each sequence are returned in sequence-order // @@ without blocking based on independent requests, depending on the // @@ use case. // @@ PreserveOrdering bool `protobuf:"varint,4,opt,name=preserve_ordering,json=preserveOrdering,proto3" json:"preserve_ordering,omitempty"` // contains filtered or unexported fields }
@@ .. cpp:var:: message StrategyOldest @@ @@ The sequence batcher maintains up to 'max_candidate_sequences' @@ candidate sequences. 'max_candidate_sequences' can be greater @@ than the model's 'max_batch_size'. For inferencing the batcher @@ chooses from the candidate sequences up to 'max_batch_size' @@ inference requests. Requests are chosen in an oldest-first @@ manner across all candidate sequences. A given sequence is @@ not guaranteed to be assigned to the same batch slot for @@ all inference requests of that sequence. @@
func (*ModelSequenceBatching_StrategyOldest) Descriptor
deprecated
func (*ModelSequenceBatching_StrategyOldest) Descriptor() ([]byte, []int)
Deprecated: Use ModelSequenceBatching_StrategyOldest.ProtoReflect.Descriptor instead.
func (*ModelSequenceBatching_StrategyOldest) GetMaxCandidateSequences ¶
func (x *ModelSequenceBatching_StrategyOldest) GetMaxCandidateSequences() int32
func (*ModelSequenceBatching_StrategyOldest) GetMaxQueueDelayMicroseconds ¶
func (x *ModelSequenceBatching_StrategyOldest) GetMaxQueueDelayMicroseconds() uint64
func (*ModelSequenceBatching_StrategyOldest) GetPreferredBatchSize ¶
func (x *ModelSequenceBatching_StrategyOldest) GetPreferredBatchSize() []int32
func (*ModelSequenceBatching_StrategyOldest) GetPreserveOrdering ¶
func (x *ModelSequenceBatching_StrategyOldest) GetPreserveOrdering() bool
func (*ModelSequenceBatching_StrategyOldest) ProtoMessage ¶
func (*ModelSequenceBatching_StrategyOldest) ProtoMessage()
func (*ModelSequenceBatching_StrategyOldest) ProtoReflect ¶
func (x *ModelSequenceBatching_StrategyOldest) ProtoReflect() protoreflect.Message
func (*ModelSequenceBatching_StrategyOldest) Reset ¶
func (x *ModelSequenceBatching_StrategyOldest) Reset()
func (*ModelSequenceBatching_StrategyOldest) String ¶
func (x *ModelSequenceBatching_StrategyOldest) String() string
type ModelStatistics ¶
type ModelStatistics struct { // @@ .. cpp:var:: string name // @@ // @@ The name of the model. If not given returns statistics for all // @@ Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` // @@ .. cpp:var:: string version // @@ // @@ The version of the model. // @@ Version string `protobuf:"bytes,2,opt,name=version,proto3" json:"version,omitempty"` // @@ .. cpp:var:: uint64 last_inference // @@ // @@ The timestamp of the last inference request made for this model, // @@ as milliseconds since the epoch. // @@ LastInference uint64 `protobuf:"varint,3,opt,name=last_inference,json=lastInference,proto3" json:"last_inference,omitempty"` // @@ .. cpp:var:: uint64 last_inference // @@ // @@ The cumulative count of successful inference requests made for this // @@ model. Each inference in a batched request is counted as an // @@ individual inference. For example, if a client sends a single // @@ inference request with batch size 64, "inference_count" will be // @@ incremented by 64. Similarly, if a clients sends 64 individual // @@ requests each with batch size 1, "inference_count" will be // @@ incremented by 64. The "inference_count" value DOES NOT include // @@ cache hits. // @@ InferenceCount uint64 `protobuf:"varint,4,opt,name=inference_count,json=inferenceCount,proto3" json:"inference_count,omitempty"` // @@ .. cpp:var:: uint64 last_inference // @@ // @@ The cumulative count of the number of successful inference executions // @@ performed for the model. When dynamic batching is enabled, a single // @@ model execution can perform inferencing for more than one inference // @@ request. For example, if a clients sends 64 individual requests each // @@ with batch size 1 and the dynamic batcher batches them into a single // @@ large batch for model execution then "execution_count" will be // @@ incremented by 1. If, on the other hand, the dynamic batcher is not // @@ enabled for that each of the 64 individual requests is executed // @@ independently, then "execution_count" will be incremented by 64. // @@ The "execution_count" value DOES NOT include cache hits. // @@ ExecutionCount uint64 `protobuf:"varint,5,opt,name=execution_count,json=executionCount,proto3" json:"execution_count,omitempty"` // @@ .. cpp:var:: InferStatistics inference_stats // @@ // @@ The aggregate statistics for the model/version. // @@ InferenceStats *InferStatistics `protobuf:"bytes,6,opt,name=inference_stats,json=inferenceStats,proto3" json:"inference_stats,omitempty"` // @@ .. cpp:var:: InferBatchStatistics batch_stats (repeated) // @@ // @@ The aggregate statistics for each different batch size that is // @@ executed in the model. The batch statistics indicate how many actual // @@ model executions were performed and show differences due to different // @@ batch size (for example, larger batches typically take longer to // @@ compute). // @@ BatchStats []*InferBatchStatistics `protobuf:"bytes,7,rep,name=batch_stats,json=batchStats,proto3" json:"batch_stats,omitempty"` // @@ .. cpp:var:: MemoryUsage memory_usage (repeated) // @@ // @@ The memory usage detected during model loading, which may be used to // @@ estimate the memory to be released once the model is unloaded. Note // @@ that the estimation is inferenced by the profiling tools and // @@ framework's memory schema, therefore it is advised to perform // @@ experiments to understand the scenario that the reported memory usage // @@ can be relied on. As a starting point, the GPU memory usage for // @@ models in ONNX Runtime backend and TensorRT backend is usually // @@ aligned. // @@ MemoryUsage []*MemoryUsage `protobuf:"bytes,8,rep,name=memory_usage,json=memoryUsage,proto3" json:"memory_usage,omitempty"` // @@ .. cpp:var:: map<string, InferResponseStatistics> response_stats // @@ // @@ The key and value pairs for all responses statistics. The key is a // @@ string identifying a set of response statistics aggregated together // @@ (i.e. index of the response sent). The value is the aggregated // @@ response statistics. // @@ ResponseStats map[string]*InferResponseStatistics `` /* 188-byte string literal not displayed */ // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ModelStatistics @@ @@ Statistics for a specific model and version. @@
func (*ModelStatistics) Descriptor
deprecated
func (*ModelStatistics) Descriptor() ([]byte, []int)
Deprecated: Use ModelStatistics.ProtoReflect.Descriptor instead.
func (*ModelStatistics) GetBatchStats ¶
func (x *ModelStatistics) GetBatchStats() []*InferBatchStatistics
func (*ModelStatistics) GetExecutionCount ¶
func (x *ModelStatistics) GetExecutionCount() uint64
func (*ModelStatistics) GetInferenceCount ¶
func (x *ModelStatistics) GetInferenceCount() uint64
func (*ModelStatistics) GetInferenceStats ¶
func (x *ModelStatistics) GetInferenceStats() *InferStatistics
func (*ModelStatistics) GetLastInference ¶
func (x *ModelStatistics) GetLastInference() uint64
func (*ModelStatistics) GetMemoryUsage ¶
func (x *ModelStatistics) GetMemoryUsage() []*MemoryUsage
func (*ModelStatistics) GetName ¶
func (x *ModelStatistics) GetName() string
func (*ModelStatistics) GetResponseStats ¶
func (x *ModelStatistics) GetResponseStats() map[string]*InferResponseStatistics
func (*ModelStatistics) GetVersion ¶
func (x *ModelStatistics) GetVersion() string
func (*ModelStatistics) ProtoMessage ¶
func (*ModelStatistics) ProtoMessage()
func (*ModelStatistics) ProtoReflect ¶
func (x *ModelStatistics) ProtoReflect() protoreflect.Message
func (*ModelStatistics) Reset ¶
func (x *ModelStatistics) Reset()
func (*ModelStatistics) String ¶
func (x *ModelStatistics) String() string
type ModelStatisticsRequest ¶
type ModelStatisticsRequest struct { // @@ .. cpp:var:: string name // @@ // @@ The name of the model. If not given returns statistics for // @@ all models. // @@ Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` // @@ .. cpp:var:: string version // @@ // @@ The version of the model. If not given returns statistics for // @@ all model versions. // @@ Version string `protobuf:"bytes,2,opt,name=version,proto3" json:"version,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ModelStatisticsRequest @@ @@ Request message for ModelStatistics. @@
func (*ModelStatisticsRequest) Descriptor
deprecated
func (*ModelStatisticsRequest) Descriptor() ([]byte, []int)
Deprecated: Use ModelStatisticsRequest.ProtoReflect.Descriptor instead.
func (*ModelStatisticsRequest) GetName ¶
func (x *ModelStatisticsRequest) GetName() string
func (*ModelStatisticsRequest) GetVersion ¶
func (x *ModelStatisticsRequest) GetVersion() string
func (*ModelStatisticsRequest) ProtoMessage ¶
func (*ModelStatisticsRequest) ProtoMessage()
func (*ModelStatisticsRequest) ProtoReflect ¶
func (x *ModelStatisticsRequest) ProtoReflect() protoreflect.Message
func (*ModelStatisticsRequest) Reset ¶
func (x *ModelStatisticsRequest) Reset()
func (*ModelStatisticsRequest) String ¶
func (x *ModelStatisticsRequest) String() string
type ModelStatisticsResponse ¶
type ModelStatisticsResponse struct { // @@ .. cpp:var:: ModelStatistics model_stats (repeated) // @@ // @@ Statistics for each requested model. // @@ ModelStats []*ModelStatistics `protobuf:"bytes,1,rep,name=model_stats,json=modelStats,proto3" json:"model_stats,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ModelStatisticsResponse @@ @@ Response message for ModelStatistics. @@
func (*ModelStatisticsResponse) Descriptor
deprecated
func (*ModelStatisticsResponse) Descriptor() ([]byte, []int)
Deprecated: Use ModelStatisticsResponse.ProtoReflect.Descriptor instead.
func (*ModelStatisticsResponse) GetModelStats ¶
func (x *ModelStatisticsResponse) GetModelStats() []*ModelStatistics
func (*ModelStatisticsResponse) ProtoMessage ¶
func (*ModelStatisticsResponse) ProtoMessage()
func (*ModelStatisticsResponse) ProtoReflect ¶
func (x *ModelStatisticsResponse) ProtoReflect() protoreflect.Message
func (*ModelStatisticsResponse) Reset ¶
func (x *ModelStatisticsResponse) Reset()
func (*ModelStatisticsResponse) String ¶
func (x *ModelStatisticsResponse) String() string
type ModelStreamInferResponse ¶
type ModelStreamInferResponse struct { // @@ // @@ .. cpp:var:: string error_message // @@ // @@ The message describing the error. The empty message // @@ indicates the inference was successful without errors. // @@ ErrorMessage string `protobuf:"bytes,1,opt,name=error_message,json=errorMessage,proto3" json:"error_message,omitempty"` // @@ // @@ .. cpp:var:: ModelInferResponse infer_response // @@ // @@ Holds the results of the request. // @@ InferResponse *ModelInferResponse `protobuf:"bytes,2,opt,name=infer_response,json=inferResponse,proto3" json:"infer_response,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ModelStreamInferResponse @@ @@ Response message for ModelStreamInfer. @@
func (*ModelStreamInferResponse) Descriptor
deprecated
func (*ModelStreamInferResponse) Descriptor() ([]byte, []int)
Deprecated: Use ModelStreamInferResponse.ProtoReflect.Descriptor instead.
func (*ModelStreamInferResponse) GetErrorMessage ¶
func (x *ModelStreamInferResponse) GetErrorMessage() string
func (*ModelStreamInferResponse) GetInferResponse ¶
func (x *ModelStreamInferResponse) GetInferResponse() *ModelInferResponse
func (*ModelStreamInferResponse) ProtoMessage ¶
func (*ModelStreamInferResponse) ProtoMessage()
func (*ModelStreamInferResponse) ProtoReflect ¶
func (x *ModelStreamInferResponse) ProtoReflect() protoreflect.Message
func (*ModelStreamInferResponse) Reset ¶
func (x *ModelStreamInferResponse) Reset()
func (*ModelStreamInferResponse) String ¶
func (x *ModelStreamInferResponse) String() string
type ModelTensorReshape ¶
type ModelTensorReshape struct { // @@ .. cpp:var:: int64 shape (repeated) // @@ // @@ The shape to use for reshaping. // @@ Shape []int64 `protobuf:"varint,1,rep,packed,name=shape,proto3" json:"shape,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ModelTensorReshape @@ @@ Reshape specification for input and output tensors. @@
func (*ModelTensorReshape) Descriptor
deprecated
func (*ModelTensorReshape) Descriptor() ([]byte, []int)
Deprecated: Use ModelTensorReshape.ProtoReflect.Descriptor instead.
func (*ModelTensorReshape) GetShape ¶
func (x *ModelTensorReshape) GetShape() []int64
func (*ModelTensorReshape) ProtoMessage ¶
func (*ModelTensorReshape) ProtoMessage()
func (*ModelTensorReshape) ProtoReflect ¶
func (x *ModelTensorReshape) ProtoReflect() protoreflect.Message
func (*ModelTensorReshape) Reset ¶
func (x *ModelTensorReshape) Reset()
func (*ModelTensorReshape) String ¶
func (x *ModelTensorReshape) String() string
type ModelTransactionPolicy ¶
type ModelTransactionPolicy struct { // @@ .. cpp:var:: bool decoupled // @@ // @@ Indicates whether responses generated by the model are decoupled with // @@ the requests issued to it, which means the number of responses // @@ generated by model may differ from number of requests issued, and // @@ that the responses may be out of order relative to the order of // @@ requests. The default is false, which means the model will generate // @@ exactly one response for each request. // @@ Decoupled bool `protobuf:"varint,1,opt,name=decoupled,proto3" json:"decoupled,omitempty"` // contains filtered or unexported fields }
@@ @@ .. cpp:var:: message ModelTransactionPolicy @@ @@ The specification that describes the nature of transactions @@ to be expected from the model. @@
func (*ModelTransactionPolicy) Descriptor
deprecated
func (*ModelTransactionPolicy) Descriptor() ([]byte, []int)
Deprecated: Use ModelTransactionPolicy.ProtoReflect.Descriptor instead.
func (*ModelTransactionPolicy) GetDecoupled ¶
func (x *ModelTransactionPolicy) GetDecoupled() bool
func (*ModelTransactionPolicy) ProtoMessage ¶
func (*ModelTransactionPolicy) ProtoMessage()
func (*ModelTransactionPolicy) ProtoReflect ¶
func (x *ModelTransactionPolicy) ProtoReflect() protoreflect.Message
func (*ModelTransactionPolicy) Reset ¶
func (x *ModelTransactionPolicy) Reset()
func (*ModelTransactionPolicy) String ¶
func (x *ModelTransactionPolicy) String() string
type ModelVersionPolicy ¶
type ModelVersionPolicy struct { // @@ .. cpp:var:: oneof policy_choice // @@ // @@ Each model must implement only a single version policy. The // @@ default policy is 'Latest'. // @@ // // Types that are assignable to PolicyChoice: // // *ModelVersionPolicy_Latest_ // *ModelVersionPolicy_All_ // *ModelVersionPolicy_Specific_ PolicyChoice isModelVersionPolicy_PolicyChoice `protobuf_oneof:"policy_choice"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ModelVersionPolicy @@ @@ Policy indicating which versions of a model should be made @@ available by the inference server. @@
func (*ModelVersionPolicy) Descriptor
deprecated
func (*ModelVersionPolicy) Descriptor() ([]byte, []int)
Deprecated: Use ModelVersionPolicy.ProtoReflect.Descriptor instead.
func (*ModelVersionPolicy) GetAll ¶
func (x *ModelVersionPolicy) GetAll() *ModelVersionPolicy_All
func (*ModelVersionPolicy) GetLatest ¶
func (x *ModelVersionPolicy) GetLatest() *ModelVersionPolicy_Latest
func (*ModelVersionPolicy) GetPolicyChoice ¶
func (m *ModelVersionPolicy) GetPolicyChoice() isModelVersionPolicy_PolicyChoice
func (*ModelVersionPolicy) GetSpecific ¶
func (x *ModelVersionPolicy) GetSpecific() *ModelVersionPolicy_Specific
func (*ModelVersionPolicy) ProtoMessage ¶
func (*ModelVersionPolicy) ProtoMessage()
func (*ModelVersionPolicy) ProtoReflect ¶
func (x *ModelVersionPolicy) ProtoReflect() protoreflect.Message
func (*ModelVersionPolicy) Reset ¶
func (x *ModelVersionPolicy) Reset()
func (*ModelVersionPolicy) String ¶
func (x *ModelVersionPolicy) String() string
type ModelVersionPolicy_All ¶
type ModelVersionPolicy_All struct {
// contains filtered or unexported fields
}
@@ .. cpp:var:: message All @@ @@ Serve all versions of the model. @@
func (*ModelVersionPolicy_All) Descriptor
deprecated
func (*ModelVersionPolicy_All) Descriptor() ([]byte, []int)
Deprecated: Use ModelVersionPolicy_All.ProtoReflect.Descriptor instead.
func (*ModelVersionPolicy_All) ProtoMessage ¶
func (*ModelVersionPolicy_All) ProtoMessage()
func (*ModelVersionPolicy_All) ProtoReflect ¶
func (x *ModelVersionPolicy_All) ProtoReflect() protoreflect.Message
func (*ModelVersionPolicy_All) Reset ¶
func (x *ModelVersionPolicy_All) Reset()
func (*ModelVersionPolicy_All) String ¶
func (x *ModelVersionPolicy_All) String() string
type ModelVersionPolicy_All_ ¶
type ModelVersionPolicy_All_ struct { // @@ .. cpp:var:: All all // @@ // @@ Serve all versions of the model. // @@ All *ModelVersionPolicy_All `protobuf:"bytes,2,opt,name=all,proto3,oneof"` }
type ModelVersionPolicy_Latest ¶
type ModelVersionPolicy_Latest struct { // @@ .. cpp:var:: uint32 num_versions // @@ // @@ Serve only the 'num_versions' highest-numbered versions. T // @@ The default value of 'num_versions' is 1, indicating that by // @@ default only the single highest-number version of a // @@ model will be served. // @@ NumVersions uint32 `protobuf:"varint,1,opt,name=num_versions,json=numVersions,proto3" json:"num_versions,omitempty"` // contains filtered or unexported fields }
@@ .. cpp:var:: message Latest @@ @@ Serve only the latest version(s) of a model. This is @@ the default policy. @@
func (*ModelVersionPolicy_Latest) Descriptor
deprecated
func (*ModelVersionPolicy_Latest) Descriptor() ([]byte, []int)
Deprecated: Use ModelVersionPolicy_Latest.ProtoReflect.Descriptor instead.
func (*ModelVersionPolicy_Latest) GetNumVersions ¶
func (x *ModelVersionPolicy_Latest) GetNumVersions() uint32
func (*ModelVersionPolicy_Latest) ProtoMessage ¶
func (*ModelVersionPolicy_Latest) ProtoMessage()
func (*ModelVersionPolicy_Latest) ProtoReflect ¶
func (x *ModelVersionPolicy_Latest) ProtoReflect() protoreflect.Message
func (*ModelVersionPolicy_Latest) Reset ¶
func (x *ModelVersionPolicy_Latest) Reset()
func (*ModelVersionPolicy_Latest) String ¶
func (x *ModelVersionPolicy_Latest) String() string
type ModelVersionPolicy_Latest_ ¶
type ModelVersionPolicy_Latest_ struct { // @@ .. cpp:var:: Latest latest // @@ // @@ Serve only latest version(s) of the model. // @@ Latest *ModelVersionPolicy_Latest `protobuf:"bytes,1,opt,name=latest,proto3,oneof"` }
type ModelVersionPolicy_Specific ¶
type ModelVersionPolicy_Specific struct { // @@ .. cpp:var:: int64 versions (repeated) // @@ // @@ The specific versions of the model that will be served. // @@ Versions []int64 `protobuf:"varint,1,rep,packed,name=versions,proto3" json:"versions,omitempty"` // contains filtered or unexported fields }
@@ .. cpp:var:: message Specific @@ @@ Serve only specific versions of the model. @@
func (*ModelVersionPolicy_Specific) Descriptor
deprecated
func (*ModelVersionPolicy_Specific) Descriptor() ([]byte, []int)
Deprecated: Use ModelVersionPolicy_Specific.ProtoReflect.Descriptor instead.
func (*ModelVersionPolicy_Specific) GetVersions ¶
func (x *ModelVersionPolicy_Specific) GetVersions() []int64
func (*ModelVersionPolicy_Specific) ProtoMessage ¶
func (*ModelVersionPolicy_Specific) ProtoMessage()
func (*ModelVersionPolicy_Specific) ProtoReflect ¶
func (x *ModelVersionPolicy_Specific) ProtoReflect() protoreflect.Message
func (*ModelVersionPolicy_Specific) Reset ¶
func (x *ModelVersionPolicy_Specific) Reset()
func (*ModelVersionPolicy_Specific) String ¶
func (x *ModelVersionPolicy_Specific) String() string
type ModelVersionPolicy_Specific_ ¶
type ModelVersionPolicy_Specific_ struct { // @@ .. cpp:var:: Specific specific // @@ // @@ Serve only specific version(s) of the model. // @@ Specific *ModelVersionPolicy_Specific `protobuf:"bytes,3,opt,name=specific,proto3,oneof"` }
type ModelWarmup ¶
type ModelWarmup struct { // @@ .. cpp:var:: string name // @@ // @@ The name of the request sample. // @@ Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` // @@ .. cpp:var:: uint32 batch_size // @@ // @@ The batch size of the inference request. This must be >= 1. For // @@ models that don't support batching, batch_size must be 1. If // @@ batch_size > 1, the 'inputs' specified below will be duplicated to // @@ match the batch size requested. // @@ BatchSize uint32 `protobuf:"varint,2,opt,name=batch_size,json=batchSize,proto3" json:"batch_size,omitempty"` // @@ .. cpp:var:: map<string, Input> inputs // @@ // @@ The warmup meta data associated with every model input, including // @@ control tensors. // @@ Inputs map[string]*ModelWarmup_Input `` /* 153-byte string literal not displayed */ // @@ .. cpp:var:: uint32 count // @@ // @@ The number of iterations that this warmup sample will be executed. // @@ For example, if this field is set to 2, 2 model executions using this // @@ sample will be scheduled for warmup. Default value is 0 which // @@ indicates that this sample will be used only once. // @@ Note that for sequence model, 'count' may not work well // @@ because the model often expect a valid sequence of requests which // @@ should be represented by a series of warmup samples. 'count > 1' // @@ essentially "resends" one of the sample, which may invalidate the // @@ sequence and result in unexpected warmup failure. // @@ Count uint32 `protobuf:"varint,4,opt,name=count,proto3" json:"count,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ModelWarmup @@ @@ Settings used to construct the request sample for model warmup. @@
func (*ModelWarmup) Descriptor
deprecated
func (*ModelWarmup) Descriptor() ([]byte, []int)
Deprecated: Use ModelWarmup.ProtoReflect.Descriptor instead.
func (*ModelWarmup) GetBatchSize ¶
func (x *ModelWarmup) GetBatchSize() uint32
func (*ModelWarmup) GetCount ¶
func (x *ModelWarmup) GetCount() uint32
func (*ModelWarmup) GetInputs ¶
func (x *ModelWarmup) GetInputs() map[string]*ModelWarmup_Input
func (*ModelWarmup) GetName ¶
func (x *ModelWarmup) GetName() string
func (*ModelWarmup) ProtoMessage ¶
func (*ModelWarmup) ProtoMessage()
func (*ModelWarmup) ProtoReflect ¶
func (x *ModelWarmup) ProtoReflect() protoreflect.Message
func (*ModelWarmup) Reset ¶
func (x *ModelWarmup) Reset()
func (*ModelWarmup) String ¶
func (x *ModelWarmup) String() string
type ModelWarmup_Input ¶
type ModelWarmup_Input struct { // @@ .. cpp:var:: DataType data_type // @@ // @@ The data-type of the input. // @@ DataType DataType `protobuf:"varint,1,opt,name=data_type,json=dataType,proto3,enum=inference.DataType" json:"data_type,omitempty"` // @@ .. cpp:var:: int64 dims (repeated) // @@ // @@ The shape of the input tensor, not including the batch dimension. // @@ Dims []int64 `protobuf:"varint,2,rep,packed,name=dims,proto3" json:"dims,omitempty"` // @@ .. cpp:var:: oneof input_data_type // @@ // @@ Specify how the input data is generated. If the input has STRING // @@ data type and 'random_data' is set, the data generation will fall // @@ back to 'zero_data'. // @@ // // Types that are assignable to InputDataType: // // *ModelWarmup_Input_ZeroData // *ModelWarmup_Input_RandomData // *ModelWarmup_Input_InputDataFile InputDataType isModelWarmup_Input_InputDataType `protobuf_oneof:"input_data_type"` // contains filtered or unexported fields }
@@ @@ .. cpp:var:: message Input @@ @@ Meta data associated with an input. @@
func (*ModelWarmup_Input) Descriptor
deprecated
func (*ModelWarmup_Input) Descriptor() ([]byte, []int)
Deprecated: Use ModelWarmup_Input.ProtoReflect.Descriptor instead.
func (*ModelWarmup_Input) GetDataType ¶
func (x *ModelWarmup_Input) GetDataType() DataType
func (*ModelWarmup_Input) GetDims ¶
func (x *ModelWarmup_Input) GetDims() []int64
func (*ModelWarmup_Input) GetInputDataFile ¶
func (x *ModelWarmup_Input) GetInputDataFile() string
func (*ModelWarmup_Input) GetInputDataType ¶
func (m *ModelWarmup_Input) GetInputDataType() isModelWarmup_Input_InputDataType
func (*ModelWarmup_Input) GetRandomData ¶
func (x *ModelWarmup_Input) GetRandomData() bool
func (*ModelWarmup_Input) GetZeroData ¶
func (x *ModelWarmup_Input) GetZeroData() bool
func (*ModelWarmup_Input) ProtoMessage ¶
func (*ModelWarmup_Input) ProtoMessage()
func (*ModelWarmup_Input) ProtoReflect ¶
func (x *ModelWarmup_Input) ProtoReflect() protoreflect.Message
func (*ModelWarmup_Input) Reset ¶
func (x *ModelWarmup_Input) Reset()
func (*ModelWarmup_Input) String ¶
func (x *ModelWarmup_Input) String() string
type ModelWarmup_Input_InputDataFile ¶
type ModelWarmup_Input_InputDataFile struct { // @@ .. cpp:var:: string input_data_file // @@ // @@ The file whose content will be used as raw input data in // @@ row-major order. The file must be provided in a sub-directory // @@ 'warmup' under the model directory. The file contents should be // @@ in binary format. For TYPE_STRING data-type, an element is // @@ represented by a 4-byte unsigned integer giving the length // @@ followed by the actual bytes. // @@ InputDataFile string `protobuf:"bytes,5,opt,name=input_data_file,json=inputDataFile,proto3,oneof"` }
type ModelWarmup_Input_RandomData ¶
type ModelWarmup_Input_RandomData struct { // @@ // @@ .. cpp:var:: bool random_data // @@ // @@ The identifier for using random data as input data. Note that // @@ the value of 'random_data' will not be checked, instead, // @@ random data will be used as long as the field is set. // @@ RandomData bool `protobuf:"varint,4,opt,name=random_data,json=randomData,proto3,oneof"` }
type ModelWarmup_Input_ZeroData ¶
type ModelWarmup_Input_ZeroData struct { // @@ // @@ .. cpp:var:: bool zero_data // @@ // @@ The identifier for using zeros as input data. Note that the // @@ value of 'zero_data' will not be checked, instead, zero data // @@ will be used as long as the field is set. // @@ ZeroData bool `protobuf:"varint,3,opt,name=zero_data,json=zeroData,proto3,oneof"` }
type RepositoryIndexRequest ¶
type RepositoryIndexRequest struct { // @@ .. cpp:var:: string repository_name // @@ // @@ The name of the repository. If empty the index is returned // @@ for all repositories. // @@ RepositoryName string `protobuf:"bytes,1,opt,name=repository_name,json=repositoryName,proto3" json:"repository_name,omitempty"` // @@ .. cpp:var:: bool ready // @@ // @@ If true returned only models currently ready for inferencing. // @@ Ready bool `protobuf:"varint,2,opt,name=ready,proto3" json:"ready,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message RepositoryIndexRequest @@ @@ Request message for RepositoryIndex. @@
func (*RepositoryIndexRequest) Descriptor
deprecated
func (*RepositoryIndexRequest) Descriptor() ([]byte, []int)
Deprecated: Use RepositoryIndexRequest.ProtoReflect.Descriptor instead.
func (*RepositoryIndexRequest) GetReady ¶
func (x *RepositoryIndexRequest) GetReady() bool
func (*RepositoryIndexRequest) GetRepositoryName ¶
func (x *RepositoryIndexRequest) GetRepositoryName() string
func (*RepositoryIndexRequest) ProtoMessage ¶
func (*RepositoryIndexRequest) ProtoMessage()
func (*RepositoryIndexRequest) ProtoReflect ¶
func (x *RepositoryIndexRequest) ProtoReflect() protoreflect.Message
func (*RepositoryIndexRequest) Reset ¶
func (x *RepositoryIndexRequest) Reset()
func (*RepositoryIndexRequest) String ¶
func (x *RepositoryIndexRequest) String() string
type RepositoryIndexResponse ¶
type RepositoryIndexResponse struct { // @@ // @@ .. cpp:var:: ModelIndex models (repeated) // @@ // @@ An index entry for each model. // @@ Models []*RepositoryIndexResponse_ModelIndex `protobuf:"bytes,1,rep,name=models,proto3" json:"models,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message RepositoryIndexResponse @@ @@ Response message for RepositoryIndex. @@
func (*RepositoryIndexResponse) Descriptor
deprecated
func (*RepositoryIndexResponse) Descriptor() ([]byte, []int)
Deprecated: Use RepositoryIndexResponse.ProtoReflect.Descriptor instead.
func (*RepositoryIndexResponse) GetModels ¶
func (x *RepositoryIndexResponse) GetModels() []*RepositoryIndexResponse_ModelIndex
func (*RepositoryIndexResponse) ProtoMessage ¶
func (*RepositoryIndexResponse) ProtoMessage()
func (*RepositoryIndexResponse) ProtoReflect ¶
func (x *RepositoryIndexResponse) ProtoReflect() protoreflect.Message
func (*RepositoryIndexResponse) Reset ¶
func (x *RepositoryIndexResponse) Reset()
func (*RepositoryIndexResponse) String ¶
func (x *RepositoryIndexResponse) String() string
type RepositoryIndexResponse_ModelIndex ¶
type RepositoryIndexResponse_ModelIndex struct { // @@ // @@ .. cpp:var:: string name // @@ // @@ The name of the model. // @@ Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` // @@ .. cpp:var:: string version // @@ // @@ The version of the model. // @@ Version string `protobuf:"bytes,2,opt,name=version,proto3" json:"version,omitempty"` // @@ // @@ .. cpp:var:: string state // @@ // @@ The state of the model. // @@ State string `protobuf:"bytes,3,opt,name=state,proto3" json:"state,omitempty"` // @@ // @@ .. cpp:var:: string reason // @@ // @@ The reason, if any, that the model is in the given state. // @@ Reason string `protobuf:"bytes,4,opt,name=reason,proto3" json:"reason,omitempty"` // contains filtered or unexported fields }
@@ @@ .. cpp:var:: message ModelIndex @@ @@ Index entry for a model. @@
func (*RepositoryIndexResponse_ModelIndex) Descriptor
deprecated
func (*RepositoryIndexResponse_ModelIndex) Descriptor() ([]byte, []int)
Deprecated: Use RepositoryIndexResponse_ModelIndex.ProtoReflect.Descriptor instead.
func (*RepositoryIndexResponse_ModelIndex) GetName ¶
func (x *RepositoryIndexResponse_ModelIndex) GetName() string
func (*RepositoryIndexResponse_ModelIndex) GetReason ¶
func (x *RepositoryIndexResponse_ModelIndex) GetReason() string
func (*RepositoryIndexResponse_ModelIndex) GetState ¶
func (x *RepositoryIndexResponse_ModelIndex) GetState() string
func (*RepositoryIndexResponse_ModelIndex) GetVersion ¶
func (x *RepositoryIndexResponse_ModelIndex) GetVersion() string
func (*RepositoryIndexResponse_ModelIndex) ProtoMessage ¶
func (*RepositoryIndexResponse_ModelIndex) ProtoMessage()
func (*RepositoryIndexResponse_ModelIndex) ProtoReflect ¶
func (x *RepositoryIndexResponse_ModelIndex) ProtoReflect() protoreflect.Message
func (*RepositoryIndexResponse_ModelIndex) Reset ¶
func (x *RepositoryIndexResponse_ModelIndex) Reset()
func (*RepositoryIndexResponse_ModelIndex) String ¶
func (x *RepositoryIndexResponse_ModelIndex) String() string
type RepositoryModelLoadRequest ¶
type RepositoryModelLoadRequest struct { // @@ .. cpp:var:: string repository_name // @@ // @@ The name of the repository to load from. If empty the model // @@ is loaded from any repository. // @@ RepositoryName string `protobuf:"bytes,1,opt,name=repository_name,json=repositoryName,proto3" json:"repository_name,omitempty"` // @@ .. cpp:var:: string repository_name // @@ // @@ The name of the model to load, or reload. // @@ ModelName string `protobuf:"bytes,2,opt,name=model_name,json=modelName,proto3" json:"model_name,omitempty"` // @@ .. cpp:var:: map<string,ModelRepositoryParameter> parameters // @@ // @@ Optional model repository request parameters. // @@ Parameters map[string]*ModelRepositoryParameter `` /* 161-byte string literal not displayed */ // contains filtered or unexported fields }
@@ @@.. cpp:var:: message RepositoryModelLoadRequest @@ @@ Request message for RepositoryModelLoad. @@
func (*RepositoryModelLoadRequest) Descriptor
deprecated
func (*RepositoryModelLoadRequest) Descriptor() ([]byte, []int)
Deprecated: Use RepositoryModelLoadRequest.ProtoReflect.Descriptor instead.
func (*RepositoryModelLoadRequest) GetModelName ¶
func (x *RepositoryModelLoadRequest) GetModelName() string
func (*RepositoryModelLoadRequest) GetParameters ¶
func (x *RepositoryModelLoadRequest) GetParameters() map[string]*ModelRepositoryParameter
func (*RepositoryModelLoadRequest) GetRepositoryName ¶
func (x *RepositoryModelLoadRequest) GetRepositoryName() string
func (*RepositoryModelLoadRequest) ProtoMessage ¶
func (*RepositoryModelLoadRequest) ProtoMessage()
func (*RepositoryModelLoadRequest) ProtoReflect ¶
func (x *RepositoryModelLoadRequest) ProtoReflect() protoreflect.Message
func (*RepositoryModelLoadRequest) Reset ¶
func (x *RepositoryModelLoadRequest) Reset()
func (*RepositoryModelLoadRequest) String ¶
func (x *RepositoryModelLoadRequest) String() string
type RepositoryModelLoadResponse ¶
type RepositoryModelLoadResponse struct {
// contains filtered or unexported fields
}
@@ @@.. cpp:var:: message RepositoryModelLoadResponse @@ @@ Response message for RepositoryModelLoad. @@
func (*RepositoryModelLoadResponse) Descriptor
deprecated
func (*RepositoryModelLoadResponse) Descriptor() ([]byte, []int)
Deprecated: Use RepositoryModelLoadResponse.ProtoReflect.Descriptor instead.
func (*RepositoryModelLoadResponse) ProtoMessage ¶
func (*RepositoryModelLoadResponse) ProtoMessage()
func (*RepositoryModelLoadResponse) ProtoReflect ¶
func (x *RepositoryModelLoadResponse) ProtoReflect() protoreflect.Message
func (*RepositoryModelLoadResponse) Reset ¶
func (x *RepositoryModelLoadResponse) Reset()
func (*RepositoryModelLoadResponse) String ¶
func (x *RepositoryModelLoadResponse) String() string
type RepositoryModelUnloadRequest ¶
type RepositoryModelUnloadRequest struct { // @@ .. cpp:var:: string repository_name // @@ // @@ The name of the repository from which the model was originally // @@ loaded. If empty the repository is not considered. // @@ RepositoryName string `protobuf:"bytes,1,opt,name=repository_name,json=repositoryName,proto3" json:"repository_name,omitempty"` // @@ .. cpp:var:: string repository_name // @@ // @@ The name of the model to unload. // @@ ModelName string `protobuf:"bytes,2,opt,name=model_name,json=modelName,proto3" json:"model_name,omitempty"` // @@ .. cpp:var:: map<string,ModelRepositoryParameter> parameters // @@ // @@ Optional model repository request parameters. // @@ Parameters map[string]*ModelRepositoryParameter `` /* 161-byte string literal not displayed */ // contains filtered or unexported fields }
@@ @@.. cpp:var:: message RepositoryModelUnloadRequest @@ @@ Request message for RepositoryModelUnload. @@
func (*RepositoryModelUnloadRequest) Descriptor
deprecated
func (*RepositoryModelUnloadRequest) Descriptor() ([]byte, []int)
Deprecated: Use RepositoryModelUnloadRequest.ProtoReflect.Descriptor instead.
func (*RepositoryModelUnloadRequest) GetModelName ¶
func (x *RepositoryModelUnloadRequest) GetModelName() string
func (*RepositoryModelUnloadRequest) GetParameters ¶
func (x *RepositoryModelUnloadRequest) GetParameters() map[string]*ModelRepositoryParameter
func (*RepositoryModelUnloadRequest) GetRepositoryName ¶
func (x *RepositoryModelUnloadRequest) GetRepositoryName() string
func (*RepositoryModelUnloadRequest) ProtoMessage ¶
func (*RepositoryModelUnloadRequest) ProtoMessage()
func (*RepositoryModelUnloadRequest) ProtoReflect ¶
func (x *RepositoryModelUnloadRequest) ProtoReflect() protoreflect.Message
func (*RepositoryModelUnloadRequest) Reset ¶
func (x *RepositoryModelUnloadRequest) Reset()
func (*RepositoryModelUnloadRequest) String ¶
func (x *RepositoryModelUnloadRequest) String() string
type RepositoryModelUnloadResponse ¶
type RepositoryModelUnloadResponse struct {
// contains filtered or unexported fields
}
@@ @@.. cpp:var:: message RepositoryModelUnloadResponse @@ @@ Response message for RepositoryModelUnload. @@
func (*RepositoryModelUnloadResponse) Descriptor
deprecated
func (*RepositoryModelUnloadResponse) Descriptor() ([]byte, []int)
Deprecated: Use RepositoryModelUnloadResponse.ProtoReflect.Descriptor instead.
func (*RepositoryModelUnloadResponse) ProtoMessage ¶
func (*RepositoryModelUnloadResponse) ProtoMessage()
func (*RepositoryModelUnloadResponse) ProtoReflect ¶
func (x *RepositoryModelUnloadResponse) ProtoReflect() protoreflect.Message
func (*RepositoryModelUnloadResponse) Reset ¶
func (x *RepositoryModelUnloadResponse) Reset()
func (*RepositoryModelUnloadResponse) String ¶
func (x *RepositoryModelUnloadResponse) String() string
type ServerLiveRequest ¶
type ServerLiveRequest struct {
// contains filtered or unexported fields
}
@@ @@.. cpp:var:: message ServerLiveRequest @@ @@ Request message for ServerLive. @@
func (*ServerLiveRequest) Descriptor
deprecated
func (*ServerLiveRequest) Descriptor() ([]byte, []int)
Deprecated: Use ServerLiveRequest.ProtoReflect.Descriptor instead.
func (*ServerLiveRequest) ProtoMessage ¶
func (*ServerLiveRequest) ProtoMessage()
func (*ServerLiveRequest) ProtoReflect ¶
func (x *ServerLiveRequest) ProtoReflect() protoreflect.Message
func (*ServerLiveRequest) Reset ¶
func (x *ServerLiveRequest) Reset()
func (*ServerLiveRequest) String ¶
func (x *ServerLiveRequest) String() string
type ServerLiveResponse ¶
type ServerLiveResponse struct { // @@ // @@ .. cpp:var:: bool live // @@ // @@ True if the inference server is live, false it not live. // @@ Live bool `protobuf:"varint,1,opt,name=live,proto3" json:"live,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ServerLiveResponse @@ @@ Response message for ServerLive. @@
func (*ServerLiveResponse) Descriptor
deprecated
func (*ServerLiveResponse) Descriptor() ([]byte, []int)
Deprecated: Use ServerLiveResponse.ProtoReflect.Descriptor instead.
func (*ServerLiveResponse) GetLive ¶
func (x *ServerLiveResponse) GetLive() bool
func (*ServerLiveResponse) ProtoMessage ¶
func (*ServerLiveResponse) ProtoMessage()
func (*ServerLiveResponse) ProtoReflect ¶
func (x *ServerLiveResponse) ProtoReflect() protoreflect.Message
func (*ServerLiveResponse) Reset ¶
func (x *ServerLiveResponse) Reset()
func (*ServerLiveResponse) String ¶
func (x *ServerLiveResponse) String() string
type ServerMetadataRequest ¶
type ServerMetadataRequest struct {
// contains filtered or unexported fields
}
@@ @@.. cpp:var:: message ServerMetadataRequest @@ @@ Request message for ServerMetadata. @@
func (*ServerMetadataRequest) Descriptor
deprecated
func (*ServerMetadataRequest) Descriptor() ([]byte, []int)
Deprecated: Use ServerMetadataRequest.ProtoReflect.Descriptor instead.
func (*ServerMetadataRequest) ProtoMessage ¶
func (*ServerMetadataRequest) ProtoMessage()
func (*ServerMetadataRequest) ProtoReflect ¶
func (x *ServerMetadataRequest) ProtoReflect() protoreflect.Message
func (*ServerMetadataRequest) Reset ¶
func (x *ServerMetadataRequest) Reset()
func (*ServerMetadataRequest) String ¶
func (x *ServerMetadataRequest) String() string
type ServerMetadataResponse ¶
type ServerMetadataResponse struct { // @@ // @@ .. cpp:var:: string name // @@ // @@ The server name. // @@ Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` // @@ // @@ .. cpp:var:: string version // @@ // @@ The server version. // @@ Version string `protobuf:"bytes,2,opt,name=version,proto3" json:"version,omitempty"` // @@ // @@ .. cpp:var:: string extensions (repeated) // @@ // @@ The extensions supported by the server. // @@ Extensions []string `protobuf:"bytes,3,rep,name=extensions,proto3" json:"extensions,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ServerMetadataResponse @@ @@ Response message for ServerMetadata. @@
func (*ServerMetadataResponse) Descriptor
deprecated
func (*ServerMetadataResponse) Descriptor() ([]byte, []int)
Deprecated: Use ServerMetadataResponse.ProtoReflect.Descriptor instead.
func (*ServerMetadataResponse) GetExtensions ¶
func (x *ServerMetadataResponse) GetExtensions() []string
func (*ServerMetadataResponse) GetName ¶
func (x *ServerMetadataResponse) GetName() string
func (*ServerMetadataResponse) GetVersion ¶
func (x *ServerMetadataResponse) GetVersion() string
func (*ServerMetadataResponse) ProtoMessage ¶
func (*ServerMetadataResponse) ProtoMessage()
func (*ServerMetadataResponse) ProtoReflect ¶
func (x *ServerMetadataResponse) ProtoReflect() protoreflect.Message
func (*ServerMetadataResponse) Reset ¶
func (x *ServerMetadataResponse) Reset()
func (*ServerMetadataResponse) String ¶
func (x *ServerMetadataResponse) String() string
type ServerReadyRequest ¶
type ServerReadyRequest struct {
// contains filtered or unexported fields
}
@@ @@.. cpp:var:: message ServerReadyRequest @@ @@ Request message for ServerReady. @@
func (*ServerReadyRequest) Descriptor
deprecated
func (*ServerReadyRequest) Descriptor() ([]byte, []int)
Deprecated: Use ServerReadyRequest.ProtoReflect.Descriptor instead.
func (*ServerReadyRequest) ProtoMessage ¶
func (*ServerReadyRequest) ProtoMessage()
func (*ServerReadyRequest) ProtoReflect ¶
func (x *ServerReadyRequest) ProtoReflect() protoreflect.Message
func (*ServerReadyRequest) Reset ¶
func (x *ServerReadyRequest) Reset()
func (*ServerReadyRequest) String ¶
func (x *ServerReadyRequest) String() string
type ServerReadyResponse ¶
type ServerReadyResponse struct { // @@ // @@ .. cpp:var:: bool ready // @@ // @@ True if the inference server is ready, false it not ready. // @@ Ready bool `protobuf:"varint,1,opt,name=ready,proto3" json:"ready,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ServerReadyResponse @@ @@ Response message for ServerReady. @@
func (*ServerReadyResponse) Descriptor
deprecated
func (*ServerReadyResponse) Descriptor() ([]byte, []int)
Deprecated: Use ServerReadyResponse.ProtoReflect.Descriptor instead.
func (*ServerReadyResponse) GetReady ¶
func (x *ServerReadyResponse) GetReady() bool
func (*ServerReadyResponse) ProtoMessage ¶
func (*ServerReadyResponse) ProtoMessage()
func (*ServerReadyResponse) ProtoReflect ¶
func (x *ServerReadyResponse) ProtoReflect() protoreflect.Message
func (*ServerReadyResponse) Reset ¶
func (x *ServerReadyResponse) Reset()
func (*ServerReadyResponse) String ¶
func (x *ServerReadyResponse) String() string
type StatisticDuration ¶
type StatisticDuration struct { // @@ .. cpp:var:: uint64 count // @@ // @@ Cumulative number of times this metric occurred. // @@ Count uint64 `protobuf:"varint,1,opt,name=count,proto3" json:"count,omitempty"` // @@ .. cpp:var:: uint64 total_time_ns // @@ // @@ Total collected duration of this metric in nanoseconds. // @@ Ns uint64 `protobuf:"varint,2,opt,name=ns,proto3" json:"ns,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message StatisticDuration @@ @@ Statistic recording a cumulative duration metric. @@
func (*StatisticDuration) Descriptor
deprecated
func (*StatisticDuration) Descriptor() ([]byte, []int)
Deprecated: Use StatisticDuration.ProtoReflect.Descriptor instead.
func (*StatisticDuration) GetCount ¶
func (x *StatisticDuration) GetCount() uint64
func (*StatisticDuration) GetNs ¶
func (x *StatisticDuration) GetNs() uint64
func (*StatisticDuration) ProtoMessage ¶
func (*StatisticDuration) ProtoMessage()
func (*StatisticDuration) ProtoReflect ¶
func (x *StatisticDuration) ProtoReflect() protoreflect.Message
func (*StatisticDuration) Reset ¶
func (x *StatisticDuration) Reset()
func (*StatisticDuration) String ¶
func (x *StatisticDuration) String() string
type SystemSharedMemoryRegisterRequest ¶
type SystemSharedMemoryRegisterRequest struct { // @@ .. cpp:var:: string name // @@ // @@ The name of the region to register. // @@ Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` // @@ // @@ The key of the underlying memory object that contains the // @@ shared memory region. // @@ Key string `protobuf:"bytes,2,opt,name=key,proto3" json:"key,omitempty"` // @@ // @@ Offset, in bytes, within the underlying memory object to // @@ the start of the shared memory region. // @@ Offset uint64 `protobuf:"varint,3,opt,name=offset,proto3" json:"offset,omitempty"` // @@ // @@ Size of the shared memory region, in bytes. // @@ ByteSize uint64 `protobuf:"varint,4,opt,name=byte_size,json=byteSize,proto3" json:"byte_size,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message SystemSharedMemoryRegisterRequest @@ @@ Request message for SystemSharedMemoryRegister. @@
func (*SystemSharedMemoryRegisterRequest) Descriptor
deprecated
func (*SystemSharedMemoryRegisterRequest) Descriptor() ([]byte, []int)
Deprecated: Use SystemSharedMemoryRegisterRequest.ProtoReflect.Descriptor instead.
func (*SystemSharedMemoryRegisterRequest) GetByteSize ¶
func (x *SystemSharedMemoryRegisterRequest) GetByteSize() uint64
func (*SystemSharedMemoryRegisterRequest) GetKey ¶
func (x *SystemSharedMemoryRegisterRequest) GetKey() string
func (*SystemSharedMemoryRegisterRequest) GetName ¶
func (x *SystemSharedMemoryRegisterRequest) GetName() string
func (*SystemSharedMemoryRegisterRequest) GetOffset ¶
func (x *SystemSharedMemoryRegisterRequest) GetOffset() uint64
func (*SystemSharedMemoryRegisterRequest) ProtoMessage ¶
func (*SystemSharedMemoryRegisterRequest) ProtoMessage()
func (*SystemSharedMemoryRegisterRequest) ProtoReflect ¶
func (x *SystemSharedMemoryRegisterRequest) ProtoReflect() protoreflect.Message
func (*SystemSharedMemoryRegisterRequest) Reset ¶
func (x *SystemSharedMemoryRegisterRequest) Reset()
func (*SystemSharedMemoryRegisterRequest) String ¶
func (x *SystemSharedMemoryRegisterRequest) String() string
type SystemSharedMemoryRegisterResponse ¶
type SystemSharedMemoryRegisterResponse struct {
// contains filtered or unexported fields
}
@@ @@.. cpp:var:: message SystemSharedMemoryRegisterResponse @@ @@ Response message for SystemSharedMemoryRegister. @@
func (*SystemSharedMemoryRegisterResponse) Descriptor
deprecated
func (*SystemSharedMemoryRegisterResponse) Descriptor() ([]byte, []int)
Deprecated: Use SystemSharedMemoryRegisterResponse.ProtoReflect.Descriptor instead.
func (*SystemSharedMemoryRegisterResponse) ProtoMessage ¶
func (*SystemSharedMemoryRegisterResponse) ProtoMessage()
func (*SystemSharedMemoryRegisterResponse) ProtoReflect ¶
func (x *SystemSharedMemoryRegisterResponse) ProtoReflect() protoreflect.Message
func (*SystemSharedMemoryRegisterResponse) Reset ¶
func (x *SystemSharedMemoryRegisterResponse) Reset()
func (*SystemSharedMemoryRegisterResponse) String ¶
func (x *SystemSharedMemoryRegisterResponse) String() string
type SystemSharedMemoryStatusRequest ¶
type SystemSharedMemoryStatusRequest struct { // @@ .. cpp:var:: string name // @@ // @@ The name of the region to get status for. If empty the // @@ status is returned for all registered regions. // @@ Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message SystemSharedMemoryStatusRequest @@ @@ Request message for SystemSharedMemoryStatus. @@
func (*SystemSharedMemoryStatusRequest) Descriptor
deprecated
func (*SystemSharedMemoryStatusRequest) Descriptor() ([]byte, []int)
Deprecated: Use SystemSharedMemoryStatusRequest.ProtoReflect.Descriptor instead.
func (*SystemSharedMemoryStatusRequest) GetName ¶
func (x *SystemSharedMemoryStatusRequest) GetName() string
func (*SystemSharedMemoryStatusRequest) ProtoMessage ¶
func (*SystemSharedMemoryStatusRequest) ProtoMessage()
func (*SystemSharedMemoryStatusRequest) ProtoReflect ¶
func (x *SystemSharedMemoryStatusRequest) ProtoReflect() protoreflect.Message
func (*SystemSharedMemoryStatusRequest) Reset ¶
func (x *SystemSharedMemoryStatusRequest) Reset()
func (*SystemSharedMemoryStatusRequest) String ¶
func (x *SystemSharedMemoryStatusRequest) String() string
type SystemSharedMemoryStatusResponse ¶
type SystemSharedMemoryStatusResponse struct { // @@ .. cpp:var:: map<string,RegionStatus> regions // @@ // @@ Status for each of the registered regions, indexed by // @@ region name. // @@ Regions map[string]*SystemSharedMemoryStatusResponse_RegionStatus `` /* 155-byte string literal not displayed */ // contains filtered or unexported fields }
@@ @@.. cpp:var:: message SystemSharedMemoryStatusResponse @@ @@ Response message for SystemSharedMemoryStatus. @@
func (*SystemSharedMemoryStatusResponse) Descriptor
deprecated
func (*SystemSharedMemoryStatusResponse) Descriptor() ([]byte, []int)
Deprecated: Use SystemSharedMemoryStatusResponse.ProtoReflect.Descriptor instead.
func (*SystemSharedMemoryStatusResponse) GetRegions ¶
func (x *SystemSharedMemoryStatusResponse) GetRegions() map[string]*SystemSharedMemoryStatusResponse_RegionStatus
func (*SystemSharedMemoryStatusResponse) ProtoMessage ¶
func (*SystemSharedMemoryStatusResponse) ProtoMessage()
func (*SystemSharedMemoryStatusResponse) ProtoReflect ¶
func (x *SystemSharedMemoryStatusResponse) ProtoReflect() protoreflect.Message
func (*SystemSharedMemoryStatusResponse) Reset ¶
func (x *SystemSharedMemoryStatusResponse) Reset()
func (*SystemSharedMemoryStatusResponse) String ¶
func (x *SystemSharedMemoryStatusResponse) String() string
type SystemSharedMemoryStatusResponse_RegionStatus ¶
type SystemSharedMemoryStatusResponse_RegionStatus struct { // @@ .. cpp:var:: string name // @@ // @@ The name for the shared memory region. // @@ Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` // @@ // @@ The key of the underlying memory object that contains the // @@ shared memory region. // @@ Key string `protobuf:"bytes,2,opt,name=key,proto3" json:"key,omitempty"` // @@ // @@ Offset, in bytes, within the underlying memory object to // @@ the start of the shared memory region. // @@ Offset uint64 `protobuf:"varint,3,opt,name=offset,proto3" json:"offset,omitempty"` // @@ // @@ Size of the shared memory region, in bytes. // @@ ByteSize uint64 `protobuf:"varint,4,opt,name=byte_size,json=byteSize,proto3" json:"byte_size,omitempty"` // contains filtered or unexported fields }
@@ @@ .. cpp:var:: message RegionStatus @@ @@ Status for a shared memory region. @@
func (*SystemSharedMemoryStatusResponse_RegionStatus) Descriptor
deprecated
func (*SystemSharedMemoryStatusResponse_RegionStatus) Descriptor() ([]byte, []int)
Deprecated: Use SystemSharedMemoryStatusResponse_RegionStatus.ProtoReflect.Descriptor instead.
func (*SystemSharedMemoryStatusResponse_RegionStatus) GetByteSize ¶
func (x *SystemSharedMemoryStatusResponse_RegionStatus) GetByteSize() uint64
func (*SystemSharedMemoryStatusResponse_RegionStatus) GetKey ¶
func (x *SystemSharedMemoryStatusResponse_RegionStatus) GetKey() string
func (*SystemSharedMemoryStatusResponse_RegionStatus) GetName ¶
func (x *SystemSharedMemoryStatusResponse_RegionStatus) GetName() string
func (*SystemSharedMemoryStatusResponse_RegionStatus) GetOffset ¶
func (x *SystemSharedMemoryStatusResponse_RegionStatus) GetOffset() uint64
func (*SystemSharedMemoryStatusResponse_RegionStatus) ProtoMessage ¶
func (*SystemSharedMemoryStatusResponse_RegionStatus) ProtoMessage()
func (*SystemSharedMemoryStatusResponse_RegionStatus) ProtoReflect ¶
func (x *SystemSharedMemoryStatusResponse_RegionStatus) ProtoReflect() protoreflect.Message
func (*SystemSharedMemoryStatusResponse_RegionStatus) Reset ¶
func (x *SystemSharedMemoryStatusResponse_RegionStatus) Reset()
func (*SystemSharedMemoryStatusResponse_RegionStatus) String ¶
func (x *SystemSharedMemoryStatusResponse_RegionStatus) String() string
type SystemSharedMemoryUnregisterRequest ¶
type SystemSharedMemoryUnregisterRequest struct { // @@ .. cpp:var:: string name // @@ // @@ The name of the system region to unregister. If empty // @@ all system shared-memory regions are unregistered. // @@ Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message SystemSharedMemoryUnregisterRequest @@ @@ Request message for SystemSharedMemoryUnregister. @@
func (*SystemSharedMemoryUnregisterRequest) Descriptor
deprecated
func (*SystemSharedMemoryUnregisterRequest) Descriptor() ([]byte, []int)
Deprecated: Use SystemSharedMemoryUnregisterRequest.ProtoReflect.Descriptor instead.
func (*SystemSharedMemoryUnregisterRequest) GetName ¶
func (x *SystemSharedMemoryUnregisterRequest) GetName() string
func (*SystemSharedMemoryUnregisterRequest) ProtoMessage ¶
func (*SystemSharedMemoryUnregisterRequest) ProtoMessage()
func (*SystemSharedMemoryUnregisterRequest) ProtoReflect ¶
func (x *SystemSharedMemoryUnregisterRequest) ProtoReflect() protoreflect.Message
func (*SystemSharedMemoryUnregisterRequest) Reset ¶
func (x *SystemSharedMemoryUnregisterRequest) Reset()
func (*SystemSharedMemoryUnregisterRequest) String ¶
func (x *SystemSharedMemoryUnregisterRequest) String() string
type SystemSharedMemoryUnregisterResponse ¶
type SystemSharedMemoryUnregisterResponse struct {
// contains filtered or unexported fields
}
@@ @@.. cpp:var:: message SystemSharedMemoryUnregisterResponse @@ @@ Response message for SystemSharedMemoryUnregister. @@
func (*SystemSharedMemoryUnregisterResponse) Descriptor
deprecated
func (*SystemSharedMemoryUnregisterResponse) Descriptor() ([]byte, []int)
Deprecated: Use SystemSharedMemoryUnregisterResponse.ProtoReflect.Descriptor instead.
func (*SystemSharedMemoryUnregisterResponse) ProtoMessage ¶
func (*SystemSharedMemoryUnregisterResponse) ProtoMessage()
func (*SystemSharedMemoryUnregisterResponse) ProtoReflect ¶
func (x *SystemSharedMemoryUnregisterResponse) ProtoReflect() protoreflect.Message
func (*SystemSharedMemoryUnregisterResponse) Reset ¶
func (x *SystemSharedMemoryUnregisterResponse) Reset()
func (*SystemSharedMemoryUnregisterResponse) String ¶
func (x *SystemSharedMemoryUnregisterResponse) String() string
type TraceSettingRequest ¶
type TraceSettingRequest struct { // @@ .. cpp:var:: map<string,SettingValue> settings // @@ // @@ The new setting values to be updated, // @@ settings that are not specified will remain unchanged. // @@ Settings map[string]*TraceSettingRequest_SettingValue `` /* 157-byte string literal not displayed */ // @@ // @@ .. cpp:var:: string model_name // @@ // @@ The name of the model to apply the new trace settings. // @@ If not given, the new settings will be applied globally. // @@ ModelName string `protobuf:"bytes,2,opt,name=model_name,json=modelName,proto3" json:"model_name,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message TraceSettingRequest @@ @@ Request message for TraceSetting. @@
func (*TraceSettingRequest) Descriptor
deprecated
func (*TraceSettingRequest) Descriptor() ([]byte, []int)
Deprecated: Use TraceSettingRequest.ProtoReflect.Descriptor instead.
func (*TraceSettingRequest) GetModelName ¶
func (x *TraceSettingRequest) GetModelName() string
func (*TraceSettingRequest) GetSettings ¶
func (x *TraceSettingRequest) GetSettings() map[string]*TraceSettingRequest_SettingValue
func (*TraceSettingRequest) ProtoMessage ¶
func (*TraceSettingRequest) ProtoMessage()
func (*TraceSettingRequest) ProtoReflect ¶
func (x *TraceSettingRequest) ProtoReflect() protoreflect.Message
func (*TraceSettingRequest) Reset ¶
func (x *TraceSettingRequest) Reset()
func (*TraceSettingRequest) String ¶
func (x *TraceSettingRequest) String() string
type TraceSettingRequest_SettingValue ¶
type TraceSettingRequest_SettingValue struct { // @@ // @@ .. cpp:var:: string value (repeated) // @@ // @@ The value. // @@ Value []string `protobuf:"bytes,1,rep,name=value,proto3" json:"value,omitempty"` // contains filtered or unexported fields }
@@ @@ .. cpp:var:: message SettingValue @@ @@ The values to be associated with a trace setting. @@ If no value is provided, the setting will be clear and @@ the global setting value will be used. @@
func (*TraceSettingRequest_SettingValue) Descriptor
deprecated
func (*TraceSettingRequest_SettingValue) Descriptor() ([]byte, []int)
Deprecated: Use TraceSettingRequest_SettingValue.ProtoReflect.Descriptor instead.
func (*TraceSettingRequest_SettingValue) GetValue ¶
func (x *TraceSettingRequest_SettingValue) GetValue() []string
func (*TraceSettingRequest_SettingValue) ProtoMessage ¶
func (*TraceSettingRequest_SettingValue) ProtoMessage()
func (*TraceSettingRequest_SettingValue) ProtoReflect ¶
func (x *TraceSettingRequest_SettingValue) ProtoReflect() protoreflect.Message
func (*TraceSettingRequest_SettingValue) Reset ¶
func (x *TraceSettingRequest_SettingValue) Reset()
func (*TraceSettingRequest_SettingValue) String ¶
func (x *TraceSettingRequest_SettingValue) String() string
type TraceSettingResponse ¶
type TraceSettingResponse struct { // @@ .. cpp:var:: map<string,SettingValue> settings // @@ // @@ The current trace settings, including any changes specified // @@ by TraceSettingRequest. // @@ Settings map[string]*TraceSettingResponse_SettingValue `` /* 157-byte string literal not displayed */ // contains filtered or unexported fields }
@@ @@.. cpp:var:: message TraceSettingResponse @@ @@ Response message for TraceSetting. @@
func (*TraceSettingResponse) Descriptor
deprecated
func (*TraceSettingResponse) Descriptor() ([]byte, []int)
Deprecated: Use TraceSettingResponse.ProtoReflect.Descriptor instead.
func (*TraceSettingResponse) GetSettings ¶
func (x *TraceSettingResponse) GetSettings() map[string]*TraceSettingResponse_SettingValue
func (*TraceSettingResponse) ProtoMessage ¶
func (*TraceSettingResponse) ProtoMessage()
func (*TraceSettingResponse) ProtoReflect ¶
func (x *TraceSettingResponse) ProtoReflect() protoreflect.Message
func (*TraceSettingResponse) Reset ¶
func (x *TraceSettingResponse) Reset()
func (*TraceSettingResponse) String ¶
func (x *TraceSettingResponse) String() string
type TraceSettingResponse_SettingValue ¶
type TraceSettingResponse_SettingValue struct { // @@ // @@ .. cpp:var:: string value (repeated) // @@ // @@ The value. // @@ Value []string `protobuf:"bytes,1,rep,name=value,proto3" json:"value,omitempty"` // contains filtered or unexported fields }
@@ @@ .. cpp:var:: message SettingValue @@ @@ The values to be associated with a trace setting. @@
func (*TraceSettingResponse_SettingValue) Descriptor
deprecated
func (*TraceSettingResponse_SettingValue) Descriptor() ([]byte, []int)
Deprecated: Use TraceSettingResponse_SettingValue.ProtoReflect.Descriptor instead.
func (*TraceSettingResponse_SettingValue) GetValue ¶
func (x *TraceSettingResponse_SettingValue) GetValue() []string
func (*TraceSettingResponse_SettingValue) ProtoMessage ¶
func (*TraceSettingResponse_SettingValue) ProtoMessage()
func (*TraceSettingResponse_SettingValue) ProtoReflect ¶
func (x *TraceSettingResponse_SettingValue) ProtoReflect() protoreflect.Message
func (*TraceSettingResponse_SettingValue) Reset ¶
func (x *TraceSettingResponse_SettingValue) Reset()
func (*TraceSettingResponse_SettingValue) String ¶
func (x *TraceSettingResponse_SettingValue) String() string
type UnimplementedGRPCInferenceServiceServer ¶
type UnimplementedGRPCInferenceServiceServer struct{}
UnimplementedGRPCInferenceServiceServer must be embedded to have forward compatible implementations.
NOTE: this should be embedded by value instead of pointer to avoid a nil pointer dereference when methods are called.
func (UnimplementedGRPCInferenceServiceServer) CudaSharedMemoryRegister ¶
func (UnimplementedGRPCInferenceServiceServer) CudaSharedMemoryRegister(context.Context, *CudaSharedMemoryRegisterRequest) (*CudaSharedMemoryRegisterResponse, error)
func (UnimplementedGRPCInferenceServiceServer) CudaSharedMemoryStatus ¶
func (UnimplementedGRPCInferenceServiceServer) CudaSharedMemoryStatus(context.Context, *CudaSharedMemoryStatusRequest) (*CudaSharedMemoryStatusResponse, error)
func (UnimplementedGRPCInferenceServiceServer) CudaSharedMemoryUnregister ¶
func (UnimplementedGRPCInferenceServiceServer) CudaSharedMemoryUnregister(context.Context, *CudaSharedMemoryUnregisterRequest) (*CudaSharedMemoryUnregisterResponse, error)
func (UnimplementedGRPCInferenceServiceServer) LogSettings ¶
func (UnimplementedGRPCInferenceServiceServer) LogSettings(context.Context, *LogSettingsRequest) (*LogSettingsResponse, error)
func (UnimplementedGRPCInferenceServiceServer) ModelConfig ¶
func (UnimplementedGRPCInferenceServiceServer) ModelConfig(context.Context, *ModelConfigRequest) (*ModelConfigResponse, error)
func (UnimplementedGRPCInferenceServiceServer) ModelInfer ¶
func (UnimplementedGRPCInferenceServiceServer) ModelInfer(context.Context, *ModelInferRequest) (*ModelInferResponse, error)
func (UnimplementedGRPCInferenceServiceServer) ModelMetadata ¶
func (UnimplementedGRPCInferenceServiceServer) ModelMetadata(context.Context, *ModelMetadataRequest) (*ModelMetadataResponse, error)
func (UnimplementedGRPCInferenceServiceServer) ModelReady ¶
func (UnimplementedGRPCInferenceServiceServer) ModelReady(context.Context, *ModelReadyRequest) (*ModelReadyResponse, error)
func (UnimplementedGRPCInferenceServiceServer) ModelStatistics ¶
func (UnimplementedGRPCInferenceServiceServer) ModelStatistics(context.Context, *ModelStatisticsRequest) (*ModelStatisticsResponse, error)
func (UnimplementedGRPCInferenceServiceServer) ModelStreamInfer ¶
func (UnimplementedGRPCInferenceServiceServer) ModelStreamInfer(grpc.BidiStreamingServer[ModelInferRequest, ModelStreamInferResponse]) error
func (UnimplementedGRPCInferenceServiceServer) RepositoryIndex ¶
func (UnimplementedGRPCInferenceServiceServer) RepositoryIndex(context.Context, *RepositoryIndexRequest) (*RepositoryIndexResponse, error)
func (UnimplementedGRPCInferenceServiceServer) RepositoryModelLoad ¶
func (UnimplementedGRPCInferenceServiceServer) RepositoryModelLoad(context.Context, *RepositoryModelLoadRequest) (*RepositoryModelLoadResponse, error)
func (UnimplementedGRPCInferenceServiceServer) RepositoryModelUnload ¶
func (UnimplementedGRPCInferenceServiceServer) RepositoryModelUnload(context.Context, *RepositoryModelUnloadRequest) (*RepositoryModelUnloadResponse, error)
func (UnimplementedGRPCInferenceServiceServer) ServerLive ¶
func (UnimplementedGRPCInferenceServiceServer) ServerLive(context.Context, *ServerLiveRequest) (*ServerLiveResponse, error)
func (UnimplementedGRPCInferenceServiceServer) ServerMetadata ¶
func (UnimplementedGRPCInferenceServiceServer) ServerMetadata(context.Context, *ServerMetadataRequest) (*ServerMetadataResponse, error)
func (UnimplementedGRPCInferenceServiceServer) ServerReady ¶
func (UnimplementedGRPCInferenceServiceServer) ServerReady(context.Context, *ServerReadyRequest) (*ServerReadyResponse, error)
func (UnimplementedGRPCInferenceServiceServer) SystemSharedMemoryRegister ¶
func (UnimplementedGRPCInferenceServiceServer) SystemSharedMemoryRegister(context.Context, *SystemSharedMemoryRegisterRequest) (*SystemSharedMemoryRegisterResponse, error)
func (UnimplementedGRPCInferenceServiceServer) SystemSharedMemoryStatus ¶
func (UnimplementedGRPCInferenceServiceServer) SystemSharedMemoryStatus(context.Context, *SystemSharedMemoryStatusRequest) (*SystemSharedMemoryStatusResponse, error)
func (UnimplementedGRPCInferenceServiceServer) SystemSharedMemoryUnregister ¶
func (UnimplementedGRPCInferenceServiceServer) SystemSharedMemoryUnregister(context.Context, *SystemSharedMemoryUnregisterRequest) (*SystemSharedMemoryUnregisterResponse, error)
func (UnimplementedGRPCInferenceServiceServer) TraceSetting ¶
func (UnimplementedGRPCInferenceServiceServer) TraceSetting(context.Context, *TraceSettingRequest) (*TraceSettingResponse, error)
type UnsafeGRPCInferenceServiceServer ¶
type UnsafeGRPCInferenceServiceServer interface {
// contains filtered or unexported methods
}
UnsafeGRPCInferenceServiceServer may be embedded to opt out of forward compatibility for this service. Use of this interface is not recommended, as added methods to GRPCInferenceServiceServer will result in compilation errors.