Documentation ¶
Index ¶
- Variables
- type BatchInput
- func (*BatchInput) Descriptor() ([]byte, []int)deprecated
- func (x *BatchInput) GetDataType() DataType
- func (x *BatchInput) GetKind() BatchInput_Kind
- func (x *BatchInput) GetSourceInput() []string
- func (x *BatchInput) GetTargetName() []string
- func (*BatchInput) ProtoMessage()
- func (x *BatchInput) ProtoReflect() protoreflect.Message
- func (x *BatchInput) Reset()
- func (x *BatchInput) String() string
- type BatchInput_Kind
- func (BatchInput_Kind) Descriptor() protoreflect.EnumDescriptor
- func (x BatchInput_Kind) Enum() *BatchInput_Kind
- func (BatchInput_Kind) EnumDescriptor() ([]byte, []int)deprecated
- func (x BatchInput_Kind) Number() protoreflect.EnumNumber
- func (x BatchInput_Kind) String() string
- func (BatchInput_Kind) Type() protoreflect.EnumType
- type BatchOutput
- func (*BatchOutput) Descriptor() ([]byte, []int)deprecated
- func (x *BatchOutput) GetKind() BatchOutput_Kind
- func (x *BatchOutput) GetSourceInput() []string
- func (x *BatchOutput) GetTargetName() []string
- func (*BatchOutput) ProtoMessage()
- func (x *BatchOutput) ProtoReflect() protoreflect.Message
- func (x *BatchOutput) Reset()
- func (x *BatchOutput) String() string
- type BatchOutput_Kind
- func (BatchOutput_Kind) Descriptor() protoreflect.EnumDescriptor
- func (x BatchOutput_Kind) Enum() *BatchOutput_Kind
- func (BatchOutput_Kind) EnumDescriptor() ([]byte, []int)deprecated
- func (x BatchOutput_Kind) Number() protoreflect.EnumNumber
- func (x BatchOutput_Kind) String() string
- func (BatchOutput_Kind) Type() protoreflect.EnumType
- type DataType
- type ModelConfig
- func (*ModelConfig) Descriptor() ([]byte, []int)deprecated
- func (x *ModelConfig) GetBackend() string
- func (x *ModelConfig) GetBatchInput() []*BatchInput
- func (x *ModelConfig) GetBatchOutput() []*BatchOutput
- func (x *ModelConfig) GetCcModelFilenames() map[string]string
- func (x *ModelConfig) GetDefaultModelFilename() string
- func (x *ModelConfig) GetDynamicBatching() *ModelDynamicBatching
- func (x *ModelConfig) GetEnsembleScheduling() *ModelEnsembling
- func (x *ModelConfig) GetInput() []*ModelInput
- func (x *ModelConfig) GetInstanceGroup() []*ModelInstanceGroup
- func (x *ModelConfig) GetMaxBatchSize() int32
- func (x *ModelConfig) GetMetricTags() map[string]string
- func (x *ModelConfig) GetModelOperations() *ModelOperations
- func (x *ModelConfig) GetModelRepositoryAgents() *ModelRepositoryAgents
- func (x *ModelConfig) GetModelTransactionPolicy() *ModelTransactionPolicy
- func (x *ModelConfig) GetModelWarmup() []*ModelWarmup
- func (x *ModelConfig) GetName() string
- func (x *ModelConfig) GetOptimization() *ModelOptimizationPolicy
- func (x *ModelConfig) GetOutput() []*ModelOutput
- func (x *ModelConfig) GetParameters() map[string]*ModelParameter
- func (x *ModelConfig) GetPlatform() string
- func (x *ModelConfig) GetResponseCache() *ModelResponseCache
- func (m *ModelConfig) GetSchedulingChoice() isModelConfig_SchedulingChoice
- func (x *ModelConfig) GetSequenceBatching() *ModelSequenceBatching
- func (x *ModelConfig) GetVersionPolicy() *ModelVersionPolicy
- func (*ModelConfig) ProtoMessage()
- func (x *ModelConfig) ProtoReflect() protoreflect.Message
- func (x *ModelConfig) Reset()
- func (x *ModelConfig) String() string
- type ModelConfig_DynamicBatching
- type ModelConfig_EnsembleScheduling
- type ModelConfig_SequenceBatching
- type ModelDynamicBatching
- func (*ModelDynamicBatching) Descriptor() ([]byte, []int)deprecated
- func (x *ModelDynamicBatching) GetDefaultPriorityLevel() uint32
- func (x *ModelDynamicBatching) GetDefaultQueuePolicy() *ModelQueuePolicy
- func (x *ModelDynamicBatching) GetMaxQueueDelayMicroseconds() uint64
- func (x *ModelDynamicBatching) GetPreferredBatchSize() []int32
- func (x *ModelDynamicBatching) GetPreserveOrdering() bool
- func (x *ModelDynamicBatching) GetPriorityLevels() uint32
- func (x *ModelDynamicBatching) GetPriorityQueuePolicy() map[uint32]*ModelQueuePolicy
- func (*ModelDynamicBatching) ProtoMessage()
- func (x *ModelDynamicBatching) ProtoReflect() protoreflect.Message
- func (x *ModelDynamicBatching) Reset()
- func (x *ModelDynamicBatching) String() string
- type ModelEnsembling
- type ModelEnsembling_Step
- func (*ModelEnsembling_Step) Descriptor() ([]byte, []int)deprecated
- func (x *ModelEnsembling_Step) GetInputMap() map[string]string
- func (x *ModelEnsembling_Step) GetModelName() string
- func (x *ModelEnsembling_Step) GetModelVersion() int64
- func (x *ModelEnsembling_Step) GetOutputMap() map[string]string
- func (*ModelEnsembling_Step) ProtoMessage()
- func (x *ModelEnsembling_Step) ProtoReflect() protoreflect.Message
- func (x *ModelEnsembling_Step) Reset()
- func (x *ModelEnsembling_Step) String() string
- type ModelInput
- func (*ModelInput) Descriptor() ([]byte, []int)deprecated
- func (x *ModelInput) GetAllowRaggedBatch() bool
- func (x *ModelInput) GetDataType() DataType
- func (x *ModelInput) GetDims() []int64
- func (x *ModelInput) GetFormat() ModelInput_Format
- func (x *ModelInput) GetIsShapeTensor() bool
- func (x *ModelInput) GetName() string
- func (x *ModelInput) GetOptional() bool
- func (x *ModelInput) GetReshape() *ModelTensorReshape
- func (*ModelInput) ProtoMessage()
- func (x *ModelInput) ProtoReflect() protoreflect.Message
- func (x *ModelInput) Reset()
- func (x *ModelInput) String() string
- type ModelInput_Format
- func (ModelInput_Format) Descriptor() protoreflect.EnumDescriptor
- func (x ModelInput_Format) Enum() *ModelInput_Format
- func (ModelInput_Format) EnumDescriptor() ([]byte, []int)deprecated
- func (x ModelInput_Format) Number() protoreflect.EnumNumber
- func (x ModelInput_Format) String() string
- func (ModelInput_Format) Type() protoreflect.EnumType
- type ModelInstanceGroup
- func (*ModelInstanceGroup) Descriptor() ([]byte, []int)deprecated
- func (x *ModelInstanceGroup) GetCount() int32
- func (x *ModelInstanceGroup) GetGpus() []int32
- func (x *ModelInstanceGroup) GetHostPolicy() string
- func (x *ModelInstanceGroup) GetKind() ModelInstanceGroup_Kind
- func (x *ModelInstanceGroup) GetName() string
- func (x *ModelInstanceGroup) GetPassive() bool
- func (x *ModelInstanceGroup) GetProfile() []string
- func (x *ModelInstanceGroup) GetRateLimiter() *ModelRateLimiter
- func (x *ModelInstanceGroup) GetSecondaryDevices() []*ModelInstanceGroup_SecondaryDevice
- func (*ModelInstanceGroup) ProtoMessage()
- func (x *ModelInstanceGroup) ProtoReflect() protoreflect.Message
- func (x *ModelInstanceGroup) Reset()
- func (x *ModelInstanceGroup) String() string
- type ModelInstanceGroup_Kind
- func (ModelInstanceGroup_Kind) Descriptor() protoreflect.EnumDescriptor
- func (x ModelInstanceGroup_Kind) Enum() *ModelInstanceGroup_Kind
- func (ModelInstanceGroup_Kind) EnumDescriptor() ([]byte, []int)deprecated
- func (x ModelInstanceGroup_Kind) Number() protoreflect.EnumNumber
- func (x ModelInstanceGroup_Kind) String() string
- func (ModelInstanceGroup_Kind) Type() protoreflect.EnumType
- type ModelInstanceGroup_SecondaryDevice
- func (*ModelInstanceGroup_SecondaryDevice) Descriptor() ([]byte, []int)deprecated
- func (x *ModelInstanceGroup_SecondaryDevice) GetDeviceId() int64
- func (x *ModelInstanceGroup_SecondaryDevice) GetKind() ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind
- func (*ModelInstanceGroup_SecondaryDevice) ProtoMessage()
- func (x *ModelInstanceGroup_SecondaryDevice) ProtoReflect() protoreflect.Message
- func (x *ModelInstanceGroup_SecondaryDevice) Reset()
- func (x *ModelInstanceGroup_SecondaryDevice) String() string
- type ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind
- func (ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind) Descriptor() protoreflect.EnumDescriptor
- func (x ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind) Enum() *ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind
- func (ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind) EnumDescriptor() ([]byte, []int)deprecated
- func (x ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind) Number() protoreflect.EnumNumber
- func (x ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind) String() string
- func (ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind) Type() protoreflect.EnumType
- type ModelOperations
- type ModelOptimizationPolicy
- func (*ModelOptimizationPolicy) Descriptor() ([]byte, []int)deprecated
- func (x *ModelOptimizationPolicy) GetCuda() *ModelOptimizationPolicy_Cuda
- func (x *ModelOptimizationPolicy) GetEagerBatching() bool
- func (x *ModelOptimizationPolicy) GetExecutionAccelerators() *ModelOptimizationPolicy_ExecutionAccelerators
- func (x *ModelOptimizationPolicy) GetGatherKernelBufferThreshold() uint32
- func (x *ModelOptimizationPolicy) GetGraph() *ModelOptimizationPolicy_Graph
- func (x *ModelOptimizationPolicy) GetInputPinnedMemory() *ModelOptimizationPolicy_PinnedMemoryBuffer
- func (x *ModelOptimizationPolicy) GetOutputPinnedMemory() *ModelOptimizationPolicy_PinnedMemoryBuffer
- func (x *ModelOptimizationPolicy) GetPriority() ModelOptimizationPolicy_ModelPriority
- func (*ModelOptimizationPolicy) ProtoMessage()
- func (x *ModelOptimizationPolicy) ProtoReflect() protoreflect.Message
- func (x *ModelOptimizationPolicy) Reset()
- func (x *ModelOptimizationPolicy) String() string
- type ModelOptimizationPolicy_Cuda
- func (*ModelOptimizationPolicy_Cuda) Descriptor() ([]byte, []int)deprecated
- func (x *ModelOptimizationPolicy_Cuda) GetBusyWaitEvents() bool
- func (x *ModelOptimizationPolicy_Cuda) GetGraphSpec() []*ModelOptimizationPolicy_Cuda_GraphSpec
- func (x *ModelOptimizationPolicy_Cuda) GetGraphs() bool
- func (x *ModelOptimizationPolicy_Cuda) GetOutputCopyStream() bool
- func (*ModelOptimizationPolicy_Cuda) ProtoMessage()
- func (x *ModelOptimizationPolicy_Cuda) ProtoReflect() protoreflect.Message
- func (x *ModelOptimizationPolicy_Cuda) Reset()
- func (x *ModelOptimizationPolicy_Cuda) String() string
- type ModelOptimizationPolicy_Cuda_GraphSpec
- func (*ModelOptimizationPolicy_Cuda_GraphSpec) Descriptor() ([]byte, []int)deprecated
- func (x *ModelOptimizationPolicy_Cuda_GraphSpec) GetBatchSize() int32
- func (x *ModelOptimizationPolicy_Cuda_GraphSpec) GetGraphLowerBound() *ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound
- func (x *ModelOptimizationPolicy_Cuda_GraphSpec) GetInput() map[string]*ModelOptimizationPolicy_Cuda_GraphSpec_Shape
- func (*ModelOptimizationPolicy_Cuda_GraphSpec) ProtoMessage()
- func (x *ModelOptimizationPolicy_Cuda_GraphSpec) ProtoReflect() protoreflect.Message
- func (x *ModelOptimizationPolicy_Cuda_GraphSpec) Reset()
- func (x *ModelOptimizationPolicy_Cuda_GraphSpec) String() string
- type ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound
- func (*ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound) Descriptor() ([]byte, []int)deprecated
- func (x *ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound) GetBatchSize() int32
- func (x *ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound) GetInput() map[string]*ModelOptimizationPolicy_Cuda_GraphSpec_Shape
- func (*ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound) ProtoMessage()
- func (x *ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound) ProtoReflect() protoreflect.Message
- func (x *ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound) Reset()
- func (x *ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound) String() string
- type ModelOptimizationPolicy_Cuda_GraphSpec_Shape
- func (*ModelOptimizationPolicy_Cuda_GraphSpec_Shape) Descriptor() ([]byte, []int)deprecated
- func (x *ModelOptimizationPolicy_Cuda_GraphSpec_Shape) GetDim() []int64
- func (*ModelOptimizationPolicy_Cuda_GraphSpec_Shape) ProtoMessage()
- func (x *ModelOptimizationPolicy_Cuda_GraphSpec_Shape) ProtoReflect() protoreflect.Message
- func (x *ModelOptimizationPolicy_Cuda_GraphSpec_Shape) Reset()
- func (x *ModelOptimizationPolicy_Cuda_GraphSpec_Shape) String() string
- type ModelOptimizationPolicy_ExecutionAccelerators
- func (*ModelOptimizationPolicy_ExecutionAccelerators) Descriptor() ([]byte, []int)deprecated
- func (x *ModelOptimizationPolicy_ExecutionAccelerators) GetCpuExecutionAccelerator() []*ModelOptimizationPolicy_ExecutionAccelerators_Accelerator
- func (x *ModelOptimizationPolicy_ExecutionAccelerators) GetGpuExecutionAccelerator() []*ModelOptimizationPolicy_ExecutionAccelerators_Accelerator
- func (*ModelOptimizationPolicy_ExecutionAccelerators) ProtoMessage()
- func (x *ModelOptimizationPolicy_ExecutionAccelerators) ProtoReflect() protoreflect.Message
- func (x *ModelOptimizationPolicy_ExecutionAccelerators) Reset()
- func (x *ModelOptimizationPolicy_ExecutionAccelerators) String() string
- type ModelOptimizationPolicy_ExecutionAccelerators_Accelerator
- func (*ModelOptimizationPolicy_ExecutionAccelerators_Accelerator) Descriptor() ([]byte, []int)deprecated
- func (x *ModelOptimizationPolicy_ExecutionAccelerators_Accelerator) GetName() string
- func (x *ModelOptimizationPolicy_ExecutionAccelerators_Accelerator) GetParameters() map[string]string
- func (*ModelOptimizationPolicy_ExecutionAccelerators_Accelerator) ProtoMessage()
- func (x *ModelOptimizationPolicy_ExecutionAccelerators_Accelerator) ProtoReflect() protoreflect.Message
- func (x *ModelOptimizationPolicy_ExecutionAccelerators_Accelerator) Reset()
- func (x *ModelOptimizationPolicy_ExecutionAccelerators_Accelerator) String() string
- type ModelOptimizationPolicy_Graph
- func (*ModelOptimizationPolicy_Graph) Descriptor() ([]byte, []int)deprecated
- func (x *ModelOptimizationPolicy_Graph) GetLevel() int32
- func (*ModelOptimizationPolicy_Graph) ProtoMessage()
- func (x *ModelOptimizationPolicy_Graph) ProtoReflect() protoreflect.Message
- func (x *ModelOptimizationPolicy_Graph) Reset()
- func (x *ModelOptimizationPolicy_Graph) String() string
- type ModelOptimizationPolicy_ModelPriority
- func (ModelOptimizationPolicy_ModelPriority) Descriptor() protoreflect.EnumDescriptor
- func (x ModelOptimizationPolicy_ModelPriority) Enum() *ModelOptimizationPolicy_ModelPriority
- func (ModelOptimizationPolicy_ModelPriority) EnumDescriptor() ([]byte, []int)deprecated
- func (x ModelOptimizationPolicy_ModelPriority) Number() protoreflect.EnumNumber
- func (x ModelOptimizationPolicy_ModelPriority) String() string
- func (ModelOptimizationPolicy_ModelPriority) Type() protoreflect.EnumType
- type ModelOptimizationPolicy_PinnedMemoryBuffer
- func (*ModelOptimizationPolicy_PinnedMemoryBuffer) Descriptor() ([]byte, []int)deprecated
- func (x *ModelOptimizationPolicy_PinnedMemoryBuffer) GetEnable() bool
- func (*ModelOptimizationPolicy_PinnedMemoryBuffer) ProtoMessage()
- func (x *ModelOptimizationPolicy_PinnedMemoryBuffer) ProtoReflect() protoreflect.Message
- func (x *ModelOptimizationPolicy_PinnedMemoryBuffer) Reset()
- func (x *ModelOptimizationPolicy_PinnedMemoryBuffer) String() string
- type ModelOutput
- func (*ModelOutput) Descriptor() ([]byte, []int)deprecated
- func (x *ModelOutput) GetDataType() DataType
- func (x *ModelOutput) GetDims() []int64
- func (x *ModelOutput) GetIsShapeTensor() bool
- func (x *ModelOutput) GetLabelFilename() string
- func (x *ModelOutput) GetName() string
- func (x *ModelOutput) GetReshape() *ModelTensorReshape
- func (*ModelOutput) ProtoMessage()
- func (x *ModelOutput) ProtoReflect() protoreflect.Message
- func (x *ModelOutput) Reset()
- func (x *ModelOutput) String() string
- type ModelParameter
- type ModelQueuePolicy
- func (*ModelQueuePolicy) Descriptor() ([]byte, []int)deprecated
- func (x *ModelQueuePolicy) GetAllowTimeoutOverride() bool
- func (x *ModelQueuePolicy) GetDefaultTimeoutMicroseconds() uint64
- func (x *ModelQueuePolicy) GetMaxQueueSize() uint32
- func (x *ModelQueuePolicy) GetTimeoutAction() ModelQueuePolicy_TimeoutAction
- func (*ModelQueuePolicy) ProtoMessage()
- func (x *ModelQueuePolicy) ProtoReflect() protoreflect.Message
- func (x *ModelQueuePolicy) Reset()
- func (x *ModelQueuePolicy) String() string
- type ModelQueuePolicy_TimeoutAction
- func (ModelQueuePolicy_TimeoutAction) Descriptor() protoreflect.EnumDescriptor
- func (x ModelQueuePolicy_TimeoutAction) Enum() *ModelQueuePolicy_TimeoutAction
- func (ModelQueuePolicy_TimeoutAction) EnumDescriptor() ([]byte, []int)deprecated
- func (x ModelQueuePolicy_TimeoutAction) Number() protoreflect.EnumNumber
- func (x ModelQueuePolicy_TimeoutAction) String() string
- func (ModelQueuePolicy_TimeoutAction) Type() protoreflect.EnumType
- type ModelRateLimiter
- func (*ModelRateLimiter) Descriptor() ([]byte, []int)deprecated
- func (x *ModelRateLimiter) GetPriority() uint32
- func (x *ModelRateLimiter) GetResources() []*ModelRateLimiter_Resource
- func (*ModelRateLimiter) ProtoMessage()
- func (x *ModelRateLimiter) ProtoReflect() protoreflect.Message
- func (x *ModelRateLimiter) Reset()
- func (x *ModelRateLimiter) String() string
- type ModelRateLimiter_Resource
- func (*ModelRateLimiter_Resource) Descriptor() ([]byte, []int)deprecated
- func (x *ModelRateLimiter_Resource) GetCount() uint32
- func (x *ModelRateLimiter_Resource) GetGlobal() bool
- func (x *ModelRateLimiter_Resource) GetName() string
- func (*ModelRateLimiter_Resource) ProtoMessage()
- func (x *ModelRateLimiter_Resource) ProtoReflect() protoreflect.Message
- func (x *ModelRateLimiter_Resource) Reset()
- func (x *ModelRateLimiter_Resource) String() string
- type ModelRepositoryAgents
- func (*ModelRepositoryAgents) Descriptor() ([]byte, []int)deprecated
- func (x *ModelRepositoryAgents) GetAgents() []*ModelRepositoryAgents_Agent
- func (*ModelRepositoryAgents) ProtoMessage()
- func (x *ModelRepositoryAgents) ProtoReflect() protoreflect.Message
- func (x *ModelRepositoryAgents) Reset()
- func (x *ModelRepositoryAgents) String() string
- type ModelRepositoryAgents_Agent
- func (*ModelRepositoryAgents_Agent) Descriptor() ([]byte, []int)deprecated
- func (x *ModelRepositoryAgents_Agent) GetName() string
- func (x *ModelRepositoryAgents_Agent) GetParameters() map[string]string
- func (*ModelRepositoryAgents_Agent) ProtoMessage()
- func (x *ModelRepositoryAgents_Agent) ProtoReflect() protoreflect.Message
- func (x *ModelRepositoryAgents_Agent) Reset()
- func (x *ModelRepositoryAgents_Agent) String() string
- type ModelResponseCache
- func (*ModelResponseCache) Descriptor() ([]byte, []int)deprecated
- func (x *ModelResponseCache) GetEnable() bool
- func (*ModelResponseCache) ProtoMessage()
- func (x *ModelResponseCache) ProtoReflect() protoreflect.Message
- func (x *ModelResponseCache) Reset()
- func (x *ModelResponseCache) String() string
- type ModelSequenceBatching
- func (*ModelSequenceBatching) Descriptor() ([]byte, []int)deprecated
- func (x *ModelSequenceBatching) GetControlInput() []*ModelSequenceBatching_ControlInput
- func (x *ModelSequenceBatching) GetDirect() *ModelSequenceBatching_StrategyDirect
- func (x *ModelSequenceBatching) GetMaxSequenceIdleMicroseconds() uint64
- func (x *ModelSequenceBatching) GetOldest() *ModelSequenceBatching_StrategyOldest
- func (x *ModelSequenceBatching) GetState() []*ModelSequenceBatching_State
- func (m *ModelSequenceBatching) GetStrategyChoice() isModelSequenceBatching_StrategyChoice
- func (*ModelSequenceBatching) ProtoMessage()
- func (x *ModelSequenceBatching) ProtoReflect() protoreflect.Message
- func (x *ModelSequenceBatching) Reset()
- func (x *ModelSequenceBatching) String() string
- type ModelSequenceBatching_Control
- func (*ModelSequenceBatching_Control) Descriptor() ([]byte, []int)deprecated
- func (x *ModelSequenceBatching_Control) GetBoolFalseTrue() []bool
- func (x *ModelSequenceBatching_Control) GetDataType() DataType
- func (x *ModelSequenceBatching_Control) GetFp32FalseTrue() []float32
- func (x *ModelSequenceBatching_Control) GetInt32FalseTrue() []int32
- func (x *ModelSequenceBatching_Control) GetKind() ModelSequenceBatching_Control_Kind
- func (*ModelSequenceBatching_Control) ProtoMessage()
- func (x *ModelSequenceBatching_Control) ProtoReflect() protoreflect.Message
- func (x *ModelSequenceBatching_Control) Reset()
- func (x *ModelSequenceBatching_Control) String() string
- type ModelSequenceBatching_ControlInput
- func (*ModelSequenceBatching_ControlInput) Descriptor() ([]byte, []int)deprecated
- func (x *ModelSequenceBatching_ControlInput) GetControl() []*ModelSequenceBatching_Control
- func (x *ModelSequenceBatching_ControlInput) GetName() string
- func (*ModelSequenceBatching_ControlInput) ProtoMessage()
- func (x *ModelSequenceBatching_ControlInput) ProtoReflect() protoreflect.Message
- func (x *ModelSequenceBatching_ControlInput) Reset()
- func (x *ModelSequenceBatching_ControlInput) String() string
- type ModelSequenceBatching_Control_Kind
- func (ModelSequenceBatching_Control_Kind) Descriptor() protoreflect.EnumDescriptor
- func (x ModelSequenceBatching_Control_Kind) Enum() *ModelSequenceBatching_Control_Kind
- func (ModelSequenceBatching_Control_Kind) EnumDescriptor() ([]byte, []int)deprecated
- func (x ModelSequenceBatching_Control_Kind) Number() protoreflect.EnumNumber
- func (x ModelSequenceBatching_Control_Kind) String() string
- func (ModelSequenceBatching_Control_Kind) Type() protoreflect.EnumType
- type ModelSequenceBatching_Direct
- type ModelSequenceBatching_InitialState
- func (*ModelSequenceBatching_InitialState) Descriptor() ([]byte, []int)deprecated
- func (x *ModelSequenceBatching_InitialState) GetDataFile() string
- func (x *ModelSequenceBatching_InitialState) GetDataType() DataType
- func (x *ModelSequenceBatching_InitialState) GetDims() []int64
- func (x *ModelSequenceBatching_InitialState) GetName() string
- func (m *ModelSequenceBatching_InitialState) GetStateData() isModelSequenceBatching_InitialState_StateData
- func (x *ModelSequenceBatching_InitialState) GetZeroData() bool
- func (*ModelSequenceBatching_InitialState) ProtoMessage()
- func (x *ModelSequenceBatching_InitialState) ProtoReflect() protoreflect.Message
- func (x *ModelSequenceBatching_InitialState) Reset()
- func (x *ModelSequenceBatching_InitialState) String() string
- type ModelSequenceBatching_InitialState_DataFile
- type ModelSequenceBatching_InitialState_ZeroData
- type ModelSequenceBatching_Oldest
- type ModelSequenceBatching_State
- func (*ModelSequenceBatching_State) Descriptor() ([]byte, []int)deprecated
- func (x *ModelSequenceBatching_State) GetDataType() DataType
- func (x *ModelSequenceBatching_State) GetDims() []int64
- func (x *ModelSequenceBatching_State) GetInitialState() []*ModelSequenceBatching_InitialState
- func (x *ModelSequenceBatching_State) GetInputName() string
- func (x *ModelSequenceBatching_State) GetOutputName() string
- func (*ModelSequenceBatching_State) ProtoMessage()
- func (x *ModelSequenceBatching_State) ProtoReflect() protoreflect.Message
- func (x *ModelSequenceBatching_State) Reset()
- func (x *ModelSequenceBatching_State) String() string
- type ModelSequenceBatching_StrategyDirect
- func (*ModelSequenceBatching_StrategyDirect) Descriptor() ([]byte, []int)deprecated
- func (x *ModelSequenceBatching_StrategyDirect) GetMaxQueueDelayMicroseconds() uint64
- func (x *ModelSequenceBatching_StrategyDirect) GetMinimumSlotUtilization() float32
- func (*ModelSequenceBatching_StrategyDirect) ProtoMessage()
- func (x *ModelSequenceBatching_StrategyDirect) ProtoReflect() protoreflect.Message
- func (x *ModelSequenceBatching_StrategyDirect) Reset()
- func (x *ModelSequenceBatching_StrategyDirect) String() string
- type ModelSequenceBatching_StrategyOldest
- func (*ModelSequenceBatching_StrategyOldest) Descriptor() ([]byte, []int)deprecated
- func (x *ModelSequenceBatching_StrategyOldest) GetMaxCandidateSequences() int32
- func (x *ModelSequenceBatching_StrategyOldest) GetMaxQueueDelayMicroseconds() uint64
- func (x *ModelSequenceBatching_StrategyOldest) GetPreferredBatchSize() []int32
- func (*ModelSequenceBatching_StrategyOldest) ProtoMessage()
- func (x *ModelSequenceBatching_StrategyOldest) ProtoReflect() protoreflect.Message
- func (x *ModelSequenceBatching_StrategyOldest) Reset()
- func (x *ModelSequenceBatching_StrategyOldest) String() string
- type ModelTensorReshape
- func (*ModelTensorReshape) Descriptor() ([]byte, []int)deprecated
- func (x *ModelTensorReshape) GetShape() []int64
- func (*ModelTensorReshape) ProtoMessage()
- func (x *ModelTensorReshape) ProtoReflect() protoreflect.Message
- func (x *ModelTensorReshape) Reset()
- func (x *ModelTensorReshape) String() string
- type ModelTransactionPolicy
- func (*ModelTransactionPolicy) Descriptor() ([]byte, []int)deprecated
- func (x *ModelTransactionPolicy) GetDecoupled() bool
- func (*ModelTransactionPolicy) ProtoMessage()
- func (x *ModelTransactionPolicy) ProtoReflect() protoreflect.Message
- func (x *ModelTransactionPolicy) Reset()
- func (x *ModelTransactionPolicy) String() string
- type ModelVersionPolicy
- func (*ModelVersionPolicy) Descriptor() ([]byte, []int)deprecated
- func (x *ModelVersionPolicy) GetAll() *ModelVersionPolicy_All
- func (x *ModelVersionPolicy) GetLatest() *ModelVersionPolicy_Latest
- func (m *ModelVersionPolicy) GetPolicyChoice() isModelVersionPolicy_PolicyChoice
- func (x *ModelVersionPolicy) GetSpecific() *ModelVersionPolicy_Specific
- func (*ModelVersionPolicy) ProtoMessage()
- func (x *ModelVersionPolicy) ProtoReflect() protoreflect.Message
- func (x *ModelVersionPolicy) Reset()
- func (x *ModelVersionPolicy) String() string
- type ModelVersionPolicy_All
- type ModelVersionPolicy_All_
- type ModelVersionPolicy_Latest
- func (*ModelVersionPolicy_Latest) Descriptor() ([]byte, []int)deprecated
- func (x *ModelVersionPolicy_Latest) GetNumVersions() uint32
- func (*ModelVersionPolicy_Latest) ProtoMessage()
- func (x *ModelVersionPolicy_Latest) ProtoReflect() protoreflect.Message
- func (x *ModelVersionPolicy_Latest) Reset()
- func (x *ModelVersionPolicy_Latest) String() string
- type ModelVersionPolicy_Latest_
- type ModelVersionPolicy_Specific
- func (*ModelVersionPolicy_Specific) Descriptor() ([]byte, []int)deprecated
- func (x *ModelVersionPolicy_Specific) GetVersions() []int64
- func (*ModelVersionPolicy_Specific) ProtoMessage()
- func (x *ModelVersionPolicy_Specific) ProtoReflect() protoreflect.Message
- func (x *ModelVersionPolicy_Specific) Reset()
- func (x *ModelVersionPolicy_Specific) String() string
- type ModelVersionPolicy_Specific_
- type ModelWarmup
- func (*ModelWarmup) Descriptor() ([]byte, []int)deprecated
- func (x *ModelWarmup) GetBatchSize() uint32
- func (x *ModelWarmup) GetInputs() map[string]*ModelWarmup_Input
- func (x *ModelWarmup) GetName() string
- func (*ModelWarmup) ProtoMessage()
- func (x *ModelWarmup) ProtoReflect() protoreflect.Message
- func (x *ModelWarmup) Reset()
- func (x *ModelWarmup) String() string
- type ModelWarmup_Input
- func (*ModelWarmup_Input) Descriptor() ([]byte, []int)deprecated
- func (x *ModelWarmup_Input) GetDataType() DataType
- func (x *ModelWarmup_Input) GetDims() []int64
- func (x *ModelWarmup_Input) GetInputDataFile() string
- func (m *ModelWarmup_Input) GetInputDataType() isModelWarmup_Input_InputDataType
- func (x *ModelWarmup_Input) GetRandomData() bool
- func (x *ModelWarmup_Input) GetZeroData() bool
- func (*ModelWarmup_Input) ProtoMessage()
- func (x *ModelWarmup_Input) ProtoReflect() protoreflect.Message
- func (x *ModelWarmup_Input) Reset()
- func (x *ModelWarmup_Input) String() string
- type ModelWarmup_Input_InputDataFile
- type ModelWarmup_Input_RandomData
- type ModelWarmup_Input_ZeroData
Constants ¶
This section is empty.
Variables ¶
var ( DataType_name = map[int32]string{ 0: "TYPE_INVALID", 1: "TYPE_BOOL", 2: "TYPE_UINT8", 3: "TYPE_UINT16", 4: "TYPE_UINT32", 5: "TYPE_UINT64", 6: "TYPE_INT8", 7: "TYPE_INT16", 8: "TYPE_INT32", 9: "TYPE_INT64", 10: "TYPE_FP16", 11: "TYPE_FP32", 12: "TYPE_FP64", 13: "TYPE_STRING", } DataType_value = map[string]int32{ "TYPE_INVALID": 0, "TYPE_BOOL": 1, "TYPE_UINT8": 2, "TYPE_UINT16": 3, "TYPE_UINT32": 4, "TYPE_UINT64": 5, "TYPE_INT8": 6, "TYPE_INT16": 7, "TYPE_INT32": 8, "TYPE_INT64": 9, "TYPE_FP16": 10, "TYPE_FP32": 11, "TYPE_FP64": 12, "TYPE_STRING": 13, } )
Enum value maps for DataType.
var ( ModelInstanceGroup_Kind_name = map[int32]string{ 0: "KIND_AUTO", 1: "KIND_GPU", 2: "KIND_CPU", 3: "KIND_MODEL", } ModelInstanceGroup_Kind_value = map[string]int32{ "KIND_AUTO": 0, "KIND_GPU": 1, "KIND_CPU": 2, "KIND_MODEL": 3, } )
Enum value maps for ModelInstanceGroup_Kind.
var ( ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind_name = map[int32]string{ 0: "KIND_NVDLA", } ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind_value = map[string]int32{ "KIND_NVDLA": 0, } )
Enum value maps for ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind.
var ( ModelInput_Format_name = map[int32]string{ 0: "FORMAT_NONE", 1: "FORMAT_NHWC", 2: "FORMAT_NCHW", } ModelInput_Format_value = map[string]int32{ "FORMAT_NONE": 0, "FORMAT_NHWC": 1, "FORMAT_NCHW": 2, } )
Enum value maps for ModelInput_Format.
var ( BatchInput_Kind_name = map[int32]string{ 0: "BATCH_ELEMENT_COUNT", 1: "BATCH_ACCUMULATED_ELEMENT_COUNT", 2: "BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO", 3: "BATCH_MAX_ELEMENT_COUNT_AS_SHAPE", } BatchInput_Kind_value = map[string]int32{ "BATCH_ELEMENT_COUNT": 0, "BATCH_ACCUMULATED_ELEMENT_COUNT": 1, "BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO": 2, "BATCH_MAX_ELEMENT_COUNT_AS_SHAPE": 3, } )
Enum value maps for BatchInput_Kind.
var ( BatchOutput_Kind_name = map[int32]string{ 0: "BATCH_SCATTER_WITH_INPUT_SHAPE", } BatchOutput_Kind_value = map[string]int32{ "BATCH_SCATTER_WITH_INPUT_SHAPE": 0, } )
Enum value maps for BatchOutput_Kind.
var ( ModelOptimizationPolicy_ModelPriority_name = map[int32]string{ 0: "PRIORITY_DEFAULT", 1: "PRIORITY_MAX", 2: "PRIORITY_MIN", } ModelOptimizationPolicy_ModelPriority_value = map[string]int32{ "PRIORITY_DEFAULT": 0, "PRIORITY_MAX": 1, "PRIORITY_MIN": 2, } )
Enum value maps for ModelOptimizationPolicy_ModelPriority.
var ( ModelQueuePolicy_TimeoutAction_name = map[int32]string{ 0: "REJECT", 1: "DELAY", } ModelQueuePolicy_TimeoutAction_value = map[string]int32{ "REJECT": 0, "DELAY": 1, } )
Enum value maps for ModelQueuePolicy_TimeoutAction.
var ( ModelSequenceBatching_Control_Kind_name = map[int32]string{ 0: "CONTROL_SEQUENCE_START", 1: "CONTROL_SEQUENCE_READY", 2: "CONTROL_SEQUENCE_END", 3: "CONTROL_SEQUENCE_CORRID", } ModelSequenceBatching_Control_Kind_value = map[string]int32{ "CONTROL_SEQUENCE_START": 0, "CONTROL_SEQUENCE_READY": 1, "CONTROL_SEQUENCE_END": 2, "CONTROL_SEQUENCE_CORRID": 3, } )
Enum value maps for ModelSequenceBatching_Control_Kind.
var File_pkg_agent_repository_triton_config_model_config_proto protoreflect.FileDescriptor
Functions ¶
This section is empty.
Types ¶
type BatchInput ¶
type BatchInput struct { //@@ .. cpp:var:: Kind kind //@@ //@@ The kind of this batch input. //@@ Kind BatchInput_Kind `protobuf:"varint,1,opt,name=kind,proto3,enum=inference.BatchInput_Kind" json:"kind,omitempty"` //@@ .. cpp:var:: string target_name (repeated) //@@ //@@ The name of the model inputs that the backend will create //@@ for this batch input. //@@ TargetName []string `protobuf:"bytes,2,rep,name=target_name,json=targetName,proto3" json:"target_name,omitempty"` //@@ .. cpp:var:: DataType data_type //@@ //@@ The input's datatype. The data type can be TYPE_INT32 or //@@ TYPE_FP32. //@@ DataType DataType `protobuf:"varint,3,opt,name=data_type,json=dataType,proto3,enum=inference.DataType" json:"data_type,omitempty"` //@@ .. cpp:var:: string source_input (repeated) //@@ //@@ The backend derives the value for each batch input from one or //@@ more other inputs. 'source_input' gives the names of those //@@ inputs. //@@ SourceInput []string `protobuf:"bytes,4,rep,name=source_input,json=sourceInput,proto3" json:"source_input,omitempty"` // contains filtered or unexported fields }
@@ .. cpp:var:: message BatchInput @@ @@ A batch input is an additional input that must be added by @@ the backend based on all the requests in a batch. @@
func (*BatchInput) Descriptor
deprecated
func (*BatchInput) Descriptor() ([]byte, []int)
Deprecated: Use BatchInput.ProtoReflect.Descriptor instead.
func (*BatchInput) GetDataType ¶
func (x *BatchInput) GetDataType() DataType
func (*BatchInput) GetKind ¶
func (x *BatchInput) GetKind() BatchInput_Kind
func (*BatchInput) GetSourceInput ¶
func (x *BatchInput) GetSourceInput() []string
func (*BatchInput) GetTargetName ¶
func (x *BatchInput) GetTargetName() []string
func (*BatchInput) ProtoMessage ¶
func (*BatchInput) ProtoMessage()
func (*BatchInput) ProtoReflect ¶
func (x *BatchInput) ProtoReflect() protoreflect.Message
func (*BatchInput) Reset ¶
func (x *BatchInput) Reset()
func (*BatchInput) String ¶
func (x *BatchInput) String() string
type BatchInput_Kind ¶
type BatchInput_Kind int32
@@ @@ .. cpp:enum:: Kind @@ @@ The kind of the batch input. @@
const ( //@@ .. cpp:enumerator:: Kind::BATCH_ELEMENT_COUNT = 0 //@@ //@@ The element count of the 'source_input' will be added as //@@ input with shape [1]. //@@ BatchInput_BATCH_ELEMENT_COUNT BatchInput_Kind = 0 //@@ .. cpp:enumerator:: Kind::BATCH_ACCUMULATED_ELEMENT_COUNT = 1 //@@ //@@ The accumulated element count of the 'source_input' will be //@@ added as input with shape [1]. For example, if there is a //@@ batch of two request, each with 2 elements, an input of value //@@ 2 will be added to the first request, and an input of value //@@ 4 will be added to the second request. //@@ BatchInput_BATCH_ACCUMULATED_ELEMENT_COUNT BatchInput_Kind = 1 //@@ .. cpp:enumerator:: //@@ Kind::BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO = 2 //@@ //@@ The accumulated element count of the 'source_input' will be //@@ added as input with shape [1], except for the first request //@@ in the batch. For the first request in the batch, the input //@@ will have shape [2] where the first element is value 0. //@@ BatchInput_BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO BatchInput_Kind = 2 //@@ .. cpp:enumerator:: Kind::BATCH_MAX_ELEMENT_COUNT_AS_SHAPE = 3 //@@ //@@ Among the requests in the batch, the max element count of the //@@ 'source_input' will be added as input with shape //@@ [max_element_count] for the first request in the batch. //@@ For other requests, such input will be with shape [0]. //@@ The data of the tensor will be uninitialized. //@@ BatchInput_BATCH_MAX_ELEMENT_COUNT_AS_SHAPE BatchInput_Kind = 3 )
func (BatchInput_Kind) Descriptor ¶
func (BatchInput_Kind) Descriptor() protoreflect.EnumDescriptor
func (BatchInput_Kind) Enum ¶
func (x BatchInput_Kind) Enum() *BatchInput_Kind
func (BatchInput_Kind) EnumDescriptor
deprecated
func (BatchInput_Kind) EnumDescriptor() ([]byte, []int)
Deprecated: Use BatchInput_Kind.Descriptor instead.
func (BatchInput_Kind) Number ¶
func (x BatchInput_Kind) Number() protoreflect.EnumNumber
func (BatchInput_Kind) String ¶
func (x BatchInput_Kind) String() string
func (BatchInput_Kind) Type ¶
func (BatchInput_Kind) Type() protoreflect.EnumType
type BatchOutput ¶
type BatchOutput struct { //@@ .. cpp:var:: string target_name (repeated) //@@ //@@ The name of the outputs to be produced by this batch output //@@ specification. //@@ TargetName []string `protobuf:"bytes,1,rep,name=target_name,json=targetName,proto3" json:"target_name,omitempty"` //@@ .. cpp:var:: Kind kind //@@ //@@ The kind of this batch output. //@@ Kind BatchOutput_Kind `protobuf:"varint,2,opt,name=kind,proto3,enum=inference.BatchOutput_Kind" json:"kind,omitempty"` //@@ .. cpp:var:: string source_input (repeated) //@@ //@@ The backend derives each batch output from one or more inputs. //@@ 'source_input' gives the names of those inputs. //@@ SourceInput []string `protobuf:"bytes,3,rep,name=source_input,json=sourceInput,proto3" json:"source_input,omitempty"` // contains filtered or unexported fields }
@@.. cpp:var:: message BatchOutput @@ @@ A batch output is an output produced by the model that must be handled @@ differently by the backend based on all the requests in a batch. @@
func (*BatchOutput) Descriptor
deprecated
func (*BatchOutput) Descriptor() ([]byte, []int)
Deprecated: Use BatchOutput.ProtoReflect.Descriptor instead.
func (*BatchOutput) GetKind ¶
func (x *BatchOutput) GetKind() BatchOutput_Kind
func (*BatchOutput) GetSourceInput ¶
func (x *BatchOutput) GetSourceInput() []string
func (*BatchOutput) GetTargetName ¶
func (x *BatchOutput) GetTargetName() []string
func (*BatchOutput) ProtoMessage ¶
func (*BatchOutput) ProtoMessage()
func (*BatchOutput) ProtoReflect ¶
func (x *BatchOutput) ProtoReflect() protoreflect.Message
func (*BatchOutput) Reset ¶
func (x *BatchOutput) Reset()
func (*BatchOutput) String ¶
func (x *BatchOutput) String() string
type BatchOutput_Kind ¶
type BatchOutput_Kind int32
@@ @@ .. cpp:enum:: Kind @@ @@ The kind of the batch output. @@
const ( //@@ .. cpp:enumerator:: Kind::BATCH_SCATTER_WITH_INPUT_SHAPE = 0 //@@ //@@ The output should be scattered according to the shape of //@@ 'source_input'. The dynamic dimension of the output will //@@ be set to the value of the same dimension in the input. //@@ BatchOutput_BATCH_SCATTER_WITH_INPUT_SHAPE BatchOutput_Kind = 0 )
func (BatchOutput_Kind) Descriptor ¶
func (BatchOutput_Kind) Descriptor() protoreflect.EnumDescriptor
func (BatchOutput_Kind) Enum ¶
func (x BatchOutput_Kind) Enum() *BatchOutput_Kind
func (BatchOutput_Kind) EnumDescriptor
deprecated
func (BatchOutput_Kind) EnumDescriptor() ([]byte, []int)
Deprecated: Use BatchOutput_Kind.Descriptor instead.
func (BatchOutput_Kind) Number ¶
func (x BatchOutput_Kind) Number() protoreflect.EnumNumber
func (BatchOutput_Kind) String ¶
func (x BatchOutput_Kind) String() string
func (BatchOutput_Kind) Type ¶
func (BatchOutput_Kind) Type() protoreflect.EnumType
type DataType ¶
type DataType int32
@@ @@.. cpp:enum:: DataType @@ @@ Data types supported for input and output tensors. @@
const ( //@@ .. cpp:enumerator:: DataType::INVALID = 0 DataType_TYPE_INVALID DataType = 0 //@@ .. cpp:enumerator:: DataType::BOOL = 1 DataType_TYPE_BOOL DataType = 1 //@@ .. cpp:enumerator:: DataType::UINT8 = 2 DataType_TYPE_UINT8 DataType = 2 //@@ .. cpp:enumerator:: DataType::UINT16 = 3 DataType_TYPE_UINT16 DataType = 3 //@@ .. cpp:enumerator:: DataType::UINT32 = 4 DataType_TYPE_UINT32 DataType = 4 //@@ .. cpp:enumerator:: DataType::UINT64 = 5 DataType_TYPE_UINT64 DataType = 5 //@@ .. cpp:enumerator:: DataType::INT8 = 6 DataType_TYPE_INT8 DataType = 6 //@@ .. cpp:enumerator:: DataType::INT16 = 7 DataType_TYPE_INT16 DataType = 7 //@@ .. cpp:enumerator:: DataType::INT32 = 8 DataType_TYPE_INT32 DataType = 8 //@@ .. cpp:enumerator:: DataType::INT64 = 9 DataType_TYPE_INT64 DataType = 9 //@@ .. cpp:enumerator:: DataType::FP16 = 10 DataType_TYPE_FP16 DataType = 10 //@@ .. cpp:enumerator:: DataType::FP32 = 11 DataType_TYPE_FP32 DataType = 11 //@@ .. cpp:enumerator:: DataType::FP64 = 12 DataType_TYPE_FP64 DataType = 12 //@@ .. cpp:enumerator:: DataType::STRING = 13 DataType_TYPE_STRING DataType = 13 )
func (DataType) Descriptor ¶
func (DataType) Descriptor() protoreflect.EnumDescriptor
func (DataType) EnumDescriptor
deprecated
func (DataType) Number ¶
func (x DataType) Number() protoreflect.EnumNumber
func (DataType) Type ¶
func (DataType) Type() protoreflect.EnumType
type ModelConfig ¶
type ModelConfig struct { //@@ .. cpp:var:: string name //@@ //@@ The name of the model. //@@ Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` //@@ .. cpp:var:: string platform //@@ //@@ The framework for the model. Possible values are //@@ "tensorrt_plan", "tensorflow_graphdef", //@@ "tensorflow_savedmodel", "onnxruntime_onnx", //@@ "pytorch_libtorch". //@@ Platform string `protobuf:"bytes,2,opt,name=platform,proto3" json:"platform,omitempty"` //@@ .. cpp:var:: string backend //@@ //@@ The backend used by the model. //@@ Backend string `protobuf:"bytes,17,opt,name=backend,proto3" json:"backend,omitempty"` //@@ .. cpp:var:: ModelVersionPolicy version_policy //@@ //@@ Policy indicating which version(s) of the model will be served. //@@ VersionPolicy *ModelVersionPolicy `protobuf:"bytes,3,opt,name=version_policy,json=versionPolicy,proto3" json:"version_policy,omitempty"` //@@ .. cpp:var:: int32 max_batch_size //@@ //@@ Maximum batch size allowed for inference. This can only decrease //@@ what is allowed by the model itself. A max_batch_size value of 0 //@@ indicates that batching is not allowed for the model and the //@@ dimension/shape of the input and output tensors must exactly //@@ match what is specified in the input and output configuration. A //@@ max_batch_size value > 0 indicates that batching is allowed and //@@ so the model expects the input tensors to have an additional //@@ initial dimension for the batching that is not specified in the //@@ input (for example, if the model supports batched inputs of //@@ 2-dimensional tensors then the model configuration will specify //@@ the input shape as [ X, Y ] but the model will expect the actual //@@ input tensors to have shape [ N, X, Y ]). For max_batch_size > 0 //@@ returned outputs will also have an additional initial dimension //@@ for the batch. //@@ MaxBatchSize int32 `protobuf:"varint,4,opt,name=max_batch_size,json=maxBatchSize,proto3" json:"max_batch_size,omitempty"` //@@ .. cpp:var:: ModelInput input (repeated) //@@ //@@ The inputs request by the model. //@@ Input []*ModelInput `protobuf:"bytes,5,rep,name=input,proto3" json:"input,omitempty"` //@@ .. cpp:var:: ModelOutput output (repeated) //@@ //@@ The outputs produced by the model. //@@ Output []*ModelOutput `protobuf:"bytes,6,rep,name=output,proto3" json:"output,omitempty"` //@@ .. cpp:var:: BatchInput batch_input (repeated) //@@ //@@ The model input(s) that the server should use to communicate //@@ batch related values to the model. //@@ BatchInput []*BatchInput `protobuf:"bytes,20,rep,name=batch_input,json=batchInput,proto3" json:"batch_input,omitempty"` //@@ .. cpp:var:: BatchOutput batch_output (repeated) //@@ //@@ The outputs produced by the model that requires special handling //@@ by the model backend. //@@ BatchOutput []*BatchOutput `protobuf:"bytes,21,rep,name=batch_output,json=batchOutput,proto3" json:"batch_output,omitempty"` //@@ .. cpp:var:: ModelOptimizationPolicy optimization //@@ //@@ Optimization configuration for the model. If not specified //@@ then default optimization policy is used. //@@ Optimization *ModelOptimizationPolicy `protobuf:"bytes,12,opt,name=optimization,proto3" json:"optimization,omitempty"` //@@ .. cpp:var:: oneof scheduling_choice //@@ //@@ The scheduling policy for the model. If not specified the //@@ default scheduling policy is used for the model. The default //@@ policy is to execute each inference request independently. //@@ // // Types that are assignable to SchedulingChoice: // *ModelConfig_DynamicBatching // *ModelConfig_SequenceBatching // *ModelConfig_EnsembleScheduling SchedulingChoice isModelConfig_SchedulingChoice `protobuf_oneof:"scheduling_choice"` //@@ .. cpp:var:: ModelInstanceGroup instance_group (repeated) //@@ //@@ Instances of this model. If not specified, one instance //@@ of the model will be instantiated on each available GPU. //@@ InstanceGroup []*ModelInstanceGroup `protobuf:"bytes,7,rep,name=instance_group,json=instanceGroup,proto3" json:"instance_group,omitempty"` //@@ .. cpp:var:: string default_model_filename //@@ //@@ Optional filename of the model file to use if a //@@ compute-capability specific model is not specified in //@@ :cpp:var:`cc_model_filenames`. If not specified the default name //@@ is 'model.graphdef', 'model.savedmodel', 'model.plan' or //@@ 'model.pt' depending on the model type. //@@ DefaultModelFilename string `protobuf:"bytes,8,opt,name=default_model_filename,json=defaultModelFilename,proto3" json:"default_model_filename,omitempty"` //@@ .. cpp:var:: map<string,string> cc_model_filenames //@@ //@@ Optional map from CUDA compute capability to the filename of //@@ the model that supports that compute capability. The filename //@@ refers to a file within the model version directory. //@@ CcModelFilenames map[string]string `` /* 199-byte string literal not displayed */ //@@ .. cpp:var:: map<string,string> metric_tags //@@ //@@ Optional metric tags. User-specific key-value pairs for metrics //@@ reported for this model. These tags are applied to the metrics //@@ reported on the HTTP metrics port. //@@ MetricTags map[string]string `` /* 180-byte string literal not displayed */ //@@ .. cpp:var:: map<string,ModelParameter> parameters //@@ //@@ Optional model parameters. User-specified parameter values. //@@ Parameters map[string]*ModelParameter `` /* 162-byte string literal not displayed */ //@@ .. cpp:var:: ModelWarmup model_warmup (repeated) //@@ //@@ Warmup setting of this model. If specified, all instances //@@ will be run with the request samples in sequence before //@@ serving the model. //@@ This field can only be specified if the model is not an ensemble //@@ model. //@@ ModelWarmup []*ModelWarmup `protobuf:"bytes,16,rep,name=model_warmup,json=modelWarmup,proto3" json:"model_warmup,omitempty"` //@@ .. cpp:var:: ModelOperations model_operations //@@ //@@ Optional metadata of the libraries providing custom operations for //@@ this model. //@@ ModelOperations *ModelOperations `protobuf:"bytes,18,opt,name=model_operations,json=modelOperations,proto3" json:"model_operations,omitempty"` //@@ .. cpp:var:: ModelTransactionPolicy model_transaction_policy //@@ //@@ Optional specification that describes the nature of transactions //@@ to be expected from the model. //@@ ModelTransactionPolicy *ModelTransactionPolicy `` /* 130-byte string literal not displayed */ //@@ .. cpp:var:: ModelRepositoryAgents model_repository_agents //@@ //@@ Optional specification of the agent(s) that should be invoked //@@ with repository actions are performed for this model. //@@ ModelRepositoryAgents *ModelRepositoryAgents `` /* 127-byte string literal not displayed */ //@@ .. cpp:var:: ModelResponseCache response_cache //@@ //@@ Optional setting for utilizing the response cache for this //@@ model. //@@ ResponseCache *ModelResponseCache `protobuf:"bytes,24,opt,name=response_cache,json=responseCache,proto3" json:"response_cache,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ModelConfig @@ @@ A model configuration. @@
func (*ModelConfig) Descriptor
deprecated
func (*ModelConfig) Descriptor() ([]byte, []int)
Deprecated: Use ModelConfig.ProtoReflect.Descriptor instead.
func (*ModelConfig) GetBackend ¶
func (x *ModelConfig) GetBackend() string
func (*ModelConfig) GetBatchInput ¶
func (x *ModelConfig) GetBatchInput() []*BatchInput
func (*ModelConfig) GetBatchOutput ¶
func (x *ModelConfig) GetBatchOutput() []*BatchOutput
func (*ModelConfig) GetCcModelFilenames ¶
func (x *ModelConfig) GetCcModelFilenames() map[string]string
func (*ModelConfig) GetDefaultModelFilename ¶
func (x *ModelConfig) GetDefaultModelFilename() string
func (*ModelConfig) GetDynamicBatching ¶
func (x *ModelConfig) GetDynamicBatching() *ModelDynamicBatching
func (*ModelConfig) GetEnsembleScheduling ¶
func (x *ModelConfig) GetEnsembleScheduling() *ModelEnsembling
func (*ModelConfig) GetInput ¶
func (x *ModelConfig) GetInput() []*ModelInput
func (*ModelConfig) GetInstanceGroup ¶
func (x *ModelConfig) GetInstanceGroup() []*ModelInstanceGroup
func (*ModelConfig) GetMaxBatchSize ¶
func (x *ModelConfig) GetMaxBatchSize() int32
func (*ModelConfig) GetMetricTags ¶
func (x *ModelConfig) GetMetricTags() map[string]string
func (*ModelConfig) GetModelOperations ¶
func (x *ModelConfig) GetModelOperations() *ModelOperations
func (*ModelConfig) GetModelRepositoryAgents ¶
func (x *ModelConfig) GetModelRepositoryAgents() *ModelRepositoryAgents
func (*ModelConfig) GetModelTransactionPolicy ¶
func (x *ModelConfig) GetModelTransactionPolicy() *ModelTransactionPolicy
func (*ModelConfig) GetModelWarmup ¶
func (x *ModelConfig) GetModelWarmup() []*ModelWarmup
func (*ModelConfig) GetName ¶
func (x *ModelConfig) GetName() string
func (*ModelConfig) GetOptimization ¶
func (x *ModelConfig) GetOptimization() *ModelOptimizationPolicy
func (*ModelConfig) GetOutput ¶
func (x *ModelConfig) GetOutput() []*ModelOutput
func (*ModelConfig) GetParameters ¶
func (x *ModelConfig) GetParameters() map[string]*ModelParameter
func (*ModelConfig) GetPlatform ¶
func (x *ModelConfig) GetPlatform() string
func (*ModelConfig) GetResponseCache ¶
func (x *ModelConfig) GetResponseCache() *ModelResponseCache
func (*ModelConfig) GetSchedulingChoice ¶
func (m *ModelConfig) GetSchedulingChoice() isModelConfig_SchedulingChoice
func (*ModelConfig) GetSequenceBatching ¶
func (x *ModelConfig) GetSequenceBatching() *ModelSequenceBatching
func (*ModelConfig) GetVersionPolicy ¶
func (x *ModelConfig) GetVersionPolicy() *ModelVersionPolicy
func (*ModelConfig) ProtoMessage ¶
func (*ModelConfig) ProtoMessage()
func (*ModelConfig) ProtoReflect ¶
func (x *ModelConfig) ProtoReflect() protoreflect.Message
func (*ModelConfig) Reset ¶
func (x *ModelConfig) Reset()
func (*ModelConfig) String ¶
func (x *ModelConfig) String() string
type ModelConfig_DynamicBatching ¶
type ModelConfig_DynamicBatching struct { //@@ .. cpp:var:: ModelDynamicBatching dynamic_batching //@@ //@@ If specified, enables the dynamic-batching scheduling //@@ policy. With dynamic-batching the scheduler may group //@@ together independent requests into a single batch to //@@ improve inference throughput. //@@ DynamicBatching *ModelDynamicBatching `protobuf:"bytes,11,opt,name=dynamic_batching,json=dynamicBatching,proto3,oneof"` }
type ModelConfig_EnsembleScheduling ¶
type ModelConfig_EnsembleScheduling struct { //@@ .. cpp:var:: ModelEnsembling ensemble_scheduling //@@ //@@ If specified, enables the model-ensembling scheduling //@@ policy. With model-ensembling, inference requests //@@ will be processed according to the specification, such as an //@@ execution sequence of models. The input specified in this model //@@ config will be the input for the ensemble, and the output //@@ specified will be the output of the ensemble. //@@ EnsembleScheduling *ModelEnsembling `protobuf:"bytes,15,opt,name=ensemble_scheduling,json=ensembleScheduling,proto3,oneof"` }
type ModelConfig_SequenceBatching ¶
type ModelConfig_SequenceBatching struct { //@@ .. cpp:var:: ModelSequenceBatching sequence_batching //@@ //@@ If specified, enables the sequence-batching scheduling //@@ policy. With sequence-batching, inference requests //@@ with the same correlation ID are routed to the same //@@ model instance. Multiple sequences of inference requests //@@ may be batched together into a single batch to //@@ improve inference throughput. //@@ SequenceBatching *ModelSequenceBatching `protobuf:"bytes,13,opt,name=sequence_batching,json=sequenceBatching,proto3,oneof"` }
type ModelDynamicBatching ¶
type ModelDynamicBatching struct { //@@ .. cpp:var:: int32 preferred_batch_size (repeated) //@@ //@@ Preferred batch sizes for dynamic batching. If a batch of one of //@@ these sizes can be formed it will be executed immediately. If //@@ not specified a preferred batch size will be chosen automatically //@@ based on model and GPU characteristics. //@@ PreferredBatchSize []int32 `protobuf:"varint,1,rep,packed,name=preferred_batch_size,json=preferredBatchSize,proto3" json:"preferred_batch_size,omitempty"` //@@ .. cpp:var:: uint64 max_queue_delay_microseconds //@@ //@@ The maximum time, in microseconds, a request will be delayed in //@@ the scheduling queue to wait for additional requests for //@@ batching. Default is 0. //@@ MaxQueueDelayMicroseconds uint64 `` /* 141-byte string literal not displayed */ //@@ .. cpp:var:: bool preserve_ordering //@@ //@@ Should the dynamic batcher preserve the ordering of responses to //@@ match the order of requests received by the scheduler. Default is //@@ false. If true, the responses will be returned in the same order as //@@ the order of requests sent to the scheduler. If false, the responses //@@ may be returned in arbitrary order. This option is specifically //@@ needed when a sequence of related inference requests (i.e. inference //@@ requests with the same correlation ID) are sent to the dynamic //@@ batcher to ensure that the sequence responses are in the correct //@@ order. //@@ PreserveOrdering bool `protobuf:"varint,3,opt,name=preserve_ordering,json=preserveOrdering,proto3" json:"preserve_ordering,omitempty"` //@@ .. cpp:var:: uint32 priority_levels //@@ //@@ The number of priority levels to be enabled for the model, //@@ the priority level starts from 1 and 1 is the highest priority. //@@ Requests are handled in priority order with all priority 1 requests //@@ processed before priority 2, all priority 2 requests processed before //@@ priority 3, etc. Requests with the same priority level will be //@@ handled in the order that they are received. //@@ PriorityLevels uint32 `protobuf:"varint,4,opt,name=priority_levels,json=priorityLevels,proto3" json:"priority_levels,omitempty"` //@@ .. cpp:var:: uint32 default_priority_level //@@ //@@ The priority level used for requests that don't specify their //@@ priority. The value must be in the range [ 1, 'priority_levels' ]. //@@ DefaultPriorityLevel uint32 `protobuf:"varint,5,opt,name=default_priority_level,json=defaultPriorityLevel,proto3" json:"default_priority_level,omitempty"` //@@ .. cpp:var:: ModelQueuePolicy default_queue_policy //@@ //@@ The default queue policy used for requests that don't require //@@ priority handling and requests that specify priority levels where //@@ there is no specific policy given. If not specified, a policy with //@@ default field values will be used. //@@ DefaultQueuePolicy *ModelQueuePolicy `protobuf:"bytes,6,opt,name=default_queue_policy,json=defaultQueuePolicy,proto3" json:"default_queue_policy,omitempty"` //@@ .. cpp:var:: map<uint32, ModelQueuePolicy> priority_queue_policy //@@ //@@ Specify the queue policy for the priority level. The default queue //@@ policy will be used if a priority level doesn't specify a queue //@@ policy. //@@ PriorityQueuePolicy map[uint32]*ModelQueuePolicy `` /* 209-byte string literal not displayed */ // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ModelDynamicBatching @@ @@ Dynamic batching configuration. These settings control how dynamic @@ batching operates for the model. @@
func (*ModelDynamicBatching) Descriptor
deprecated
func (*ModelDynamicBatching) Descriptor() ([]byte, []int)
Deprecated: Use ModelDynamicBatching.ProtoReflect.Descriptor instead.
func (*ModelDynamicBatching) GetDefaultPriorityLevel ¶
func (x *ModelDynamicBatching) GetDefaultPriorityLevel() uint32
func (*ModelDynamicBatching) GetDefaultQueuePolicy ¶
func (x *ModelDynamicBatching) GetDefaultQueuePolicy() *ModelQueuePolicy
func (*ModelDynamicBatching) GetMaxQueueDelayMicroseconds ¶
func (x *ModelDynamicBatching) GetMaxQueueDelayMicroseconds() uint64
func (*ModelDynamicBatching) GetPreferredBatchSize ¶
func (x *ModelDynamicBatching) GetPreferredBatchSize() []int32
func (*ModelDynamicBatching) GetPreserveOrdering ¶
func (x *ModelDynamicBatching) GetPreserveOrdering() bool
func (*ModelDynamicBatching) GetPriorityLevels ¶
func (x *ModelDynamicBatching) GetPriorityLevels() uint32
func (*ModelDynamicBatching) GetPriorityQueuePolicy ¶
func (x *ModelDynamicBatching) GetPriorityQueuePolicy() map[uint32]*ModelQueuePolicy
func (*ModelDynamicBatching) ProtoMessage ¶
func (*ModelDynamicBatching) ProtoMessage()
func (*ModelDynamicBatching) ProtoReflect ¶
func (x *ModelDynamicBatching) ProtoReflect() protoreflect.Message
func (*ModelDynamicBatching) Reset ¶
func (x *ModelDynamicBatching) Reset()
func (*ModelDynamicBatching) String ¶
func (x *ModelDynamicBatching) String() string
type ModelEnsembling ¶
type ModelEnsembling struct { //@@ .. cpp:var:: Step step (repeated) //@@ //@@ The models and the input / output mappings used within the ensemble. //@@ Step []*ModelEnsembling_Step `protobuf:"bytes,1,rep,name=step,proto3" json:"step,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ModelEnsembling @@ @@ Model ensembling configuration. These settings specify the models that @@ compose the ensemble and how data flows between the models. @@
func (*ModelEnsembling) Descriptor
deprecated
func (*ModelEnsembling) Descriptor() ([]byte, []int)
Deprecated: Use ModelEnsembling.ProtoReflect.Descriptor instead.
func (*ModelEnsembling) GetStep ¶
func (x *ModelEnsembling) GetStep() []*ModelEnsembling_Step
func (*ModelEnsembling) ProtoMessage ¶
func (*ModelEnsembling) ProtoMessage()
func (*ModelEnsembling) ProtoReflect ¶
func (x *ModelEnsembling) ProtoReflect() protoreflect.Message
func (*ModelEnsembling) Reset ¶
func (x *ModelEnsembling) Reset()
func (*ModelEnsembling) String ¶
func (x *ModelEnsembling) String() string
type ModelEnsembling_Step ¶
type ModelEnsembling_Step struct { //@@ .. cpp:var:: string model_name //@@ //@@ The name of the model to execute for this step of the ensemble. //@@ ModelName string `protobuf:"bytes,1,opt,name=model_name,json=modelName,proto3" json:"model_name,omitempty"` //@@ .. cpp:var:: int64 model_version //@@ //@@ The version of the model to use for inference. If -1 //@@ the latest/most-recent version of the model is used. //@@ ModelVersion int64 `protobuf:"varint,2,opt,name=model_version,json=modelVersion,proto3" json:"model_version,omitempty"` //@@ .. cpp:var:: map<string,string> input_map //@@ //@@ Map from name of an input tensor on this step's model to ensemble //@@ tensor name. The ensemble tensor must have the same data type and //@@ shape as the model input. Each model input must be assigned to //@@ one ensemble tensor, but the same ensemble tensor can be assigned //@@ to multiple model inputs. //@@ InputMap map[string]string `` /* 173-byte string literal not displayed */ //@@ .. cpp:var:: map<string,string> output_map //@@ //@@ Map from name of an output tensor on this step's model to ensemble //@@ tensor name. The data type and shape of the ensemble tensor will //@@ be inferred from the model output. It is optional to assign all //@@ model outputs to ensemble tensors. One ensemble tensor name //@@ can appear in an output map only once. //@@ OutputMap map[string]string `` /* 176-byte string literal not displayed */ // contains filtered or unexported fields }
@@ .. cpp:var:: message Step @@ @@ Each step specifies a model included in the ensemble, @@ maps ensemble tensor names to the model input tensors, @@ and maps model output tensors to ensemble tensor names @@
func (*ModelEnsembling_Step) Descriptor
deprecated
func (*ModelEnsembling_Step) Descriptor() ([]byte, []int)
Deprecated: Use ModelEnsembling_Step.ProtoReflect.Descriptor instead.
func (*ModelEnsembling_Step) GetInputMap ¶
func (x *ModelEnsembling_Step) GetInputMap() map[string]string
func (*ModelEnsembling_Step) GetModelName ¶
func (x *ModelEnsembling_Step) GetModelName() string
func (*ModelEnsembling_Step) GetModelVersion ¶
func (x *ModelEnsembling_Step) GetModelVersion() int64
func (*ModelEnsembling_Step) GetOutputMap ¶
func (x *ModelEnsembling_Step) GetOutputMap() map[string]string
func (*ModelEnsembling_Step) ProtoMessage ¶
func (*ModelEnsembling_Step) ProtoMessage()
func (*ModelEnsembling_Step) ProtoReflect ¶
func (x *ModelEnsembling_Step) ProtoReflect() protoreflect.Message
func (*ModelEnsembling_Step) Reset ¶
func (x *ModelEnsembling_Step) Reset()
func (*ModelEnsembling_Step) String ¶
func (x *ModelEnsembling_Step) String() string
type ModelInput ¶
type ModelInput struct { //@@ .. cpp:var:: string name //@@ //@@ The name of the input. //@@ Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` //@@ .. cpp:var:: DataType data_type //@@ //@@ The data-type of the input. //@@ DataType DataType `protobuf:"varint,2,opt,name=data_type,json=dataType,proto3,enum=inference.DataType" json:"data_type,omitempty"` //@@ .. cpp:var:: Format format //@@ //@@ The format of the input. Optional. //@@ Format ModelInput_Format `protobuf:"varint,3,opt,name=format,proto3,enum=inference.ModelInput_Format" json:"format,omitempty"` //@@ .. cpp:var:: int64 dims (repeated) //@@ //@@ The dimensions/shape of the input tensor that must be provided //@@ when invoking the inference API for this model. //@@ Dims []int64 `protobuf:"varint,4,rep,packed,name=dims,proto3" json:"dims,omitempty"` //@@ .. cpp:var:: ModelTensorReshape reshape //@@ //@@ The shape expected for this input by the backend. The input will //@@ be reshaped to this before being presented to the backend. The //@@ reshape must have the same number of elements as the input shape //@@ specified by 'dims'. Optional. //@@ Reshape *ModelTensorReshape `protobuf:"bytes,5,opt,name=reshape,proto3" json:"reshape,omitempty"` //@@ .. cpp:var:: bool is_shape_tensor //@@ //@@ Whether or not the input is a shape tensor to the model. This field //@@ is currently supported only for the TensorRT model. An error will be //@@ generated if this specification does not comply with underlying //@@ model. //@@ IsShapeTensor bool `protobuf:"varint,6,opt,name=is_shape_tensor,json=isShapeTensor,proto3" json:"is_shape_tensor,omitempty"` //@@ .. cpp:var:: bool allow_ragged_batch //@@ //@@ Whether or not the input is allowed to be "ragged" in a dynamically //@@ created batch. Default is false indicating that two requests will //@@ only be batched if this tensor has the same shape in both requests. //@@ True indicates that two requests can be batched even if this tensor //@@ has a different shape in each request. //@@ AllowRaggedBatch bool `protobuf:"varint,7,opt,name=allow_ragged_batch,json=allowRaggedBatch,proto3" json:"allow_ragged_batch,omitempty"` //@@ .. cpp:var:: bool optional //@@ //@@ Whether or not the input is optional for the model execution. //@@ If true, the input is not required in the inference request. //@@ Default value is false. //@@ Optional bool `protobuf:"varint,8,opt,name=optional,proto3" json:"optional,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ModelInput @@ @@ An input required by the model. @@
func (*ModelInput) Descriptor
deprecated
func (*ModelInput) Descriptor() ([]byte, []int)
Deprecated: Use ModelInput.ProtoReflect.Descriptor instead.
func (*ModelInput) GetAllowRaggedBatch ¶
func (x *ModelInput) GetAllowRaggedBatch() bool
func (*ModelInput) GetDataType ¶
func (x *ModelInput) GetDataType() DataType
func (*ModelInput) GetDims ¶
func (x *ModelInput) GetDims() []int64
func (*ModelInput) GetFormat ¶
func (x *ModelInput) GetFormat() ModelInput_Format
func (*ModelInput) GetIsShapeTensor ¶
func (x *ModelInput) GetIsShapeTensor() bool
func (*ModelInput) GetName ¶
func (x *ModelInput) GetName() string
func (*ModelInput) GetOptional ¶
func (x *ModelInput) GetOptional() bool
func (*ModelInput) GetReshape ¶
func (x *ModelInput) GetReshape() *ModelTensorReshape
func (*ModelInput) ProtoMessage ¶
func (*ModelInput) ProtoMessage()
func (*ModelInput) ProtoReflect ¶
func (x *ModelInput) ProtoReflect() protoreflect.Message
func (*ModelInput) Reset ¶
func (x *ModelInput) Reset()
func (*ModelInput) String ¶
func (x *ModelInput) String() string
type ModelInput_Format ¶
type ModelInput_Format int32
@@ @@ .. cpp:enum:: Format @@ @@ The format for the input. @@
const ( //@@ .. cpp:enumerator:: Format::FORMAT_NONE = 0 //@@ //@@ The input has no specific format. This is the default. //@@ ModelInput_FORMAT_NONE ModelInput_Format = 0 //@@ .. cpp:enumerator:: Format::FORMAT_NHWC = 1 //@@ //@@ HWC image format. Tensors with this format require 3 dimensions //@@ if the model does not support batching (max_batch_size = 0) or 4 //@@ dimensions if the model does support batching (max_batch_size //@@ >= 1). In either case the 'dims' below should only specify the //@@ 3 non-batch dimensions (i.e. HWC or CHW). //@@ ModelInput_FORMAT_NHWC ModelInput_Format = 1 //@@ .. cpp:enumerator:: Format::FORMAT_NCHW = 2 //@@ //@@ CHW image format. Tensors with this format require 3 dimensions //@@ if the model does not support batching (max_batch_size = 0) or 4 //@@ dimensions if the model does support batching (max_batch_size //@@ >= 1). In either case the 'dims' below should only specify the //@@ 3 non-batch dimensions (i.e. HWC or CHW). //@@ ModelInput_FORMAT_NCHW ModelInput_Format = 2 )
func (ModelInput_Format) Descriptor ¶
func (ModelInput_Format) Descriptor() protoreflect.EnumDescriptor
func (ModelInput_Format) Enum ¶
func (x ModelInput_Format) Enum() *ModelInput_Format
func (ModelInput_Format) EnumDescriptor
deprecated
func (ModelInput_Format) EnumDescriptor() ([]byte, []int)
Deprecated: Use ModelInput_Format.Descriptor instead.
func (ModelInput_Format) Number ¶
func (x ModelInput_Format) Number() protoreflect.EnumNumber
func (ModelInput_Format) String ¶
func (x ModelInput_Format) String() string
func (ModelInput_Format) Type ¶
func (ModelInput_Format) Type() protoreflect.EnumType
type ModelInstanceGroup ¶
type ModelInstanceGroup struct { //@@ .. cpp:var:: string name //@@ //@@ Optional name of this group of instances. If not specified the //@@ name will be formed as <model name>_<group number>. The name of //@@ individual instances will be further formed by a unique instance //@@ number and GPU index: //@@ Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` //@@ .. cpp:var:: Kind kind //@@ //@@ The kind of this instance group. Default is KIND_AUTO. If //@@ KIND_AUTO or KIND_GPU then both 'count' and 'gpu' are valid and //@@ may be specified. If KIND_CPU or KIND_MODEL only 'count' is valid //@@ and 'gpu' cannot be specified. //@@ Kind ModelInstanceGroup_Kind `protobuf:"varint,4,opt,name=kind,proto3,enum=inference.ModelInstanceGroup_Kind" json:"kind,omitempty"` //@@ .. cpp:var:: int32 count //@@ //@@ For a group assigned to GPU, the number of instances created for //@@ each GPU listed in 'gpus'. For a group assigned to CPU the number //@@ of instances created. Default is 1. Count int32 `protobuf:"varint,2,opt,name=count,proto3" json:"count,omitempty"` //@@ .. cpp:var:: ModelRateLimiter rate_limiter //@@ //@@ The rate limiter specific settings to be associated with this //@@ instance group. Optional, if not specified no rate limiting //@@ will be applied to this instance group. //@@ RateLimiter *ModelRateLimiter `protobuf:"bytes,6,opt,name=rate_limiter,json=rateLimiter,proto3" json:"rate_limiter,omitempty"` //@@ .. cpp:var:: int32 gpus (repeated) //@@ //@@ GPU(s) where instances should be available. For each GPU listed, //@@ 'count' instances of the model will be available. Setting 'gpus' //@@ to empty (or not specifying at all) is eqivalent to listing all //@@ available GPUs. //@@ Gpus []int32 `protobuf:"varint,3,rep,packed,name=gpus,proto3" json:"gpus,omitempty"` //@@ .. cpp:var:: SecondaryDevice secondary_devices (repeated) //@@ //@@ Secondary devices that are required by instances specified by this //@@ instance group. Optional. //@@ SecondaryDevices []*ModelInstanceGroup_SecondaryDevice `protobuf:"bytes,8,rep,name=secondary_devices,json=secondaryDevices,proto3" json:"secondary_devices,omitempty"` //@@ .. cpp:var:: string profile (repeated) //@@ //@@ For TensorRT models containing multiple optimization profile, this //@@ parameter specifies a set of optimization profiles available to this //@@ instance group. The inference server will choose the optimal profile //@@ based on the shapes of the input tensors. This field should lie //@@ between 0 and <TotalNumberOfOptimizationProfilesInPlanModel> - 1 //@@ and be specified only for TensorRT backend, otherwise an error will //@@ be generated. If not specified, the server will select the first //@@ optimization profile by default. //@@ Profile []string `protobuf:"bytes,5,rep,name=profile,proto3" json:"profile,omitempty"` //@@ .. cpp:var:: bool passive //@@ //@@ Whether the instances within this instance group will be accepting //@@ inference requests from the scheduler. If true, the instances will //@@ not be added to the scheduler. Default value is false. //@@ Passive bool `protobuf:"varint,7,opt,name=passive,proto3" json:"passive,omitempty"` //@@ .. cpp:var:: string host_policy //@@ //@@ The host policy name that the instance to be associated with. //@@ The default value is set to reflect the device kind of the instance, //@@ for instance, KIND_CPU is "cpu", KIND_MODEL is "model" and //@@ KIND_GPU is "gpu_<gpu_id>". //@@ HostPolicy string `protobuf:"bytes,9,opt,name=host_policy,json=hostPolicy,proto3" json:"host_policy,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ModelInstanceGroup @@ @@ A group of one or more instances of a model and resources made @@ available for those instances. @@
func (*ModelInstanceGroup) Descriptor
deprecated
func (*ModelInstanceGroup) Descriptor() ([]byte, []int)
Deprecated: Use ModelInstanceGroup.ProtoReflect.Descriptor instead.
func (*ModelInstanceGroup) GetCount ¶
func (x *ModelInstanceGroup) GetCount() int32
func (*ModelInstanceGroup) GetGpus ¶
func (x *ModelInstanceGroup) GetGpus() []int32
func (*ModelInstanceGroup) GetHostPolicy ¶
func (x *ModelInstanceGroup) GetHostPolicy() string
func (*ModelInstanceGroup) GetKind ¶
func (x *ModelInstanceGroup) GetKind() ModelInstanceGroup_Kind
func (*ModelInstanceGroup) GetName ¶
func (x *ModelInstanceGroup) GetName() string
func (*ModelInstanceGroup) GetPassive ¶
func (x *ModelInstanceGroup) GetPassive() bool
func (*ModelInstanceGroup) GetProfile ¶
func (x *ModelInstanceGroup) GetProfile() []string
func (*ModelInstanceGroup) GetRateLimiter ¶
func (x *ModelInstanceGroup) GetRateLimiter() *ModelRateLimiter
func (*ModelInstanceGroup) GetSecondaryDevices ¶
func (x *ModelInstanceGroup) GetSecondaryDevices() []*ModelInstanceGroup_SecondaryDevice
func (*ModelInstanceGroup) ProtoMessage ¶
func (*ModelInstanceGroup) ProtoMessage()
func (*ModelInstanceGroup) ProtoReflect ¶
func (x *ModelInstanceGroup) ProtoReflect() protoreflect.Message
func (*ModelInstanceGroup) Reset ¶
func (x *ModelInstanceGroup) Reset()
func (*ModelInstanceGroup) String ¶
func (x *ModelInstanceGroup) String() string
type ModelInstanceGroup_Kind ¶
type ModelInstanceGroup_Kind int32
@@ @@ .. cpp:enum:: Kind @@ @@ Kind of this instance group. @@
const ( //@@ .. cpp:enumerator:: Kind::KIND_AUTO = 0 //@@ //@@ This instance group represents instances that can run on either //@@ CPU or GPU. If all GPUs listed in 'gpus' are available then //@@ instances will be created on GPU(s), otherwise instances will //@@ be created on CPU. //@@ ModelInstanceGroup_KIND_AUTO ModelInstanceGroup_Kind = 0 //@@ .. cpp:enumerator:: Kind::KIND_GPU = 1 //@@ //@@ This instance group represents instances that must run on the //@@ GPU. //@@ ModelInstanceGroup_KIND_GPU ModelInstanceGroup_Kind = 1 //@@ .. cpp:enumerator:: Kind::KIND_CPU = 2 //@@ //@@ This instance group represents instances that must run on the //@@ CPU. //@@ ModelInstanceGroup_KIND_CPU ModelInstanceGroup_Kind = 2 //@@ .. cpp:enumerator:: Kind::KIND_MODEL = 3 //@@ //@@ This instance group represents instances that should run on the //@@ CPU and/or GPU(s) as specified by the model or backend itself. //@@ The inference server will not override the model/backend //@@ settings. //@@ ModelInstanceGroup_KIND_MODEL ModelInstanceGroup_Kind = 3 )
func (ModelInstanceGroup_Kind) Descriptor ¶
func (ModelInstanceGroup_Kind) Descriptor() protoreflect.EnumDescriptor
func (ModelInstanceGroup_Kind) Enum ¶
func (x ModelInstanceGroup_Kind) Enum() *ModelInstanceGroup_Kind
func (ModelInstanceGroup_Kind) EnumDescriptor
deprecated
func (ModelInstanceGroup_Kind) EnumDescriptor() ([]byte, []int)
Deprecated: Use ModelInstanceGroup_Kind.Descriptor instead.
func (ModelInstanceGroup_Kind) Number ¶
func (x ModelInstanceGroup_Kind) Number() protoreflect.EnumNumber
func (ModelInstanceGroup_Kind) String ¶
func (x ModelInstanceGroup_Kind) String() string
func (ModelInstanceGroup_Kind) Type ¶
func (ModelInstanceGroup_Kind) Type() protoreflect.EnumType
type ModelInstanceGroup_SecondaryDevice ¶
type ModelInstanceGroup_SecondaryDevice struct { //@@ .. cpp:var:: SecondaryDeviceKind kind //@@ //@@ The secondary device kind. //@@ Kind ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind `` /* 132-byte string literal not displayed */ //@@ .. cpp:var:: int64 device_id //@@ //@@ Identifier for the secondary device. //@@ DeviceId int64 `protobuf:"varint,2,opt,name=device_id,json=deviceId,proto3" json:"device_id,omitempty"` // contains filtered or unexported fields }
@@ @@ .. cpp:var:: message SecondaryDevice @@ @@ A secondary device required for a model instance. @@
func (*ModelInstanceGroup_SecondaryDevice) Descriptor
deprecated
func (*ModelInstanceGroup_SecondaryDevice) Descriptor() ([]byte, []int)
Deprecated: Use ModelInstanceGroup_SecondaryDevice.ProtoReflect.Descriptor instead.
func (*ModelInstanceGroup_SecondaryDevice) GetDeviceId ¶
func (x *ModelInstanceGroup_SecondaryDevice) GetDeviceId() int64
func (*ModelInstanceGroup_SecondaryDevice) ProtoMessage ¶
func (*ModelInstanceGroup_SecondaryDevice) ProtoMessage()
func (*ModelInstanceGroup_SecondaryDevice) ProtoReflect ¶
func (x *ModelInstanceGroup_SecondaryDevice) ProtoReflect() protoreflect.Message
func (*ModelInstanceGroup_SecondaryDevice) Reset ¶
func (x *ModelInstanceGroup_SecondaryDevice) Reset()
func (*ModelInstanceGroup_SecondaryDevice) String ¶
func (x *ModelInstanceGroup_SecondaryDevice) String() string
type ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind ¶
type ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind int32
@@ @@ .. cpp:enum:: SecondaryDeviceKind @@ @@ The kind of the secondary device. @@
const ( //@@ .. cpp:enumerator:: SecondaryDeviceKind::KIND_NVDLA = 0 //@@ //@@ An NVDLA core. http://nvdla.org //@@ Currently KIND_NVDLA is only supported by the TensorRT backend. //@@ ModelInstanceGroup_SecondaryDevice_KIND_NVDLA ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind = 0 )
func (ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind) Descriptor ¶
func (ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind) Descriptor() protoreflect.EnumDescriptor
func (ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind) EnumDescriptor
deprecated
func (ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind) EnumDescriptor() ([]byte, []int)
Deprecated: Use ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind.Descriptor instead.
func (ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind) Number ¶
func (x ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind) Number() protoreflect.EnumNumber
func (ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind) String ¶
func (x ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind) String() string
type ModelOperations ¶
type ModelOperations struct { //@@ .. cpp:var:: string op_library_filename (repeated) //@@ //@@ Optional paths of the libraries providing custom operations for //@@ this model. Valid only for ONNX models. //@@ OpLibraryFilename []string `protobuf:"bytes,1,rep,name=op_library_filename,json=opLibraryFilename,proto3" json:"op_library_filename,omitempty"` // contains filtered or unexported fields }
@@ @@ .. cpp:var:: message ModelOperations @@ @@ The metadata of libraries providing custom operations for this model. @@
func (*ModelOperations) Descriptor
deprecated
func (*ModelOperations) Descriptor() ([]byte, []int)
Deprecated: Use ModelOperations.ProtoReflect.Descriptor instead.
func (*ModelOperations) GetOpLibraryFilename ¶
func (x *ModelOperations) GetOpLibraryFilename() []string
func (*ModelOperations) ProtoMessage ¶
func (*ModelOperations) ProtoMessage()
func (*ModelOperations) ProtoReflect ¶
func (x *ModelOperations) ProtoReflect() protoreflect.Message
func (*ModelOperations) Reset ¶
func (x *ModelOperations) Reset()
func (*ModelOperations) String ¶
func (x *ModelOperations) String() string
type ModelOptimizationPolicy ¶
type ModelOptimizationPolicy struct { //@@ .. cpp:var:: Graph graph //@@ //@@ The graph optimization setting for the model. Optional. //@@ Graph *ModelOptimizationPolicy_Graph `protobuf:"bytes,1,opt,name=graph,proto3" json:"graph,omitempty"` //@@ .. cpp:var:: ModelPriority priority //@@ //@@ The priority setting for the model. Optional. //@@ Priority ModelOptimizationPolicy_ModelPriority `protobuf:"varint,2,opt,name=priority,proto3,enum=inference.ModelOptimizationPolicy_ModelPriority" json:"priority,omitempty"` //@@ .. cpp:var:: Cuda cuda //@@ //@@ CUDA-specific optimization settings. Optional. //@@ Cuda *ModelOptimizationPolicy_Cuda `protobuf:"bytes,3,opt,name=cuda,proto3" json:"cuda,omitempty"` //@@ .. cpp:var:: ExecutionAccelerators execution_accelerators //@@ //@@ The accelerators used for the model. Optional. //@@ ExecutionAccelerators *ModelOptimizationPolicy_ExecutionAccelerators `protobuf:"bytes,4,opt,name=execution_accelerators,json=executionAccelerators,proto3" json:"execution_accelerators,omitempty"` //@@ .. cpp:var:: PinnedMemoryBuffer input_pinned_memory //@@ //@@ Use pinned memory buffer when the data transfer for inputs //@@ is between GPU memory and non-pinned system memory. //@@ Default is true. //@@ InputPinnedMemory *ModelOptimizationPolicy_PinnedMemoryBuffer `protobuf:"bytes,5,opt,name=input_pinned_memory,json=inputPinnedMemory,proto3" json:"input_pinned_memory,omitempty"` //@@ .. cpp:var:: PinnedMemoryBuffer output_pinned_memory //@@ //@@ Use pinned memory buffer when the data transfer for outputs //@@ is between GPU memory and non-pinned system memory. //@@ Default is true. //@@ OutputPinnedMemory *ModelOptimizationPolicy_PinnedMemoryBuffer `protobuf:"bytes,6,opt,name=output_pinned_memory,json=outputPinnedMemory,proto3" json:"output_pinned_memory,omitempty"` //@@ .. cpp:var:: uint32 gather_kernel_buffer_threshold //@@ //@@ The backend may use a gather kernel to gather input data if the //@@ device has direct access to the source buffer and the destination //@@ buffer. In such case, the gather kernel will be used only if the //@@ number of buffers to be gathered is greater or equal to //@@ the specifed value. If 0, the gather kernel will be disabled. //@@ Default value is 0. //@@ Currently only recognized by TensorRT backend. //@@ GatherKernelBufferThreshold uint32 `` /* 147-byte string literal not displayed */ //@@ .. cpp:var:: bool eager_batching //@@ //@@ Start preparing the next batch before the model instance is ready //@@ for the next inference. This option can be used to overlap the //@@ batch preparation with model execution, with the trade-off that //@@ the next batch might be smaller than what it could have been. //@@ Default value is false. //@@ Currently only recognized by TensorRT backend. //@@ EagerBatching bool `protobuf:"varint,8,opt,name=eager_batching,json=eagerBatching,proto3" json:"eager_batching,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ModelOptimizationPolicy @@ @@ Optimization settings for a model. These settings control if/how a @@ model is optimized and prioritized by the backend framework when @@ it is loaded. @@
func (*ModelOptimizationPolicy) Descriptor
deprecated
func (*ModelOptimizationPolicy) Descriptor() ([]byte, []int)
Deprecated: Use ModelOptimizationPolicy.ProtoReflect.Descriptor instead.
func (*ModelOptimizationPolicy) GetCuda ¶
func (x *ModelOptimizationPolicy) GetCuda() *ModelOptimizationPolicy_Cuda
func (*ModelOptimizationPolicy) GetEagerBatching ¶
func (x *ModelOptimizationPolicy) GetEagerBatching() bool
func (*ModelOptimizationPolicy) GetExecutionAccelerators ¶
func (x *ModelOptimizationPolicy) GetExecutionAccelerators() *ModelOptimizationPolicy_ExecutionAccelerators
func (*ModelOptimizationPolicy) GetGatherKernelBufferThreshold ¶
func (x *ModelOptimizationPolicy) GetGatherKernelBufferThreshold() uint32
func (*ModelOptimizationPolicy) GetGraph ¶
func (x *ModelOptimizationPolicy) GetGraph() *ModelOptimizationPolicy_Graph
func (*ModelOptimizationPolicy) GetInputPinnedMemory ¶
func (x *ModelOptimizationPolicy) GetInputPinnedMemory() *ModelOptimizationPolicy_PinnedMemoryBuffer
func (*ModelOptimizationPolicy) GetOutputPinnedMemory ¶
func (x *ModelOptimizationPolicy) GetOutputPinnedMemory() *ModelOptimizationPolicy_PinnedMemoryBuffer
func (*ModelOptimizationPolicy) GetPriority ¶
func (x *ModelOptimizationPolicy) GetPriority() ModelOptimizationPolicy_ModelPriority
func (*ModelOptimizationPolicy) ProtoMessage ¶
func (*ModelOptimizationPolicy) ProtoMessage()
func (*ModelOptimizationPolicy) ProtoReflect ¶
func (x *ModelOptimizationPolicy) ProtoReflect() protoreflect.Message
func (*ModelOptimizationPolicy) Reset ¶
func (x *ModelOptimizationPolicy) Reset()
func (*ModelOptimizationPolicy) String ¶
func (x *ModelOptimizationPolicy) String() string
type ModelOptimizationPolicy_Cuda ¶
type ModelOptimizationPolicy_Cuda struct { //@@ .. cpp:var:: bool graphs //@@ //@@ Use CUDA graphs API to capture model operations and execute //@@ them more efficiently. Default value is false. //@@ Currently only recognized by TensorRT backend. //@@ Graphs bool `protobuf:"varint,1,opt,name=graphs,proto3" json:"graphs,omitempty"` //@@ .. cpp:var:: bool busy_wait_events //@@ //@@ Use busy-waiting to synchronize CUDA events to achieve minimum //@@ latency from event complete to host thread to be notified, with //@@ the cost of high CPU load. Default value is false. //@@ Currently only recognized by TensorRT backend. //@@ BusyWaitEvents bool `protobuf:"varint,2,opt,name=busy_wait_events,json=busyWaitEvents,proto3" json:"busy_wait_events,omitempty"` //@@ .. cpp:var:: GraphSpec graph_spec (repeated) //@@ //@@ Specification of the CUDA graph to be captured. If not specified //@@ and 'graphs' is true, the default CUDA graphs will be captured //@@ based on model settings. //@@ Currently only recognized by TensorRT backend. //@@ GraphSpec []*ModelOptimizationPolicy_Cuda_GraphSpec `protobuf:"bytes,3,rep,name=graph_spec,json=graphSpec,proto3" json:"graph_spec,omitempty"` //@@ .. cpp:var:: bool output_copy_stream //@@ //@@ Uses a CUDA stream separate from the inference stream to copy the //@@ output to host. However, be aware that setting this option to //@@ true will lead to an increase in the memory consumption of the //@@ model as Triton will allocate twice as much GPU memory for its //@@ I/O tensor buffers. Default value is false. //@@ Currently only recognized by TensorRT backend. //@@ OutputCopyStream bool `protobuf:"varint,4,opt,name=output_copy_stream,json=outputCopyStream,proto3" json:"output_copy_stream,omitempty"` // contains filtered or unexported fields }
@@ @@ .. cpp:var:: message Cuda @@ @@ CUDA-specific optimization settings. @@
func (*ModelOptimizationPolicy_Cuda) Descriptor
deprecated
func (*ModelOptimizationPolicy_Cuda) Descriptor() ([]byte, []int)
Deprecated: Use ModelOptimizationPolicy_Cuda.ProtoReflect.Descriptor instead.
func (*ModelOptimizationPolicy_Cuda) GetBusyWaitEvents ¶
func (x *ModelOptimizationPolicy_Cuda) GetBusyWaitEvents() bool
func (*ModelOptimizationPolicy_Cuda) GetGraphSpec ¶
func (x *ModelOptimizationPolicy_Cuda) GetGraphSpec() []*ModelOptimizationPolicy_Cuda_GraphSpec
func (*ModelOptimizationPolicy_Cuda) GetGraphs ¶
func (x *ModelOptimizationPolicy_Cuda) GetGraphs() bool
func (*ModelOptimizationPolicy_Cuda) GetOutputCopyStream ¶
func (x *ModelOptimizationPolicy_Cuda) GetOutputCopyStream() bool
func (*ModelOptimizationPolicy_Cuda) ProtoMessage ¶
func (*ModelOptimizationPolicy_Cuda) ProtoMessage()
func (*ModelOptimizationPolicy_Cuda) ProtoReflect ¶
func (x *ModelOptimizationPolicy_Cuda) ProtoReflect() protoreflect.Message
func (*ModelOptimizationPolicy_Cuda) Reset ¶
func (x *ModelOptimizationPolicy_Cuda) Reset()
func (*ModelOptimizationPolicy_Cuda) String ¶
func (x *ModelOptimizationPolicy_Cuda) String() string
type ModelOptimizationPolicy_Cuda_GraphSpec ¶
type ModelOptimizationPolicy_Cuda_GraphSpec struct { //@@ .. cpp:var:: int32 batch_size //@@ //@@ The batch size of the CUDA graph. If 'max_batch_size' is 0, //@@ 'batch_size' must be set to 0. Otherwise, 'batch_size' must //@@ be set to value between 1 and 'max_batch_size'. //@@ BatchSize int32 `protobuf:"varint,1,opt,name=batch_size,json=batchSize,proto3" json:"batch_size,omitempty"` //@@ .. cpp:var:: map<string, Shape> input //@@ //@@ The specification of the inputs. 'Shape' is the shape of the //@@ input without batching dimension. //@@ Input map[string]*ModelOptimizationPolicy_Cuda_GraphSpec_Shape `` /* 151-byte string literal not displayed */ //@@ .. cpp:var:: LowerBound graph_lower_bound //@@ //@@ Specify the lower bound of the CUDA graph. Optional. //@@ If specified, the graph can be used for input shapes and //@@ batch sizes that are in closed interval between the lower //@@ bound specification and graph specification. For dynamic //@@ shape model, this allows CUDA graphs to be launched //@@ frequently without capturing all possible shape combinations. //@@ However, using graph for shape combinations different from //@@ the one used for capturing introduces uninitialized data for //@@ execution and it may distort the inference result if //@@ the model is sensitive to uninitialized data. //@@ GraphLowerBound *ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound `protobuf:"bytes,3,opt,name=graph_lower_bound,json=graphLowerBound,proto3" json:"graph_lower_bound,omitempty"` // contains filtered or unexported fields }
@@ .. cpp:var:: message GraphSpec @@ @@ Specification of the CUDA graph to be captured. @@
func (*ModelOptimizationPolicy_Cuda_GraphSpec) Descriptor
deprecated
func (*ModelOptimizationPolicy_Cuda_GraphSpec) Descriptor() ([]byte, []int)
Deprecated: Use ModelOptimizationPolicy_Cuda_GraphSpec.ProtoReflect.Descriptor instead.
func (*ModelOptimizationPolicy_Cuda_GraphSpec) GetBatchSize ¶
func (x *ModelOptimizationPolicy_Cuda_GraphSpec) GetBatchSize() int32
func (*ModelOptimizationPolicy_Cuda_GraphSpec) GetGraphLowerBound ¶
func (x *ModelOptimizationPolicy_Cuda_GraphSpec) GetGraphLowerBound() *ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound
func (*ModelOptimizationPolicy_Cuda_GraphSpec) GetInput ¶
func (x *ModelOptimizationPolicy_Cuda_GraphSpec) GetInput() map[string]*ModelOptimizationPolicy_Cuda_GraphSpec_Shape
func (*ModelOptimizationPolicy_Cuda_GraphSpec) ProtoMessage ¶
func (*ModelOptimizationPolicy_Cuda_GraphSpec) ProtoMessage()
func (*ModelOptimizationPolicy_Cuda_GraphSpec) ProtoReflect ¶
func (x *ModelOptimizationPolicy_Cuda_GraphSpec) ProtoReflect() protoreflect.Message
func (*ModelOptimizationPolicy_Cuda_GraphSpec) Reset ¶
func (x *ModelOptimizationPolicy_Cuda_GraphSpec) Reset()
func (*ModelOptimizationPolicy_Cuda_GraphSpec) String ¶
func (x *ModelOptimizationPolicy_Cuda_GraphSpec) String() string
type ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound ¶
type ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound struct { //@@ .. cpp:var:: int32 batch_size //@@ //@@ The batch size of the CUDA graph. If 'max_batch_size' is 0, //@@ 'batch_size' must be set to 0. Otherwise, 'batch_size' must //@@ be set to value between 1 and 'max_batch_size'. //@@ BatchSize int32 `protobuf:"varint,1,opt,name=batch_size,json=batchSize,proto3" json:"batch_size,omitempty"` //@@ .. cpp:var:: map<string, Shape> input //@@ //@@ The specification of the inputs. 'Shape' is the shape of //@@ the input without batching dimension. //@@ Input map[string]*ModelOptimizationPolicy_Cuda_GraphSpec_Shape `` /* 151-byte string literal not displayed */ // contains filtered or unexported fields }
func (*ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound) Descriptor
deprecated
func (*ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound) Descriptor() ([]byte, []int)
Deprecated: Use ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound.ProtoReflect.Descriptor instead.
func (*ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound) GetBatchSize ¶
func (x *ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound) GetBatchSize() int32
func (*ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound) GetInput ¶
func (x *ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound) GetInput() map[string]*ModelOptimizationPolicy_Cuda_GraphSpec_Shape
func (*ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound) ProtoMessage ¶
func (*ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound) ProtoMessage()
func (*ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound) ProtoReflect ¶
func (x *ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound) ProtoReflect() protoreflect.Message
func (*ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound) Reset ¶
func (x *ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound) Reset()
func (*ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound) String ¶
func (x *ModelOptimizationPolicy_Cuda_GraphSpec_LowerBound) String() string
type ModelOptimizationPolicy_Cuda_GraphSpec_Shape ¶
type ModelOptimizationPolicy_Cuda_GraphSpec_Shape struct { //@@ .. cpp:var:: int64 dim (repeated) //@@ //@@ The dimension. //@@ Dim []int64 `protobuf:"varint,1,rep,packed,name=dim,proto3" json:"dim,omitempty"` // contains filtered or unexported fields }
@@ .. cpp:var:: message Dims @@ @@ Specification of tensor dimension. @@
func (*ModelOptimizationPolicy_Cuda_GraphSpec_Shape) Descriptor
deprecated
func (*ModelOptimizationPolicy_Cuda_GraphSpec_Shape) Descriptor() ([]byte, []int)
Deprecated: Use ModelOptimizationPolicy_Cuda_GraphSpec_Shape.ProtoReflect.Descriptor instead.
func (*ModelOptimizationPolicy_Cuda_GraphSpec_Shape) GetDim ¶
func (x *ModelOptimizationPolicy_Cuda_GraphSpec_Shape) GetDim() []int64
func (*ModelOptimizationPolicy_Cuda_GraphSpec_Shape) ProtoMessage ¶
func (*ModelOptimizationPolicy_Cuda_GraphSpec_Shape) ProtoMessage()
func (*ModelOptimizationPolicy_Cuda_GraphSpec_Shape) ProtoReflect ¶
func (x *ModelOptimizationPolicy_Cuda_GraphSpec_Shape) ProtoReflect() protoreflect.Message
func (*ModelOptimizationPolicy_Cuda_GraphSpec_Shape) Reset ¶
func (x *ModelOptimizationPolicy_Cuda_GraphSpec_Shape) Reset()
func (*ModelOptimizationPolicy_Cuda_GraphSpec_Shape) String ¶
func (x *ModelOptimizationPolicy_Cuda_GraphSpec_Shape) String() string
type ModelOptimizationPolicy_ExecutionAccelerators ¶
type ModelOptimizationPolicy_ExecutionAccelerators struct { //@@ .. cpp:var:: Accelerator gpu_execution_accelerator (repeated) //@@ //@@ The preferred execution provider to be used if the model instance //@@ is deployed on GPU. //@@ //@@ For ONNX Runtime backend, possible value is "tensorrt" as name, //@@ and no parameters are required. //@@ //@@ For TensorFlow backend, possible values are "tensorrt", //@@ "auto_mixed_precision", "gpu_io". //@@ //@@ For "tensorrt", the following parameters can be specified: //@@ "precision_mode": The precision used for optimization. //@@ Allowed values are "FP32" and "FP16". Default value is "FP32". //@@ //@@ "max_cached_engines": The maximum number of cached TensorRT //@@ engines in dynamic TensorRT ops. Default value is 100. //@@ //@@ "minimum_segment_size": The smallest model subgraph that will //@@ be considered for optimization by TensorRT. Default value is 3. //@@ //@@ "max_workspace_size_bytes": The maximum GPU memory the model //@@ can use temporarily during execution. Default value is 1GB. //@@ //@@ For "auto_mixed_precision", no parameters are required. If set, //@@ the model will try to use FP16 for better performance. //@@ This optimization can not be set with "tensorrt". //@@ //@@ For "gpu_io", no parameters are required. If set, the model will //@@ be executed using TensorFlow Callable API to set input and output //@@ tensors in GPU memory if possible, which can reduce data transfer //@@ overhead if the model is used in ensemble. However, the Callable //@@ object will be created on model creation and it will request all //@@ outputs for every model execution, which may impact the //@@ performance if a request does not require all outputs. This //@@ optimization will only take affect if the model instance is //@@ created with KIND_GPU. //@@ GpuExecutionAccelerator []*ModelOptimizationPolicy_ExecutionAccelerators_Accelerator `` /* 132-byte string literal not displayed */ //@@ .. cpp:var:: Accelerator cpu_execution_accelerator (repeated) //@@ //@@ The preferred execution provider to be used if the model instance //@@ is deployed on CPU. //@@ //@@ For ONNX Runtime backend, possible value is "openvino" as name, //@@ and no parameters are required. //@@ CpuExecutionAccelerator []*ModelOptimizationPolicy_ExecutionAccelerators_Accelerator `` /* 132-byte string literal not displayed */ // contains filtered or unexported fields }
@@ @@ .. cpp:var:: message ExecutionAccelerators @@ @@ Specify the preferred execution accelerators to be used to execute @@ the model. Currently only recognized by ONNX Runtime backend and @@ TensorFlow backend. @@ @@ For ONNX Runtime backend, it will deploy the model with the execution @@ accelerators by priority, the priority is determined based on the @@ order that they are set, i.e. the provider at the front has highest @@ priority. Overall, the priority will be in the following order: @@ <gpu_execution_accelerator> (if instance is on GPU) @@ CUDA Execution Provider (if instance is on GPU) @@ <cpu_execution_accelerator> @@ Default CPU Execution Provider @@
func (*ModelOptimizationPolicy_ExecutionAccelerators) Descriptor
deprecated
func (*ModelOptimizationPolicy_ExecutionAccelerators) Descriptor() ([]byte, []int)
Deprecated: Use ModelOptimizationPolicy_ExecutionAccelerators.ProtoReflect.Descriptor instead.
func (*ModelOptimizationPolicy_ExecutionAccelerators) GetCpuExecutionAccelerator ¶
func (x *ModelOptimizationPolicy_ExecutionAccelerators) GetCpuExecutionAccelerator() []*ModelOptimizationPolicy_ExecutionAccelerators_Accelerator
func (*ModelOptimizationPolicy_ExecutionAccelerators) GetGpuExecutionAccelerator ¶
func (x *ModelOptimizationPolicy_ExecutionAccelerators) GetGpuExecutionAccelerator() []*ModelOptimizationPolicy_ExecutionAccelerators_Accelerator
func (*ModelOptimizationPolicy_ExecutionAccelerators) ProtoMessage ¶
func (*ModelOptimizationPolicy_ExecutionAccelerators) ProtoMessage()
func (*ModelOptimizationPolicy_ExecutionAccelerators) ProtoReflect ¶
func (x *ModelOptimizationPolicy_ExecutionAccelerators) ProtoReflect() protoreflect.Message
func (*ModelOptimizationPolicy_ExecutionAccelerators) Reset ¶
func (x *ModelOptimizationPolicy_ExecutionAccelerators) Reset()
func (*ModelOptimizationPolicy_ExecutionAccelerators) String ¶
func (x *ModelOptimizationPolicy_ExecutionAccelerators) String() string
type ModelOptimizationPolicy_ExecutionAccelerators_Accelerator ¶
type ModelOptimizationPolicy_ExecutionAccelerators_Accelerator struct { //@@ .. cpp:var:: string name //@@ //@@ The name of the execution accelerator. //@@ Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` //@@ .. cpp:var:: map<string, string> parameters //@@ //@@ Additional paremeters used to configure the accelerator. //@@ Parameters map[string]string `` /* 161-byte string literal not displayed */ // contains filtered or unexported fields }
@@ @@ .. cpp:var:: message Accelerator @@ @@ Specify the accelerator to be used to execute the model. @@ Accelerator with the same name may accept different parameters @@ depending on the backends. @@
func (*ModelOptimizationPolicy_ExecutionAccelerators_Accelerator) Descriptor
deprecated
func (*ModelOptimizationPolicy_ExecutionAccelerators_Accelerator) Descriptor() ([]byte, []int)
Deprecated: Use ModelOptimizationPolicy_ExecutionAccelerators_Accelerator.ProtoReflect.Descriptor instead.
func (*ModelOptimizationPolicy_ExecutionAccelerators_Accelerator) GetName ¶
func (x *ModelOptimizationPolicy_ExecutionAccelerators_Accelerator) GetName() string
func (*ModelOptimizationPolicy_ExecutionAccelerators_Accelerator) GetParameters ¶
func (x *ModelOptimizationPolicy_ExecutionAccelerators_Accelerator) GetParameters() map[string]string
func (*ModelOptimizationPolicy_ExecutionAccelerators_Accelerator) ProtoMessage ¶
func (*ModelOptimizationPolicy_ExecutionAccelerators_Accelerator) ProtoMessage()
func (*ModelOptimizationPolicy_ExecutionAccelerators_Accelerator) ProtoReflect ¶
func (x *ModelOptimizationPolicy_ExecutionAccelerators_Accelerator) ProtoReflect() protoreflect.Message
func (*ModelOptimizationPolicy_ExecutionAccelerators_Accelerator) Reset ¶
func (x *ModelOptimizationPolicy_ExecutionAccelerators_Accelerator) Reset()
func (*ModelOptimizationPolicy_ExecutionAccelerators_Accelerator) String ¶
func (x *ModelOptimizationPolicy_ExecutionAccelerators_Accelerator) String() string
type ModelOptimizationPolicy_Graph ¶
type ModelOptimizationPolicy_Graph struct { //@@ .. cpp:var:: int32 level //@@ //@@ The optimization level. Defaults to 0 (zero) if not specified. //@@ //@@ - -1: Disabled //@@ - 0: Framework default //@@ - 1+: Enable optimization level (greater values indicate //@@ higher optimization levels) //@@ Level int32 `protobuf:"varint,1,opt,name=level,proto3" json:"level,omitempty"` // contains filtered or unexported fields }
@@ @@ .. cpp:var:: message Graph @@ @@ Enable generic graph optimization of the model. If not specified @@ the framework's default level of optimization is used. Supports @@ TensorFlow graphdef and savedmodel and Onnx models. For TensorFlow @@ causes XLA to be enabled/disabled for the model. For Onnx defaults @@ to enabling all optimizations, -1 enables only basic optimizations, @@ +1 enables only basic and extended optimizations. @@
func (*ModelOptimizationPolicy_Graph) Descriptor
deprecated
func (*ModelOptimizationPolicy_Graph) Descriptor() ([]byte, []int)
Deprecated: Use ModelOptimizationPolicy_Graph.ProtoReflect.Descriptor instead.
func (*ModelOptimizationPolicy_Graph) GetLevel ¶
func (x *ModelOptimizationPolicy_Graph) GetLevel() int32
func (*ModelOptimizationPolicy_Graph) ProtoMessage ¶
func (*ModelOptimizationPolicy_Graph) ProtoMessage()
func (*ModelOptimizationPolicy_Graph) ProtoReflect ¶
func (x *ModelOptimizationPolicy_Graph) ProtoReflect() protoreflect.Message
func (*ModelOptimizationPolicy_Graph) Reset ¶
func (x *ModelOptimizationPolicy_Graph) Reset()
func (*ModelOptimizationPolicy_Graph) String ¶
func (x *ModelOptimizationPolicy_Graph) String() string
type ModelOptimizationPolicy_ModelPriority ¶
type ModelOptimizationPolicy_ModelPriority int32
@@ @@ .. cpp:enum:: ModelPriority @@ @@ Model priorities. A model will be given scheduling and execution @@ preference over models at lower priorities. Current model @@ priorities only work for TensorRT models. @@
const ( //@@ .. cpp:enumerator:: ModelPriority::PRIORITY_DEFAULT = 0 //@@ //@@ The default model priority. //@@ ModelOptimizationPolicy_PRIORITY_DEFAULT ModelOptimizationPolicy_ModelPriority = 0 //@@ .. cpp:enumerator:: ModelPriority::PRIORITY_MAX = 1 //@@ //@@ The maximum model priority. //@@ ModelOptimizationPolicy_PRIORITY_MAX ModelOptimizationPolicy_ModelPriority = 1 //@@ .. cpp:enumerator:: ModelPriority::PRIORITY_MIN = 2 //@@ //@@ The minimum model priority. //@@ ModelOptimizationPolicy_PRIORITY_MIN ModelOptimizationPolicy_ModelPriority = 2 )
func (ModelOptimizationPolicy_ModelPriority) Descriptor ¶
func (ModelOptimizationPolicy_ModelPriority) Descriptor() protoreflect.EnumDescriptor
func (ModelOptimizationPolicy_ModelPriority) EnumDescriptor
deprecated
func (ModelOptimizationPolicy_ModelPriority) EnumDescriptor() ([]byte, []int)
Deprecated: Use ModelOptimizationPolicy_ModelPriority.Descriptor instead.
func (ModelOptimizationPolicy_ModelPriority) Number ¶
func (x ModelOptimizationPolicy_ModelPriority) Number() protoreflect.EnumNumber
func (ModelOptimizationPolicy_ModelPriority) String ¶
func (x ModelOptimizationPolicy_ModelPriority) String() string
func (ModelOptimizationPolicy_ModelPriority) Type ¶
func (ModelOptimizationPolicy_ModelPriority) Type() protoreflect.EnumType
type ModelOptimizationPolicy_PinnedMemoryBuffer ¶
type ModelOptimizationPolicy_PinnedMemoryBuffer struct { //@@ .. cpp:var:: bool enable //@@ //@@ Use pinned memory buffer. Default is true. //@@ Enable bool `protobuf:"varint,1,opt,name=enable,proto3" json:"enable,omitempty"` // contains filtered or unexported fields }
@@ @@ .. cpp:var:: message PinnedMemoryBuffer @@ @@ Specify whether to use a pinned memory buffer when transferring data @@ between non-pinned system memory and GPU memory. Using a pinned @@ memory buffer for system from/to GPU transfers will typically provide @@ increased performance. For example, in the common use case where the @@ request provides inputs and delivers outputs via non-pinned system @@ memory, if the model instance accepts GPU IOs, the inputs will be @@ processed by two copies: from non-pinned system memory to pinned @@ memory, and from pinned memory to GPU memory. Similarly, pinned @@ memory will be used for delivering the outputs. @@
func (*ModelOptimizationPolicy_PinnedMemoryBuffer) Descriptor
deprecated
func (*ModelOptimizationPolicy_PinnedMemoryBuffer) Descriptor() ([]byte, []int)
Deprecated: Use ModelOptimizationPolicy_PinnedMemoryBuffer.ProtoReflect.Descriptor instead.
func (*ModelOptimizationPolicy_PinnedMemoryBuffer) GetEnable ¶
func (x *ModelOptimizationPolicy_PinnedMemoryBuffer) GetEnable() bool
func (*ModelOptimizationPolicy_PinnedMemoryBuffer) ProtoMessage ¶
func (*ModelOptimizationPolicy_PinnedMemoryBuffer) ProtoMessage()
func (*ModelOptimizationPolicy_PinnedMemoryBuffer) ProtoReflect ¶
func (x *ModelOptimizationPolicy_PinnedMemoryBuffer) ProtoReflect() protoreflect.Message
func (*ModelOptimizationPolicy_PinnedMemoryBuffer) Reset ¶
func (x *ModelOptimizationPolicy_PinnedMemoryBuffer) Reset()
func (*ModelOptimizationPolicy_PinnedMemoryBuffer) String ¶
func (x *ModelOptimizationPolicy_PinnedMemoryBuffer) String() string
type ModelOutput ¶
type ModelOutput struct { //@@ .. cpp:var:: string name //@@ //@@ The name of the output. //@@ Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` //@@ .. cpp:var:: DataType data_type //@@ //@@ The data-type of the output. //@@ DataType DataType `protobuf:"varint,2,opt,name=data_type,json=dataType,proto3,enum=inference.DataType" json:"data_type,omitempty"` //@@ .. cpp:var:: int64 dims (repeated) //@@ //@@ The dimensions/shape of the output tensor. //@@ Dims []int64 `protobuf:"varint,3,rep,packed,name=dims,proto3" json:"dims,omitempty"` //@@ .. cpp:var:: ModelTensorReshape reshape //@@ //@@ The shape produced for this output by the backend. The output will //@@ be reshaped from this to the shape specifed in 'dims' before being //@@ returned in the inference response. The reshape must have the same //@@ number of elements as the output shape specified by 'dims'. Optional. //@@ Reshape *ModelTensorReshape `protobuf:"bytes,5,opt,name=reshape,proto3" json:"reshape,omitempty"` //@@ .. cpp:var:: string label_filename //@@ //@@ The label file associated with this output. Should be specified only //@@ for outputs that represent classifications. Optional. //@@ LabelFilename string `protobuf:"bytes,4,opt,name=label_filename,json=labelFilename,proto3" json:"label_filename,omitempty"` //@@ .. cpp:var:: bool is_shape_tensor //@@ //@@ Whether or not the output is a shape tensor to the model. This field //@@ is currently supported only for the TensorRT model. An error will be //@@ generated if this specification does not comply with underlying //@@ model. //@@ IsShapeTensor bool `protobuf:"varint,6,opt,name=is_shape_tensor,json=isShapeTensor,proto3" json:"is_shape_tensor,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ModelOutput @@ @@ An output produced by the model. @@
func (*ModelOutput) Descriptor
deprecated
func (*ModelOutput) Descriptor() ([]byte, []int)
Deprecated: Use ModelOutput.ProtoReflect.Descriptor instead.
func (*ModelOutput) GetDataType ¶
func (x *ModelOutput) GetDataType() DataType
func (*ModelOutput) GetDims ¶
func (x *ModelOutput) GetDims() []int64
func (*ModelOutput) GetIsShapeTensor ¶
func (x *ModelOutput) GetIsShapeTensor() bool
func (*ModelOutput) GetLabelFilename ¶
func (x *ModelOutput) GetLabelFilename() string
func (*ModelOutput) GetName ¶
func (x *ModelOutput) GetName() string
func (*ModelOutput) GetReshape ¶
func (x *ModelOutput) GetReshape() *ModelTensorReshape
func (*ModelOutput) ProtoMessage ¶
func (*ModelOutput) ProtoMessage()
func (*ModelOutput) ProtoReflect ¶
func (x *ModelOutput) ProtoReflect() protoreflect.Message
func (*ModelOutput) Reset ¶
func (x *ModelOutput) Reset()
func (*ModelOutput) String ¶
func (x *ModelOutput) String() string
type ModelParameter ¶
type ModelParameter struct { //@@ .. cpp:var:: string string_value //@@ //@@ The string value of the parameter. //@@ StringValue string `protobuf:"bytes,1,opt,name=string_value,json=stringValue,proto3" json:"string_value,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ModelParameter @@ @@ A model parameter. @@
func (*ModelParameter) Descriptor
deprecated
func (*ModelParameter) Descriptor() ([]byte, []int)
Deprecated: Use ModelParameter.ProtoReflect.Descriptor instead.
func (*ModelParameter) GetStringValue ¶
func (x *ModelParameter) GetStringValue() string
func (*ModelParameter) ProtoMessage ¶
func (*ModelParameter) ProtoMessage()
func (*ModelParameter) ProtoReflect ¶
func (x *ModelParameter) ProtoReflect() protoreflect.Message
func (*ModelParameter) Reset ¶
func (x *ModelParameter) Reset()
func (*ModelParameter) String ¶
func (x *ModelParameter) String() string
type ModelQueuePolicy ¶
type ModelQueuePolicy struct { //@@ //@@ .. cpp:var:: TimeoutAction timeout_action //@@ //@@ The action applied to timed-out request. //@@ The default action is REJECT. //@@ TimeoutAction ModelQueuePolicy_TimeoutAction `` /* 147-byte string literal not displayed */ //@@ //@@ .. cpp:var:: uint64 default_timeout_microseconds //@@ //@@ The default timeout for every request, in microseconds. //@@ The default value is 0 which indicates that no timeout is set. //@@ DefaultTimeoutMicroseconds uint64 `` /* 142-byte string literal not displayed */ //@@ //@@ .. cpp:var:: bool allow_timeout_override //@@ //@@ Whether individual request can override the default timeout value. //@@ When true, individual requests can set a timeout that is less than //@@ the default timeout value but may not increase the timeout. //@@ The default value is false. //@@ AllowTimeoutOverride bool `protobuf:"varint,3,opt,name=allow_timeout_override,json=allowTimeoutOverride,proto3" json:"allow_timeout_override,omitempty"` //@@ //@@ .. cpp:var:: uint32 max_queue_size //@@ //@@ The maximum queue size for holding requests. A request will be //@@ rejected immediately if it can't be enqueued because the queue is //@@ full. The default value is 0 which indicates that no maximum //@@ queue size is enforced. //@@ MaxQueueSize uint32 `protobuf:"varint,4,opt,name=max_queue_size,json=maxQueueSize,proto3" json:"max_queue_size,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ModelQueuePolicy @@ @@ Queue policy for inference requests. @@
func (*ModelQueuePolicy) Descriptor
deprecated
func (*ModelQueuePolicy) Descriptor() ([]byte, []int)
Deprecated: Use ModelQueuePolicy.ProtoReflect.Descriptor instead.
func (*ModelQueuePolicy) GetAllowTimeoutOverride ¶
func (x *ModelQueuePolicy) GetAllowTimeoutOverride() bool
func (*ModelQueuePolicy) GetDefaultTimeoutMicroseconds ¶
func (x *ModelQueuePolicy) GetDefaultTimeoutMicroseconds() uint64
func (*ModelQueuePolicy) GetMaxQueueSize ¶
func (x *ModelQueuePolicy) GetMaxQueueSize() uint32
func (*ModelQueuePolicy) GetTimeoutAction ¶
func (x *ModelQueuePolicy) GetTimeoutAction() ModelQueuePolicy_TimeoutAction
func (*ModelQueuePolicy) ProtoMessage ¶
func (*ModelQueuePolicy) ProtoMessage()
func (*ModelQueuePolicy) ProtoReflect ¶
func (x *ModelQueuePolicy) ProtoReflect() protoreflect.Message
func (*ModelQueuePolicy) Reset ¶
func (x *ModelQueuePolicy) Reset()
func (*ModelQueuePolicy) String ¶
func (x *ModelQueuePolicy) String() string
type ModelQueuePolicy_TimeoutAction ¶
type ModelQueuePolicy_TimeoutAction int32
@@ @@ .. cpp:enum:: TimeoutAction @@ @@ The action applied to timed-out requests. @@
const ( //@@ .. cpp:enumerator:: Action::REJECT = 0 //@@ //@@ Reject the request and return error message accordingly. //@@ ModelQueuePolicy_REJECT ModelQueuePolicy_TimeoutAction = 0 //@@ .. cpp:enumerator:: Action::DELAY = 1 //@@ //@@ Delay the request until all other requests at the same //@@ (or higher) priority levels that have not reached their timeouts //@@ are processed. A delayed request will eventually be processed, //@@ but may be delayed indefinitely due to newly arriving requests. //@@ ModelQueuePolicy_DELAY ModelQueuePolicy_TimeoutAction = 1 )
func (ModelQueuePolicy_TimeoutAction) Descriptor ¶
func (ModelQueuePolicy_TimeoutAction) Descriptor() protoreflect.EnumDescriptor
func (ModelQueuePolicy_TimeoutAction) Enum ¶
func (x ModelQueuePolicy_TimeoutAction) Enum() *ModelQueuePolicy_TimeoutAction
func (ModelQueuePolicy_TimeoutAction) EnumDescriptor
deprecated
func (ModelQueuePolicy_TimeoutAction) EnumDescriptor() ([]byte, []int)
Deprecated: Use ModelQueuePolicy_TimeoutAction.Descriptor instead.
func (ModelQueuePolicy_TimeoutAction) Number ¶
func (x ModelQueuePolicy_TimeoutAction) Number() protoreflect.EnumNumber
func (ModelQueuePolicy_TimeoutAction) String ¶
func (x ModelQueuePolicy_TimeoutAction) String() string
func (ModelQueuePolicy_TimeoutAction) Type ¶
func (ModelQueuePolicy_TimeoutAction) Type() protoreflect.EnumType
type ModelRateLimiter ¶
type ModelRateLimiter struct { //@@ .. cpp:var:: Resource resources (repeated) //@@ //@@ The resources required to execute the request on a model instance. //@@ Resources are just names with a corresponding count. The execution //@@ of the instance will be blocked until the specificied resources are //@@ available. By default an instance uses no rate-limiter resources. //@@ Resources []*ModelRateLimiter_Resource `protobuf:"bytes,1,rep,name=resources,proto3" json:"resources,omitempty"` //@@ .. cpp:var:: uint32 priority //@@ //@@ The optional weighting value to be used for prioritizing across //@@ instances. An instance with priority 2 will be given 1/2 the //@@ number of scheduling chances as an instance_group with priority //@@ 1. The default priority is 1. The priority of value 0 will be //@@ treated as priority 1. //@@ Priority uint32 `protobuf:"varint,2,opt,name=priority,proto3" json:"priority,omitempty"` // contains filtered or unexported fields }
@@ @@ .. cpp:var:: message ModelRateLimiter @@ @@ The specifications required by the rate limiter to properly @@ schedule the inference requests across the different models @@ and their instances. @@
func (*ModelRateLimiter) Descriptor
deprecated
func (*ModelRateLimiter) Descriptor() ([]byte, []int)
Deprecated: Use ModelRateLimiter.ProtoReflect.Descriptor instead.
func (*ModelRateLimiter) GetPriority ¶
func (x *ModelRateLimiter) GetPriority() uint32
func (*ModelRateLimiter) GetResources ¶
func (x *ModelRateLimiter) GetResources() []*ModelRateLimiter_Resource
func (*ModelRateLimiter) ProtoMessage ¶
func (*ModelRateLimiter) ProtoMessage()
func (*ModelRateLimiter) ProtoReflect ¶
func (x *ModelRateLimiter) ProtoReflect() protoreflect.Message
func (*ModelRateLimiter) Reset ¶
func (x *ModelRateLimiter) Reset()
func (*ModelRateLimiter) String ¶
func (x *ModelRateLimiter) String() string
type ModelRateLimiter_Resource ¶
type ModelRateLimiter_Resource struct { //@@ .. cpp:var:: string name //@@ //@@ The name associated with the resource. //@@ Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` //@@ .. cpp:var:: bool global //@@ //@@ Whether or not the resource is global. If true then the resource //@@ is assumed to be shared among the devices otherwise specified //@@ count of the resource is assumed for each device associated //@@ with the instance. //@@ Global bool `protobuf:"varint,2,opt,name=global,proto3" json:"global,omitempty"` //@@ .. cpp:var:: uint32 count //@@ //@@ The number of resources required for the execution of the model //@@ instance. //@@ Count uint32 `protobuf:"varint,3,opt,name=count,proto3" json:"count,omitempty"` // contains filtered or unexported fields }
@@ .. cpp:var:: message Resource @@ @@ The resource property. @@
func (*ModelRateLimiter_Resource) Descriptor
deprecated
func (*ModelRateLimiter_Resource) Descriptor() ([]byte, []int)
Deprecated: Use ModelRateLimiter_Resource.ProtoReflect.Descriptor instead.
func (*ModelRateLimiter_Resource) GetCount ¶
func (x *ModelRateLimiter_Resource) GetCount() uint32
func (*ModelRateLimiter_Resource) GetGlobal ¶
func (x *ModelRateLimiter_Resource) GetGlobal() bool
func (*ModelRateLimiter_Resource) GetName ¶
func (x *ModelRateLimiter_Resource) GetName() string
func (*ModelRateLimiter_Resource) ProtoMessage ¶
func (*ModelRateLimiter_Resource) ProtoMessage()
func (*ModelRateLimiter_Resource) ProtoReflect ¶
func (x *ModelRateLimiter_Resource) ProtoReflect() protoreflect.Message
func (*ModelRateLimiter_Resource) Reset ¶
func (x *ModelRateLimiter_Resource) Reset()
func (*ModelRateLimiter_Resource) String ¶
func (x *ModelRateLimiter_Resource) String() string
type ModelRepositoryAgents ¶
type ModelRepositoryAgents struct { //@@ //@@ .. cpp:var:: Agent agents (repeated) //@@ //@@ The ordered list of agents for the model. These agents will be //@@ invoked in order to respond to repository actions occuring for the //@@ model. //@@ Agents []*ModelRepositoryAgents_Agent `protobuf:"bytes,1,rep,name=agents,proto3" json:"agents,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ModelRepositoryAgents @@ @@ The repository agents for the model. @@
func (*ModelRepositoryAgents) Descriptor
deprecated
func (*ModelRepositoryAgents) Descriptor() ([]byte, []int)
Deprecated: Use ModelRepositoryAgents.ProtoReflect.Descriptor instead.
func (*ModelRepositoryAgents) GetAgents ¶
func (x *ModelRepositoryAgents) GetAgents() []*ModelRepositoryAgents_Agent
func (*ModelRepositoryAgents) ProtoMessage ¶
func (*ModelRepositoryAgents) ProtoMessage()
func (*ModelRepositoryAgents) ProtoReflect ¶
func (x *ModelRepositoryAgents) ProtoReflect() protoreflect.Message
func (*ModelRepositoryAgents) Reset ¶
func (x *ModelRepositoryAgents) Reset()
func (*ModelRepositoryAgents) String ¶
func (x *ModelRepositoryAgents) String() string
type ModelRepositoryAgents_Agent ¶
type ModelRepositoryAgents_Agent struct { //@@ .. cpp:var:: string name //@@ //@@ The name of the agent. //@@ Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` //@@ .. cpp:var:: map<string, string> parameters //@@ //@@ The parameters for the agent. //@@ Parameters map[string]string `` /* 161-byte string literal not displayed */ // contains filtered or unexported fields }
@@ @@ .. cpp:var:: message Agent @@ @@ A repository agent that should be invoked for the specified @@ repository actions for this model. @@
func (*ModelRepositoryAgents_Agent) Descriptor
deprecated
func (*ModelRepositoryAgents_Agent) Descriptor() ([]byte, []int)
Deprecated: Use ModelRepositoryAgents_Agent.ProtoReflect.Descriptor instead.
func (*ModelRepositoryAgents_Agent) GetName ¶
func (x *ModelRepositoryAgents_Agent) GetName() string
func (*ModelRepositoryAgents_Agent) GetParameters ¶
func (x *ModelRepositoryAgents_Agent) GetParameters() map[string]string
func (*ModelRepositoryAgents_Agent) ProtoMessage ¶
func (*ModelRepositoryAgents_Agent) ProtoMessage()
func (*ModelRepositoryAgents_Agent) ProtoReflect ¶
func (x *ModelRepositoryAgents_Agent) ProtoReflect() protoreflect.Message
func (*ModelRepositoryAgents_Agent) Reset ¶
func (x *ModelRepositoryAgents_Agent) Reset()
func (*ModelRepositoryAgents_Agent) String ¶
func (x *ModelRepositoryAgents_Agent) String() string
type ModelResponseCache ¶
type ModelResponseCache struct { //@@ //@@ .. cpp::var:: bool enable //@@ //@@ Whether or not to use response cache for the model. If True, the //@@ responses from the model are cached and when identical request //@@ is encountered, instead of going through the model execution, //@@ the response from the cache is utilized. By default, response //@@ cache is disabled for the models. //@@ Enable bool `protobuf:"varint,1,opt,name=enable,proto3" json:"enable,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ModelResponseCache @@ @@ The response cache setting for the model. @@
func (*ModelResponseCache) Descriptor
deprecated
func (*ModelResponseCache) Descriptor() ([]byte, []int)
Deprecated: Use ModelResponseCache.ProtoReflect.Descriptor instead.
func (*ModelResponseCache) GetEnable ¶
func (x *ModelResponseCache) GetEnable() bool
func (*ModelResponseCache) ProtoMessage ¶
func (*ModelResponseCache) ProtoMessage()
func (*ModelResponseCache) ProtoReflect ¶
func (x *ModelResponseCache) ProtoReflect() protoreflect.Message
func (*ModelResponseCache) Reset ¶
func (x *ModelResponseCache) Reset()
func (*ModelResponseCache) String ¶
func (x *ModelResponseCache) String() string
type ModelSequenceBatching ¶
type ModelSequenceBatching struct { //@@ .. cpp:var:: oneof strategy_choice //@@ //@@ The strategy used by the sequence batcher. Default strategy //@@ is 'direct'. //@@ // // Types that are assignable to StrategyChoice: // *ModelSequenceBatching_Direct // *ModelSequenceBatching_Oldest StrategyChoice isModelSequenceBatching_StrategyChoice `protobuf_oneof:"strategy_choice"` //@@ .. cpp:var:: uint64 max_sequence_idle_microseconds //@@ //@@ The maximum time, in microseconds, that a sequence is allowed to //@@ be idle before it is aborted. The inference server considers a //@@ sequence idle when it does not have any inference request queued //@@ for the sequence. If this limit is exceeded, the inference server //@@ will free the sequence slot allocated by the sequence and make it //@@ available for another sequence. If not specified (or specified as //@@ zero) a default value of 1000000 (1 second) is used. //@@ MaxSequenceIdleMicroseconds uint64 `` /* 147-byte string literal not displayed */ //@@ .. cpp:var:: ControlInput control_input (repeated) //@@ //@@ The model input(s) that the server should use to communicate //@@ sequence start, stop, ready and similar control values to the //@@ model. //@@ ControlInput []*ModelSequenceBatching_ControlInput `protobuf:"bytes,2,rep,name=control_input,json=controlInput,proto3" json:"control_input,omitempty"` //@@ .. cpp:var:: State state (repeated) //@@ //@@ The optional state that can be stored in Triton for performing //@@ inference requests on a sequence. Each sequence holds an implicit //@@ state local to itself. The output state tensor provided by the //@@ model in 'output_name' field of the current inference request will //@@ be transferred as an input tensor named 'input_name' in the next //@@ request of the same sequence. The input state of the first request //@@ in the sequence contains garbage data. //@@ State []*ModelSequenceBatching_State `protobuf:"bytes,5,rep,name=state,proto3" json:"state,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ModelSequenceBatching @@ @@ Sequence batching configuration. These settings control how sequence @@ batching operates for the model. @@
func (*ModelSequenceBatching) Descriptor
deprecated
func (*ModelSequenceBatching) Descriptor() ([]byte, []int)
Deprecated: Use ModelSequenceBatching.ProtoReflect.Descriptor instead.
func (*ModelSequenceBatching) GetControlInput ¶
func (x *ModelSequenceBatching) GetControlInput() []*ModelSequenceBatching_ControlInput
func (*ModelSequenceBatching) GetDirect ¶
func (x *ModelSequenceBatching) GetDirect() *ModelSequenceBatching_StrategyDirect
func (*ModelSequenceBatching) GetMaxSequenceIdleMicroseconds ¶
func (x *ModelSequenceBatching) GetMaxSequenceIdleMicroseconds() uint64
func (*ModelSequenceBatching) GetOldest ¶
func (x *ModelSequenceBatching) GetOldest() *ModelSequenceBatching_StrategyOldest
func (*ModelSequenceBatching) GetState ¶
func (x *ModelSequenceBatching) GetState() []*ModelSequenceBatching_State
func (*ModelSequenceBatching) GetStrategyChoice ¶
func (m *ModelSequenceBatching) GetStrategyChoice() isModelSequenceBatching_StrategyChoice
func (*ModelSequenceBatching) ProtoMessage ¶
func (*ModelSequenceBatching) ProtoMessage()
func (*ModelSequenceBatching) ProtoReflect ¶
func (x *ModelSequenceBatching) ProtoReflect() protoreflect.Message
func (*ModelSequenceBatching) Reset ¶
func (x *ModelSequenceBatching) Reset()
func (*ModelSequenceBatching) String ¶
func (x *ModelSequenceBatching) String() string
type ModelSequenceBatching_Control ¶
type ModelSequenceBatching_Control struct { //@@ .. cpp:var:: Kind kind //@@ //@@ The kind of this control. //@@ Kind ModelSequenceBatching_Control_Kind `protobuf:"varint,1,opt,name=kind,proto3,enum=inference.ModelSequenceBatching_Control_Kind" json:"kind,omitempty"` //@@ .. cpp:var:: int32 int32_false_true (repeated) //@@ //@@ The control's true and false setting is indicated by setting //@@ a value in an int32 tensor. The tensor must be a //@@ 1-dimensional tensor with size equal to the batch size of //@@ the request. 'int32_false_true' must have two entries: the //@@ first the false value and the second the true value. //@@ Int32FalseTrue []int32 `protobuf:"varint,2,rep,packed,name=int32_false_true,json=int32FalseTrue,proto3" json:"int32_false_true,omitempty"` //@@ .. cpp:var:: float fp32_false_true (repeated) //@@ //@@ The control's true and false setting is indicated by setting //@@ a value in a fp32 tensor. The tensor must be a //@@ 1-dimensional tensor with size equal to the batch size of //@@ the request. 'fp32_false_true' must have two entries: the //@@ first the false value and the second the true value. //@@ Fp32FalseTrue []float32 `protobuf:"fixed32,3,rep,packed,name=fp32_false_true,json=fp32FalseTrue,proto3" json:"fp32_false_true,omitempty"` //@@ .. cpp:var:: bool bool_false_true (repeated) //@@ //@@ The control's true and false setting is indicated by setting //@@ a value in a bool tensor. The tensor must be a //@@ 1-dimensional tensor with size equal to the batch size of //@@ the request. 'bool_false_true' must have two entries: the //@@ first the false value and the second the true value. //@@ BoolFalseTrue []bool `protobuf:"varint,5,rep,packed,name=bool_false_true,json=boolFalseTrue,proto3" json:"bool_false_true,omitempty"` //@@ .. cpp:var:: DataType data_type //@@ //@@ The control's datatype. //@@ DataType DataType `protobuf:"varint,4,opt,name=data_type,json=dataType,proto3,enum=inference.DataType" json:"data_type,omitempty"` // contains filtered or unexported fields }
@@ .. cpp:var:: message Control @@ @@ A control is a signal that the sequence batcher uses to @@ communicate with a backend. @@
func (*ModelSequenceBatching_Control) Descriptor
deprecated
func (*ModelSequenceBatching_Control) Descriptor() ([]byte, []int)
Deprecated: Use ModelSequenceBatching_Control.ProtoReflect.Descriptor instead.
func (*ModelSequenceBatching_Control) GetBoolFalseTrue ¶
func (x *ModelSequenceBatching_Control) GetBoolFalseTrue() []bool
func (*ModelSequenceBatching_Control) GetDataType ¶
func (x *ModelSequenceBatching_Control) GetDataType() DataType
func (*ModelSequenceBatching_Control) GetFp32FalseTrue ¶
func (x *ModelSequenceBatching_Control) GetFp32FalseTrue() []float32
func (*ModelSequenceBatching_Control) GetInt32FalseTrue ¶
func (x *ModelSequenceBatching_Control) GetInt32FalseTrue() []int32
func (*ModelSequenceBatching_Control) GetKind ¶
func (x *ModelSequenceBatching_Control) GetKind() ModelSequenceBatching_Control_Kind
func (*ModelSequenceBatching_Control) ProtoMessage ¶
func (*ModelSequenceBatching_Control) ProtoMessage()
func (*ModelSequenceBatching_Control) ProtoReflect ¶
func (x *ModelSequenceBatching_Control) ProtoReflect() protoreflect.Message
func (*ModelSequenceBatching_Control) Reset ¶
func (x *ModelSequenceBatching_Control) Reset()
func (*ModelSequenceBatching_Control) String ¶
func (x *ModelSequenceBatching_Control) String() string
type ModelSequenceBatching_ControlInput ¶
type ModelSequenceBatching_ControlInput struct { //@@ .. cpp:var:: string name //@@ //@@ The name of the model input. //@@ Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` //@@ .. cpp:var:: Control control (repeated) //@@ //@@ The control value(s) that should be communicated to the //@@ model using this model input. //@@ Control []*ModelSequenceBatching_Control `protobuf:"bytes,2,rep,name=control,proto3" json:"control,omitempty"` // contains filtered or unexported fields }
@@ .. cpp:var:: message ControlInput @@ @@ The sequence control values to communicate by a model input. @@
func (*ModelSequenceBatching_ControlInput) Descriptor
deprecated
func (*ModelSequenceBatching_ControlInput) Descriptor() ([]byte, []int)
Deprecated: Use ModelSequenceBatching_ControlInput.ProtoReflect.Descriptor instead.
func (*ModelSequenceBatching_ControlInput) GetControl ¶
func (x *ModelSequenceBatching_ControlInput) GetControl() []*ModelSequenceBatching_Control
func (*ModelSequenceBatching_ControlInput) GetName ¶
func (x *ModelSequenceBatching_ControlInput) GetName() string
func (*ModelSequenceBatching_ControlInput) ProtoMessage ¶
func (*ModelSequenceBatching_ControlInput) ProtoMessage()
func (*ModelSequenceBatching_ControlInput) ProtoReflect ¶
func (x *ModelSequenceBatching_ControlInput) ProtoReflect() protoreflect.Message
func (*ModelSequenceBatching_ControlInput) Reset ¶
func (x *ModelSequenceBatching_ControlInput) Reset()
func (*ModelSequenceBatching_ControlInput) String ¶
func (x *ModelSequenceBatching_ControlInput) String() string
type ModelSequenceBatching_Control_Kind ¶
type ModelSequenceBatching_Control_Kind int32
@@ @@ .. cpp:enum:: Kind @@ @@ The kind of the control. @@
const ( //@@ .. cpp:enumerator:: Kind::CONTROL_SEQUENCE_START = 0 //@@ //@@ A new sequence is/is-not starting. If true a sequence is //@@ starting, if false a sequence is continuing. Must //@@ specify either int32_false_true, fp32_false_true or //@@ bool_false_true for this control. This control is optional. //@@ ModelSequenceBatching_Control_CONTROL_SEQUENCE_START ModelSequenceBatching_Control_Kind = 0 //@@ .. cpp:enumerator:: Kind::CONTROL_SEQUENCE_READY = 1 //@@ //@@ A sequence is/is-not ready for inference. If true the //@@ input tensor data is valid and should be used. If false //@@ the input tensor data is invalid and inferencing should //@@ be "skipped". Must specify either int32_false_true, //@@ fp32_false_true or bool_false_true for this control. This //@@ control is optional. //@@ ModelSequenceBatching_Control_CONTROL_SEQUENCE_READY ModelSequenceBatching_Control_Kind = 1 //@@ .. cpp:enumerator:: Kind::CONTROL_SEQUENCE_END = 2 //@@ //@@ A sequence is/is-not ending. If true a sequence is //@@ ending, if false a sequence is continuing. Must specify //@@ either int32_false_true, fp32_false_true or bool_false_true //@@ for this control. This control is optional. //@@ ModelSequenceBatching_Control_CONTROL_SEQUENCE_END ModelSequenceBatching_Control_Kind = 2 //@@ .. cpp:enumerator:: Kind::CONTROL_SEQUENCE_CORRID = 3 //@@ //@@ The correlation ID of the sequence. The correlation ID //@@ is an uint64_t value that is communicated in whole or //@@ in part by the tensor. The tensor's datatype must be //@@ specified by data_type and must be TYPE_UINT64, TYPE_INT64, //@@ TYPE_UINT32 or TYPE_INT32. If a 32-bit datatype is specified //@@ the correlation ID will be truncated to the low-order 32 //@@ bits. This control is optional. //@@ ModelSequenceBatching_Control_CONTROL_SEQUENCE_CORRID ModelSequenceBatching_Control_Kind = 3 )
func (ModelSequenceBatching_Control_Kind) Descriptor ¶
func (ModelSequenceBatching_Control_Kind) Descriptor() protoreflect.EnumDescriptor
func (ModelSequenceBatching_Control_Kind) Enum ¶
func (x ModelSequenceBatching_Control_Kind) Enum() *ModelSequenceBatching_Control_Kind
func (ModelSequenceBatching_Control_Kind) EnumDescriptor
deprecated
func (ModelSequenceBatching_Control_Kind) EnumDescriptor() ([]byte, []int)
Deprecated: Use ModelSequenceBatching_Control_Kind.Descriptor instead.
func (ModelSequenceBatching_Control_Kind) Number ¶
func (x ModelSequenceBatching_Control_Kind) Number() protoreflect.EnumNumber
func (ModelSequenceBatching_Control_Kind) String ¶
func (x ModelSequenceBatching_Control_Kind) String() string
func (ModelSequenceBatching_Control_Kind) Type ¶
func (ModelSequenceBatching_Control_Kind) Type() protoreflect.EnumType
type ModelSequenceBatching_Direct ¶
type ModelSequenceBatching_Direct struct { //@@ .. cpp:var:: StrategyDirect direct //@@ //@@ StrategyDirect scheduling strategy. //@@ Direct *ModelSequenceBatching_StrategyDirect `protobuf:"bytes,3,opt,name=direct,proto3,oneof"` }
type ModelSequenceBatching_InitialState ¶
type ModelSequenceBatching_InitialState struct { //@@ .. cpp:var:: DataType data_type //@@ //@@ The data-type of the state. //@@ DataType DataType `protobuf:"varint,1,opt,name=data_type,json=dataType,proto3,enum=inference.DataType" json:"data_type,omitempty"` //@@ .. cpp:var:: int64 dims (repeated) //@@ //@@ The shape of the state tensor, not including the batch dimension. //@@ Dims []int64 `protobuf:"varint,2,rep,packed,name=dims,proto3" json:"dims,omitempty"` //@@ .. cpp:var:: oneof state_data //@@ //@@ Specify how the initial state data is generated. //@@ // // Types that are assignable to StateData: // *ModelSequenceBatching_InitialState_ZeroData // *ModelSequenceBatching_InitialState_DataFile StateData isModelSequenceBatching_InitialState_StateData `protobuf_oneof:"state_data"` //@@ .. cpp:var:: string name //@@ //@@ The name of the state initialization. //@@ Name string `protobuf:"bytes,5,opt,name=name,proto3" json:"name,omitempty"` // contains filtered or unexported fields }
@@ @@ .. cpp:var:: message InitialState @@ @@ Settings used to initialize data for implicit state. @@
func (*ModelSequenceBatching_InitialState) Descriptor
deprecated
func (*ModelSequenceBatching_InitialState) Descriptor() ([]byte, []int)
Deprecated: Use ModelSequenceBatching_InitialState.ProtoReflect.Descriptor instead.
func (*ModelSequenceBatching_InitialState) GetDataFile ¶
func (x *ModelSequenceBatching_InitialState) GetDataFile() string
func (*ModelSequenceBatching_InitialState) GetDataType ¶
func (x *ModelSequenceBatching_InitialState) GetDataType() DataType
func (*ModelSequenceBatching_InitialState) GetDims ¶
func (x *ModelSequenceBatching_InitialState) GetDims() []int64
func (*ModelSequenceBatching_InitialState) GetName ¶
func (x *ModelSequenceBatching_InitialState) GetName() string
func (*ModelSequenceBatching_InitialState) GetStateData ¶
func (m *ModelSequenceBatching_InitialState) GetStateData() isModelSequenceBatching_InitialState_StateData
func (*ModelSequenceBatching_InitialState) GetZeroData ¶
func (x *ModelSequenceBatching_InitialState) GetZeroData() bool
func (*ModelSequenceBatching_InitialState) ProtoMessage ¶
func (*ModelSequenceBatching_InitialState) ProtoMessage()
func (*ModelSequenceBatching_InitialState) ProtoReflect ¶
func (x *ModelSequenceBatching_InitialState) ProtoReflect() protoreflect.Message
func (*ModelSequenceBatching_InitialState) Reset ¶
func (x *ModelSequenceBatching_InitialState) Reset()
func (*ModelSequenceBatching_InitialState) String ¶
func (x *ModelSequenceBatching_InitialState) String() string
type ModelSequenceBatching_InitialState_DataFile ¶
type ModelSequenceBatching_InitialState_DataFile struct { //@@ .. cpp:var:: string data_file //@@ //@@ The file whose content will be used as the initial data for //@@ the state in row-major order. The file must be provided in //@@ sub-directory 'initial_state' under the model directory. //@@ DataFile string `protobuf:"bytes,4,opt,name=data_file,json=dataFile,proto3,oneof"` }
type ModelSequenceBatching_InitialState_ZeroData ¶
type ModelSequenceBatching_InitialState_ZeroData struct { //@@ //@@ .. cpp:var:: bool zero_data //@@ //@@ The identifier for using zeros as initial state data. //@@ Note that the value of 'zero_data' will not be checked, //@@ instead, zero data will be used as long as the field is set. //@@ ZeroData bool `protobuf:"varint,3,opt,name=zero_data,json=zeroData,proto3,oneof"` }
type ModelSequenceBatching_Oldest ¶
type ModelSequenceBatching_Oldest struct { //@@ .. cpp:var:: StrategyOldest oldest //@@ //@@ StrategyOldest scheduling strategy. //@@ Oldest *ModelSequenceBatching_StrategyOldest `protobuf:"bytes,4,opt,name=oldest,proto3,oneof"` }
type ModelSequenceBatching_State ¶
type ModelSequenceBatching_State struct { //@@ .. cpp:var:: string input_name //@@ //@@ The name of the model state input. //@@ InputName string `protobuf:"bytes,1,opt,name=input_name,json=inputName,proto3" json:"input_name,omitempty"` //@@ .. cpp:var:: string output_name //@@ //@@ The name of the model state output. //@@ OutputName string `protobuf:"bytes,2,opt,name=output_name,json=outputName,proto3" json:"output_name,omitempty"` //@@ .. cpp:var:: DataType data_type //@@ //@@ The data-type of the state. //@@ DataType DataType `protobuf:"varint,3,opt,name=data_type,json=dataType,proto3,enum=inference.DataType" json:"data_type,omitempty"` //@@ .. cpp:var:: int64 dim (repeated) //@@ //@@ The dimension. //@@ Dims []int64 `protobuf:"varint,4,rep,packed,name=dims,proto3" json:"dims,omitempty"` //@@ .. cpp:var:: InitialState initial_state (repeated) //@@ //@@ The optional field to specify the initial state for the model. //@@ InitialState []*ModelSequenceBatching_InitialState `protobuf:"bytes,5,rep,name=initial_state,json=initialState,proto3" json:"initial_state,omitempty"` // contains filtered or unexported fields }
@@ .. cpp:var:: message State @@ @@ An input / output pair of tensors that carry state for the sequence. @@
func (*ModelSequenceBatching_State) Descriptor
deprecated
func (*ModelSequenceBatching_State) Descriptor() ([]byte, []int)
Deprecated: Use ModelSequenceBatching_State.ProtoReflect.Descriptor instead.
func (*ModelSequenceBatching_State) GetDataType ¶
func (x *ModelSequenceBatching_State) GetDataType() DataType
func (*ModelSequenceBatching_State) GetDims ¶
func (x *ModelSequenceBatching_State) GetDims() []int64
func (*ModelSequenceBatching_State) GetInitialState ¶
func (x *ModelSequenceBatching_State) GetInitialState() []*ModelSequenceBatching_InitialState
func (*ModelSequenceBatching_State) GetInputName ¶
func (x *ModelSequenceBatching_State) GetInputName() string
func (*ModelSequenceBatching_State) GetOutputName ¶
func (x *ModelSequenceBatching_State) GetOutputName() string
func (*ModelSequenceBatching_State) ProtoMessage ¶
func (*ModelSequenceBatching_State) ProtoMessage()
func (*ModelSequenceBatching_State) ProtoReflect ¶
func (x *ModelSequenceBatching_State) ProtoReflect() protoreflect.Message
func (*ModelSequenceBatching_State) Reset ¶
func (x *ModelSequenceBatching_State) Reset()
func (*ModelSequenceBatching_State) String ¶
func (x *ModelSequenceBatching_State) String() string
type ModelSequenceBatching_StrategyDirect ¶
type ModelSequenceBatching_StrategyDirect struct { //@@ .. cpp:var:: uint64 max_queue_delay_microseconds //@@ //@@ The maximum time, in microseconds, a candidate request //@@ will be delayed in the sequence batch scheduling queue to //@@ wait for additional requests for batching. Default is 0. //@@ MaxQueueDelayMicroseconds uint64 `` /* 141-byte string literal not displayed */ //@@ .. cpp:var:: float minimum_slot_utilization //@@ //@@ The minimum slot utilization that must be satisfied to //@@ execute the batch before 'max_queue_delay_microseconds' expires. //@@ For example, a value of 0.5 indicates that the batch should be //@@ executed as soon as 50% or more of the slots are ready even if //@@ the 'max_queue_delay_microseconds' timeout has not expired. //@@ The default is 0.0, indicating that a batch will be executed //@@ before 'max_queue_delay_microseconds' timeout expires if at least //@@ one batch slot is ready. 'max_queue_delay_microseconds' will be //@@ ignored unless minimum_slot_utilization is set to a non-zero //@@ value. //@@ MinimumSlotUtilization float32 `` /* 131-byte string literal not displayed */ // contains filtered or unexported fields }
@@ .. cpp:var:: message StrategyDirect @@ @@ The sequence batcher uses a specific, unique batch @@ slot for each sequence. All inference requests in a @@ sequence are directed to the same batch slot in the same @@ model instance over the lifetime of the sequence. This @@ is the default strategy. @@
func (*ModelSequenceBatching_StrategyDirect) Descriptor
deprecated
func (*ModelSequenceBatching_StrategyDirect) Descriptor() ([]byte, []int)
Deprecated: Use ModelSequenceBatching_StrategyDirect.ProtoReflect.Descriptor instead.
func (*ModelSequenceBatching_StrategyDirect) GetMaxQueueDelayMicroseconds ¶
func (x *ModelSequenceBatching_StrategyDirect) GetMaxQueueDelayMicroseconds() uint64
func (*ModelSequenceBatching_StrategyDirect) GetMinimumSlotUtilization ¶
func (x *ModelSequenceBatching_StrategyDirect) GetMinimumSlotUtilization() float32
func (*ModelSequenceBatching_StrategyDirect) ProtoMessage ¶
func (*ModelSequenceBatching_StrategyDirect) ProtoMessage()
func (*ModelSequenceBatching_StrategyDirect) ProtoReflect ¶
func (x *ModelSequenceBatching_StrategyDirect) ProtoReflect() protoreflect.Message
func (*ModelSequenceBatching_StrategyDirect) Reset ¶
func (x *ModelSequenceBatching_StrategyDirect) Reset()
func (*ModelSequenceBatching_StrategyDirect) String ¶
func (x *ModelSequenceBatching_StrategyDirect) String() string
type ModelSequenceBatching_StrategyOldest ¶
type ModelSequenceBatching_StrategyOldest struct { //@@ .. cpp:var:: int32 max_candidate_sequences //@@ //@@ Maximum number of candidate sequences that the batcher //@@ maintains. Excess seqences are kept in an ordered backlog //@@ and become candidates when existing candidate sequences //@@ complete. //@@ MaxCandidateSequences int32 `` /* 127-byte string literal not displayed */ //@@ .. cpp:var:: int32 preferred_batch_size (repeated) //@@ //@@ Preferred batch sizes for dynamic batching of candidate //@@ sequences. If a batch of one of these sizes can be formed //@@ it will be executed immediately. If not specified a //@@ preferred batch size will be chosen automatically //@@ based on model and GPU characteristics. //@@ PreferredBatchSize []int32 `protobuf:"varint,2,rep,packed,name=preferred_batch_size,json=preferredBatchSize,proto3" json:"preferred_batch_size,omitempty"` //@@ .. cpp:var:: uint64 max_queue_delay_microseconds //@@ //@@ The maximum time, in microseconds, a candidate request //@@ will be delayed in the dynamic batch scheduling queue to //@@ wait for additional requests for batching. Default is 0. //@@ MaxQueueDelayMicroseconds uint64 `` /* 141-byte string literal not displayed */ // contains filtered or unexported fields }
@@ .. cpp:var:: message StrategyOldest @@ @@ The sequence batcher maintains up to 'max_candidate_sequences' @@ candidate sequences. 'max_candidate_sequences' can be greater @@ than the model's 'max_batch_size'. For inferencing the batcher @@ chooses from the candidate sequences up to 'max_batch_size' @@ inference requests. Requests are chosen in an oldest-first @@ manner across all candidate sequences. A given sequence is @@ not guaranteed to be assigned to the same batch slot for @@ all inference requests of that sequence. @@
func (*ModelSequenceBatching_StrategyOldest) Descriptor
deprecated
func (*ModelSequenceBatching_StrategyOldest) Descriptor() ([]byte, []int)
Deprecated: Use ModelSequenceBatching_StrategyOldest.ProtoReflect.Descriptor instead.
func (*ModelSequenceBatching_StrategyOldest) GetMaxCandidateSequences ¶
func (x *ModelSequenceBatching_StrategyOldest) GetMaxCandidateSequences() int32
func (*ModelSequenceBatching_StrategyOldest) GetMaxQueueDelayMicroseconds ¶
func (x *ModelSequenceBatching_StrategyOldest) GetMaxQueueDelayMicroseconds() uint64
func (*ModelSequenceBatching_StrategyOldest) GetPreferredBatchSize ¶
func (x *ModelSequenceBatching_StrategyOldest) GetPreferredBatchSize() []int32
func (*ModelSequenceBatching_StrategyOldest) ProtoMessage ¶
func (*ModelSequenceBatching_StrategyOldest) ProtoMessage()
func (*ModelSequenceBatching_StrategyOldest) ProtoReflect ¶
func (x *ModelSequenceBatching_StrategyOldest) ProtoReflect() protoreflect.Message
func (*ModelSequenceBatching_StrategyOldest) Reset ¶
func (x *ModelSequenceBatching_StrategyOldest) Reset()
func (*ModelSequenceBatching_StrategyOldest) String ¶
func (x *ModelSequenceBatching_StrategyOldest) String() string
type ModelTensorReshape ¶
type ModelTensorReshape struct { //@@ .. cpp:var:: int64 shape (repeated) //@@ //@@ The shape to use for reshaping. //@@ Shape []int64 `protobuf:"varint,1,rep,packed,name=shape,proto3" json:"shape,omitempty"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ModelTensorReshape @@ @@ Reshape specification for input and output tensors. @@
func (*ModelTensorReshape) Descriptor
deprecated
func (*ModelTensorReshape) Descriptor() ([]byte, []int)
Deprecated: Use ModelTensorReshape.ProtoReflect.Descriptor instead.
func (*ModelTensorReshape) GetShape ¶
func (x *ModelTensorReshape) GetShape() []int64
func (*ModelTensorReshape) ProtoMessage ¶
func (*ModelTensorReshape) ProtoMessage()
func (*ModelTensorReshape) ProtoReflect ¶
func (x *ModelTensorReshape) ProtoReflect() protoreflect.Message
func (*ModelTensorReshape) Reset ¶
func (x *ModelTensorReshape) Reset()
func (*ModelTensorReshape) String ¶
func (x *ModelTensorReshape) String() string
type ModelTransactionPolicy ¶
type ModelTransactionPolicy struct { //@@ .. cpp:var:: bool decoupled //@@ //@@ Indicates whether responses generated by the model are decoupled with //@@ the requests issued to it, which means the number of responses //@@ generated by model may differ from number of requests issued, and //@@ that the responses may be out of order relative to the order of //@@ requests. The default is false, which means the model will generate //@@ exactly one response for each request. //@@ Decoupled bool `protobuf:"varint,1,opt,name=decoupled,proto3" json:"decoupled,omitempty"` // contains filtered or unexported fields }
@@ @@ .. cpp:var:: message ModelTransactionPolicy @@ @@ The specification that describes the nature of transactions @@ to be expected from the model. @@
func (*ModelTransactionPolicy) Descriptor
deprecated
func (*ModelTransactionPolicy) Descriptor() ([]byte, []int)
Deprecated: Use ModelTransactionPolicy.ProtoReflect.Descriptor instead.
func (*ModelTransactionPolicy) GetDecoupled ¶
func (x *ModelTransactionPolicy) GetDecoupled() bool
func (*ModelTransactionPolicy) ProtoMessage ¶
func (*ModelTransactionPolicy) ProtoMessage()
func (*ModelTransactionPolicy) ProtoReflect ¶
func (x *ModelTransactionPolicy) ProtoReflect() protoreflect.Message
func (*ModelTransactionPolicy) Reset ¶
func (x *ModelTransactionPolicy) Reset()
func (*ModelTransactionPolicy) String ¶
func (x *ModelTransactionPolicy) String() string
type ModelVersionPolicy ¶
type ModelVersionPolicy struct { //@@ .. cpp:var:: oneof policy_choice //@@ //@@ Each model must implement only a single version policy. The //@@ default policy is 'Latest'. //@@ // // Types that are assignable to PolicyChoice: // *ModelVersionPolicy_Latest_ // *ModelVersionPolicy_All_ // *ModelVersionPolicy_Specific_ PolicyChoice isModelVersionPolicy_PolicyChoice `protobuf_oneof:"policy_choice"` // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ModelVersionPolicy @@ @@ Policy indicating which versions of a model should be made @@ available by the inference server. @@
func (*ModelVersionPolicy) Descriptor
deprecated
func (*ModelVersionPolicy) Descriptor() ([]byte, []int)
Deprecated: Use ModelVersionPolicy.ProtoReflect.Descriptor instead.
func (*ModelVersionPolicy) GetAll ¶
func (x *ModelVersionPolicy) GetAll() *ModelVersionPolicy_All
func (*ModelVersionPolicy) GetLatest ¶
func (x *ModelVersionPolicy) GetLatest() *ModelVersionPolicy_Latest
func (*ModelVersionPolicy) GetPolicyChoice ¶
func (m *ModelVersionPolicy) GetPolicyChoice() isModelVersionPolicy_PolicyChoice
func (*ModelVersionPolicy) GetSpecific ¶
func (x *ModelVersionPolicy) GetSpecific() *ModelVersionPolicy_Specific
func (*ModelVersionPolicy) ProtoMessage ¶
func (*ModelVersionPolicy) ProtoMessage()
func (*ModelVersionPolicy) ProtoReflect ¶
func (x *ModelVersionPolicy) ProtoReflect() protoreflect.Message
func (*ModelVersionPolicy) Reset ¶
func (x *ModelVersionPolicy) Reset()
func (*ModelVersionPolicy) String ¶
func (x *ModelVersionPolicy) String() string
type ModelVersionPolicy_All ¶
type ModelVersionPolicy_All struct {
// contains filtered or unexported fields
}
@@ .. cpp:var:: message All @@ @@ Serve all versions of the model. @@
func (*ModelVersionPolicy_All) Descriptor
deprecated
func (*ModelVersionPolicy_All) Descriptor() ([]byte, []int)
Deprecated: Use ModelVersionPolicy_All.ProtoReflect.Descriptor instead.
func (*ModelVersionPolicy_All) ProtoMessage ¶
func (*ModelVersionPolicy_All) ProtoMessage()
func (*ModelVersionPolicy_All) ProtoReflect ¶
func (x *ModelVersionPolicy_All) ProtoReflect() protoreflect.Message
func (*ModelVersionPolicy_All) Reset ¶
func (x *ModelVersionPolicy_All) Reset()
func (*ModelVersionPolicy_All) String ¶
func (x *ModelVersionPolicy_All) String() string
type ModelVersionPolicy_All_ ¶
type ModelVersionPolicy_All_ struct { //@@ .. cpp:var:: All all //@@ //@@ Serve all versions of the model. //@@ All *ModelVersionPolicy_All `protobuf:"bytes,2,opt,name=all,proto3,oneof"` }
type ModelVersionPolicy_Latest ¶
type ModelVersionPolicy_Latest struct { //@@ .. cpp:var:: uint32 num_versions //@@ //@@ Serve only the 'num_versions' highest-numbered versions. T //@@ The default value of 'num_versions' is 1, indicating that by //@@ default only the single highest-number version of a //@@ model will be served. //@@ NumVersions uint32 `protobuf:"varint,1,opt,name=num_versions,json=numVersions,proto3" json:"num_versions,omitempty"` // contains filtered or unexported fields }
@@ .. cpp:var:: message Latest @@ @@ Serve only the latest version(s) of a model. This is @@ the default policy. @@
func (*ModelVersionPolicy_Latest) Descriptor
deprecated
func (*ModelVersionPolicy_Latest) Descriptor() ([]byte, []int)
Deprecated: Use ModelVersionPolicy_Latest.ProtoReflect.Descriptor instead.
func (*ModelVersionPolicy_Latest) GetNumVersions ¶
func (x *ModelVersionPolicy_Latest) GetNumVersions() uint32
func (*ModelVersionPolicy_Latest) ProtoMessage ¶
func (*ModelVersionPolicy_Latest) ProtoMessage()
func (*ModelVersionPolicy_Latest) ProtoReflect ¶
func (x *ModelVersionPolicy_Latest) ProtoReflect() protoreflect.Message
func (*ModelVersionPolicy_Latest) Reset ¶
func (x *ModelVersionPolicy_Latest) Reset()
func (*ModelVersionPolicy_Latest) String ¶
func (x *ModelVersionPolicy_Latest) String() string
type ModelVersionPolicy_Latest_ ¶
type ModelVersionPolicy_Latest_ struct { //@@ .. cpp:var:: Latest latest //@@ //@@ Serve only latest version(s) of the model. //@@ Latest *ModelVersionPolicy_Latest `protobuf:"bytes,1,opt,name=latest,proto3,oneof"` }
type ModelVersionPolicy_Specific ¶
type ModelVersionPolicy_Specific struct { //@@ .. cpp:var:: int64 versions (repeated) //@@ //@@ The specific versions of the model that will be served. //@@ Versions []int64 `protobuf:"varint,1,rep,packed,name=versions,proto3" json:"versions,omitempty"` // contains filtered or unexported fields }
@@ .. cpp:var:: message Specific @@ @@ Serve only specific versions of the model. @@
func (*ModelVersionPolicy_Specific) Descriptor
deprecated
func (*ModelVersionPolicy_Specific) Descriptor() ([]byte, []int)
Deprecated: Use ModelVersionPolicy_Specific.ProtoReflect.Descriptor instead.
func (*ModelVersionPolicy_Specific) GetVersions ¶
func (x *ModelVersionPolicy_Specific) GetVersions() []int64
func (*ModelVersionPolicy_Specific) ProtoMessage ¶
func (*ModelVersionPolicy_Specific) ProtoMessage()
func (*ModelVersionPolicy_Specific) ProtoReflect ¶
func (x *ModelVersionPolicy_Specific) ProtoReflect() protoreflect.Message
func (*ModelVersionPolicy_Specific) Reset ¶
func (x *ModelVersionPolicy_Specific) Reset()
func (*ModelVersionPolicy_Specific) String ¶
func (x *ModelVersionPolicy_Specific) String() string
type ModelVersionPolicy_Specific_ ¶
type ModelVersionPolicy_Specific_ struct { //@@ .. cpp:var:: Specific specific //@@ //@@ Serve only specific version(s) of the model. //@@ Specific *ModelVersionPolicy_Specific `protobuf:"bytes,3,opt,name=specific,proto3,oneof"` }
type ModelWarmup ¶
type ModelWarmup struct { //@@ .. cpp:var:: string name //@@ //@@ The name of the request sample. //@@ Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` //@@ .. cpp:var:: uint32 batch_size //@@ //@@ The batch size of the inference request. This must be >= 1. For //@@ models that don't support batching, batch_size must be 1. If //@@ batch_size > 1, the 'inputs' specified below will be duplicated to //@@ match the batch size requested. //@@ BatchSize uint32 `protobuf:"varint,2,opt,name=batch_size,json=batchSize,proto3" json:"batch_size,omitempty"` //@@ .. cpp:var:: map<string, Input> inputs //@@ //@@ The warmup meta data associated with every model input, including //@@ control tensors. //@@ Inputs map[string]*ModelWarmup_Input `` /* 153-byte string literal not displayed */ // contains filtered or unexported fields }
@@ @@.. cpp:var:: message ModelWarmup @@ @@ Settings used to construct the request sample for model warmup. @@
func (*ModelWarmup) Descriptor
deprecated
func (*ModelWarmup) Descriptor() ([]byte, []int)
Deprecated: Use ModelWarmup.ProtoReflect.Descriptor instead.
func (*ModelWarmup) GetBatchSize ¶
func (x *ModelWarmup) GetBatchSize() uint32
func (*ModelWarmup) GetInputs ¶
func (x *ModelWarmup) GetInputs() map[string]*ModelWarmup_Input
func (*ModelWarmup) GetName ¶
func (x *ModelWarmup) GetName() string
func (*ModelWarmup) ProtoMessage ¶
func (*ModelWarmup) ProtoMessage()
func (*ModelWarmup) ProtoReflect ¶
func (x *ModelWarmup) ProtoReflect() protoreflect.Message
func (*ModelWarmup) Reset ¶
func (x *ModelWarmup) Reset()
func (*ModelWarmup) String ¶
func (x *ModelWarmup) String() string
type ModelWarmup_Input ¶
type ModelWarmup_Input struct { //@@ .. cpp:var:: DataType data_type //@@ //@@ The data-type of the input. //@@ DataType DataType `protobuf:"varint,1,opt,name=data_type,json=dataType,proto3,enum=inference.DataType" json:"data_type,omitempty"` //@@ .. cpp:var:: int64 dims (repeated) //@@ //@@ The shape of the input tensor, not including the batch dimension. //@@ Dims []int64 `protobuf:"varint,2,rep,packed,name=dims,proto3" json:"dims,omitempty"` //@@ .. cpp:var:: oneof input_data_type //@@ //@@ Specify how the input data is generated. If the input has STRING //@@ data type and 'random_data' is set, the data generation will fall //@@ back to 'zero_data'. //@@ // // Types that are assignable to InputDataType: // *ModelWarmup_Input_ZeroData // *ModelWarmup_Input_RandomData // *ModelWarmup_Input_InputDataFile InputDataType isModelWarmup_Input_InputDataType `protobuf_oneof:"input_data_type"` // contains filtered or unexported fields }
@@ @@ .. cpp:var:: message Input @@ @@ Meta data associated with an input. @@
func (*ModelWarmup_Input) Descriptor
deprecated
func (*ModelWarmup_Input) Descriptor() ([]byte, []int)
Deprecated: Use ModelWarmup_Input.ProtoReflect.Descriptor instead.
func (*ModelWarmup_Input) GetDataType ¶
func (x *ModelWarmup_Input) GetDataType() DataType
func (*ModelWarmup_Input) GetDims ¶
func (x *ModelWarmup_Input) GetDims() []int64
func (*ModelWarmup_Input) GetInputDataFile ¶
func (x *ModelWarmup_Input) GetInputDataFile() string
func (*ModelWarmup_Input) GetInputDataType ¶
func (m *ModelWarmup_Input) GetInputDataType() isModelWarmup_Input_InputDataType
func (*ModelWarmup_Input) GetRandomData ¶
func (x *ModelWarmup_Input) GetRandomData() bool
func (*ModelWarmup_Input) GetZeroData ¶
func (x *ModelWarmup_Input) GetZeroData() bool
func (*ModelWarmup_Input) ProtoMessage ¶
func (*ModelWarmup_Input) ProtoMessage()
func (*ModelWarmup_Input) ProtoReflect ¶
func (x *ModelWarmup_Input) ProtoReflect() protoreflect.Message
func (*ModelWarmup_Input) Reset ¶
func (x *ModelWarmup_Input) Reset()
func (*ModelWarmup_Input) String ¶
func (x *ModelWarmup_Input) String() string
type ModelWarmup_Input_InputDataFile ¶
type ModelWarmup_Input_InputDataFile struct { //@@ .. cpp:var:: string input_data_file //@@ //@@ The file whose content will be used as raw input data in //@@ row-major order. The file must be provided in a sub-directory //@@ 'warmup' under the model directory. //@@ InputDataFile string `protobuf:"bytes,5,opt,name=input_data_file,json=inputDataFile,proto3,oneof"` }
type ModelWarmup_Input_RandomData ¶
type ModelWarmup_Input_RandomData struct { //@@ //@@ .. cpp:var:: bool random_data //@@ //@@ The identifier for using random data as input data. Note that //@@ the value of 'random_data' will not be checked, instead, //@@ random data will be used as long as the field is set. //@@ RandomData bool `protobuf:"varint,4,opt,name=random_data,json=randomData,proto3,oneof"` }
type ModelWarmup_Input_ZeroData ¶
type ModelWarmup_Input_ZeroData struct { //@@ //@@ .. cpp:var:: bool zero_data //@@ //@@ The identifier for using zeros as input data. Note that the //@@ value of 'zero_data' will not be checked, instead, zero data //@@ will be used as long as the field is set. //@@ ZeroData bool `protobuf:"varint,3,opt,name=zero_data,json=zeroData,proto3,oneof"` }