Index ¶
- Variables
- type CompilationEnvironmentsProto
- func (*CompilationEnvironmentsProto) Descriptor() ([]byte, []int)deprecated
- func (x *CompilationEnvironmentsProto) GetEnvironments() []*anypb.Any
- func (*CompilationEnvironmentsProto) ProtoMessage()
- func (x *CompilationEnvironmentsProto) ProtoReflect() protoreflect.Message
- func (x *CompilationEnvironmentsProto) Reset()
- func (x *CompilationEnvironmentsProto) String() string
- type DebugOptions
- func (*DebugOptions) Descriptor() ([]byte, []int)deprecated
- func (x *DebugOptions) GetLegacyCommandBufferCustomCallTargets() []string
- func (x *DebugOptions) GetXlaAllowExcessPrecision() bool
- func (x *DebugOptions) GetXlaAllowScalarIndexDynamicOps() bool
- func (x *DebugOptions) GetXlaBackendExtraOptions() map[string]string
- func (x *DebugOptions) GetXlaBackendOptimizationLevel() int32
- func (x *DebugOptions) GetXlaCmdBufferTraceCacheSize() int64
- func (x *DebugOptions) GetXlaCpuCopyInsertionUseRegionAnalysis() bool
- func (x *DebugOptions) GetXlaCpuEnableConcurrencyOptimizedScheduler() bool
- func (x *DebugOptions) GetXlaCpuEnableFastMath() bool
- func (x *DebugOptions) GetXlaCpuEnableFastMinMax() bool
- func (x *DebugOptions) GetXlaCpuEnableXprofTraceme() bool
- func (x *DebugOptions) GetXlaCpuFastMathHonorDivision() bool
- func (x *DebugOptions) GetXlaCpuFastMathHonorFunctions() bool
- func (x *DebugOptions) GetXlaCpuFastMathHonorInfs() bool
- func (x *DebugOptions) GetXlaCpuFastMathHonorNans() bool
- func (x *DebugOptions) GetXlaCpuMaxIsa() string
- func (x *DebugOptions) GetXlaCpuMultiThreadEigen() bool
- func (x *DebugOptions) GetXlaCpuParallelCodegenSplitCount() int32
- func (x *DebugOptions) GetXlaCpuPreferVectorWidth() int32
- func (x *DebugOptions) GetXlaCpuStrictDotConvMath() bool
- func (x *DebugOptions) GetXlaCpuUseAcl() bool
- func (x *DebugOptions) GetXlaCpuUseMklDnn() bool
- func (x *DebugOptions) GetXlaCpuUseThunkRuntime() bool
- func (x *DebugOptions) GetXlaCpuUseXnnpack() bool
- func (x *DebugOptions) GetXlaDebugBufferAssignmentShowMax() int64
- func (x *DebugOptions) GetXlaDetailedLogging() bool
- func (x *DebugOptions) GetXlaDisableAllHloPasses() bool
- func (x *DebugOptions) GetXlaDisableHloPasses() []string
- func (x *DebugOptions) GetXlaDumpCompressProtos() bool
- func (x *DebugOptions) GetXlaDumpDisableMetadata() bool
- func (x *DebugOptions) GetXlaDumpEnableMlirPrettyForm() bool
- func (x *DebugOptions) GetXlaDumpFusionVisualization() bool
- func (x *DebugOptions) GetXlaDumpHloAsDot() bool
- func (x *DebugOptions) GetXlaDumpHloAsHtml() bool
- func (x *DebugOptions) GetXlaDumpHloAsLongText() bool
- func (x *DebugOptions) GetXlaDumpHloAsProto() bool
- func (x *DebugOptions) GetXlaDumpHloAsText() bool
- func (x *DebugOptions) GetXlaDumpHloAsUrl() bool
- func (x *DebugOptions) GetXlaDumpHloModuleRe() string
- func (x *DebugOptions) GetXlaDumpHloPassRe() string
- func (x *DebugOptions) GetXlaDumpHloPipelineRe() string
- func (x *DebugOptions) GetXlaDumpHloSnapshots() bool
- func (x *DebugOptions) GetXlaDumpIncludeTimestamp() bool
- func (x *DebugOptions) GetXlaDumpLargeConstants() bool
- func (x *DebugOptions) GetXlaDumpLatencyHidingSchedule() bool
- func (x *DebugOptions) GetXlaDumpMaxHloModules() int32
- func (x *DebugOptions) GetXlaDumpModuleMetadata() bool
- func (x *DebugOptions) GetXlaDumpTo() string
- func (x *DebugOptions) GetXlaEliminateHloImplicitBroadcast() bool
- func (x *DebugOptions) GetXlaEmbedIrInExecutable() bool
- func (x *DebugOptions) GetXlaEnableCommandBuffersDuringProfiling() bool
- func (x *DebugOptions) GetXlaEnableDumping() bool
- func (x *DebugOptions) GetXlaEnableFastMath() bool
- func (x *DebugOptions) GetXlaEnableHloPassesOnly() []string
- func (x *DebugOptions) GetXlaExperimentalIgnoreChannelId() bool
- func (x *DebugOptions) GetXlaForceHostPlatformDeviceCount() int32
- func (x *DebugOptions) GetXlaGpuAlgorithmDenylistPath() string
- func (x *DebugOptions) GetXlaGpuAllGatherCombineThresholdBytes() int64
- func (x *DebugOptions) GetXlaGpuAllReduceBlueconnectNumDevicesPerHost() int32
- func (x *DebugOptions) GetXlaGpuAllReduceCombineThresholdBytes() int64
- func (x *DebugOptions) GetXlaGpuAnalyticalLatencyEstimatorOptions() map[string]string
- func (x *DebugOptions) GetXlaGpuAsyncDot() bool
- func (x *DebugOptions) GetXlaGpuAutoSpmdPartitioningMemoryBudgetGb() int32
- func (x *DebugOptions) GetXlaGpuAutoSpmdPartitioningMemoryBudgetRatio() float32
- func (x *DebugOptions) GetXlaGpuAutotuneGemmRtol() float32
- func (x *DebugOptions) GetXlaGpuAutotuneLevel() int32
- func (x *DebugOptions) GetXlaGpuAutotuneMaxSolutions() int64
- func (x *DebugOptions) GetXlaGpuCollectCostModelStats() bool
- func (x *DebugOptions) GetXlaGpuCollectiveInflationFactor() int32
- func (x *DebugOptions) GetXlaGpuCollectivePermuteDecomposerThreshold() int64
- func (x *DebugOptions) GetXlaGpuCollectivesUsePersistentCliques() bool
- func (x *DebugOptions) GetXlaGpuCopyInsertionUseRegionAnalysis() bool
- func (x *DebugOptions) GetXlaGpuCrashOnVerificationFailures() bool
- func (x *DebugOptions) GetXlaGpuCublasFallback() bool
- func (x *DebugOptions) GetXlaGpuCudaDataDir() string
- func (x *DebugOptions) GetXlaGpuCudnnGemmFusionLevel() int32
- func (x *DebugOptions) GetXlaGpuCudnnGemmMaxPlans() int32
- func (x *DebugOptions) GetXlaGpuDeterministicOps() bool
- func (x *DebugOptions) GetXlaGpuDisableAsyncCollectives() []DebugOptions_CollectiveOpType
- func (x *DebugOptions) GetXlaGpuDisableGpuasmOptimizations() bool
- func (x *DebugOptions) GetXlaGpuDotMergerThresholdMb() int32
- func (x *DebugOptions) GetXlaGpuDumpAutotuneLogsTo() string
- func (x *DebugOptions) GetXlaGpuDumpAutotuneResultsTo() string
- func (x *DebugOptions) GetXlaGpuDumpAutotunedGemmFusions() bool
- func (x *DebugOptions) GetXlaGpuDumpHloUnoptimizedSnapshots() bool
- func (x *DebugOptions) GetXlaGpuDumpLlvmir() bool
- func (x *DebugOptions) GetXlaGpuEnableAllGatherCombineByDim() bool
- func (x *DebugOptions) GetXlaGpuEnableAnalyticalLatencyEstimator() bool
- func (x *DebugOptions) GetXlaGpuEnableAnalyticalSolLatencyEstimator() bool
- func (x *DebugOptions) GetXlaGpuEnableApproxCostlyCollectives() bool
- func (x *DebugOptions) GetXlaGpuEnableCommandBuffer() []DebugOptions_CommandBufferCmdType
- func (x *DebugOptions) GetXlaGpuEnableCubRadixSort() bool
- func (x *DebugOptions) GetXlaGpuEnableCublaslt() bool
- func (x *DebugOptions) GetXlaGpuEnableCudnnFmha() bool
- func (x *DebugOptions) GetXlaGpuEnableCudnnFrontend() bool
- func (x *DebugOptions) GetXlaGpuEnableCudnnInt8X32ConvolutionReordering() bool
- func (x *DebugOptions) GetXlaGpuEnableCudnnLayerNorm() bool
- func (x *DebugOptions) GetXlaGpuEnableCustomFusions() bool
- func (x *DebugOptions) GetXlaGpuEnableCustomFusionsRe() string
- func (x *DebugOptions) GetXlaGpuEnableDynamicSliceFusion() bool
- func (x *DebugOptions) GetXlaGpuEnableFastMinMax() bool
- func (x *DebugOptions) GetXlaGpuEnableHighestPriorityAsyncStream() bool
- func (x *DebugOptions) GetXlaGpuEnableHostMemoryOffloading() bool
- func (x *DebugOptions) GetXlaGpuEnableLatencyHidingScheduler() bool
- func (x *DebugOptions) GetXlaGpuEnableLibnvptxcompiler() bool
- func (x *DebugOptions) GetXlaGpuEnableLlvmModuleCompilationParallelism() bool
- func (x *DebugOptions) GetXlaGpuEnableNcclCliqueOptimization() bool
- func (x *DebugOptions) GetXlaGpuEnableNcclCommSplitting() bool
- func (x *DebugOptions) GetXlaGpuEnableNcclPerStreamComms() bool
- func (x *DebugOptions) GetXlaGpuEnableNcclUserBuffers() bool
- func (x *DebugOptions) GetXlaGpuEnablePipelinedAllGather() bool
- func (x *DebugOptions) GetXlaGpuEnablePipelinedAllReduce() bool
- func (x *DebugOptions) GetXlaGpuEnablePipelinedCollectives() bool
- func (x *DebugOptions) GetXlaGpuEnablePipelinedP2P() bool
- func (x *DebugOptions) GetXlaGpuEnablePipelinedReduceScatter() bool
- func (x *DebugOptions) GetXlaGpuEnableReassociationForConvertedAr() bool
- func (x *DebugOptions) GetXlaGpuEnableReduceScatterCombineByDim() bool
- func (x *DebugOptions) GetXlaGpuEnableReductionEpilogueFusion() bool
- func (x *DebugOptions) GetXlaGpuEnableScatterDeterminismExpander() bool
- func (x *DebugOptions) GetXlaGpuEnableSharedConstants() bool
- func (x *DebugOptions) GetXlaGpuEnableSplitKAutotuning() bool
- func (x *DebugOptions) GetXlaGpuEnableTritonGemm() bool
- func (x *DebugOptions) GetXlaGpuEnableTritonHopper() bool
- func (x *DebugOptions) GetXlaGpuEnableWhileLoopDoubleBuffering() bool
- func (x *DebugOptions) GetXlaGpuEnableWhileLoopReduceScatterCodeMotion() bool
- func (x *DebugOptions) GetXlaGpuEnableWhileLoopUnrolling() DebugOptions_WhileLoopUnrolling
- func (x *DebugOptions) GetXlaGpuEnsureMinorDotContractionDims() bool
- func (x *DebugOptions) GetXlaGpuExcludeNondeterministicOps() bool
- func (x *DebugOptions) GetXlaGpuExecutableTerminateTimeoutSeconds() int32
- func (x *DebugOptions) GetXlaGpuExecutableWarnStuckTimeoutSeconds() int32
- func (x *DebugOptions) GetXlaGpuExhaustiveTilingSearch() bool
- func (x *DebugOptions) GetXlaGpuExperimentalAutotuneCacheMode() DebugOptions_AutotuneCacheMode
- func (x *DebugOptions) GetXlaGpuExperimentalDisableBinaryLibraries() bool
- func (x *DebugOptions) GetXlaGpuExperimentalDumpFdoProfiles() bool
- func (x *DebugOptions) GetXlaGpuExperimentalEnableAlltoallWindowedEinsum() bool
- func (x *DebugOptions) GetXlaGpuExperimentalEnableFusionBlockLevelRewriter() bool
- func (x *DebugOptions) GetXlaGpuExperimentalEnablePipelineParallelismOpt() bool
- func (x *DebugOptions) GetXlaGpuExperimentalEnableTritonHerolessPriorityFusion() bool
- func (x *DebugOptions) GetXlaGpuExperimentalEnableTritonTma() bool
- func (x *DebugOptions) GetXlaGpuExperimentalPackDotOperandsAlongKDimension() bool
- func (x *DebugOptions) GetXlaGpuExperimentalParallelCollectiveOverlapLimit() int32
- func (x *DebugOptions) GetXlaGpuExperimentalStreamAnnotation() bool
- func (x *DebugOptions) GetXlaGpuFailPtxCompilationOnRegisterSpilling() bool
- func (x *DebugOptions) GetXlaGpuFilterKernelsSpillingRegistersOnAutotuning() bool
- func (x *DebugOptions) GetXlaGpuForceCompilationParallelism() int32
- func (x *DebugOptions) GetXlaGpuForceConvNchw() bool
- func (x *DebugOptions) GetXlaGpuForceConvNhwc() bool
- func (x *DebugOptions) GetXlaGpuFtz() bool
- func (x *DebugOptions) GetXlaGpuFusedAttentionUseCudnnRng() bool
- func (x *DebugOptions) GetXlaGpuGemmRewriteSizeThreshold() int64
- func (x *DebugOptions) GetXlaGpuGenerateDebugInfo() bool
- func (x *DebugOptions) GetXlaGpuGenerateLineInfo() bool
- func (x *DebugOptions) GetXlaGpuGraphEnableConcurrentRegion() bool
- func (x *DebugOptions) GetXlaGpuGraphMinGraphSize() int32
- func (x *DebugOptions) GetXlaGpuKernelCacheFile() string
- func (x *DebugOptions) GetXlaGpuLibnvjitlinkMode() DebugOptions_LibNvJitLinkMode
- func (x *DebugOptions) GetXlaGpuLlvmIrFile() []string
- func (x *DebugOptions) GetXlaGpuLlvmVerificationLevel() int32
- func (x *DebugOptions) GetXlaGpuLoadAutotuneResultsFrom() string
- func (x *DebugOptions) GetXlaGpuMemoryLimitSlopFactor() int32
- func (x *DebugOptions) GetXlaGpuMockCustomCalls() bool
- func (x *DebugOptions) GetXlaGpuMultiStreamedWindowedEinsum() bool
- func (x *DebugOptions) GetXlaGpuNcclCollectiveMaxNchannels() int64
- func (x *DebugOptions) GetXlaGpuNcclInitMaxRankPerRootRatio() int64
- func (x *DebugOptions) GetXlaGpuNcclP2PMaxNchannels() int64
- func (x *DebugOptions) GetXlaGpuNcclTerminateOnError() bool
- func (x *DebugOptions) GetXlaGpuNcclTerminationTimeoutSeconds() int64
- func (x *DebugOptions) GetXlaGpuOperandBytesThresholdForWindowedEinsum() int64
- func (x *DebugOptions) GetXlaGpuOverrideGemmAutotuner() string
- func (x *DebugOptions) GetXlaGpuPerFusionAutotuneCacheDir() string
- func (x *DebugOptions) GetXlaGpuPgleAccuracyChecker() DebugOptions_PGLEStrictnessLevel
- func (x *DebugOptions) GetXlaGpuPgleProfileFileOrDirectoryPath() string
- func (x *DebugOptions) GetXlaGpuPtxFile() []string
- func (x *DebugOptions) GetXlaGpuReduceScatterCombineThresholdBytes() int64
- func (x *DebugOptions) GetXlaGpuRedzonePaddingBytes() int64
- func (x *DebugOptions) GetXlaGpuRedzoneScratchMaxMegabytes() int64
- func (x *DebugOptions) GetXlaGpuRequireCompleteAotAutotuneResults() bool
- func (x *DebugOptions) GetXlaGpuRequireExclusiveLock() bool
- func (x *DebugOptions) GetXlaGpuShapeChecks() DebugOptions_ShapeChecks
- func (x *DebugOptions) GetXlaGpuShardAutotuning() bool
- func (x *DebugOptions) GetXlaGpuStrictConvAlgorithmPicker() bool
- func (x *DebugOptions) GetXlaGpuTargetConfigFilename() string
- func (x *DebugOptions) GetXlaGpuTempBufferUseSeparateColor() bool
- func (x *DebugOptions) GetXlaGpuThresholdForWindowedEinsumMib() int64
- func (x *DebugOptions) GetXlaGpuTritonGemmAny() bool
- func (x *DebugOptions) GetXlaGpuTritonGemmDisableReducedPrecisionReduction() bool
- func (x *DebugOptions) GetXlaGpuUnsafeFallbackToDriverOnPtxasNotFound() bool
- func (x *DebugOptions) GetXlaGpuUnsafePipelinedLoopAnnotator() bool
- func (x *DebugOptions) GetXlaGpuUnsupportedAnnotateWithEmitterLoc() bool
- func (x *DebugOptions) GetXlaGpuUnsupportedEnableRaggedAllToAllDecomposer() bool
- func (x *DebugOptions) GetXlaGpuUnsupportedEnableTritonGemm() bool
- func (x *DebugOptions) GetXlaGpuUseMemcpyLocalP2P() bool
- func (x *DebugOptions) GetXlaGpuUseRuntimeFusion() bool
- func (x *DebugOptions) GetXlaGpuVerifyTritonFusionNumerics() bool
- func (x *DebugOptions) GetXlaHloEvaluatorUseFastPath() bool
- func (x *DebugOptions) GetXlaHloGraphAddresses() bool
- func (x *DebugOptions) GetXlaHloGraphShardingColor() bool
- func (x *DebugOptions) GetXlaHloProfile() bool
- func (x *DebugOptions) GetXlaLlvmDisableExpensivePasses() bool
- func (x *DebugOptions) GetXlaLlvmEnableAliasScopeMetadata() bool
- func (x *DebugOptions) GetXlaLlvmEnableInvariantLoadMetadata() bool
- func (x *DebugOptions) GetXlaLlvmEnableNoaliasMetadata() bool
- func (x *DebugOptions) GetXlaLlvmForceInlineBeforeSplit() bool
- func (x *DebugOptions) GetXlaMultiheapSizeConstraintPerHeap() int32
- func (x *DebugOptions) GetXlaPartitioningAlgorithm() DebugOptions_PartitioningAlgorithm
- func (x *DebugOptions) GetXlaPjrtAllowAutoLayoutInHlo() bool
- func (x *DebugOptions) GetXlaReduceWindowRewriteBaseLength() int64
- func (x *DebugOptions) GetXlaStepMarkerLocation() DebugOptions_StepMarkerLocation
- func (x *DebugOptions) GetXlaSyntaxSugarAsyncOps() bool
- func (x *DebugOptions) GetXlaTestAllInputLayouts() bool
- func (x *DebugOptions) GetXlaTestAllOutputLayouts() bool
- func (x *DebugOptions) GetXlaTpuDetectInf() bool
- func (x *DebugOptions) GetXlaTpuDetectNan() bool
- func (x *DebugOptions) GetXlaUnsupportedCrashOnHloPassFixMaxIterations() bool
- func (*DebugOptions) ProtoMessage()
- func (x *DebugOptions) ProtoReflect() protoreflect.Message
- func (x *DebugOptions) Reset()
- func (x *DebugOptions) String() string
- type DebugOptions_AutotuneCacheMode
- func (DebugOptions_AutotuneCacheMode) Descriptor() protoreflect.EnumDescriptor
- func (x DebugOptions_AutotuneCacheMode) Enum() *DebugOptions_AutotuneCacheMode
- func (DebugOptions_AutotuneCacheMode) EnumDescriptor() ([]byte, []int)deprecated
- func (x DebugOptions_AutotuneCacheMode) Number() protoreflect.EnumNumber
- func (x DebugOptions_AutotuneCacheMode) String() string
- func (DebugOptions_AutotuneCacheMode) Type() protoreflect.EnumType
- type DebugOptions_CollectiveOpType
- func (DebugOptions_CollectiveOpType) Descriptor() protoreflect.EnumDescriptor
- func (x DebugOptions_CollectiveOpType) Enum() *DebugOptions_CollectiveOpType
- func (DebugOptions_CollectiveOpType) EnumDescriptor() ([]byte, []int)deprecated
- func (x DebugOptions_CollectiveOpType) Number() protoreflect.EnumNumber
- func (x DebugOptions_CollectiveOpType) String() string
- func (DebugOptions_CollectiveOpType) Type() protoreflect.EnumType
- type DebugOptions_CommandBufferCmdType
- func (DebugOptions_CommandBufferCmdType) Descriptor() protoreflect.EnumDescriptor
- func (x DebugOptions_CommandBufferCmdType) Enum() *DebugOptions_CommandBufferCmdType
- func (DebugOptions_CommandBufferCmdType) EnumDescriptor() ([]byte, []int)deprecated
- func (x DebugOptions_CommandBufferCmdType) Number() protoreflect.EnumNumber
- func (x DebugOptions_CommandBufferCmdType) String() string
- func (DebugOptions_CommandBufferCmdType) Type() protoreflect.EnumType
- type DebugOptions_LibNvJitLinkMode
- func (DebugOptions_LibNvJitLinkMode) Descriptor() protoreflect.EnumDescriptor
- func (x DebugOptions_LibNvJitLinkMode) Enum() *DebugOptions_LibNvJitLinkMode
- func (DebugOptions_LibNvJitLinkMode) EnumDescriptor() ([]byte, []int)deprecated
- func (x DebugOptions_LibNvJitLinkMode) Number() protoreflect.EnumNumber
- func (x DebugOptions_LibNvJitLinkMode) String() string
- func (DebugOptions_LibNvJitLinkMode) Type() protoreflect.EnumType
- type DebugOptions_PGLEStrictnessLevel
- func (DebugOptions_PGLEStrictnessLevel) Descriptor() protoreflect.EnumDescriptor
- func (x DebugOptions_PGLEStrictnessLevel) Enum() *DebugOptions_PGLEStrictnessLevel
- func (DebugOptions_PGLEStrictnessLevel) EnumDescriptor() ([]byte, []int)deprecated
- func (x DebugOptions_PGLEStrictnessLevel) Number() protoreflect.EnumNumber
- func (x DebugOptions_PGLEStrictnessLevel) String() string
- func (DebugOptions_PGLEStrictnessLevel) Type() protoreflect.EnumType
- type DebugOptions_PartitioningAlgorithm
- func (DebugOptions_PartitioningAlgorithm) Descriptor() protoreflect.EnumDescriptor
- func (x DebugOptions_PartitioningAlgorithm) Enum() *DebugOptions_PartitioningAlgorithm
- func (DebugOptions_PartitioningAlgorithm) EnumDescriptor() ([]byte, []int)deprecated
- func (x DebugOptions_PartitioningAlgorithm) Number() protoreflect.EnumNumber
- func (x DebugOptions_PartitioningAlgorithm) String() string
- func (DebugOptions_PartitioningAlgorithm) Type() protoreflect.EnumType
- type DebugOptions_ShapeChecks
- func (DebugOptions_ShapeChecks) Descriptor() protoreflect.EnumDescriptor
- func (x DebugOptions_ShapeChecks) Enum() *DebugOptions_ShapeChecks
- func (DebugOptions_ShapeChecks) EnumDescriptor() ([]byte, []int)deprecated
- func (x DebugOptions_ShapeChecks) Number() protoreflect.EnumNumber
- func (x DebugOptions_ShapeChecks) String() string
- func (DebugOptions_ShapeChecks) Type() protoreflect.EnumType
- type DebugOptions_StepMarkerLocation
- func (DebugOptions_StepMarkerLocation) Descriptor() protoreflect.EnumDescriptor
- func (x DebugOptions_StepMarkerLocation) Enum() *DebugOptions_StepMarkerLocation
- func (DebugOptions_StepMarkerLocation) EnumDescriptor() ([]byte, []int)deprecated
- func (x DebugOptions_StepMarkerLocation) Number() protoreflect.EnumNumber
- func (x DebugOptions_StepMarkerLocation) String() string
- func (DebugOptions_StepMarkerLocation) Type() protoreflect.EnumType
- type DebugOptions_WhileLoopUnrolling
- func (DebugOptions_WhileLoopUnrolling) Descriptor() protoreflect.EnumDescriptor
- func (x DebugOptions_WhileLoopUnrolling) Enum() *DebugOptions_WhileLoopUnrolling
- func (DebugOptions_WhileLoopUnrolling) EnumDescriptor() ([]byte, []int)deprecated
- func (x DebugOptions_WhileLoopUnrolling) Number() protoreflect.EnumNumber
- func (x DebugOptions_WhileLoopUnrolling) String() string
- func (DebugOptions_WhileLoopUnrolling) Type() protoreflect.EnumType
- type ExecutionOptions
- func (*ExecutionOptions) Descriptor() ([]byte, []int)deprecated
- func (x *ExecutionOptions) GetAliasPassthroughParams() bool
- func (x *ExecutionOptions) GetAllowSeparateShardingPrograms() bool
- func (x *ExecutionOptions) GetAllowSpmdShardingPropagationToOutput() []bool
- func (x *ExecutionOptions) GetAllowSpmdShardingPropagationToParameters() []bool
- func (x *ExecutionOptions) GetAutoSpmdPartitioningMeshIds() []int64
- func (x *ExecutionOptions) GetAutoSpmdPartitioningMeshShape() []int64
- func (x *ExecutionOptions) GetDebugOptions() *DebugOptions
- func (x *ExecutionOptions) GetDeduplicateHlo() bool
- func (x *ExecutionOptions) GetDeviceAssignment() *xla_data.DeviceAssignmentProto
- func (x *ExecutionOptions) GetDeviceHandles() []*xla_data.DeviceHandle
- func (x *ExecutionOptions) GetDeviceMemorySize() int64
- func (x *ExecutionOptions) GetExecTimeOptimizationEffort() float32
- func (x *ExecutionOptions) GetFdoProfile() []byte
- func (x *ExecutionOptions) GetLaunchId() int32
- func (x *ExecutionOptions) GetMemoryFittingEffort() float32
- func (x *ExecutionOptions) GetNumPartitions() int32
- func (x *ExecutionOptions) GetNumReplicas() int32
- func (x *ExecutionOptions) GetParamRequiresBroadcastViaCollectives() []bool
- func (x *ExecutionOptions) GetSeed() uint64
- func (x *ExecutionOptions) GetShapeWithOutputLayout() *xla_data.ShapeProto
- func (x *ExecutionOptions) GetShardableValueUpdatePairs() []*ShardableValueUpdatePairProto
- func (x *ExecutionOptions) GetUseAutoSpmdPartitioning() bool
- func (x *ExecutionOptions) GetUseShardyPartitioner() bool
- func (x *ExecutionOptions) GetUseSpmdPartitioning() bool
- func (*ExecutionOptions) ProtoMessage()
- func (x *ExecutionOptions) ProtoReflect() protoreflect.Message
- func (x *ExecutionOptions) Reset()
- func (x *ExecutionOptions) String() string
- type GpuCompilationEnvironment
- func (*GpuCompilationEnvironment) Descriptor() ([]byte, []int)deprecated
- func (x *GpuCompilationEnvironment) GetDummyFlag() int64
- func (*GpuCompilationEnvironment) ProtoMessage()
- func (x *GpuCompilationEnvironment) ProtoReflect() protoreflect.Message
- func (x *GpuCompilationEnvironment) Reset()
- func (x *GpuCompilationEnvironment) String() string
- type HloModuleConfigProto
- func (*HloModuleConfigProto) Descriptor() ([]byte, []int)deprecated
- func (x *HloModuleConfigProto) GetAliasPassthroughParams() bool
- func (x *HloModuleConfigProto) GetAllowSeparateShardingPrograms() bool
- func (x *HloModuleConfigProto) GetAllowSpmdShardingPropagationToOutput() []bool
- func (x *HloModuleConfigProto) GetAllowSpmdShardingPropagationToParameters() []bool
- func (x *HloModuleConfigProto) GetAnalysisAllowanceMap() map[string]int64
- func (x *HloModuleConfigProto) GetAutoSpmdPartitioningMeshIds() []int64
- func (x *HloModuleConfigProto) GetAutoSpmdPartitioningMeshShape() []int64
- func (x *HloModuleConfigProto) GetContentAwareComputationSorting() bool
- func (x *HloModuleConfigProto) GetDebugOptions() *DebugOptions
- func (x *HloModuleConfigProto) GetDeduplicateHlo() bool
- func (x *HloModuleConfigProto) GetDeviceMemorySize() int64
- func (x *HloModuleConfigProto) GetDeviceType() string
- func (x *HloModuleConfigProto) GetDotConfig() map[string]*HloModuleConfigProto_Int64List
- func (x *HloModuleConfigProto) GetEntryComputationLayout() *xla_data.ProgramShapeProto
- func (x *HloModuleConfigProto) GetExecTimeOptimizationEffort() float32
- func (x *HloModuleConfigProto) GetFdoProfile() []byte
- func (x *HloModuleConfigProto) GetFusionConfig() []*HloModuleConfigProto_BoolList
- func (x *HloModuleConfigProto) GetFusionConfigCollection() HloModuleConfigProto_FusionConfigCollection
- func (x *HloModuleConfigProto) GetIntraOpParallelismThreads() int64
- func (x *HloModuleConfigProto) GetLaunchId() int32
- func (x *HloModuleConfigProto) GetLayoutConfig() []*HloModuleConfigProto_Int64ListList
- func (x *HloModuleConfigProto) GetMatrixUnitOperandPrecision() xla_data.PrecisionConfig_Precision
- func (x *HloModuleConfigProto) GetMemoryFittingEffort() float32
- func (x *HloModuleConfigProto) GetMemorySpaceAssignmentConfig() []uint64
- func (x *HloModuleConfigProto) GetNumPartitions() int64
- func (x *HloModuleConfigProto) GetParamRequiresBroadcastViaCollectives() []bool
- func (x *HloModuleConfigProto) GetPhaseIndex() int32
- func (x *HloModuleConfigProto) GetPhaseOrderingConfig() []*HloModuleConfigProto_BoolList
- func (x *HloModuleConfigProto) GetPreSimulationDeviceAssignment() *xla_data.DeviceAssignmentProto
- func (x *HloModuleConfigProto) GetReplicaCount() int64
- func (x *HloModuleConfigProto) GetSeed() uint64
- func (x *HloModuleConfigProto) GetShardableValueUpdatePairs() []*ShardableValueUpdatePairProto
- func (x *HloModuleConfigProto) GetShardingConfig() *ShardingConfigProto
- func (x *HloModuleConfigProto) GetStaticDeviceAssignment() *xla_data.DeviceAssignmentProto
- func (x *HloModuleConfigProto) GetUseAutoSpmdPartitioning() bool
- func (x *HloModuleConfigProto) GetUseShardyPartitioner() bool
- func (x *HloModuleConfigProto) GetUseSpmdPartitioning() bool
- func (*HloModuleConfigProto) ProtoMessage()
- func (x *HloModuleConfigProto) ProtoReflect() protoreflect.Message
- func (x *HloModuleConfigProto) Reset()
- func (x *HloModuleConfigProto) String() string
- type HloModuleConfigProto_BoolList
- func (*HloModuleConfigProto_BoolList) Descriptor() ([]byte, []int)deprecated
- func (x *HloModuleConfigProto_BoolList) GetVals() []bool
- func (*HloModuleConfigProto_BoolList) ProtoMessage()
- func (x *HloModuleConfigProto_BoolList) ProtoReflect() protoreflect.Message
- func (x *HloModuleConfigProto_BoolList) Reset()
- func (x *HloModuleConfigProto_BoolList) String() string
- type HloModuleConfigProto_FusionConfigCollection
- func (HloModuleConfigProto_FusionConfigCollection) Descriptor() protoreflect.EnumDescriptor
- func (x HloModuleConfigProto_FusionConfigCollection) Enum() *HloModuleConfigProto_FusionConfigCollection
- func (HloModuleConfigProto_FusionConfigCollection) EnumDescriptor() ([]byte, []int)deprecated
- func (x HloModuleConfigProto_FusionConfigCollection) Number() protoreflect.EnumNumber
- func (x HloModuleConfigProto_FusionConfigCollection) String() string
- func (HloModuleConfigProto_FusionConfigCollection) Type() protoreflect.EnumType
- type HloModuleConfigProto_Int64List
- func (*HloModuleConfigProto_Int64List) Descriptor() ([]byte, []int)deprecated
- func (x *HloModuleConfigProto_Int64List) GetVals() []int64
- func (*HloModuleConfigProto_Int64List) ProtoMessage()
- func (x *HloModuleConfigProto_Int64List) ProtoReflect() protoreflect.Message
- func (x *HloModuleConfigProto_Int64List) Reset()
- func (x *HloModuleConfigProto_Int64List) String() string
- type HloModuleConfigProto_Int64ListList
- func (*HloModuleConfigProto_Int64ListList) Descriptor() ([]byte, []int)deprecated
- func (x *HloModuleConfigProto_Int64ListList) GetLists() []*HloModuleConfigProto_Int64List
- func (*HloModuleConfigProto_Int64ListList) ProtoMessage()
- func (x *HloModuleConfigProto_Int64ListList) ProtoReflect() protoreflect.Message
- func (x *HloModuleConfigProto_Int64ListList) Reset()
- func (x *HloModuleConfigProto_Int64ListList) String() string
- type HloModuleProtoWithConfig
- func (*HloModuleProtoWithConfig) Descriptor() ([]byte, []int)deprecated
- func (x *HloModuleProtoWithConfig) GetConfig() *HloModuleConfigProto
- func (x *HloModuleProtoWithConfig) GetHloModule() *hlo.HloModuleProto
- func (*HloModuleProtoWithConfig) ProtoMessage()
- func (x *HloModuleProtoWithConfig) ProtoReflect() protoreflect.Message
- func (x *HloModuleProtoWithConfig) Reset()
- func (x *HloModuleProtoWithConfig) String() string
- type NodeShardingConfigProto
- func (*NodeShardingConfigProto) Descriptor() ([]byte, []int)deprecated
- func (x *NodeShardingConfigProto) GetNodes() []*NodeShardingConfigProto
- func (x *NodeShardingConfigProto) GetSharding() *xla_data.OpSharding
- func (*NodeShardingConfigProto) ProtoMessage()
- func (x *NodeShardingConfigProto) ProtoReflect() protoreflect.Message
- func (x *NodeShardingConfigProto) Reset()
- func (x *NodeShardingConfigProto) String() string
- type ScheduleProto
- func (*ScheduleProto) Descriptor() ([]byte, []int)deprecated
- func (x *ScheduleProto) GetComputationId() int64
- func (x *ScheduleProto) GetCyclesPerMicrosecond() int64
- func (x *ScheduleProto) GetHloModule() *hlo.HloModuleProto
- func (x *ScheduleProto) GetInstructions() []*ScheduleProto_Instruction
- func (*ScheduleProto) ProtoMessage()
- func (x *ScheduleProto) ProtoReflect() protoreflect.Message
- func (x *ScheduleProto) Reset()
- func (x *ScheduleProto) String() string
- type ScheduleProto_Instruction
- func (*ScheduleProto_Instruction) Descriptor() ([]byte, []int)deprecated
- func (x *ScheduleProto_Instruction) GetEndTimestampCycles() float64
- func (x *ScheduleProto_Instruction) GetId() int64
- func (x *ScheduleProto_Instruction) GetStartTimestampCycles() float64
- func (*ScheduleProto_Instruction) ProtoMessage()
- func (x *ScheduleProto_Instruction) ProtoReflect() protoreflect.Message
- func (x *ScheduleProto_Instruction) Reset()
- func (x *ScheduleProto_Instruction) String() string
- type ShardableValueUpdatePairProto
- func (*ShardableValueUpdatePairProto) Descriptor() ([]byte, []int)deprecated
- func (x *ShardableValueUpdatePairProto) GetInputParameterNumber() int64
- func (x *ShardableValueUpdatePairProto) GetOutputShapeIndex() []int64
- func (x *ShardableValueUpdatePairProto) GetParameterShapeIndex() []int64
- func (*ShardableValueUpdatePairProto) ProtoMessage()
- func (x *ShardableValueUpdatePairProto) ProtoReflect() protoreflect.Message
- func (x *ShardableValueUpdatePairProto) Reset()
- func (x *ShardableValueUpdatePairProto) String() string
- type ShardingConfigProto
- func (*ShardingConfigProto) Descriptor() ([]byte, []int)deprecated
- func (x *ShardingConfigProto) GetNodes() []*NodeShardingConfigProto
- func (*ShardingConfigProto) ProtoMessage()
- func (x *ShardingConfigProto) ProtoReflect() protoreflect.Message
- func (x *ShardingConfigProto) Reset()
- func (x *ShardingConfigProto) String() string
Constants ¶
This section is empty.
Variables ¶
var ( DebugOptions_CollectiveOpType_name = map[int32]string{ 0: "NOOP", 1: "ALLREDUCE", 2: "ALLGATHER", 3: "REDUCESCATTER", 4: "COLLECTIVEBROADCAST", 5: "ALLTOALL", 6: "COLLECTIVEPERMUTE", 7: "RAGGEDALLTOALL", } DebugOptions_CollectiveOpType_value = map[string]int32{ "NOOP": 0, "ALLREDUCE": 1, "ALLGATHER": 2, "REDUCESCATTER": 3, "COLLECTIVEBROADCAST": 4, "ALLTOALL": 5, "COLLECTIVEPERMUTE": 6, "RAGGEDALLTOALL": 7, } )
Enum value maps for DebugOptions_CollectiveOpType.
var ( DebugOptions_CommandBufferCmdType_name = map[int32]string{ 0: "INVALID", 1: "FUSION", 2: "CUBLAS", 3: "CUDNN", 4: "COLLECTIVES", 5: "CONDITIONAL", 6: "WHILE", 7: "CUSTOM_CALL", 8: "CUBLASLT", 9: "DYNAMIC_SLICE_FUSION", } DebugOptions_CommandBufferCmdType_value = map[string]int32{ "INVALID": 0, "FUSION": 1, "CUBLAS": 2, "CUDNN": 3, "COLLECTIVES": 4, "CONDITIONAL": 5, "WHILE": 6, "CUSTOM_CALL": 7, "CUBLASLT": 8, "DYNAMIC_SLICE_FUSION": 9, } )
Enum value maps for DebugOptions_CommandBufferCmdType.
var ( DebugOptions_LibNvJitLinkMode_name = map[int32]string{ 0: "LIB_NV_JIT_LINK_MODE_AUTO", 1: "LIB_NV_JIT_LINK_MODE_DISABLED", 2: "LIB_NV_JIT_LINK_MODE_ENABLED", } DebugOptions_LibNvJitLinkMode_value = map[string]int32{ "LIB_NV_JIT_LINK_MODE_AUTO": 0, "LIB_NV_JIT_LINK_MODE_DISABLED": 1, "LIB_NV_JIT_LINK_MODE_ENABLED": 2, } )
Enum value maps for DebugOptions_LibNvJitLinkMode.
var ( DebugOptions_PGLEStrictnessLevel_name = map[int32]string{ 0: "PGLE_STRICTNESS_LEVEL_OFF", 1: "PGLE_STRICTNESS_LEVEL_WARN", 2: "PGLE_STRICTNESS_LEVEL_ERROR", } DebugOptions_PGLEStrictnessLevel_value = map[string]int32{ "PGLE_STRICTNESS_LEVEL_OFF": 0, "PGLE_STRICTNESS_LEVEL_WARN": 1, "PGLE_STRICTNESS_LEVEL_ERROR": 2, } )
Enum value maps for DebugOptions_PGLEStrictnessLevel.
var ( DebugOptions_ShapeChecks_name = map[int32]string{ 0: "IGNORE", 1: "RUNTIME", 2: "COMPILE_TIME", } DebugOptions_ShapeChecks_value = map[string]int32{ "IGNORE": 0, "RUNTIME": 1, "COMPILE_TIME": 2, } )
Enum value maps for DebugOptions_ShapeChecks.
Enum value maps for DebugOptions_WhileLoopUnrolling.
var ( DebugOptions_StepMarkerLocation_name = map[int32]string{ 0: "STEP_MARK_AT_ENTRY", 1: "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", 3: "STEP_MARK_AT_SECOND_LEVEL_WHILE_LOOP", 2: "STEP_MARK_NONE", } DebugOptions_StepMarkerLocation_value = map[string]int32{ "STEP_MARK_AT_ENTRY": 0, "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP": 1, "STEP_MARK_AT_SECOND_LEVEL_WHILE_LOOP": 3, "STEP_MARK_NONE": 2, } )
Enum value maps for DebugOptions_StepMarkerLocation.
var ( DebugOptions_PartitioningAlgorithm_name = map[int32]string{ 0: "PARTITIONING_ALGORITHM_NOOP", 1: "PARTITIONING_ALGORITHM_EXP0", 2: "PARTITIONING_ALGORITHM_EXP1", 3: "PARTITIONING_ALGORITHM_EXP2", } DebugOptions_PartitioningAlgorithm_value = map[string]int32{ "PARTITIONING_ALGORITHM_NOOP": 0, "PARTITIONING_ALGORITHM_EXP0": 1, "PARTITIONING_ALGORITHM_EXP1": 2, "PARTITIONING_ALGORITHM_EXP2": 3, } )
Enum value maps for DebugOptions_PartitioningAlgorithm.
var ( DebugOptions_AutotuneCacheMode_name = map[int32]string{ 0: "AUTOTUNE_CACHE_MODE_UNSPECIFIED", 1: "AUTOTUNE_CACHE_MODE_UPDATE", 2: "AUTOTUNE_CACHE_MODE_READ", } DebugOptions_AutotuneCacheMode_value = map[string]int32{ "AUTOTUNE_CACHE_MODE_UNSPECIFIED": 0, "AUTOTUNE_CACHE_MODE_UPDATE": 1, "AUTOTUNE_CACHE_MODE_READ": 2, } )
Enum value maps for DebugOptions_AutotuneCacheMode.
var ( HloModuleConfigProto_FusionConfigCollection_name = map[int32]string{ 0: "OFF", 1: "PER_EDGE", 2: "PER_NODE", } HloModuleConfigProto_FusionConfigCollection_value = map[string]int32{ "OFF": 0, "PER_EDGE": 1, "PER_NODE": 2, } )
Enum value maps for HloModuleConfigProto_FusionConfigCollection.
var File_xla_xla_proto protoreflect.FileDescriptor
Functions ¶
This section is empty.
Types ¶
type CompilationEnvironmentsProto ¶
type CompilationEnvironmentsProto struct { Environments []*anypb.Any `protobuf:"bytes,1,rep,name=environments,proto3" json:"environments,omitempty"` // contains filtered or unexported fields }
Proto version of `xla::CompilationEnvironments`.
func (*CompilationEnvironmentsProto) Descriptor
func (*CompilationEnvironmentsProto) Descriptor() ([]byte, []int)
Deprecated: Use CompilationEnvironmentsProto.ProtoReflect.Descriptor instead.
func (*CompilationEnvironmentsProto) GetEnvironments ¶
func (x *CompilationEnvironmentsProto) GetEnvironments() []*anypb.Any
func (*CompilationEnvironmentsProto) ProtoMessage ¶
func (*CompilationEnvironmentsProto) ProtoMessage()
func (*CompilationEnvironmentsProto) ProtoReflect ¶
func (x *CompilationEnvironmentsProto) ProtoReflect() protoreflect.Message
func (*CompilationEnvironmentsProto) Reset ¶
func (x *CompilationEnvironmentsProto) Reset()
func (*CompilationEnvironmentsProto) String ¶
func (x *CompilationEnvironmentsProto) String() string
type DebugOptions ¶
type DebugOptions struct { // --------------------------------------------------------------------------// // XLA backend-agnostic options. // --------------------------------------------------------------------------// // go/keep-sorted start // Crash if HloPassFix can not converge after a fixed number of iterations. XlaUnsupportedCrashOnHloPassFixMaxIterations bool `` // go/keep-sorted end /* 210-byte string literal not displayed */ // Use region analysis in copy insertion pass. XlaCpuCopyInsertionUseRegionAnalysis bool `` /* 182-byte string literal not displayed */ // When true, XLA:CPU uses HLO module scheduler that is optimized for // extracting concurrency at the cost of extra memory: we extend the live // ranges of temporaries to allow XLA runtime to schedule independent // operations in parallel on separate threads. XlaCpuEnableConcurrencyOptimizedScheduler bool `` /* 195-byte string literal not displayed */ // When true, "unsafe" mathematical optimizations are enabled. These // transformations include but are not limited to: // // - Reducing the precision of operations (e.g. using an approximate sin // function, or transforming x/y into x * (1/y)). // - Assuming that operations never produce or consume NaN or +/- Inf (this // behavior can be adjusted using xla_cpu_fast_math_allow_{nans|infs}). // - Assuming that +0 and -0 are indistinguishable. XlaCpuEnableFastMath bool `` /* 129-byte string literal not displayed */ // When false we lower the Minimum and Maximum hlos in the CPU backend such // that Min(NotNaN, NaN) = Min(NaN, NotNaN) = NaN. In other words, if flag // this is false we always propagate NaNs through Min and Max. // // Note, this does not correspond to the exact same behavior as the gpu flag // below! XlaCpuEnableFastMinMax bool `` /* 138-byte string literal not displayed */ // When xla_cpu_enable_fast_math is true then this controls whether we forbid // to use the reciprocal of an argument instead of division. Ignored when // xla_cpu_enable_fast_math is false. XlaCpuFastMathHonorDivision bool `` /* 153-byte string literal not displayed */ // When xla_cpu_enable_fast_math is true then this controls whether we forbid // to approximate calculations for functions. Ignored when // xla_cpu_enable_fast_math is false. XlaCpuFastMathHonorFunctions bool `` /* 156-byte string literal not displayed */ // When xla_cpu_enable_fast_math is true then this controls whether we allow // operations to produce infinites. Ignored when xla_cpu_enable_fast_math is // false. XlaCpuFastMathHonorInfs bool `` /* 141-byte string literal not displayed */ // When xla_cpu_enable_fast_math is true then this controls whether we allow // operations to produce NaNs. Ignored when xla_cpu_enable_fast_math is // false. XlaCpuFastMathHonorNans bool `` /* 141-byte string literal not displayed */ // When true, XLA:CPU uses the thunk runtime to execute compiled program. XlaCpuUseThunkRuntime bool `` /* 133-byte string literal not displayed */ // When true, XLA:CPU uses XNNPACK to execute supported operations. XlaCpuUseXnnpack bool `protobuf:"varint,359,opt,name=xla_cpu_use_xnnpack,json=xlaCpuUseXnnpack,proto3" json:"xla_cpu_use_xnnpack,omitempty"` // Enabling this will enable optimizations that ignore the possibility of NaN. XlaEnableFastMath bool `protobuf:"varint,335,opt,name=xla_enable_fast_math,json=xlaEnableFastMath,proto3" json:"xla_enable_fast_math,omitempty"` // The number of parts to split the LLVM module into before codegen. This // allows XLA to compile all parts in parallel, and resolve kernel symbols // from different dynamic libraries. XlaCpuParallelCodegenSplitCount int32 `` /* 165-byte string literal not displayed */ // A `prefer-vector-width` value that is passed to the LLVM backend. Default // value is `256` (AVX2 on x86 platforms). XlaCpuPreferVectorWidth int32 `` /* 139-byte string literal not displayed */ // When set, XLA:CPU will only generate code up to the specified ISA. // (It will not use newer ISAs.) Using the string format allows us to extend // the flag for more flexible control if necessary. XlaCpuMaxIsa string `protobuf:"bytes,333,opt,name=xla_cpu_max_isa,json=xlaCpuMaxIsa,proto3" json:"xla_cpu_max_isa,omitempty"` // Denylist for cuDNN convolutions. XlaGpuAlgorithmDenylistPath string `` /* 150-byte string literal not displayed */ // Size threshold (in bytes) for the GPU all-gather combiner. XlaGpuAllGatherCombineThresholdBytes int64 `` /* 182-byte string literal not displayed */ // Number of devices per host for first stage of BlueConnect decomposition // pass. The pass will attempt to decompose all-reduces ops into a // ReduceScatter-AllReduce-AllGather sequence, with the initial ReduceScatter // being performed over all of the devices in the same host. Set to < 1 to // disable all-reduce decomposition. XlaGpuAllReduceBlueconnectNumDevicesPerHost int32 `` /* 207-byte string literal not displayed */ // Size threshold (in bytes) for the GPU all-reduce combiner. XlaGpuAllReduceCombineThresholdBytes int64 `` /* 182-byte string literal not displayed */ // Extra platform-specific options to improve analytical latency // estimator precision; comma-separated list of 'key=val' strings (=val may be // omitted); no whitespace around commas. Available options: // --xla_gpu_analytical_latency_estimator_options= // 'nccl_op_launch_ms=55,nic_speed_gbps=40, // chunk_prep_ms=1,rtt_ms=2,gpus_per_node=4,chunk_size_bytes=1024' XlaGpuAnalyticalLatencyEstimatorOptions map[string]string `` /* 262-byte string literal not displayed */ // If true, XLA will wrap `dot` operations into async computations in an // effort to parallelize matrix operations. XlaGpuAsyncDot bool `protobuf:"varint,321,opt,name=xla_gpu_async_dot,json=xlaGpuAsyncDot,proto3" json:"xla_gpu_async_dot,omitempty"` // Memory budget in GB per device for AutoSharding. XlaGpuAutoSpmdPartitioningMemoryBudgetGb int32 `` /* 196-byte string literal not displayed */ // See the definition of the // xla_gpu_auto_spmd_partitioning_memory_budget_ratio flag for the meaning of // this field. XlaGpuAutoSpmdPartitioningMemoryBudgetRatio float32 `` /* 206-byte string literal not displayed */ // Relative precision for comparing different GEMM solutions XlaGpuAutotuneGemmRtol float32 `` /* 137-byte string literal not displayed */ // 0: Disable gemm and convolution autotuning. // 1: Enable autotuning, but disable correctness checking. // 2: Also set output buffers to random numbers during autotuning. // 3: Also reset output buffers to random numbers after autotuning each // // algorithm. // // 4+: Also check for correct outputs and for out-of-bounds reads/writes. // // Default: 4. XlaGpuAutotuneLevel int32 `protobuf:"varint,123,opt,name=xla_gpu_autotune_level,json=xlaGpuAutotuneLevel,proto3" json:"xla_gpu_autotune_level,omitempty"` // If non-zero, limits the number of solutions to be used by GEMM autotuner. // This might be useful if underlying math library returns too many GEMM // solutions. XlaGpuAutotuneMaxSolutions int64 `` /* 148-byte string literal not displayed */ // If true, each fusion instruction will have a cost model runtime estimate in // backend config after compilation. XlaGpuCollectCostModelStats bool `` /* 153-byte string literal not displayed */ // Inflate collective cost by running each collective multiple times. XlaGpuCollectiveInflationFactor int32 `` /* 163-byte string literal not displayed */ // The minimum data size in bytes to trigger collective-permute-decomposer // transformation. XlaGpuCollectivePermuteDecomposerThreshold int64 `` /* 198-byte string literal not displayed */ // Do not lock collective cliques for each XLA:GPU execution, and instead // use per-process cliques that are never unlocked. This disables deadlock // prevention mechanism in XLA:GPU and should be used at you own risk. If // collective operations from concurrent executions are not correcctly ordered // it may lead to deadlocks, crashes or will produce garbage. XlaGpuCollectivesUsePersistentCliques bool `` /* 183-byte string literal not displayed */ XlaGpuCopyInsertionUseRegionAnalysis bool `` /* 182-byte string literal not displayed */ // Crashes the program when any kind of verification fails, instead of just // logging the failures. One example is cross checking of convolution results // among different algorithms. XlaGpuCrashOnVerificationFailures bool `` /* 171-byte string literal not displayed */ // Allow Triton GEMM autotuning to fall back to cuBLAS when that is // faster. XlaGpuCublasFallback bool `` /* 128-byte string literal not displayed */ // Path to directory with cuda/ptx tools and libraries. XlaGpuCudaDataDir string `protobuf:"bytes,61,opt,name=xla_gpu_cuda_data_dir,json=xlaGpuCudaDataDir,proto3" json:"xla_gpu_cuda_data_dir,omitempty"` // Let GEMM fusion autotuning probe cuDNN as a backend. // Current levels: // 0: Disabled. // 1: Enabled on Blackwell+ GPUs. // 2: Enabled on all supported GPUs (Ampere+). XlaGpuCudnnGemmFusionLevel int32 `` /* 150-byte string literal not displayed */ // Limit for the number of kernel configurations (plans) to use during // autotuning of cuDNN GEMM fusions. The more - the slower the autotuning // but potentially higher the performance. XlaGpuCudnnGemmMaxPlans int32 `` /* 141-byte string literal not displayed */ // Guarantees run-to-run determinism. // This flag implies --xla_gpu_exclude_nondeterministic_ops and in addition // disables autotuning. XlaGpuDeterministicOps bool `` /* 134-byte string literal not displayed */ XlaGpuDisableAsyncCollectives []DebugOptions_CollectiveOpType `` /* 203-byte string literal not displayed */ // If set to true XLA:GPU invokes `ptxas` with -O0 (default is -O3). XlaGpuDisableGpuasmOptimizations bool `` /* 166-byte string literal not displayed */ // DotMerger pass threshold size to be used in MB. XlaGpuDotMergerThresholdMb int32 `` /* 150-byte string literal not displayed */ // File to write autotune logs to. It will stored in txt format. XlaGpuDumpAutotuneLogsTo string `` /* 143-byte string literal not displayed */ // File to write autotune results to. It will be a binary file unless the name // ends with .txt or .textproto. Warning: The results are written at every // compilation, possibly multiple times per process. This only works on CUDA. XlaGpuDumpAutotuneResultsTo string `` /* 152-byte string literal not displayed */ XlaGpuDumpAutotunedGemmFusions bool `` /* 162-byte string literal not displayed */ // If true, every time an HLO module is run, we will dump an // HloUnoptimizedSnapshot (essentially, a serialized unoptimizedmodule plus // its inputs) to the --xla_dump_to directory. XlaGpuDumpHloUnoptimizedSnapshots bool `` /* 171-byte string literal not displayed */ // Whether to dump llvm ir when compiling to ptx. XlaGpuDumpLlvmir bool `protobuf:"varint,155,opt,name=xla_gpu_dump_llvmir,json=xlaGpuDumpLlvmir,proto3" json:"xla_gpu_dump_llvmir,omitempty"` // Combine all-gather ops with the same dimension or irrespective of their // dimension. XlaGpuEnableAllGatherCombineByDim bool `` /* 175-byte string literal not displayed */ XlaGpuEnableAnalyticalLatencyEstimator bool `` /* 186-byte string literal not displayed */ // Enables NCCL Speed-of-Light (SoL) analytical cost model XlaGpuEnableAnalyticalSolLatencyEstimator bool `` /* 197-byte string literal not displayed */ XlaGpuEnableApproxCostlyCollectives bool `` /* 177-byte string literal not displayed */ // Determine the types of commands that are recorded into command buffers. XlaGpuEnableCommandBuffer []DebugOptions_CommandBufferCmdType `` /* 195-byte string literal not displayed */ // Enable radix sort using CUB. XlaGpuEnableCubRadixSort bool `` /* 144-byte string literal not displayed */ // Whether to use cuBLASLt for GEMMs on GPUs. XlaGpuEnableCublaslt bool `` /* 128-byte string literal not displayed */ XlaGpuEnableCudnnFmha bool `` /* 133-byte string literal not displayed */ // Whether to use the cuDNN frontend API for convolutions when possible. XlaGpuEnableCudnnFrontend bool `` /* 145-byte string literal not displayed */ XlaGpuEnableCudnnInt8X32ConvolutionReordering bool `` /* 209-byte string literal not displayed */ // Rewrite layer norm patterns into cuDNN library calls. XlaGpuEnableCudnnLayerNorm bool `` /* 150-byte string literal not displayed */ // If true, XLA will try to pattern match subgraphs of HLO operations into // custom fusions registered in the current process (pre-compiled hand written // kernels, e.g. various GEMM fusions written in CUTLASS). XlaGpuEnableCustomFusions bool `` /* 145-byte string literal not displayed */ // A regular expression enabling only a subset of custom fusions. Enabled only // if `xla_gpu_enable_custom_fusion` set to true. XlaGpuEnableCustomFusionsRe string `` /* 152-byte string literal not displayed */ // Enables address computation fusion to optimize dynamic-slice and // dynamic-update-slice operations around library calls. XlaGpuEnableDynamicSliceFusion bool `` /* 162-byte string literal not displayed */ // When true we lower the Minimum and Maximum hlos in the GPU backend such // that Min(NotNaN, NaN) = Min(NaN, NotNaN) = NotNaN. In other words, if flag // this is true we don't propagate NaNs through Min and Max. XlaGpuEnableFastMinMax bool `` /* 138-byte string literal not displayed */ XlaGpuEnableHighestPriorityAsyncStream bool `` /* 188-byte string literal not displayed */ // If true, will enable host memory offloading on a device. XlaGpuEnableHostMemoryOffloading bool `` /* 168-byte string literal not displayed */ XlaGpuEnableLatencyHidingScheduler bool `` /* 174-byte string literal not displayed */ // If enabled, uses the libnvptxcompiler library to compile PTX to cuBIN. XlaGpuEnableLibnvptxcompiler bool `` /* 152-byte string literal not displayed */ // Overrides normal multi-threaded compilation setting to use this many // threads. Setting to 0 (the default value) means no enforcement. XlaGpuEnableLlvmModuleCompilationParallelism bool `` /* 206-byte string literal not displayed */ // Allow early return when acquiring NCCL cliques. XlaGpuEnableNcclCliqueOptimization bool `` /* 174-byte string literal not displayed */ // Enable NCCL communicator splitting. XlaGpuEnableNcclCommSplitting bool `` /* 159-byte string literal not displayed */ // Enable NCCL per stream communicators. XlaGpuEnableNcclPerStreamComms bool `` /* 164-byte string literal not displayed */ // Enable NCCL user buffers. XlaGpuEnableNcclUserBuffers bool `` /* 153-byte string literal not displayed */ XlaGpuEnablePipelinedAllGather bool `` /* 162-byte string literal not displayed */ XlaGpuEnablePipelinedAllReduce bool `` /* 162-byte string literal not displayed */ XlaGpuEnablePipelinedCollectives bool `` /* 166-byte string literal not displayed */ XlaGpuEnablePipelinedP2P bool `` /* 142-byte string literal not displayed */ XlaGpuEnablePipelinedReduceScatter bool `` /* 174-byte string literal not displayed */ // Enable allreduce reassociation on allreduces that are converted to a wider // type. The resulting allreduce will be promoted to a wider-typed allreduce. XlaGpuEnableReassociationForConvertedAr bool `` /* 191-byte string literal not displayed */ // Combine reduce-scatter ops with the same dimension or irrespective of their // dimension. XlaGpuEnableReduceScatterCombineByDim bool `` /* 187-byte string literal not displayed */ // Whether reduction epilogue fusion is enabled in fusion passes. XlaGpuEnableReductionEpilogueFusion bool `` /* 177-byte string literal not displayed */ // Enable the scatter determinism expander, an optimized pass that // rewrites scatter operations to ensure deterministic behavior with high // performance. // Note that even when this flag is disabled, scatter operations may still // be deterministic, although with additional overhead. XlaGpuEnableScatterDeterminismExpander bool `` /* 186-byte string literal not displayed */ // Enables shared constants for XLA/GPU. This allows large constants to be // shared among multiple GPU executables. XlaGpuEnableSplitKAutotuning bool `` /* 156-byte string literal not displayed */ XlaGpuEnableTritonGemm bool `` /* 136-byte string literal not displayed */ // Enables currently disabled features within Triton for Hopper. XlaGpuEnableTritonHopper bool `` /* 142-byte string literal not displayed */ // Enable double buffering for loops. XlaGpuEnableWhileLoopDoubleBuffering bool `` /* 182-byte string literal not displayed */ // Enable hoisting of reduce-scatter out of while loops. XlaGpuEnableWhileLoopReduceScatterCodeMotion bool `` /* 210-byte string literal not displayed */ // Determine the while loop unrolling scheme. XlaGpuEnableWhileLoopUnrolling DebugOptions_WhileLoopUnrolling `` /* 203-byte string literal not displayed */ // Change the layout of the second triton dot operand to be column major. // Only works for (bf16 x bf16) -> bf16. XlaGpuEnsureMinorDotContractionDims bool `` /* 179-byte string literal not displayed */ // Excludes non-deterministic ops from compiled executables. // Unlike --xla_gpu_deterministic_ops does not disable autotuning - the // compilation itself can be non-deterministic. // Scatter ops can non-deterministic by default; these get converted to // a deterministic implementation. XlaGpuExcludeNondeterministicOps bool `` /* 166-byte string literal not displayed */ // Timeout to terminate on stuck rendez-vous. XlaGpuExecutableTerminateTimeoutSeconds int32 `` /* 189-byte string literal not displayed */ // Timeout to issue a warning on stuck rendez-vous. XlaGpuExecutableWarnStuckTimeoutSeconds int32 `` /* 191-byte string literal not displayed */ XlaGpuExhaustiveTilingSearch bool `` /* 154-byte string literal not displayed */ // Specifies the behavior of per kernel autotuning cache. XlaGpuExperimentalAutotuneCacheMode DebugOptions_AutotuneCacheMode `` /* 217-byte string literal not displayed */ // Experimentally disables binary libraries in GPU compiler passes. XlaGpuExperimentalDisableBinaryLibraries bool `` /* 192-byte string literal not displayed */ // Dump FDO profiles in a binary format to a separate file. XlaGpuExperimentalDumpFdoProfiles bool `` /* 171-byte string literal not displayed */ // Enable windowed einsum(collective matmul) rewrite for all-to-all + gemm // This feature is still experimental and effective only // xla_gpu_multi_streamed_windowed_einsum is set to true. XlaGpuExperimentalEnableAlltoallWindowedEinsum bool `` /* 212-byte string literal not displayed */ // Enabling this flag will attempt to redirect every already-constructed // fusion possible to the Triton emitter. // // For example, a fusion with kind kLoop will be transformed to a fusion with // kind kCustom (and underlying kTritonFusionKind) if it can be tiled // correctly, and if all the instructions it contains are supported by XLA's // Triton emitter. Tile sizes are assigned automatically. // // Pre-existing block-level fusions are left unmodified. XlaGpuExperimentalEnableFusionBlockLevelRewriter bool `` /* 220-byte string literal not displayed */ // Experimental optimizations for SPMD-based pipeline parallelism on GPU. XlaGpuExperimentalEnablePipelineParallelismOpt bool `` /* 212-byte string literal not displayed */ // When enabled, the PriorityFusion pass will try to make Triton fusions first // and foremost where it is possible. // // A kCustom fusion with underlying kTritonFusionKind will be created if it // can be tiled correctly, and if all the instructions it contains are // supported by XLA's Triton emitter. Tile sizes are assigned automatically. XlaGpuExperimentalEnableTritonHerolessPriorityFusion bool `` /* 232-byte string literal not displayed */ // When possible, XLA will use Triton's experimental TMA feature. XlaGpuExperimentalEnableTritonTma bool `` /* 171-byte string literal not displayed */ // For sub-byte dot operands, layout them along contracting dimensions. XlaGpuExperimentalPackDotOperandsAlongKDimension bool `` /* 222-byte string literal not displayed */ // This controls how many in-flight collectives latency hiding scheduler // can schedule. Example usage: // With xla_gpu_experimental_parallel_collective_overlap_limit = 1: // // coll.1-start = collective(input) // coll.1-done = collective(coll.1-start) // coll.2-start = collective(input2) // coll.2-done = collective(coll.2-start) // // With xla_gpu_experimental_parallel_collective_overlap_limit = 2: // // coll.1-start = collective(input) // coll.2-start = collective(input2) // coll.1-done = collective(coll.1-start) // coll.2-done = collective(coll.2-start) XlaGpuExperimentalParallelCollectiveOverlapLimit int32 `` /* 218-byte string literal not displayed */ XlaGpuExperimentalStreamAnnotation bool `` /* 172-byte string literal not displayed */ // If true, PTX compilation will fail if a kernel spills registers. // This is meant for debugging and only applies to CUDA PTX compilation. XlaGpuFailPtxCompilationOnRegisterSpilling bool `` /* 202-byte string literal not displayed */ // Filter out kernels that spill registers during autotuning. XlaGpuFilterKernelsSpillingRegistersOnAutotuning bool `` /* 220-byte string literal not displayed */ // Overrides normal multi-threaded compilation setting to use this many // threads. Setting to 0 (the default value) means no enforcement. XlaGpuForceCompilationParallelism int32 `` /* 169-byte string literal not displayed */ // Overrides for XLA GPU's convolution layout heuristic. XlaGpuForceConvNchw bool `` /* 127-byte string literal not displayed */ XlaGpuForceConvNhwc bool `` /* 127-byte string literal not displayed */ // Enable flush-to-zero semantics in the GPU backend. XlaGpuFtz bool `protobuf:"varint,62,opt,name=xla_gpu_ftz,json=xlaGpuFtz,proto3" json:"xla_gpu_ftz,omitempty"` XlaGpuFusedAttentionUseCudnnRng bool `` /* 167-byte string literal not displayed */ // Threshold to rewrite matmul to cuBLAS or Triton (minimum combined number of // elements of both matrices in non-batch dimensions to be considered for a // rewrite). XlaGpuGemmRewriteSizeThreshold int64 `` /* 162-byte string literal not displayed */ // If true, we generate debug info when compiling PTX. This is useful for // profiling and debugging. XlaGpuGenerateDebugInfo bool `` /* 139-byte string literal not displayed */ // If true, we generate line info when compiling PTX. This is useful for // profiling and debugging. XlaGpuGenerateLineInfo bool `` /* 136-byte string literal not displayed */ // Identify concurrent regions in GPU graphs and execute them concurrently. XlaGpuGraphEnableConcurrentRegion bool `` /* 171-byte string literal not displayed */ // This number determines how many moved instructions like fusion kernels are // required for a region to be captured as a function to be launched as a GPU // graph. XlaGpuGraphMinGraphSize int32 `` /* 141-byte string literal not displayed */ XlaGpuKernelCacheFile string `` /* 132-byte string literal not displayed */ // If enabled, uses the libnvjitlink library for PTX compilation and linking XlaGpuLibnvjitlinkMode DebugOptions_LibNvJitLinkMode `` /* 173-byte string literal not displayed */ // Paths to files with LLVM code. XlaGpuLlvmIrFile []string `protobuf:"bytes,150,rep,name=xla_gpu_llvm_ir_file,json=xlaGpuLlvmIrFile,proto3" json:"xla_gpu_llvm_ir_file,omitempty"` XlaGpuLlvmVerificationLevel int32 `` /* 151-byte string literal not displayed */ // File to load autotune results from. It will be considered a binary file // unless the name ends with .txt or .textproto. At most one loading will // happen during the lifetime of one process, even if the first one is // unsuccessful or different file paths are passed here. This only works on // CUDA. XlaGpuLoadAutotuneResultsFrom string `` /* 158-byte string literal not displayed */ XlaGpuMemoryLimitSlopFactor int32 `` /* 153-byte string literal not displayed */ // Replace custom calls with noop operations. XlaGpuMockCustomCalls bool `` /* 133-byte string literal not displayed */ // Whether to use multiple compute streams to run windowed einsum. XlaGpuMultiStreamedWindowedEinsum bool `` /* 171-byte string literal not displayed */ // Specify the maximum number of channels(SMs) NCCL // will use for collective operations. XlaGpuNcclCollectiveMaxNchannels int64 `` /* 168-byte string literal not displayed */ // Set number of ranks per root rank for NCCL init. XlaGpuNcclInitMaxRankPerRootRatio int64 `` /* 177-byte string literal not displayed */ // Specify the maximum number of channels(SMs) NCCL // will use for p2p operations. XlaGpuNcclP2PMaxNchannels int64 `` /* 147-byte string literal not displayed */ // If true, Nccl errors will terminate the process. XlaGpuNcclTerminateOnError bool `` /* 150-byte string literal not displayed */ // Timeout in seconds before terminating jobs that are stuck in a NCCL // Rendezvous. Negative value disables the timeout and will not terminate. XlaGpuNcclTerminationTimeoutSeconds int64 `` /* 177-byte string literal not displayed */ // If set >= 0, this controls the total bytes(combined sizes of both // operands in bytes) to enable windowed einsum and // xla_gpu_threshold_for_windowed_einsum_mib will be ignored. XlaGpuOperandBytesThresholdForWindowedEinsum int64 `` /* 208-byte string literal not displayed */ XlaGpuOverrideGemmAutotuner string `` /* 150-byte string literal not displayed */ XlaGpuPerFusionAutotuneCacheDir string `` /* 166-byte string literal not displayed */ XlaGpuPgleAccuracyChecker DebugOptions_PGLEStrictnessLevel `` /* 187-byte string literal not displayed */ XlaGpuPgleProfileFileOrDirectoryPath string `` /* 183-byte string literal not displayed */ // Paths to files with ptx code. XlaGpuPtxFile []string `protobuf:"bytes,127,rep,name=xla_gpu_ptx_file,json=xlaGpuPtxFile,proto3" json:"xla_gpu_ptx_file,omitempty"` // Size threshold (in bytes) for the GPU reduce-scatter combiner. XlaGpuReduceScatterCombineThresholdBytes int64 `` /* 194-byte string literal not displayed */ // Amount of padding the redzone allocator will put on one side of each buffer // it allocates. (So the buffer's total size will be increased by 2x this // value.) // // Higher values make it more likely that we'll catch an out-of-bounds read or // write. Smaller values consume less memory during autotuning. Note that a // fused cudnn conv has up to 6 total buffers (4 inputs, 1 output, and 1 // scratch), so this can be multiplied by quite a lot. XlaGpuRedzonePaddingBytes int64 `` /* 145-byte string literal not displayed */ // Size threshold (in megabytes) for the GPU redzone scratch allocator. XlaGpuRedzoneScratchMaxMegabytes int64 `` /* 168-byte string literal not displayed */ // If true, will require complete AOT autotuning results; in the case of // missing AOT result, the model will not be compiled or executed, a // `NotFound` error will be returned. XlaGpuRequireCompleteAotAutotuneResults bool `` /* 191-byte string literal not displayed */ // If true, XLA runtime will retain exclusive ownership of the GPU when // running a module, so there are no multi-thread conflicts on the GPU. This // can enable some optimizations that reduce the cost of resource management, // e.g., command buffer updates to ensure correctness when running in // multi-thread mode. XlaGpuRequireExclusiveLock bool `` /* 148-byte string literal not displayed */ XlaGpuShapeChecks DebugOptions_ShapeChecks `` /* 153-byte string literal not displayed */ // If true, shards the autotuning work between participating compiler // processes (typically in multi-host setups) and joins the results when // it's done. XlaGpuShardAutotuning bool `` /* 131-byte string literal not displayed */ // If true, abort immediately when conv algorithm picker fails, rather than // logging a warning and proceeding with fallback. XlaGpuStrictConvAlgorithmPicker bool `` /* 165-byte string literal not displayed */ // Description of the target platform in GpuTargetConfigProto format; if // provided, deviceless compilation is assumed, and the current device is // ignored. XlaGpuTargetConfigFilename string `` /* 147-byte string literal not displayed */ // Enable this flag will use a separate memory space color for // temp buffer, and then will use separate memory allocator to allocate it, // as there is no other memory allocation interference, // it will allocate temp buffer to some fix address on every iteration, // which is good for cuda-graph perf. XlaGpuTempBufferUseSeparateColor bool `` /* 170-byte string literal not displayed */ // Threshold to enable windowed einsum (collective matmul) in MB. XlaGpuThresholdForWindowedEinsumMib int64 `` /* 179-byte string literal not displayed */ // Creates triton fusion for all supported gemms. // To make sure only triton gemm is chosen by the autotuner run with // `xla_gpu_cublas_fallback` set to false. XlaGpuTritonGemmAny bool `` /* 127-byte string literal not displayed */ XlaGpuTritonGemmDisableReducedPrecisionReduction bool `` /* 220-byte string literal not displayed */ // It is usually preferable to not fallback to the driver; it can consume more // memory, or have bugs. XlaGpuUnsafeFallbackToDriverOnPtxasNotFound bool `` /* 209-byte string literal not displayed */ // Recognises rotate-right patterns (slice, slice, concat) within a while // loop and labels the while loop as a pipelined while loop. This is an // unsafe flag. XlaGpuUnsafePipelinedLoopAnnotator bool `` /* 174-byte string literal not displayed */ // If true, XLA will annotate instructions in the dumps with emitter code // location (source:line) annotations. This helps to identify the source of // the code that emits a particular instruction. XlaGpuUnsupportedAnnotateWithEmitterLoc bool `` /* 191-byte string literal not displayed */ // Internal testing flag to switch RaggedAllToAllDecomposer on or off. XlaGpuUnsupportedEnableRaggedAllToAllDecomposer bool `` /* 219-byte string literal not displayed */ // Internal debug/testing flag to switch Triton GEMM fusions on or off. XlaGpuUnsupportedEnableTritonGemm bool `` /* 171-byte string literal not displayed */ // This instructs the runtime whether to use memcpy for p2p communication when // source and target are located within a node(nvlink). XlaGpuUseMemcpyLocalP2P bool `` /* 141-byte string literal not displayed */ // An option to enable using cuDNN runtime compiled fusion kernels which is // available and recommended for Ampere+ GPUs. XlaGpuUseRuntimeFusion bool `` /* 136-byte string literal not displayed */ // If true, will verify that the numerical results of Triton fusions match // the results of regular emitters. XlaGpuVerifyTritonFusionNumerics bool `` /* 168-byte string literal not displayed */ // Show addresses of HLO ops in graph dump. XlaHloGraphAddresses bool `` /* 126-byte string literal not displayed */ // Instrument the computation to collect per-HLO cycle counts. XlaHloProfile bool `protobuf:"varint,9,opt,name=xla_hlo_profile,json=xlaHloProfile,proto3" json:"xla_hlo_profile,omitempty"` // List of HLO passes to disable/enable. These names must exactly match the // pass names as specified by the HloPassInterface::name() method. // // At least one of xla_disable_hlo_passes and xla_enable_hlo_passes_only must // be empty. XlaDisableHloPasses []string `protobuf:"bytes,30,rep,name=xla_disable_hlo_passes,json=xlaDisableHloPasses,proto3" json:"xla_disable_hlo_passes,omitempty"` XlaEnableHloPassesOnly []string `` /* 135-byte string literal not displayed */ // Disables all HLO passes. Notes that some passes are necessary for // correctness and the invariants that must be satisfied by "fully optimized" // HLO are different for different devices and may change over time. The only // "guarantee", such as it is, is that if you compile XLA and dump the // optimized HLO for some graph, you should be able to run it again on the // same device with the same build of XLA. XlaDisableAllHloPasses bool `` /* 136-byte string literal not displayed */ // Numerical optimization level for the XLA compiler backend; the specific // interpretation of this value is left to the backends. XlaBackendOptimizationLevel int32 `` /* 148-byte string literal not displayed */ // Embed the compiler IR as a string in the executable. XlaEmbedIrInExecutable bool `` /* 135-byte string literal not displayed */ // Eliminate implicit broadcasts when lowering user computations to HLO // instructions; use explicit broadcast instead. XlaEliminateHloImplicitBroadcast bool `` /* 165-byte string literal not displayed */ // When generating calls to Eigen in the CPU backend, use multi-threaded Eigen // mode. XlaCpuMultiThreadEigen bool `` /* 135-byte string literal not displayed */ // If true, in LLVM-based backends, emit !alias.scope metadata in // generated IR. XlaLlvmEnableAliasScopeMetadata bool `` /* 164-byte string literal not displayed */ // If true, in LLVM-based backends, emit !noalias metadata in the // generated IR. XlaLlvmEnableNoaliasMetadata bool `` /* 153-byte string literal not displayed */ // If true, in LLVM-based backends, emit !invariant.load metadata in // the generated IR. XlaLlvmEnableInvariantLoadMetadata bool `` /* 173-byte string literal not displayed */ // If true, a set of expensive LLVM optimization passes will not be run. XlaLlvmDisableExpensivePasses bool `` /* 156-byte string literal not displayed */ // This is used by ClientLibraryTestBase::ComputeAndCompare*. If true, the // computation will run n! times with all permunations of layouts for the // output shape in rank n. For example, with a 3D shape, all permutations of // the set {0, 1, 2} are tried. XlaTestAllOutputLayouts bool `` /* 138-byte string literal not displayed */ // This is used by ClientLibraryTestBase::ComputeAndCompare*. If true, the // computation will run for all permunations of layouts of all input // arguments. For example, with 2 input arguments in 2D and 4D shapes, the // computation will run 2! * 4! times. XlaTestAllInputLayouts bool `` /* 135-byte string literal not displayed */ // Assign colors based on sharding information when generating the Graphviz // HLO graph. XlaHloGraphShardingColor bool `` /* 141-byte string literal not displayed */ // Generate calls to MKL-DNN in the CPU backend. XlaCpuUseMklDnn bool `protobuf:"varint,97,opt,name=xla_cpu_use_mkl_dnn,json=xlaCpuUseMklDnn,proto3" json:"xla_cpu_use_mkl_dnn,omitempty"` // Allows xla to increase the output precision of floating point operations // and all floating-point conversions to be simplified, including those // that affect the numerics. The `FloatNormalization` pass inserts many // `f32 -> bf16 -> f32` conversion pairs. These are not removed by the // `AlgebraicSimplifier`, as that will only simplify conversions that are // no-ops, e.g. `bf16 -> f32 -> bf16`. Removing these improves accuracy. XlaAllowExcessPrecision bool `` /* 137-byte string literal not displayed */ // Force the host platform to pretend that there are these many host // "devices". All these devices are backed by the same threadpool. Defaults // to 1. // // Setting this to anything other than 1 can increase overhead from context // switching but we let the user override this behavior to help run tests on // the host that run models in parallel across multiple devices. XlaForceHostPlatformDeviceCount int32 `` /* 165-byte string literal not displayed */ // Enable fast math with eigen in the HLO evaluator. XlaHloEvaluatorUseFastPath bool `` /* 150-byte string literal not displayed */ // Temporary option to allow support for both the R1 and the scalar index // versions of DynamicSlice and DynamicUpdateSlice. Only used for testing. XlaAllowScalarIndexDynamicOps bool `` /* 159-byte string literal not displayed */ // Option to emit a target-specific marker to indicate the start of a training // step. The location of the marker (if any) is determined by the option // value. XlaStepMarkerLocation DebugOptions_StepMarkerLocation `` /* 172-byte string literal not displayed */ // Directory to dump into. XlaDumpTo string `protobuf:"bytes,109,opt,name=xla_dump_to,json=xlaDumpTo,proto3" json:"xla_dump_to,omitempty"` // If specified, will only dump modules which match this regexp. XlaDumpHloModuleRe string `protobuf:"bytes,110,opt,name=xla_dump_hlo_module_re,json=xlaDumpHloModuleRe,proto3" json:"xla_dump_hlo_module_re,omitempty"` // If this flag is specified, will also dump HLO before and after passes that // match this regular expression. Set to .* to dump before/after all passes. XlaDumpHloPassRe string `protobuf:"bytes,111,opt,name=xla_dump_hlo_pass_re,json=xlaDumpHloPassRe,proto3" json:"xla_dump_hlo_pass_re,omitempty"` // Specifies the format that HLO is dumped in. Multiple of these may be // specified. XlaDumpHloAsText bool `protobuf:"varint,112,opt,name=xla_dump_hlo_as_text,json=xlaDumpHloAsText,proto3" json:"xla_dump_hlo_as_text,omitempty"` XlaDumpHloAsProto bool `protobuf:"varint,113,opt,name=xla_dump_hlo_as_proto,json=xlaDumpHloAsProto,proto3" json:"xla_dump_hlo_as_proto,omitempty"` XlaDumpHloAsDot bool `protobuf:"varint,114,opt,name=xla_dump_hlo_as_dot,json=xlaDumpHloAsDot,proto3" json:"xla_dump_hlo_as_dot,omitempty"` XlaDumpHloAsUrl bool `protobuf:"varint,115,opt,name=xla_dump_hlo_as_url,json=xlaDumpHloAsUrl,proto3" json:"xla_dump_hlo_as_url,omitempty"` // Dump HLO graphs as an HTML (DOT -> SVG inlined in HTML) XlaDumpHloAsHtml bool `protobuf:"varint,116,opt,name=xla_dump_hlo_as_html,json=xlaDumpHloAsHtml,proto3" json:"xla_dump_hlo_as_html,omitempty"` // Dump the visualization of the fusion progress. XlaDumpFusionVisualization bool `` /* 146-byte string literal not displayed */ // If true, every time an HLO module is run, we will dump an HloSnapshot // (essentially, a serialized module plus its inputs) to the --xla_dump_to // directory. XlaDumpHloSnapshots bool `protobuf:"varint,118,opt,name=xla_dump_hlo_snapshots,json=xlaDumpHloSnapshots,proto3" json:"xla_dump_hlo_snapshots,omitempty"` // Include a timestamp in the dumped filenames. XlaDumpIncludeTimestamp bool `` /* 137-byte string literal not displayed */ // Max number of hlo module dumps in a directory. Set to < 0 for unbounded. XlaDumpMaxHloModules int32 `` /* 130-byte string literal not displayed */ // Dump HloModuleMetadata as a text proto for each HLO module. XlaDumpModuleMetadata bool `` /* 131-byte string literal not displayed */ // GZip-compress protos dumped via --xla_dump_hlo_as_proto. XlaDumpCompressProtos bool `` /* 131-byte string literal not displayed */ // Dump HLO in long text format. Ignored unless xla_dump_hlo_as_text is true. XlaDumpHloAsLongText bool `` /* 132-byte string literal not displayed */ // Whether to dump mlir using pretty print form. XlaDumpEnableMlirPrettyForm bool `` /* 153-byte string literal not displayed */ // Debug options that trigger execution errors when NaN or Inf are detected. XlaTpuDetectNan bool `protobuf:"varint,135,opt,name=xla_tpu_detect_nan,json=xlaTpuDetectNan,proto3" json:"xla_tpu_detect_nan,omitempty"` XlaTpuDetectInf bool `protobuf:"varint,136,opt,name=xla_tpu_detect_inf,json=xlaTpuDetectInf,proto3" json:"xla_tpu_detect_inf,omitempty"` // True if TraceMe annotations are enabled for XLA:CPU. XlaCpuEnableXprofTraceme bool `` /* 142-byte string literal not displayed */ // Per-heap size constraint. New heaps will be created if per-heap max size is // reached. XlaMultiheapSizeConstraintPerHeap int32 `` /* 171-byte string literal not displayed */ // Enable detailed logging into vlog. If this is disabled, no // compilation summary will be printed in the end of computation. XlaDetailedLogging bool `protobuf:"varint,252,opt,name=xla_detailed_logging,json=xlaDetailedLogging,proto3" json:"xla_detailed_logging,omitempty"` // Enable HLO dumping. If this is disabled, no HLO modules will be dumped. XlaEnableDumping bool `protobuf:"varint,253,opt,name=xla_enable_dumping,json=xlaEnableDumping,proto3" json:"xla_enable_dumping,omitempty"` // Whether to force inline before llvm module split to get a more balanced // splits for parallel compilation. XlaLlvmForceInlineBeforeSplit bool `` /* 159-byte string literal not displayed */ // Disable dumping metadata in HLO dumps. XlaDumpDisableMetadata bool `` /* 134-byte string literal not displayed */ // If this flag is specified, will only dump HLO before and after passes in // the pass pipeline that matches this regular expression. Default empty value // enables dumping in all pipelines. XlaDumpHloPipelineRe string `` /* 129-byte string literal not displayed */ // Generate calls to Arm Compute Library in the CPU backend. XlaCpuUseAcl bool `protobuf:"varint,174,opt,name=xla_cpu_use_acl,json=xlaCpuUseAcl,proto3" json:"xla_cpu_use_acl,omitempty"` // By default, XLA:CPU will run fp16 dot/conv as fp32, as this is generally // (much) faster on our hardware. Set this flag to disable this behavior. XlaCpuStrictDotConvMath bool `` /* 141-byte string literal not displayed */ XlaDumpLatencyHidingSchedule bool `` /* 154-byte string literal not displayed */ // The partitioning algorithm to be used in the PartitionAssignment pass. XlaPartitioningAlgorithm DebugOptions_PartitioningAlgorithm `` /* 182-byte string literal not displayed */ // Maximum number of buffers to print when debugging buffer assignment. XlaDebugBufferAssignmentShowMax int64 `` /* 165-byte string literal not displayed */ // If true, large constants will be printed out when dumping HLOs. XlaDumpLargeConstants bool `` /* 131-byte string literal not displayed */ // Base length to rewrite the reduce window to, no rewrite if set to 0. XlaReduceWindowRewriteBaseLength int64 `` /* 168-byte string literal not displayed */ // The command buffer trace cache size, increasing the cache size may // sometimes reduces the chances of doing command buffer tracing for // updating command buffer instance. XlaCmdBufferTraceCacheSize int64 `` /* 150-byte string literal not displayed */ // Custom call targets with legacy registry API (non FFI API), // that support recording to command buffer custom command, // i.e., custom call target supports cuda-graph capturing for CUDA devices. // This flag is read if CUSTOM_CALL command type is recorded into // command buffer. LegacyCommandBufferCustomCallTargets []string `` /* 179-byte string literal not displayed */ // This flag is used for controlling HLO dumping and NVTX marker. If turned // on, both HLO dumping and NVTX marker will use syntactic sugar wrappers // as op names, while the actual op names will be shown if turned off. // // Here is an example HLO excerpt with the flag off: // // async_computation { // param_0 = f32[1,4,8]{1,0,2} parameter(0) // ROOT all-to-all.3.1 = f32[1,4,8]{1,0,2} all-to-all(param_0), // replica_groups={{0,1,2,3,4,5,6,7}}, dimensions={2} // } // ... // // all-to-all-start = // ((f32[1,4,8]{1,0,2}), f32[1,4,8]{1,0,2}) async-start(bitcast.24.0), // calls=async_computation, backend_config={...} // all-to-all-done = f32[1,4,8]{1,0,2} async-done(all-to-all-start) // // and with the flag on: // // all-to-all-start = ((f32[1,4,8]{1,0,2}), f32[1,4,8]{1,0,2}) // all-to-all-start(bitcast.24.0), // replica_groups={{0,1,2,3,4,5,6,7}}, dimensions={2}, // backend_config={...} // all-to-all-done = f32[1,4,8]{1,0,2} all-to-all-done(all-to-all-start) XlaSyntaxSugarAsyncOps bool `` /* 136-byte string literal not displayed */ // Allow launching command buffers while profiling active. // When disabled, execute in op-by-op mode. // TODO(b/355487968): Remove this option when validation complete. XlaEnableCommandBuffersDuringProfiling bool `` /* 186-byte string literal not displayed */ // Whether to ignore channel ids(including verifier channel id checks) // for collectives in the given HLO. XlaExperimentalIgnoreChannelId bool `` /* 160-byte string literal not displayed */ XlaPjrtAllowAutoLayoutInHlo bool `` /* 155-byte string literal not displayed */ // Extra options to pass to the compilation backend (e.g. LLVM); specific // interpretation of these values is left to the backend. XlaBackendExtraOptions map[string]string `` /* 207-byte string literal not displayed */ // contains filtered or unexported fields }
Debugging options for XLA. These options may change at any time - there are no guarantees about backward or forward compatibility for these fields.
Debug options naming and organization:
Backend-agnostic options: `xla_$flag_name` - go first, and sorted alphabetically by the flag name.
Backend-specific options: `xla_$backend_$flag_name` - must be in the corresponding backend section, and sorted alphabetically by the flag name.
func (*DebugOptions) Descriptor
func (*DebugOptions) Descriptor() ([]byte, []int)
Deprecated: Use DebugOptions.ProtoReflect.Descriptor instead.
func (*DebugOptions) GetLegacyCommandBufferCustomCallTargets ¶
func (x *DebugOptions) GetLegacyCommandBufferCustomCallTargets() []string
func (*DebugOptions) GetXlaAllowExcessPrecision ¶
func (x *DebugOptions) GetXlaAllowExcessPrecision() bool
func (*DebugOptions) GetXlaAllowScalarIndexDynamicOps ¶
func (x *DebugOptions) GetXlaAllowScalarIndexDynamicOps() bool
func (*DebugOptions) GetXlaBackendExtraOptions ¶
func (x *DebugOptions) GetXlaBackendExtraOptions() map[string]string
func (*DebugOptions) GetXlaBackendOptimizationLevel ¶
func (x *DebugOptions) GetXlaBackendOptimizationLevel() int32
func (*DebugOptions) GetXlaCmdBufferTraceCacheSize ¶
func (x *DebugOptions) GetXlaCmdBufferTraceCacheSize() int64
func (*DebugOptions) GetXlaCpuCopyInsertionUseRegionAnalysis ¶ added in v0.4.7
func (x *DebugOptions) GetXlaCpuCopyInsertionUseRegionAnalysis() bool
func (*DebugOptions) GetXlaCpuEnableConcurrencyOptimizedScheduler ¶
func (x *DebugOptions) GetXlaCpuEnableConcurrencyOptimizedScheduler() bool
func (*DebugOptions) GetXlaCpuEnableFastMath ¶
func (x *DebugOptions) GetXlaCpuEnableFastMath() bool
func (*DebugOptions) GetXlaCpuEnableFastMinMax ¶
func (x *DebugOptions) GetXlaCpuEnableFastMinMax() bool
func (*DebugOptions) GetXlaCpuEnableXprofTraceme ¶
func (x *DebugOptions) GetXlaCpuEnableXprofTraceme() bool
func (*DebugOptions) GetXlaCpuFastMathHonorDivision ¶
func (x *DebugOptions) GetXlaCpuFastMathHonorDivision() bool
func (*DebugOptions) GetXlaCpuFastMathHonorFunctions ¶
func (x *DebugOptions) GetXlaCpuFastMathHonorFunctions() bool
func (*DebugOptions) GetXlaCpuFastMathHonorInfs ¶
func (x *DebugOptions) GetXlaCpuFastMathHonorInfs() bool
func (*DebugOptions) GetXlaCpuFastMathHonorNans ¶
func (x *DebugOptions) GetXlaCpuFastMathHonorNans() bool
func (*DebugOptions) GetXlaCpuMaxIsa ¶ added in v0.4.7
func (x *DebugOptions) GetXlaCpuMaxIsa() string
func (*DebugOptions) GetXlaCpuMultiThreadEigen ¶
func (x *DebugOptions) GetXlaCpuMultiThreadEigen() bool
func (*DebugOptions) GetXlaCpuParallelCodegenSplitCount ¶
func (x *DebugOptions) GetXlaCpuParallelCodegenSplitCount() int32
func (*DebugOptions) GetXlaCpuPreferVectorWidth ¶
func (x *DebugOptions) GetXlaCpuPreferVectorWidth() int32
func (*DebugOptions) GetXlaCpuStrictDotConvMath ¶
func (x *DebugOptions) GetXlaCpuStrictDotConvMath() bool
func (*DebugOptions) GetXlaCpuUseAcl ¶
func (x *DebugOptions) GetXlaCpuUseAcl() bool
func (*DebugOptions) GetXlaCpuUseMklDnn ¶
func (x *DebugOptions) GetXlaCpuUseMklDnn() bool
func (*DebugOptions) GetXlaCpuUseThunkRuntime ¶
func (x *DebugOptions) GetXlaCpuUseThunkRuntime() bool
func (*DebugOptions) GetXlaCpuUseXnnpack ¶ added in v0.5.1
func (x *DebugOptions) GetXlaCpuUseXnnpack() bool
func (*DebugOptions) GetXlaDebugBufferAssignmentShowMax ¶
func (x *DebugOptions) GetXlaDebugBufferAssignmentShowMax() int64
func (*DebugOptions) GetXlaDetailedLogging ¶
func (x *DebugOptions) GetXlaDetailedLogging() bool
func (*DebugOptions) GetXlaDisableAllHloPasses ¶
func (x *DebugOptions) GetXlaDisableAllHloPasses() bool
func (*DebugOptions) GetXlaDisableHloPasses ¶
func (x *DebugOptions) GetXlaDisableHloPasses() []string
func (*DebugOptions) GetXlaDumpCompressProtos ¶
func (x *DebugOptions) GetXlaDumpCompressProtos() bool
func (*DebugOptions) GetXlaDumpDisableMetadata ¶
func (x *DebugOptions) GetXlaDumpDisableMetadata() bool
func (*DebugOptions) GetXlaDumpEnableMlirPrettyForm ¶
func (x *DebugOptions) GetXlaDumpEnableMlirPrettyForm() bool
func (*DebugOptions) GetXlaDumpFusionVisualization ¶
func (x *DebugOptions) GetXlaDumpFusionVisualization() bool
func (*DebugOptions) GetXlaDumpHloAsDot ¶
func (x *DebugOptions) GetXlaDumpHloAsDot() bool
func (*DebugOptions) GetXlaDumpHloAsHtml ¶
func (x *DebugOptions) GetXlaDumpHloAsHtml() bool
func (*DebugOptions) GetXlaDumpHloAsLongText ¶
func (x *DebugOptions) GetXlaDumpHloAsLongText() bool
func (*DebugOptions) GetXlaDumpHloAsProto ¶
func (x *DebugOptions) GetXlaDumpHloAsProto() bool
func (*DebugOptions) GetXlaDumpHloAsText ¶
func (x *DebugOptions) GetXlaDumpHloAsText() bool
func (*DebugOptions) GetXlaDumpHloAsUrl ¶
func (x *DebugOptions) GetXlaDumpHloAsUrl() bool
func (*DebugOptions) GetXlaDumpHloModuleRe ¶
func (x *DebugOptions) GetXlaDumpHloModuleRe() string
func (*DebugOptions) GetXlaDumpHloPassRe ¶
func (x *DebugOptions) GetXlaDumpHloPassRe() string
func (*DebugOptions) GetXlaDumpHloPipelineRe ¶
func (x *DebugOptions) GetXlaDumpHloPipelineRe() string
func (*DebugOptions) GetXlaDumpHloSnapshots ¶
func (x *DebugOptions) GetXlaDumpHloSnapshots() bool
func (*DebugOptions) GetXlaDumpIncludeTimestamp ¶
func (x *DebugOptions) GetXlaDumpIncludeTimestamp() bool
func (*DebugOptions) GetXlaDumpLargeConstants ¶
func (x *DebugOptions) GetXlaDumpLargeConstants() bool
func (*DebugOptions) GetXlaDumpLatencyHidingSchedule ¶
func (x *DebugOptions) GetXlaDumpLatencyHidingSchedule() bool
func (*DebugOptions) GetXlaDumpMaxHloModules ¶
func (x *DebugOptions) GetXlaDumpMaxHloModules() int32
func (*DebugOptions) GetXlaDumpModuleMetadata ¶
func (x *DebugOptions) GetXlaDumpModuleMetadata() bool
func (*DebugOptions) GetXlaDumpTo ¶
func (x *DebugOptions) GetXlaDumpTo() string
func (*DebugOptions) GetXlaEliminateHloImplicitBroadcast ¶
func (x *DebugOptions) GetXlaEliminateHloImplicitBroadcast() bool
func (*DebugOptions) GetXlaEmbedIrInExecutable ¶
func (x *DebugOptions) GetXlaEmbedIrInExecutable() bool
func (*DebugOptions) GetXlaEnableCommandBuffersDuringProfiling ¶
func (x *DebugOptions) GetXlaEnableCommandBuffersDuringProfiling() bool
func (*DebugOptions) GetXlaEnableDumping ¶
func (x *DebugOptions) GetXlaEnableDumping() bool
func (*DebugOptions) GetXlaEnableFastMath ¶ added in v0.4.7
func (x *DebugOptions) GetXlaEnableFastMath() bool
func (*DebugOptions) GetXlaEnableHloPassesOnly ¶
func (x *DebugOptions) GetXlaEnableHloPassesOnly() []string
func (*DebugOptions) GetXlaExperimentalIgnoreChannelId ¶ added in v0.4.2
func (x *DebugOptions) GetXlaExperimentalIgnoreChannelId() bool
func (*DebugOptions) GetXlaForceHostPlatformDeviceCount ¶
func (x *DebugOptions) GetXlaForceHostPlatformDeviceCount() int32
func (*DebugOptions) GetXlaGpuAlgorithmDenylistPath ¶
func (x *DebugOptions) GetXlaGpuAlgorithmDenylistPath() string
func (*DebugOptions) GetXlaGpuAllGatherCombineThresholdBytes ¶
func (x *DebugOptions) GetXlaGpuAllGatherCombineThresholdBytes() int64
func (*DebugOptions) GetXlaGpuAllReduceBlueconnectNumDevicesPerHost ¶
func (x *DebugOptions) GetXlaGpuAllReduceBlueconnectNumDevicesPerHost() int32
func (*DebugOptions) GetXlaGpuAllReduceCombineThresholdBytes ¶
func (x *DebugOptions) GetXlaGpuAllReduceCombineThresholdBytes() int64
func (*DebugOptions) GetXlaGpuAnalyticalLatencyEstimatorOptions ¶ added in v0.5.1
func (x *DebugOptions) GetXlaGpuAnalyticalLatencyEstimatorOptions() map[string]string
func (*DebugOptions) GetXlaGpuAsyncDot ¶
func (x *DebugOptions) GetXlaGpuAsyncDot() bool
func (*DebugOptions) GetXlaGpuAutoSpmdPartitioningMemoryBudgetGb ¶
func (x *DebugOptions) GetXlaGpuAutoSpmdPartitioningMemoryBudgetGb() int32
func (*DebugOptions) GetXlaGpuAutoSpmdPartitioningMemoryBudgetRatio ¶
func (x *DebugOptions) GetXlaGpuAutoSpmdPartitioningMemoryBudgetRatio() float32
func (*DebugOptions) GetXlaGpuAutotuneGemmRtol ¶
func (x *DebugOptions) GetXlaGpuAutotuneGemmRtol() float32
func (*DebugOptions) GetXlaGpuAutotuneLevel ¶
func (x *DebugOptions) GetXlaGpuAutotuneLevel() int32
func (*DebugOptions) GetXlaGpuAutotuneMaxSolutions ¶
func (x *DebugOptions) GetXlaGpuAutotuneMaxSolutions() int64
func (*DebugOptions) GetXlaGpuCollectCostModelStats ¶
func (x *DebugOptions) GetXlaGpuCollectCostModelStats() bool
func (*DebugOptions) GetXlaGpuCollectiveInflationFactor ¶
func (x *DebugOptions) GetXlaGpuCollectiveInflationFactor() int32
func (*DebugOptions) GetXlaGpuCollectivePermuteDecomposerThreshold ¶
func (x *DebugOptions) GetXlaGpuCollectivePermuteDecomposerThreshold() int64
func (*DebugOptions) GetXlaGpuCollectivesUsePersistentCliques ¶ added in v0.5.1
func (x *DebugOptions) GetXlaGpuCollectivesUsePersistentCliques() bool
func (*DebugOptions) GetXlaGpuCopyInsertionUseRegionAnalysis ¶
func (x *DebugOptions) GetXlaGpuCopyInsertionUseRegionAnalysis() bool
func (*DebugOptions) GetXlaGpuCrashOnVerificationFailures ¶
func (x *DebugOptions) GetXlaGpuCrashOnVerificationFailures() bool
func (*DebugOptions) GetXlaGpuCublasFallback ¶
func (x *DebugOptions) GetXlaGpuCublasFallback() bool
func (*DebugOptions) GetXlaGpuCudaDataDir ¶
func (x *DebugOptions) GetXlaGpuCudaDataDir() string
func (*DebugOptions) GetXlaGpuCudnnGemmFusionLevel ¶
func (x *DebugOptions) GetXlaGpuCudnnGemmFusionLevel() int32
func (*DebugOptions) GetXlaGpuCudnnGemmMaxPlans ¶
func (x *DebugOptions) GetXlaGpuCudnnGemmMaxPlans() int32
func (*DebugOptions) GetXlaGpuDeterministicOps ¶
func (x *DebugOptions) GetXlaGpuDeterministicOps() bool
func (*DebugOptions) GetXlaGpuDisableAsyncCollectives ¶
func (x *DebugOptions) GetXlaGpuDisableAsyncCollectives() []DebugOptions_CollectiveOpType
func (*DebugOptions) GetXlaGpuDisableGpuasmOptimizations ¶
func (x *DebugOptions) GetXlaGpuDisableGpuasmOptimizations() bool
func (*DebugOptions) GetXlaGpuDotMergerThresholdMb ¶ added in v0.4.2
func (x *DebugOptions) GetXlaGpuDotMergerThresholdMb() int32
func (*DebugOptions) GetXlaGpuDumpAutotuneLogsTo ¶
func (x *DebugOptions) GetXlaGpuDumpAutotuneLogsTo() string
func (*DebugOptions) GetXlaGpuDumpAutotuneResultsTo ¶
func (x *DebugOptions) GetXlaGpuDumpAutotuneResultsTo() string
func (*DebugOptions) GetXlaGpuDumpAutotunedGemmFusions ¶
func (x *DebugOptions) GetXlaGpuDumpAutotunedGemmFusions() bool
func (*DebugOptions) GetXlaGpuDumpHloUnoptimizedSnapshots ¶ added in v0.5.1
func (x *DebugOptions) GetXlaGpuDumpHloUnoptimizedSnapshots() bool
func (*DebugOptions) GetXlaGpuDumpLlvmir ¶
func (x *DebugOptions) GetXlaGpuDumpLlvmir() bool
func (*DebugOptions) GetXlaGpuEnableAllGatherCombineByDim ¶
func (x *DebugOptions) GetXlaGpuEnableAllGatherCombineByDim() bool
func (*DebugOptions) GetXlaGpuEnableAnalyticalLatencyEstimator ¶
func (x *DebugOptions) GetXlaGpuEnableAnalyticalLatencyEstimator() bool
func (*DebugOptions) GetXlaGpuEnableAnalyticalSolLatencyEstimator ¶ added in v0.5.1
func (x *DebugOptions) GetXlaGpuEnableAnalyticalSolLatencyEstimator() bool
func (*DebugOptions) GetXlaGpuEnableApproxCostlyCollectives ¶
func (x *DebugOptions) GetXlaGpuEnableApproxCostlyCollectives() bool
func (*DebugOptions) GetXlaGpuEnableCommandBuffer ¶
func (x *DebugOptions) GetXlaGpuEnableCommandBuffer() []DebugOptions_CommandBufferCmdType
func (*DebugOptions) GetXlaGpuEnableCubRadixSort ¶
func (x *DebugOptions) GetXlaGpuEnableCubRadixSort() bool
func (*DebugOptions) GetXlaGpuEnableCublaslt ¶
func (x *DebugOptions) GetXlaGpuEnableCublaslt() bool
func (*DebugOptions) GetXlaGpuEnableCudnnFmha ¶
func (x *DebugOptions) GetXlaGpuEnableCudnnFmha() bool
func (*DebugOptions) GetXlaGpuEnableCudnnFrontend ¶
func (x *DebugOptions) GetXlaGpuEnableCudnnFrontend() bool
func (*DebugOptions) GetXlaGpuEnableCudnnInt8X32ConvolutionReordering ¶
func (x *DebugOptions) GetXlaGpuEnableCudnnInt8X32ConvolutionReordering() bool
func (*DebugOptions) GetXlaGpuEnableCudnnLayerNorm ¶
func (x *DebugOptions) GetXlaGpuEnableCudnnLayerNorm() bool
func (*DebugOptions) GetXlaGpuEnableCustomFusions ¶
func (x *DebugOptions) GetXlaGpuEnableCustomFusions() bool
func (*DebugOptions) GetXlaGpuEnableCustomFusionsRe ¶
func (x *DebugOptions) GetXlaGpuEnableCustomFusionsRe() string
func (*DebugOptions) GetXlaGpuEnableDynamicSliceFusion ¶
func (x *DebugOptions) GetXlaGpuEnableDynamicSliceFusion() bool
func (*DebugOptions) GetXlaGpuEnableFastMinMax ¶
func (x *DebugOptions) GetXlaGpuEnableFastMinMax() bool
func (*DebugOptions) GetXlaGpuEnableHighestPriorityAsyncStream ¶
func (x *DebugOptions) GetXlaGpuEnableHighestPriorityAsyncStream() bool
func (*DebugOptions) GetXlaGpuEnableHostMemoryOffloading ¶
func (x *DebugOptions) GetXlaGpuEnableHostMemoryOffloading() bool
func (*DebugOptions) GetXlaGpuEnableLatencyHidingScheduler ¶
func (x *DebugOptions) GetXlaGpuEnableLatencyHidingScheduler() bool
func (*DebugOptions) GetXlaGpuEnableLibnvptxcompiler ¶
func (x *DebugOptions) GetXlaGpuEnableLibnvptxcompiler() bool
func (*DebugOptions) GetXlaGpuEnableLlvmModuleCompilationParallelism ¶
func (x *DebugOptions) GetXlaGpuEnableLlvmModuleCompilationParallelism() bool
func (*DebugOptions) GetXlaGpuEnableNcclCliqueOptimization ¶
func (x *DebugOptions) GetXlaGpuEnableNcclCliqueOptimization() bool
func (*DebugOptions) GetXlaGpuEnableNcclCommSplitting ¶
func (x *DebugOptions) GetXlaGpuEnableNcclCommSplitting() bool
func (*DebugOptions) GetXlaGpuEnableNcclPerStreamComms ¶
func (x *DebugOptions) GetXlaGpuEnableNcclPerStreamComms() bool
func (*DebugOptions) GetXlaGpuEnableNcclUserBuffers ¶
func (x *DebugOptions) GetXlaGpuEnableNcclUserBuffers() bool
func (*DebugOptions) GetXlaGpuEnablePipelinedAllGather ¶
func (x *DebugOptions) GetXlaGpuEnablePipelinedAllGather() bool
func (*DebugOptions) GetXlaGpuEnablePipelinedAllReduce ¶
func (x *DebugOptions) GetXlaGpuEnablePipelinedAllReduce() bool
func (*DebugOptions) GetXlaGpuEnablePipelinedCollectives ¶
func (x *DebugOptions) GetXlaGpuEnablePipelinedCollectives() bool
func (*DebugOptions) GetXlaGpuEnablePipelinedP2P ¶
func (x *DebugOptions) GetXlaGpuEnablePipelinedP2P() bool
func (*DebugOptions) GetXlaGpuEnablePipelinedReduceScatter ¶
func (x *DebugOptions) GetXlaGpuEnablePipelinedReduceScatter() bool
func (*DebugOptions) GetXlaGpuEnableReassociationForConvertedAr ¶
func (x *DebugOptions) GetXlaGpuEnableReassociationForConvertedAr() bool
func (*DebugOptions) GetXlaGpuEnableReduceScatterCombineByDim ¶
func (x *DebugOptions) GetXlaGpuEnableReduceScatterCombineByDim() bool
func (*DebugOptions) GetXlaGpuEnableReductionEpilogueFusion ¶
func (x *DebugOptions) GetXlaGpuEnableReductionEpilogueFusion() bool
func (*DebugOptions) GetXlaGpuEnableScatterDeterminismExpander ¶ added in v0.4.9
func (x *DebugOptions) GetXlaGpuEnableScatterDeterminismExpander() bool
func (*DebugOptions) GetXlaGpuEnableSharedConstants ¶
func (x *DebugOptions) GetXlaGpuEnableSharedConstants() bool
func (*DebugOptions) GetXlaGpuEnableSplitKAutotuning ¶
func (x *DebugOptions) GetXlaGpuEnableSplitKAutotuning() bool
func (*DebugOptions) GetXlaGpuEnableTritonGemm ¶
func (x *DebugOptions) GetXlaGpuEnableTritonGemm() bool
func (*DebugOptions) GetXlaGpuEnableTritonHopper ¶
func (x *DebugOptions) GetXlaGpuEnableTritonHopper() bool
func (*DebugOptions) GetXlaGpuEnableWhileLoopDoubleBuffering ¶
func (x *DebugOptions) GetXlaGpuEnableWhileLoopDoubleBuffering() bool
func (*DebugOptions) GetXlaGpuEnableWhileLoopReduceScatterCodeMotion ¶
func (x *DebugOptions) GetXlaGpuEnableWhileLoopReduceScatterCodeMotion() bool
func (*DebugOptions) GetXlaGpuEnableWhileLoopUnrolling ¶
func (x *DebugOptions) GetXlaGpuEnableWhileLoopUnrolling() DebugOptions_WhileLoopUnrolling
func (*DebugOptions) GetXlaGpuEnsureMinorDotContractionDims ¶
func (x *DebugOptions) GetXlaGpuEnsureMinorDotContractionDims() bool
func (*DebugOptions) GetXlaGpuExcludeNondeterministicOps ¶
func (x *DebugOptions) GetXlaGpuExcludeNondeterministicOps() bool
func (*DebugOptions) GetXlaGpuExecutableTerminateTimeoutSeconds ¶ added in v0.4.2
func (x *DebugOptions) GetXlaGpuExecutableTerminateTimeoutSeconds() int32
func (*DebugOptions) GetXlaGpuExecutableWarnStuckTimeoutSeconds ¶ added in v0.4.2
func (x *DebugOptions) GetXlaGpuExecutableWarnStuckTimeoutSeconds() int32
func (*DebugOptions) GetXlaGpuExhaustiveTilingSearch ¶
func (x *DebugOptions) GetXlaGpuExhaustiveTilingSearch() bool
func (*DebugOptions) GetXlaGpuExperimentalAutotuneCacheMode ¶ added in v0.4.0
func (x *DebugOptions) GetXlaGpuExperimentalAutotuneCacheMode() DebugOptions_AutotuneCacheMode
func (*DebugOptions) GetXlaGpuExperimentalDisableBinaryLibraries ¶ added in v0.4.2
func (x *DebugOptions) GetXlaGpuExperimentalDisableBinaryLibraries() bool
func (*DebugOptions) GetXlaGpuExperimentalDumpFdoProfiles ¶ added in v0.4.7
func (x *DebugOptions) GetXlaGpuExperimentalDumpFdoProfiles() bool
func (*DebugOptions) GetXlaGpuExperimentalEnableAlltoallWindowedEinsum ¶ added in v0.5.1
func (x *DebugOptions) GetXlaGpuExperimentalEnableAlltoallWindowedEinsum() bool
func (*DebugOptions) GetXlaGpuExperimentalEnableFusionBlockLevelRewriter ¶ added in v0.4.7
func (x *DebugOptions) GetXlaGpuExperimentalEnableFusionBlockLevelRewriter() bool
func (*DebugOptions) GetXlaGpuExperimentalEnablePipelineParallelismOpt ¶ added in v0.5.1
func (x *DebugOptions) GetXlaGpuExperimentalEnablePipelineParallelismOpt() bool
func (*DebugOptions) GetXlaGpuExperimentalEnableTritonHerolessPriorityFusion ¶ added in v0.4.7
func (x *DebugOptions) GetXlaGpuExperimentalEnableTritonHerolessPriorityFusion() bool
func (*DebugOptions) GetXlaGpuExperimentalEnableTritonTma ¶ added in v0.5.1
func (x *DebugOptions) GetXlaGpuExperimentalEnableTritonTma() bool
func (*DebugOptions) GetXlaGpuExperimentalPackDotOperandsAlongKDimension ¶ added in v0.5.1
func (x *DebugOptions) GetXlaGpuExperimentalPackDotOperandsAlongKDimension() bool
func (*DebugOptions) GetXlaGpuExperimentalParallelCollectiveOverlapLimit ¶ added in v0.4.7
func (x *DebugOptions) GetXlaGpuExperimentalParallelCollectiveOverlapLimit() int32
func (*DebugOptions) GetXlaGpuExperimentalStreamAnnotation ¶ added in v0.4.7
func (x *DebugOptions) GetXlaGpuExperimentalStreamAnnotation() bool
func (*DebugOptions) GetXlaGpuFailPtxCompilationOnRegisterSpilling ¶ added in v0.5.1
func (x *DebugOptions) GetXlaGpuFailPtxCompilationOnRegisterSpilling() bool
func (*DebugOptions) GetXlaGpuFilterKernelsSpillingRegistersOnAutotuning ¶
func (x *DebugOptions) GetXlaGpuFilterKernelsSpillingRegistersOnAutotuning() bool
func (*DebugOptions) GetXlaGpuForceCompilationParallelism ¶
func (x *DebugOptions) GetXlaGpuForceCompilationParallelism() int32
func (*DebugOptions) GetXlaGpuForceConvNchw ¶
func (x *DebugOptions) GetXlaGpuForceConvNchw() bool
func (*DebugOptions) GetXlaGpuForceConvNhwc ¶
func (x *DebugOptions) GetXlaGpuForceConvNhwc() bool
func (*DebugOptions) GetXlaGpuFtz ¶
func (x *DebugOptions) GetXlaGpuFtz() bool
func (*DebugOptions) GetXlaGpuFusedAttentionUseCudnnRng ¶
func (x *DebugOptions) GetXlaGpuFusedAttentionUseCudnnRng() bool
func (*DebugOptions) GetXlaGpuGemmRewriteSizeThreshold ¶
func (x *DebugOptions) GetXlaGpuGemmRewriteSizeThreshold() int64
func (*DebugOptions) GetXlaGpuGenerateDebugInfo ¶ added in v0.5.1
func (x *DebugOptions) GetXlaGpuGenerateDebugInfo() bool
func (*DebugOptions) GetXlaGpuGenerateLineInfo ¶ added in v0.5.1
func (x *DebugOptions) GetXlaGpuGenerateLineInfo() bool
func (*DebugOptions) GetXlaGpuGraphEnableConcurrentRegion ¶
func (x *DebugOptions) GetXlaGpuGraphEnableConcurrentRegion() bool
func (*DebugOptions) GetXlaGpuGraphMinGraphSize ¶
func (x *DebugOptions) GetXlaGpuGraphMinGraphSize() int32
func (*DebugOptions) GetXlaGpuKernelCacheFile ¶
func (x *DebugOptions) GetXlaGpuKernelCacheFile() string
func (*DebugOptions) GetXlaGpuLibnvjitlinkMode ¶ added in v0.4.9
func (x *DebugOptions) GetXlaGpuLibnvjitlinkMode() DebugOptions_LibNvJitLinkMode
func (*DebugOptions) GetXlaGpuLlvmIrFile ¶
func (x *DebugOptions) GetXlaGpuLlvmIrFile() []string
func (*DebugOptions) GetXlaGpuLlvmVerificationLevel ¶
func (x *DebugOptions) GetXlaGpuLlvmVerificationLevel() int32
func (*DebugOptions) GetXlaGpuLoadAutotuneResultsFrom ¶
func (x *DebugOptions) GetXlaGpuLoadAutotuneResultsFrom() string
func (*DebugOptions) GetXlaGpuMemoryLimitSlopFactor ¶
func (x *DebugOptions) GetXlaGpuMemoryLimitSlopFactor() int32
func (*DebugOptions) GetXlaGpuMockCustomCalls ¶
func (x *DebugOptions) GetXlaGpuMockCustomCalls() bool
func (*DebugOptions) GetXlaGpuMultiStreamedWindowedEinsum ¶
func (x *DebugOptions) GetXlaGpuMultiStreamedWindowedEinsum() bool
func (*DebugOptions) GetXlaGpuNcclCollectiveMaxNchannels ¶
func (x *DebugOptions) GetXlaGpuNcclCollectiveMaxNchannels() int64
func (*DebugOptions) GetXlaGpuNcclInitMaxRankPerRootRatio ¶ added in v0.5.1
func (x *DebugOptions) GetXlaGpuNcclInitMaxRankPerRootRatio() int64
func (*DebugOptions) GetXlaGpuNcclP2PMaxNchannels ¶
func (x *DebugOptions) GetXlaGpuNcclP2PMaxNchannels() int64
func (*DebugOptions) GetXlaGpuNcclTerminateOnError ¶
func (x *DebugOptions) GetXlaGpuNcclTerminateOnError() bool
func (*DebugOptions) GetXlaGpuNcclTerminationTimeoutSeconds ¶
func (x *DebugOptions) GetXlaGpuNcclTerminationTimeoutSeconds() int64
func (*DebugOptions) GetXlaGpuOperandBytesThresholdForWindowedEinsum ¶ added in v0.4.7
func (x *DebugOptions) GetXlaGpuOperandBytesThresholdForWindowedEinsum() int64
func (*DebugOptions) GetXlaGpuOverrideGemmAutotuner ¶
func (x *DebugOptions) GetXlaGpuOverrideGemmAutotuner() string
func (*DebugOptions) GetXlaGpuPerFusionAutotuneCacheDir ¶
func (x *DebugOptions) GetXlaGpuPerFusionAutotuneCacheDir() string
func (*DebugOptions) GetXlaGpuPgleAccuracyChecker ¶ added in v0.4.7
func (x *DebugOptions) GetXlaGpuPgleAccuracyChecker() DebugOptions_PGLEStrictnessLevel
func (*DebugOptions) GetXlaGpuPgleProfileFileOrDirectoryPath ¶
func (x *DebugOptions) GetXlaGpuPgleProfileFileOrDirectoryPath() string
func (*DebugOptions) GetXlaGpuPtxFile ¶
func (x *DebugOptions) GetXlaGpuPtxFile() []string
func (*DebugOptions) GetXlaGpuReduceScatterCombineThresholdBytes ¶
func (x *DebugOptions) GetXlaGpuReduceScatterCombineThresholdBytes() int64
func (*DebugOptions) GetXlaGpuRedzonePaddingBytes ¶
func (x *DebugOptions) GetXlaGpuRedzonePaddingBytes() int64
func (*DebugOptions) GetXlaGpuRedzoneScratchMaxMegabytes ¶
func (x *DebugOptions) GetXlaGpuRedzoneScratchMaxMegabytes() int64
func (*DebugOptions) GetXlaGpuRequireCompleteAotAutotuneResults ¶
func (x *DebugOptions) GetXlaGpuRequireCompleteAotAutotuneResults() bool
func (*DebugOptions) GetXlaGpuRequireExclusiveLock ¶ added in v0.5.1
func (x *DebugOptions) GetXlaGpuRequireExclusiveLock() bool
func (*DebugOptions) GetXlaGpuShapeChecks ¶
func (x *DebugOptions) GetXlaGpuShapeChecks() DebugOptions_ShapeChecks
func (*DebugOptions) GetXlaGpuShardAutotuning ¶
func (x *DebugOptions) GetXlaGpuShardAutotuning() bool
func (*DebugOptions) GetXlaGpuStrictConvAlgorithmPicker ¶
func (x *DebugOptions) GetXlaGpuStrictConvAlgorithmPicker() bool
func (*DebugOptions) GetXlaGpuTargetConfigFilename ¶
func (x *DebugOptions) GetXlaGpuTargetConfigFilename() string
func (*DebugOptions) GetXlaGpuTempBufferUseSeparateColor ¶
func (x *DebugOptions) GetXlaGpuTempBufferUseSeparateColor() bool
func (*DebugOptions) GetXlaGpuThresholdForWindowedEinsumMib ¶
func (x *DebugOptions) GetXlaGpuThresholdForWindowedEinsumMib() int64
func (*DebugOptions) GetXlaGpuTritonGemmAny ¶
func (x *DebugOptions) GetXlaGpuTritonGemmAny() bool
func (*DebugOptions) GetXlaGpuTritonGemmDisableReducedPrecisionReduction ¶
func (x *DebugOptions) GetXlaGpuTritonGemmDisableReducedPrecisionReduction() bool
func (*DebugOptions) GetXlaGpuUnsafeFallbackToDriverOnPtxasNotFound ¶
func (x *DebugOptions) GetXlaGpuUnsafeFallbackToDriverOnPtxasNotFound() bool
func (*DebugOptions) GetXlaGpuUnsafePipelinedLoopAnnotator ¶
func (x *DebugOptions) GetXlaGpuUnsafePipelinedLoopAnnotator() bool
func (*DebugOptions) GetXlaGpuUnsupportedAnnotateWithEmitterLoc ¶ added in v0.5.1
func (x *DebugOptions) GetXlaGpuUnsupportedAnnotateWithEmitterLoc() bool
func (*DebugOptions) GetXlaGpuUnsupportedEnableRaggedAllToAllDecomposer ¶ added in v0.5.1
func (x *DebugOptions) GetXlaGpuUnsupportedEnableRaggedAllToAllDecomposer() bool
func (*DebugOptions) GetXlaGpuUnsupportedEnableTritonGemm ¶
func (x *DebugOptions) GetXlaGpuUnsupportedEnableTritonGemm() bool
func (*DebugOptions) GetXlaGpuUseMemcpyLocalP2P ¶
func (x *DebugOptions) GetXlaGpuUseMemcpyLocalP2P() bool
func (*DebugOptions) GetXlaGpuUseRuntimeFusion ¶
func (x *DebugOptions) GetXlaGpuUseRuntimeFusion() bool
func (*DebugOptions) GetXlaGpuVerifyTritonFusionNumerics ¶
func (x *DebugOptions) GetXlaGpuVerifyTritonFusionNumerics() bool
func (*DebugOptions) GetXlaHloEvaluatorUseFastPath ¶
func (x *DebugOptions) GetXlaHloEvaluatorUseFastPath() bool
func (*DebugOptions) GetXlaHloGraphAddresses ¶
func (x *DebugOptions) GetXlaHloGraphAddresses() bool
func (*DebugOptions) GetXlaHloGraphShardingColor ¶
func (x *DebugOptions) GetXlaHloGraphShardingColor() bool
func (*DebugOptions) GetXlaHloProfile ¶
func (x *DebugOptions) GetXlaHloProfile() bool
func (*DebugOptions) GetXlaLlvmDisableExpensivePasses ¶
func (x *DebugOptions) GetXlaLlvmDisableExpensivePasses() bool
func (*DebugOptions) GetXlaLlvmEnableAliasScopeMetadata ¶
func (x *DebugOptions) GetXlaLlvmEnableAliasScopeMetadata() bool
func (*DebugOptions) GetXlaLlvmEnableInvariantLoadMetadata ¶
func (x *DebugOptions) GetXlaLlvmEnableInvariantLoadMetadata() bool
func (*DebugOptions) GetXlaLlvmEnableNoaliasMetadata ¶
func (x *DebugOptions) GetXlaLlvmEnableNoaliasMetadata() bool
func (*DebugOptions) GetXlaLlvmForceInlineBeforeSplit ¶
func (x *DebugOptions) GetXlaLlvmForceInlineBeforeSplit() bool
func (*DebugOptions) GetXlaMultiheapSizeConstraintPerHeap ¶
func (x *DebugOptions) GetXlaMultiheapSizeConstraintPerHeap() int32
func (*DebugOptions) GetXlaPartitioningAlgorithm ¶
func (x *DebugOptions) GetXlaPartitioningAlgorithm() DebugOptions_PartitioningAlgorithm
func (*DebugOptions) GetXlaPjrtAllowAutoLayoutInHlo ¶ added in v0.4.9
func (x *DebugOptions) GetXlaPjrtAllowAutoLayoutInHlo() bool
func (*DebugOptions) GetXlaReduceWindowRewriteBaseLength ¶
func (x *DebugOptions) GetXlaReduceWindowRewriteBaseLength() int64
func (*DebugOptions) GetXlaStepMarkerLocation ¶
func (x *DebugOptions) GetXlaStepMarkerLocation() DebugOptions_StepMarkerLocation
func (*DebugOptions) GetXlaSyntaxSugarAsyncOps ¶
func (x *DebugOptions) GetXlaSyntaxSugarAsyncOps() bool
func (*DebugOptions) GetXlaTestAllInputLayouts ¶
func (x *DebugOptions) GetXlaTestAllInputLayouts() bool
func (*DebugOptions) GetXlaTestAllOutputLayouts ¶
func (x *DebugOptions) GetXlaTestAllOutputLayouts() bool
func (*DebugOptions) GetXlaTpuDetectInf ¶
func (x *DebugOptions) GetXlaTpuDetectInf() bool
func (*DebugOptions) GetXlaTpuDetectNan ¶
func (x *DebugOptions) GetXlaTpuDetectNan() bool
func (*DebugOptions) GetXlaUnsupportedCrashOnHloPassFixMaxIterations ¶ added in v0.5.1
func (x *DebugOptions) GetXlaUnsupportedCrashOnHloPassFixMaxIterations() bool
func (*DebugOptions) ProtoMessage ¶
func (*DebugOptions) ProtoMessage()
func (*DebugOptions) ProtoReflect ¶
func (x *DebugOptions) ProtoReflect() protoreflect.Message
func (*DebugOptions) Reset ¶
func (x *DebugOptions) Reset()
func (*DebugOptions) String ¶
func (x *DebugOptions) String() string
type DebugOptions_AutotuneCacheMode ¶ added in v0.4.0
type DebugOptions_AutotuneCacheMode int32
const ( DebugOptions_AUTOTUNE_CACHE_MODE_UNSPECIFIED DebugOptions_AutotuneCacheMode = 0 // If the cache exists per fusion autotuner loads it and terminates, // otherwise runs autotuner and dumps the result. DebugOptions_AUTOTUNE_CACHE_MODE_UPDATE DebugOptions_AutotuneCacheMode = 1 // Sets readonly access to the cache for the per fusion autotuner. Same as // above, but doesn't dump anything. DebugOptions_AUTOTUNE_CACHE_MODE_READ DebugOptions_AutotuneCacheMode = 2 )
func (DebugOptions_AutotuneCacheMode) Descriptor ¶ added in v0.4.0
func (DebugOptions_AutotuneCacheMode) Descriptor() protoreflect.EnumDescriptor
func (DebugOptions_AutotuneCacheMode) Enum ¶ added in v0.4.0
func (x DebugOptions_AutotuneCacheMode) Enum() *DebugOptions_AutotuneCacheMode
func (DebugOptions_AutotuneCacheMode) EnumDescriptor
added in
func (DebugOptions_AutotuneCacheMode) EnumDescriptor() ([]byte, []int)
Deprecated: Use DebugOptions_AutotuneCacheMode.Descriptor instead.
func (DebugOptions_AutotuneCacheMode) Number ¶ added in v0.4.0
func (x DebugOptions_AutotuneCacheMode) Number() protoreflect.EnumNumber
func (DebugOptions_AutotuneCacheMode) String ¶ added in v0.4.0
func (x DebugOptions_AutotuneCacheMode) String() string
func (DebugOptions_AutotuneCacheMode) Type ¶ added in v0.4.0
func (DebugOptions_AutotuneCacheMode) Type() protoreflect.EnumType
type DebugOptions_CollectiveOpType ¶
type DebugOptions_CollectiveOpType int32
Enum to define all collective ops that xla supports.
const ( DebugOptions_NOOP DebugOptions_CollectiveOpType = 0 DebugOptions_ALLREDUCE DebugOptions_CollectiveOpType = 1 DebugOptions_ALLGATHER DebugOptions_CollectiveOpType = 2 DebugOptions_REDUCESCATTER DebugOptions_CollectiveOpType = 3 DebugOptions_COLLECTIVEBROADCAST DebugOptions_CollectiveOpType = 4 DebugOptions_ALLTOALL DebugOptions_CollectiveOpType = 5 DebugOptions_COLLECTIVEPERMUTE DebugOptions_CollectiveOpType = 6 DebugOptions_RAGGEDALLTOALL DebugOptions_CollectiveOpType = 7 )
func (DebugOptions_CollectiveOpType) Descriptor ¶
func (DebugOptions_CollectiveOpType) Descriptor() protoreflect.EnumDescriptor
func (DebugOptions_CollectiveOpType) Enum ¶
func (x DebugOptions_CollectiveOpType) Enum() *DebugOptions_CollectiveOpType
func (DebugOptions_CollectiveOpType) EnumDescriptor
func (DebugOptions_CollectiveOpType) EnumDescriptor() ([]byte, []int)
Deprecated: Use DebugOptions_CollectiveOpType.Descriptor instead.
func (DebugOptions_CollectiveOpType) Number ¶
func (x DebugOptions_CollectiveOpType) Number() protoreflect.EnumNumber
func (DebugOptions_CollectiveOpType) String ¶
func (x DebugOptions_CollectiveOpType) String() string
func (DebugOptions_CollectiveOpType) Type ¶
func (DebugOptions_CollectiveOpType) Type() protoreflect.EnumType
type DebugOptions_CommandBufferCmdType ¶
type DebugOptions_CommandBufferCmdType int32
Commands are categorized into 5 types: FUSION represents regular fusion kernels. CUBLAS/CUBLASLT, CUDNN, and COLLECTIVES represent library calls. CONDITIONALS represents control flow.
const ( DebugOptions_INVALID DebugOptions_CommandBufferCmdType = 0 DebugOptions_FUSION DebugOptions_CommandBufferCmdType = 1 DebugOptions_CUBLAS DebugOptions_CommandBufferCmdType = 2 DebugOptions_CUDNN DebugOptions_CommandBufferCmdType = 3 DebugOptions_COLLECTIVES DebugOptions_CommandBufferCmdType = 4 DebugOptions_CONDITIONAL DebugOptions_CommandBufferCmdType = 5 DebugOptions_WHILE DebugOptions_CommandBufferCmdType = 6 DebugOptions_CUSTOM_CALL DebugOptions_CommandBufferCmdType = 7 DebugOptions_CUBLASLT DebugOptions_CommandBufferCmdType = 8 DebugOptions_DYNAMIC_SLICE_FUSION DebugOptions_CommandBufferCmdType = 9 )
func (DebugOptions_CommandBufferCmdType) Descriptor ¶
func (DebugOptions_CommandBufferCmdType) Descriptor() protoreflect.EnumDescriptor
func (DebugOptions_CommandBufferCmdType) Enum ¶
func (x DebugOptions_CommandBufferCmdType) Enum() *DebugOptions_CommandBufferCmdType
func (DebugOptions_CommandBufferCmdType) EnumDescriptor
func (DebugOptions_CommandBufferCmdType) EnumDescriptor() ([]byte, []int)
Deprecated: Use DebugOptions_CommandBufferCmdType.Descriptor instead.
func (DebugOptions_CommandBufferCmdType) Number ¶
func (x DebugOptions_CommandBufferCmdType) Number() protoreflect.EnumNumber
func (DebugOptions_CommandBufferCmdType) String ¶
func (x DebugOptions_CommandBufferCmdType) String() string
func (DebugOptions_CommandBufferCmdType) Type ¶
func (DebugOptions_CommandBufferCmdType) Type() protoreflect.EnumType
type DebugOptions_LibNvJitLinkMode ¶ added in v0.4.9
type DebugOptions_LibNvJitLinkMode int32
const ( // LibNvJitLink is used if it is available and no buggy version has been // detected. DebugOptions_LIB_NV_JIT_LINK_MODE_AUTO DebugOptions_LibNvJitLinkMode = 0 // LibNvJitLink is never used. DebugOptions_LIB_NV_JIT_LINK_MODE_DISABLED DebugOptions_LibNvJitLinkMode = 1 // LibNvJitLink is used always. If it is not available, compilation will // fail. DebugOptions_LIB_NV_JIT_LINK_MODE_ENABLED DebugOptions_LibNvJitLinkMode = 2 )
func (DebugOptions_LibNvJitLinkMode) Descriptor ¶ added in v0.4.9
func (DebugOptions_LibNvJitLinkMode) Descriptor() protoreflect.EnumDescriptor
func (DebugOptions_LibNvJitLinkMode) Enum ¶ added in v0.4.9
func (x DebugOptions_LibNvJitLinkMode) Enum() *DebugOptions_LibNvJitLinkMode
func (DebugOptions_LibNvJitLinkMode) EnumDescriptor
added in
func (DebugOptions_LibNvJitLinkMode) EnumDescriptor() ([]byte, []int)
Deprecated: Use DebugOptions_LibNvJitLinkMode.Descriptor instead.
func (DebugOptions_LibNvJitLinkMode) Number ¶ added in v0.4.9
func (x DebugOptions_LibNvJitLinkMode) Number() protoreflect.EnumNumber
func (DebugOptions_LibNvJitLinkMode) String ¶ added in v0.4.9
func (x DebugOptions_LibNvJitLinkMode) String() string
func (DebugOptions_LibNvJitLinkMode) Type ¶ added in v0.4.9
func (DebugOptions_LibNvJitLinkMode) Type() protoreflect.EnumType
type DebugOptions_PGLEStrictnessLevel ¶ added in v0.4.7
type DebugOptions_PGLEStrictnessLevel int32
Enables strict PGLE checking. If an FDO profile is specified and latency hiding scheduler encounters missing instructions in the profile compilation will halt or warn depending on the value of this option.
const ( DebugOptions_PGLE_STRICTNESS_LEVEL_OFF DebugOptions_PGLEStrictnessLevel = 0 DebugOptions_PGLE_STRICTNESS_LEVEL_WARN DebugOptions_PGLEStrictnessLevel = 1 DebugOptions_PGLE_STRICTNESS_LEVEL_ERROR DebugOptions_PGLEStrictnessLevel = 2 )
func (DebugOptions_PGLEStrictnessLevel) Descriptor ¶ added in v0.4.7
func (DebugOptions_PGLEStrictnessLevel) Descriptor() protoreflect.EnumDescriptor
func (DebugOptions_PGLEStrictnessLevel) Enum ¶ added in v0.4.7
func (x DebugOptions_PGLEStrictnessLevel) Enum() *DebugOptions_PGLEStrictnessLevel
func (DebugOptions_PGLEStrictnessLevel) EnumDescriptor
added in
func (DebugOptions_PGLEStrictnessLevel) EnumDescriptor() ([]byte, []int)
Deprecated: Use DebugOptions_PGLEStrictnessLevel.Descriptor instead.
func (DebugOptions_PGLEStrictnessLevel) Number ¶ added in v0.4.7
func (x DebugOptions_PGLEStrictnessLevel) Number() protoreflect.EnumNumber
func (DebugOptions_PGLEStrictnessLevel) String ¶ added in v0.4.7
func (x DebugOptions_PGLEStrictnessLevel) String() string
func (DebugOptions_PGLEStrictnessLevel) Type ¶ added in v0.4.7
func (DebugOptions_PGLEStrictnessLevel) Type() protoreflect.EnumType
type DebugOptions_PartitioningAlgorithm ¶
type DebugOptions_PartitioningAlgorithm int32
const ( DebugOptions_PARTITIONING_ALGORITHM_NOOP DebugOptions_PartitioningAlgorithm = 0 DebugOptions_PARTITIONING_ALGORITHM_EXP0 DebugOptions_PartitioningAlgorithm = 1 DebugOptions_PARTITIONING_ALGORITHM_EXP1 DebugOptions_PartitioningAlgorithm = 2 DebugOptions_PARTITIONING_ALGORITHM_EXP2 DebugOptions_PartitioningAlgorithm = 3 )
func (DebugOptions_PartitioningAlgorithm) Descriptor ¶
func (DebugOptions_PartitioningAlgorithm) Descriptor() protoreflect.EnumDescriptor
func (DebugOptions_PartitioningAlgorithm) Enum ¶
func (x DebugOptions_PartitioningAlgorithm) Enum() *DebugOptions_PartitioningAlgorithm
func (DebugOptions_PartitioningAlgorithm) EnumDescriptor
func (DebugOptions_PartitioningAlgorithm) EnumDescriptor() ([]byte, []int)
Deprecated: Use DebugOptions_PartitioningAlgorithm.Descriptor instead.
func (DebugOptions_PartitioningAlgorithm) Number ¶
func (x DebugOptions_PartitioningAlgorithm) Number() protoreflect.EnumNumber
func (DebugOptions_PartitioningAlgorithm) String ¶
func (x DebugOptions_PartitioningAlgorithm) String() string
func (DebugOptions_PartitioningAlgorithm) Type ¶
func (DebugOptions_PartitioningAlgorithm) Type() protoreflect.EnumType
type DebugOptions_ShapeChecks ¶
type DebugOptions_ShapeChecks int32
const ( // Do not insert any shape checks for dynamically shaped operations; output // buffers might contain garbage data if shapes don't match. DebugOptions_IGNORE DebugOptions_ShapeChecks = 0 // Check shapes at runtime, will insert an extra synchronization if shapes // cannot be proven correct at compile time. DebugOptions_RUNTIME DebugOptions_ShapeChecks = 1 // Will refuse to compile any program where shape correctness can not be // established at compile time. DebugOptions_COMPILE_TIME DebugOptions_ShapeChecks = 2 )
func (DebugOptions_ShapeChecks) Descriptor ¶
func (DebugOptions_ShapeChecks) Descriptor() protoreflect.EnumDescriptor
func (DebugOptions_ShapeChecks) Enum ¶
func (x DebugOptions_ShapeChecks) Enum() *DebugOptions_ShapeChecks
func (DebugOptions_ShapeChecks) EnumDescriptor
func (DebugOptions_ShapeChecks) EnumDescriptor() ([]byte, []int)
Deprecated: Use DebugOptions_ShapeChecks.Descriptor instead.
func (DebugOptions_ShapeChecks) Number ¶
func (x DebugOptions_ShapeChecks) Number() protoreflect.EnumNumber
func (DebugOptions_ShapeChecks) String ¶
func (x DebugOptions_ShapeChecks) String() string
func (DebugOptions_ShapeChecks) Type ¶
func (DebugOptions_ShapeChecks) Type() protoreflect.EnumType
type DebugOptions_StepMarkerLocation ¶
type DebugOptions_StepMarkerLocation int32
const ( // Generate a step marker at the program entry. This handles the case where // each step is done by one or multiple program execution(s). Only the first // program will be tagged for generating a step marker at the program entry. // This is the default. DebugOptions_STEP_MARK_AT_ENTRY DebugOptions_StepMarkerLocation = 0 // Generate a step marker at each iteration of the top level while loop, // which is assumed to be a training loop. DebugOptions_STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP DebugOptions_StepMarkerLocation = 1 // Generate a step marker at each iteration of the second level while loops, // which is assumed to be a training or eval loop. DebugOptions_STEP_MARK_AT_SECOND_LEVEL_WHILE_LOOP DebugOptions_StepMarkerLocation = 3 // No step marker generated. DebugOptions_STEP_MARK_NONE DebugOptions_StepMarkerLocation = 2 )
func (DebugOptions_StepMarkerLocation) Descriptor ¶
func (DebugOptions_StepMarkerLocation) Descriptor() protoreflect.EnumDescriptor
func (DebugOptions_StepMarkerLocation) Enum ¶
func (x DebugOptions_StepMarkerLocation) Enum() *DebugOptions_StepMarkerLocation
func (DebugOptions_StepMarkerLocation) EnumDescriptor
func (DebugOptions_StepMarkerLocation) EnumDescriptor() ([]byte, []int)
Deprecated: Use DebugOptions_StepMarkerLocation.Descriptor instead.
func (DebugOptions_StepMarkerLocation) Number ¶
func (x DebugOptions_StepMarkerLocation) Number() protoreflect.EnumNumber
func (DebugOptions_StepMarkerLocation) String ¶
func (x DebugOptions_StepMarkerLocation) String() string
func (DebugOptions_StepMarkerLocation) Type ¶
func (DebugOptions_StepMarkerLocation) Type() protoreflect.EnumType
type DebugOptions_WhileLoopUnrolling ¶
type DebugOptions_WhileLoopUnrolling int32
const ( DebugOptions_WHILE_LOOP_UNROLLING_NO_UNROLL DebugOptions_WhileLoopUnrolling = 0 // Has the same effect as setting // `xla_gpu_enable_while_loop_double_buffering`. DebugOptions_WHILE_LOOP_UNROLLING_DOUBLE_BUFFER DebugOptions_WhileLoopUnrolling = 1 // Enables full loop unrolling using the same strategy as `DOUBLE_BUFFER`. DebugOptions_WHILE_LOOP_UNROLLING_FULL_UNROLL DebugOptions_WhileLoopUnrolling = 2 // Enables loop unrolling when we have at least one collective within a // while loop. DebugOptions_WHILE_LOOP_UNROLLING_AUTO_UNROLL DebugOptions_WhileLoopUnrolling = 3 )
func (DebugOptions_WhileLoopUnrolling) Descriptor ¶
func (DebugOptions_WhileLoopUnrolling) Descriptor() protoreflect.EnumDescriptor
func (DebugOptions_WhileLoopUnrolling) Enum ¶
func (x DebugOptions_WhileLoopUnrolling) Enum() *DebugOptions_WhileLoopUnrolling
func (DebugOptions_WhileLoopUnrolling) EnumDescriptor
func (DebugOptions_WhileLoopUnrolling) EnumDescriptor() ([]byte, []int)
Deprecated: Use DebugOptions_WhileLoopUnrolling.Descriptor instead.
func (DebugOptions_WhileLoopUnrolling) Number ¶
func (x DebugOptions_WhileLoopUnrolling) Number() protoreflect.EnumNumber
func (DebugOptions_WhileLoopUnrolling) String ¶
func (x DebugOptions_WhileLoopUnrolling) String() string
func (DebugOptions_WhileLoopUnrolling) Type ¶
func (DebugOptions_WhileLoopUnrolling) Type() protoreflect.EnumType
type ExecutionOptions ¶
type ExecutionOptions struct { // This optional field's layout is used as a hint when storing the output of // this computation. Subsequent transfers of this output array to the client // may be faster when using this layout. // // We use a Shape here to accommodate computations that return a tuple. ShapeWithOutputLayout *xla_data.ShapeProto `` /* 128-byte string literal not displayed */ // Used to seed random-number generators used in this computation. If this is // 0, we generate a seed ourselves. // // Changing the seed unnecessarily forces a recompilation. Seed uint64 `protobuf:"varint,3,opt,name=seed,proto3" json:"seed,omitempty"` DebugOptions *DebugOptions `protobuf:"bytes,4,opt,name=debug_options,json=debugOptions,proto3" json:"debug_options,omitempty"` // This optional field specifies a particular set of devices to run the // computation on. The computation will be partitioned across these devices. // If not provided, the default device will be chosen. DeviceHandles []*xla_data.DeviceHandle `protobuf:"bytes,5,rep,name=device_handles,json=deviceHandles,proto3" json:"device_handles,omitempty"` // Number of replicas of the computation to run. If zero, uses the default // number of replicas for the XLA service. NumReplicas int32 `protobuf:"varint,6,opt,name=num_replicas,json=numReplicas,proto3" json:"num_replicas,omitempty"` // This optional field specifies the device assignment if known at compile // time. DeviceAssignment *xla_data.DeviceAssignmentProto `protobuf:"bytes,7,opt,name=device_assignment,json=deviceAssignment,proto3" json:"device_assignment,omitempty"` // Alias input and output buffers for parameters that are passed-through XLA // modules without being changed. AliasPassthroughParams bool `` /* 130-byte string literal not displayed */ // Number of partitions of the computation to run (model parallelism). // If zero, uses the default number of partitions for the XLA service. NumPartitions int32 `protobuf:"varint,9,opt,name=num_partitions,json=numPartitions,proto3" json:"num_partitions,omitempty"` // Used to identify a set of programs that should be launch together. LaunchId int32 `protobuf:"varint,10,opt,name=launch_id,json=launchId,proto3" json:"launch_id,omitempty"` // Indicates whether to use SPMD (true) or MPMD (false) partitioning when // num_partitions > 1 and XLA is requested to partition the input program. UseSpmdPartitioning bool `protobuf:"varint,11,opt,name=use_spmd_partitioning,json=useSpmdPartitioning,proto3" json:"use_spmd_partitioning,omitempty"` // Whether to automatically generate XLA shardings for SPMD partitioner. UseAutoSpmdPartitioning bool `` /* 136-byte string literal not displayed */ // Device mesh shape used to create the sharding search space when // use_auto_spmd_partitioning=true. AutoSpmdPartitioningMeshShape []int64 `` /* 163-byte string literal not displayed */ // Device mesh ids compatible with the above mesh_shape used when // use_auto_spmd_partitioning=true. AutoSpmdPartitioningMeshIds []int64 `` /* 157-byte string literal not displayed */ // The amount of effort to spend on optimizing for minimizing program // execution time, as a value in [-1.0, +1.0]. The baseline is 0.0, which // strongly prioritizes execution time at the cost of longer compile times, // suitable for production workloads. A value of -0.5 would be appropriate for // research use cases that prefer faster compilations to iterate more quickly. // Positive values, on the other hand, might enable costly optimizations that // are off by default. ExecTimeOptimizationEffort float32 `` /* 146-byte string literal not displayed */ // The amount of effort to spend on making the program fit in memory (where // "fit in memory" here has a backend-dependent meaning), as a value in // [-1.0,+1.0]. The baseline is 0.0, which expends significant effort on // attempting to make the program fit. A value of -1.0 would be appropriate // for use cases that wish to spend minimal effort here and fail as quickly as // possible instead. Positive values, on the other hand, might enable costly // algorithms to reduce memory usage that are off by default. MemoryFittingEffort float32 `protobuf:"fixed32,26,opt,name=memory_fitting_effort,json=memoryFittingEffort,proto3" json:"memory_fitting_effort,omitempty"` // If set, deduplicate hlo into function calls to reduce binary size. Only // works on TPU. DeduplicateHlo bool `protobuf:"varint,12,opt,name=deduplicate_hlo,json=deduplicateHlo,proto3" json:"deduplicate_hlo,omitempty"` // Allows sharding propagation to propagate to the parameters. This changes // the input shape of the computation (which is undesirable), but it can be // used to allow to run partial compilation to determine what would be the // input sharding of a computation if XLA would be allowed to propagate the // sharding which can be used by higher level framework as a way to query // intermediate sharding of operations when multiple computation would be // chained and merged together. // This is a vector of bool, because the user can control which parameters can // have the sharding substituted. If only one boolean value is passed in the // vector that is interpreted as the value to be applied for every parameter. AllowSpmdShardingPropagationToParameters []bool `` /* 198-byte string literal not displayed */ // Allows sharding propagation to propagate to the outputs. This changes the // output shape of the computation (which is undesirable), but it can be used // to allow to run partial compilation to determine what would be the output // sharding of a computation if XLA would be allowed to propagate the sharding // which can be used by higher level framework as a way to query intermediate // sharding of operations when multiple computation would be chained and // merged together. // This is a vector of bool, because the user can control (if the output of // the computation is a tuple) which elements of the tuple can have the // sharding substituted and which don't. If only one boolean value is passed // in the vector that's interpreted as the value to be applied for every // single element of the output tuple. One value per element of the tuple // means that each value is attached to one of the output elements. AllowSpmdShardingPropagationToOutput []bool `` /* 186-byte string literal not displayed */ // Whether to broadcast args across all replicas. One entry per arg. ParamRequiresBroadcastViaCollectives []bool `` /* 184-byte string literal not displayed */ // If enabled, the compiler may generate sharding and unsharding programs as // separate HLO modules, and modify the main program's input and output to // be sharded. AllowSeparateShardingPrograms bool `` /* 154-byte string literal not displayed */ // The list of input/output pairs in the main program that could be sharded. ShardableValueUpdatePairs []*ShardableValueUpdatePairProto `` /* 141-byte string literal not displayed */ // Profiling data for feedback directed optimizations. Note that this is not // the only way to feed FDO data into the compiler and individual backends // may choose to get FDO data by other means. FdoProfile []byte `protobuf:"bytes,21,opt,name=fdo_profile,json=fdoProfile,proto3" json:"fdo_profile,omitempty"` // Amount of device memory available for the executable to use. DeviceMemorySize int64 `protobuf:"varint,22,opt,name=device_memory_size,json=deviceMemorySize,proto3" json:"device_memory_size,omitempty"` // Use Shardy, a new partitioner, to replace the existing // ShardingPropagation and SpmdPartitioner. See go/xla-sdy-pipeline for // details. UseShardyPartitioner bool `protobuf:"varint,24,opt,name=use_shardy_partitioner,json=useShardyPartitioner,proto3" json:"use_shardy_partitioner,omitempty"` // contains filtered or unexported fields }
These settings control how XLA compiles and/or runs code. Not all settings will have an effect on every platform.
When adding new fields, keep in mind that boolean fields default to false. Next id: 27.
func (*ExecutionOptions) Descriptor
func (*ExecutionOptions) Descriptor() ([]byte, []int)
Deprecated: Use ExecutionOptions.ProtoReflect.Descriptor instead.
func (*ExecutionOptions) GetAliasPassthroughParams ¶
func (x *ExecutionOptions) GetAliasPassthroughParams() bool
func (*ExecutionOptions) GetAllowSeparateShardingPrograms ¶
func (x *ExecutionOptions) GetAllowSeparateShardingPrograms() bool
func (*ExecutionOptions) GetAllowSpmdShardingPropagationToOutput ¶
func (x *ExecutionOptions) GetAllowSpmdShardingPropagationToOutput() []bool
func (*ExecutionOptions) GetAllowSpmdShardingPropagationToParameters ¶
func (x *ExecutionOptions) GetAllowSpmdShardingPropagationToParameters() []bool
func (*ExecutionOptions) GetAutoSpmdPartitioningMeshIds ¶
func (x *ExecutionOptions) GetAutoSpmdPartitioningMeshIds() []int64
func (*ExecutionOptions) GetAutoSpmdPartitioningMeshShape ¶
func (x *ExecutionOptions) GetAutoSpmdPartitioningMeshShape() []int64
func (*ExecutionOptions) GetDebugOptions ¶
func (x *ExecutionOptions) GetDebugOptions() *DebugOptions
func (*ExecutionOptions) GetDeduplicateHlo ¶
func (x *ExecutionOptions) GetDeduplicateHlo() bool
func (*ExecutionOptions) GetDeviceAssignment ¶
func (x *ExecutionOptions) GetDeviceAssignment() *xla_data.DeviceAssignmentProto
func (*ExecutionOptions) GetDeviceHandles ¶
func (x *ExecutionOptions) GetDeviceHandles() []*xla_data.DeviceHandle
func (*ExecutionOptions) GetDeviceMemorySize ¶
func (x *ExecutionOptions) GetDeviceMemorySize() int64
func (*ExecutionOptions) GetExecTimeOptimizationEffort ¶ added in v0.4.7
func (x *ExecutionOptions) GetExecTimeOptimizationEffort() float32
func (*ExecutionOptions) GetFdoProfile ¶
func (x *ExecutionOptions) GetFdoProfile() []byte
func (*ExecutionOptions) GetLaunchId ¶
func (x *ExecutionOptions) GetLaunchId() int32
func (*ExecutionOptions) GetMemoryFittingEffort ¶ added in v0.4.7
func (x *ExecutionOptions) GetMemoryFittingEffort() float32
func (*ExecutionOptions) GetNumPartitions ¶
func (x *ExecutionOptions) GetNumPartitions() int32
func (*ExecutionOptions) GetNumReplicas ¶
func (x *ExecutionOptions) GetNumReplicas() int32
func (*ExecutionOptions) GetParamRequiresBroadcastViaCollectives ¶
func (x *ExecutionOptions) GetParamRequiresBroadcastViaCollectives() []bool
func (*ExecutionOptions) GetSeed ¶
func (x *ExecutionOptions) GetSeed() uint64
func (*ExecutionOptions) GetShapeWithOutputLayout ¶
func (x *ExecutionOptions) GetShapeWithOutputLayout() *xla_data.ShapeProto
func (*ExecutionOptions) GetShardableValueUpdatePairs ¶
func (x *ExecutionOptions) GetShardableValueUpdatePairs() []*ShardableValueUpdatePairProto
func (*ExecutionOptions) GetUseAutoSpmdPartitioning ¶
func (x *ExecutionOptions) GetUseAutoSpmdPartitioning() bool
func (*ExecutionOptions) GetUseShardyPartitioner ¶
func (x *ExecutionOptions) GetUseShardyPartitioner() bool
func (*ExecutionOptions) GetUseSpmdPartitioning ¶
func (x *ExecutionOptions) GetUseSpmdPartitioning() bool
func (*ExecutionOptions) ProtoMessage ¶
func (*ExecutionOptions) ProtoMessage()
func (*ExecutionOptions) ProtoReflect ¶
func (x *ExecutionOptions) ProtoReflect() protoreflect.Message
func (*ExecutionOptions) Reset ¶
func (x *ExecutionOptions) Reset()
func (*ExecutionOptions) String ¶
func (x *ExecutionOptions) String() string
type GpuCompilationEnvironment ¶
type GpuCompilationEnvironment struct { // Temporary dummy flag is added to test the flow. // To be removed when we add flags here. DummyFlag int64 `protobuf:"varint,1,opt,name=dummy_flag,json=dummyFlag,proto3" json:"dummy_flag,omitempty"` // contains filtered or unexported fields }
Contains flags which affects the GPU compilation result. These flags are part of Debug Options as of now, and will be migrated to this proto.
func (*GpuCompilationEnvironment) Descriptor
func (*GpuCompilationEnvironment) Descriptor() ([]byte, []int)
Deprecated: Use GpuCompilationEnvironment.ProtoReflect.Descriptor instead.
func (*GpuCompilationEnvironment) GetDummyFlag ¶
func (x *GpuCompilationEnvironment) GetDummyFlag() int64
func (*GpuCompilationEnvironment) ProtoMessage ¶
func (*GpuCompilationEnvironment) ProtoMessage()
func (*GpuCompilationEnvironment) ProtoReflect ¶
func (x *GpuCompilationEnvironment) ProtoReflect() protoreflect.Message
func (*GpuCompilationEnvironment) Reset ¶
func (x *GpuCompilationEnvironment) Reset()
func (*GpuCompilationEnvironment) String ¶
func (x *GpuCompilationEnvironment) String() string
type HloModuleConfigProto ¶
type HloModuleConfigProto struct { EntryComputationLayout *xla_data.ProgramShapeProto `` /* 129-byte string literal not displayed */ Seed uint64 `protobuf:"varint,2,opt,name=seed,proto3" json:"seed,omitempty"` LaunchId int32 `protobuf:"varint,3,opt,name=launch_id,json=launchId,proto3" json:"launch_id,omitempty"` ReplicaCount int64 `protobuf:"varint,4,opt,name=replica_count,json=replicaCount,proto3" json:"replica_count,omitempty"` NumPartitions int64 `protobuf:"varint,5,opt,name=num_partitions,json=numPartitions,proto3" json:"num_partitions,omitempty"` ParamRequiresBroadcastViaCollectives []bool `` /* 183-byte string literal not displayed */ UseSpmdPartitioning bool `protobuf:"varint,7,opt,name=use_spmd_partitioning,json=useSpmdPartitioning,proto3" json:"use_spmd_partitioning,omitempty"` UseAutoSpmdPartitioning bool `` /* 135-byte string literal not displayed */ AutoSpmdPartitioningMeshShape []int64 `` /* 162-byte string literal not displayed */ AutoSpmdPartitioningMeshIds []int64 `` /* 157-byte string literal not displayed */ ExecTimeOptimizationEffort float32 `` /* 146-byte string literal not displayed */ MemoryFittingEffort float32 `protobuf:"fixed32,37,opt,name=memory_fitting_effort,json=memoryFittingEffort,proto3" json:"memory_fitting_effort,omitempty"` DeduplicateHlo bool `protobuf:"varint,11,opt,name=deduplicate_hlo,json=deduplicateHlo,proto3" json:"deduplicate_hlo,omitempty"` IntraOpParallelismThreads int64 `` /* 142-byte string literal not displayed */ DeviceType string `protobuf:"bytes,13,opt,name=device_type,json=deviceType,proto3" json:"device_type,omitempty"` DebugOptions *DebugOptions `protobuf:"bytes,14,opt,name=debug_options,json=debugOptions,proto3" json:"debug_options,omitempty"` StaticDeviceAssignment *xla_data.DeviceAssignmentProto `` /* 130-byte string literal not displayed */ // The original device assignment before being changed by a simulator. // Simulators, like HybridSim, may change the device assignment to a smaller // topology, to make simulation easier. PreSimulationDeviceAssignment *xla_data.DeviceAssignmentProto `` /* 153-byte string literal not displayed */ AllowSeparateShardingPrograms bool `` /* 154-byte string literal not displayed */ ShardableValueUpdatePairs []*ShardableValueUpdatePairProto `` /* 141-byte string literal not displayed */ AliasPassthroughParams bool `` /* 131-byte string literal not displayed */ ContentAwareComputationSorting bool `` /* 157-byte string literal not displayed */ FusionConfigCollection HloModuleConfigProto_FusionConfigCollection `` /* 184-byte string literal not displayed */ FusionConfig []*HloModuleConfigProto_BoolList `protobuf:"bytes,20,rep,name=fusion_config,json=fusionConfig,proto3" json:"fusion_config,omitempty"` DotConfig map[string]*HloModuleConfigProto_Int64List `` /* 163-byte string literal not displayed */ LayoutConfig []*HloModuleConfigProto_Int64ListList `protobuf:"bytes,22,rep,name=layout_config,json=layoutConfig,proto3" json:"layout_config,omitempty"` MemorySpaceAssignmentConfig []uint64 `` /* 155-byte string literal not displayed */ PhaseOrderingConfig []*HloModuleConfigProto_BoolList `protobuf:"bytes,24,rep,name=phase_ordering_config,json=phaseOrderingConfig,proto3" json:"phase_ordering_config,omitempty"` PhaseIndex int32 `protobuf:"varint,25,opt,name=phase_index,json=phaseIndex,proto3" json:"phase_index,omitempty"` AllowSpmdShardingPropagationToParameters []bool `` /* 198-byte string literal not displayed */ AllowSpmdShardingPropagationToOutput []bool `` /* 186-byte string literal not displayed */ AnalysisAllowanceMap map[string]int64 `` /* 199-byte string literal not displayed */ MatrixUnitOperandPrecision xla_data.PrecisionConfig_Precision `` /* 180-byte string literal not displayed */ FdoProfile []byte `protobuf:"bytes,31,opt,name=fdo_profile,json=fdoProfile,proto3" json:"fdo_profile,omitempty"` DeviceMemorySize int64 `protobuf:"varint,32,opt,name=device_memory_size,json=deviceMemorySize,proto3" json:"device_memory_size,omitempty"` UseShardyPartitioner bool `protobuf:"varint,34,opt,name=use_shardy_partitioner,json=useShardyPartitioner,proto3" json:"use_shardy_partitioner,omitempty"` ShardingConfig *ShardingConfigProto `protobuf:"bytes,38,opt,name=sharding_config,json=shardingConfig,proto3" json:"sharding_config,omitempty"` // contains filtered or unexported fields }
Serialization of HloModuleConfig. See the C++ class definition for descriptions of each field. There are no guarantees of backwards or forwards compatibility. Next id: 39.
func (*HloModuleConfigProto) Descriptor
func (*HloModuleConfigProto) Descriptor() ([]byte, []int)
Deprecated: Use HloModuleConfigProto.ProtoReflect.Descriptor instead.
func (*HloModuleConfigProto) GetAliasPassthroughParams ¶
func (x *HloModuleConfigProto) GetAliasPassthroughParams() bool
func (*HloModuleConfigProto) GetAllowSeparateShardingPrograms ¶
func (x *HloModuleConfigProto) GetAllowSeparateShardingPrograms() bool
func (*HloModuleConfigProto) GetAllowSpmdShardingPropagationToOutput ¶
func (x *HloModuleConfigProto) GetAllowSpmdShardingPropagationToOutput() []bool
func (*HloModuleConfigProto) GetAllowSpmdShardingPropagationToParameters ¶
func (x *HloModuleConfigProto) GetAllowSpmdShardingPropagationToParameters() []bool
func (*HloModuleConfigProto) GetAnalysisAllowanceMap ¶
func (x *HloModuleConfigProto) GetAnalysisAllowanceMap() map[string]int64
func (*HloModuleConfigProto) GetAutoSpmdPartitioningMeshIds ¶
func (x *HloModuleConfigProto) GetAutoSpmdPartitioningMeshIds() []int64
func (*HloModuleConfigProto) GetAutoSpmdPartitioningMeshShape ¶
func (x *HloModuleConfigProto) GetAutoSpmdPartitioningMeshShape() []int64
func (*HloModuleConfigProto) GetContentAwareComputationSorting ¶
func (x *HloModuleConfigProto) GetContentAwareComputationSorting() bool
func (*HloModuleConfigProto) GetDebugOptions ¶
func (x *HloModuleConfigProto) GetDebugOptions() *DebugOptions
func (*HloModuleConfigProto) GetDeduplicateHlo ¶
func (x *HloModuleConfigProto) GetDeduplicateHlo() bool
func (*HloModuleConfigProto) GetDeviceMemorySize ¶
func (x *HloModuleConfigProto) GetDeviceMemorySize() int64
func (*HloModuleConfigProto) GetDeviceType ¶
func (x *HloModuleConfigProto) GetDeviceType() string
func (*HloModuleConfigProto) GetDotConfig ¶
func (x *HloModuleConfigProto) GetDotConfig() map[string]*HloModuleConfigProto_Int64List
func (*HloModuleConfigProto) GetEntryComputationLayout ¶
func (x *HloModuleConfigProto) GetEntryComputationLayout() *xla_data.ProgramShapeProto
func (*HloModuleConfigProto) GetExecTimeOptimizationEffort ¶ added in v0.4.7
func (x *HloModuleConfigProto) GetExecTimeOptimizationEffort() float32
func (*HloModuleConfigProto) GetFdoProfile ¶
func (x *HloModuleConfigProto) GetFdoProfile() []byte
func (*HloModuleConfigProto) GetFusionConfig ¶
func (x *HloModuleConfigProto) GetFusionConfig() []*HloModuleConfigProto_BoolList
func (*HloModuleConfigProto) GetFusionConfigCollection ¶
func (x *HloModuleConfigProto) GetFusionConfigCollection() HloModuleConfigProto_FusionConfigCollection
func (*HloModuleConfigProto) GetIntraOpParallelismThreads ¶
func (x *HloModuleConfigProto) GetIntraOpParallelismThreads() int64
func (*HloModuleConfigProto) GetLaunchId ¶
func (x *HloModuleConfigProto) GetLaunchId() int32
func (*HloModuleConfigProto) GetLayoutConfig ¶
func (x *HloModuleConfigProto) GetLayoutConfig() []*HloModuleConfigProto_Int64ListList
func (*HloModuleConfigProto) GetMatrixUnitOperandPrecision ¶
func (x *HloModuleConfigProto) GetMatrixUnitOperandPrecision() xla_data.PrecisionConfig_Precision
func (*HloModuleConfigProto) GetMemoryFittingEffort ¶ added in v0.4.7
func (x *HloModuleConfigProto) GetMemoryFittingEffort() float32
func (*HloModuleConfigProto) GetMemorySpaceAssignmentConfig ¶
func (x *HloModuleConfigProto) GetMemorySpaceAssignmentConfig() []uint64
func (*HloModuleConfigProto) GetNumPartitions ¶
func (x *HloModuleConfigProto) GetNumPartitions() int64
func (*HloModuleConfigProto) GetParamRequiresBroadcastViaCollectives ¶
func (x *HloModuleConfigProto) GetParamRequiresBroadcastViaCollectives() []bool
func (*HloModuleConfigProto) GetPhaseIndex ¶
func (x *HloModuleConfigProto) GetPhaseIndex() int32
func (*HloModuleConfigProto) GetPhaseOrderingConfig ¶
func (x *HloModuleConfigProto) GetPhaseOrderingConfig() []*HloModuleConfigProto_BoolList
func (*HloModuleConfigProto) GetPreSimulationDeviceAssignment ¶ added in v0.4.2
func (x *HloModuleConfigProto) GetPreSimulationDeviceAssignment() *xla_data.DeviceAssignmentProto
func (*HloModuleConfigProto) GetReplicaCount ¶
func (x *HloModuleConfigProto) GetReplicaCount() int64
func (*HloModuleConfigProto) GetSeed ¶
func (x *HloModuleConfigProto) GetSeed() uint64
func (*HloModuleConfigProto) GetShardableValueUpdatePairs ¶
func (x *HloModuleConfigProto) GetShardableValueUpdatePairs() []*ShardableValueUpdatePairProto
func (*HloModuleConfigProto) GetShardingConfig ¶ added in v0.5.1
func (x *HloModuleConfigProto) GetShardingConfig() *ShardingConfigProto
func (*HloModuleConfigProto) GetStaticDeviceAssignment ¶
func (x *HloModuleConfigProto) GetStaticDeviceAssignment() *xla_data.DeviceAssignmentProto
func (*HloModuleConfigProto) GetUseAutoSpmdPartitioning ¶
func (x *HloModuleConfigProto) GetUseAutoSpmdPartitioning() bool
func (*HloModuleConfigProto) GetUseShardyPartitioner ¶
func (x *HloModuleConfigProto) GetUseShardyPartitioner() bool
func (*HloModuleConfigProto) GetUseSpmdPartitioning ¶
func (x *HloModuleConfigProto) GetUseSpmdPartitioning() bool
func (*HloModuleConfigProto) ProtoMessage ¶
func (*HloModuleConfigProto) ProtoMessage()
func (*HloModuleConfigProto) ProtoReflect ¶
func (x *HloModuleConfigProto) ProtoReflect() protoreflect.Message
func (*HloModuleConfigProto) Reset ¶
func (x *HloModuleConfigProto) Reset()
func (*HloModuleConfigProto) String ¶
func (x *HloModuleConfigProto) String() string
type HloModuleConfigProto_BoolList ¶
type HloModuleConfigProto_BoolList struct { Vals []bool `protobuf:"varint,1,rep,packed,name=vals,proto3" json:"vals,omitempty"` // contains filtered or unexported fields }
func (*HloModuleConfigProto_BoolList) Descriptor
func (*HloModuleConfigProto_BoolList) Descriptor() ([]byte, []int)
Deprecated: Use HloModuleConfigProto_BoolList.ProtoReflect.Descriptor instead.
func (*HloModuleConfigProto_BoolList) GetVals ¶
func (x *HloModuleConfigProto_BoolList) GetVals() []bool
func (*HloModuleConfigProto_BoolList) ProtoMessage ¶
func (*HloModuleConfigProto_BoolList) ProtoMessage()
func (*HloModuleConfigProto_BoolList) ProtoReflect ¶
func (x *HloModuleConfigProto_BoolList) ProtoReflect() protoreflect.Message
func (*HloModuleConfigProto_BoolList) Reset ¶
func (x *HloModuleConfigProto_BoolList) Reset()
func (*HloModuleConfigProto_BoolList) String ¶
func (x *HloModuleConfigProto_BoolList) String() string
type HloModuleConfigProto_FusionConfigCollection ¶
type HloModuleConfigProto_FusionConfigCollection int32
const ( HloModuleConfigProto_OFF HloModuleConfigProto_FusionConfigCollection = 0 // Do not collect configuration. HloModuleConfigProto_PER_EDGE HloModuleConfigProto_FusionConfigCollection = 1 // Collect per-edge configuration. HloModuleConfigProto_PER_NODE HloModuleConfigProto_FusionConfigCollection = 2 // Collect per-node configuration. )
func (HloModuleConfigProto_FusionConfigCollection) Descriptor ¶
func (HloModuleConfigProto_FusionConfigCollection) Descriptor() protoreflect.EnumDescriptor
func (HloModuleConfigProto_FusionConfigCollection) EnumDescriptor
func (HloModuleConfigProto_FusionConfigCollection) EnumDescriptor() ([]byte, []int)
Deprecated: Use HloModuleConfigProto_FusionConfigCollection.Descriptor instead.
func (HloModuleConfigProto_FusionConfigCollection) Number ¶
func (x HloModuleConfigProto_FusionConfigCollection) Number() protoreflect.EnumNumber
func (HloModuleConfigProto_FusionConfigCollection) String ¶
func (x HloModuleConfigProto_FusionConfigCollection) String() string
func (HloModuleConfigProto_FusionConfigCollection) Type ¶
func (HloModuleConfigProto_FusionConfigCollection) Type() protoreflect.EnumType
type HloModuleConfigProto_Int64List ¶
type HloModuleConfigProto_Int64List struct { Vals []int64 `protobuf:"varint,1,rep,packed,name=vals,proto3" json:"vals,omitempty"` // contains filtered or unexported fields }
func (*HloModuleConfigProto_Int64List) Descriptor
func (*HloModuleConfigProto_Int64List) Descriptor() ([]byte, []int)
Deprecated: Use HloModuleConfigProto_Int64List.ProtoReflect.Descriptor instead.
func (*HloModuleConfigProto_Int64List) GetVals ¶
func (x *HloModuleConfigProto_Int64List) GetVals() []int64
func (*HloModuleConfigProto_Int64List) ProtoMessage ¶
func (*HloModuleConfigProto_Int64List) ProtoMessage()
func (*HloModuleConfigProto_Int64List) ProtoReflect ¶
func (x *HloModuleConfigProto_Int64List) ProtoReflect() protoreflect.Message
func (*HloModuleConfigProto_Int64List) Reset ¶
func (x *HloModuleConfigProto_Int64List) Reset()
func (*HloModuleConfigProto_Int64List) String ¶
func (x *HloModuleConfigProto_Int64List) String() string
type HloModuleConfigProto_Int64ListList ¶
type HloModuleConfigProto_Int64ListList struct { Lists []*HloModuleConfigProto_Int64List `protobuf:"bytes,1,rep,name=lists,proto3" json:"lists,omitempty"` // contains filtered or unexported fields }
func (*HloModuleConfigProto_Int64ListList) Descriptor
func (*HloModuleConfigProto_Int64ListList) Descriptor() ([]byte, []int)
Deprecated: Use HloModuleConfigProto_Int64ListList.ProtoReflect.Descriptor instead.
func (*HloModuleConfigProto_Int64ListList) GetLists ¶
func (x *HloModuleConfigProto_Int64ListList) GetLists() []*HloModuleConfigProto_Int64List
func (*HloModuleConfigProto_Int64ListList) ProtoMessage ¶
func (*HloModuleConfigProto_Int64ListList) ProtoMessage()
func (*HloModuleConfigProto_Int64ListList) ProtoReflect ¶
func (x *HloModuleConfigProto_Int64ListList) ProtoReflect() protoreflect.Message
func (*HloModuleConfigProto_Int64ListList) Reset ¶
func (x *HloModuleConfigProto_Int64ListList) Reset()
func (*HloModuleConfigProto_Int64ListList) String ¶
func (x *HloModuleConfigProto_Int64ListList) String() string
type HloModuleProtoWithConfig ¶
type HloModuleProtoWithConfig struct { HloModule *hlo.HloModuleProto `protobuf:"bytes,1,opt,name=hlo_module,json=hloModule,proto3" json:"hlo_module,omitempty"` Config *HloModuleConfigProto `protobuf:"bytes,2,opt,name=config,proto3" json:"config,omitempty"` // contains filtered or unexported fields }
func (*HloModuleProtoWithConfig) Descriptor
func (*HloModuleProtoWithConfig) Descriptor() ([]byte, []int)
Deprecated: Use HloModuleProtoWithConfig.ProtoReflect.Descriptor instead.
func (*HloModuleProtoWithConfig) GetConfig ¶
func (x *HloModuleProtoWithConfig) GetConfig() *HloModuleConfigProto
func (*HloModuleProtoWithConfig) GetHloModule ¶
func (x *HloModuleProtoWithConfig) GetHloModule() *hlo.HloModuleProto
func (*HloModuleProtoWithConfig) ProtoMessage ¶
func (*HloModuleProtoWithConfig) ProtoMessage()
func (*HloModuleProtoWithConfig) ProtoReflect ¶
func (x *HloModuleProtoWithConfig) ProtoReflect() protoreflect.Message
func (*HloModuleProtoWithConfig) Reset ¶
func (x *HloModuleProtoWithConfig) Reset()
func (*HloModuleProtoWithConfig) String ¶
func (x *HloModuleProtoWithConfig) String() string
type NodeShardingConfigProto ¶ added in v0.5.1
type NodeShardingConfigProto struct { Sharding *xla_data.OpSharding `protobuf:"bytes,1,opt,name=sharding,proto3" json:"sharding,omitempty"` // For non-tuples. Nodes []*NodeShardingConfigProto `protobuf:"bytes,2,rep,name=nodes,proto3" json:"nodes,omitempty"` // For tuples. // contains filtered or unexported fields }
Message that captures sharding configuration of an HLO op.
func (*NodeShardingConfigProto) Descriptor
added in
func (*NodeShardingConfigProto) Descriptor() ([]byte, []int)
Deprecated: Use NodeShardingConfigProto.ProtoReflect.Descriptor instead.
func (*NodeShardingConfigProto) GetNodes ¶ added in v0.5.1
func (x *NodeShardingConfigProto) GetNodes() []*NodeShardingConfigProto
func (*NodeShardingConfigProto) GetSharding ¶ added in v0.5.1
func (x *NodeShardingConfigProto) GetSharding() *xla_data.OpSharding
func (*NodeShardingConfigProto) ProtoMessage ¶ added in v0.5.1
func (*NodeShardingConfigProto) ProtoMessage()
func (*NodeShardingConfigProto) ProtoReflect ¶ added in v0.5.1
func (x *NodeShardingConfigProto) ProtoReflect() protoreflect.Message
func (*NodeShardingConfigProto) Reset ¶ added in v0.5.1
func (x *NodeShardingConfigProto) Reset()
func (*NodeShardingConfigProto) String ¶ added in v0.5.1
func (x *NodeShardingConfigProto) String() string
type ScheduleProto ¶
type ScheduleProto struct { Instructions []*ScheduleProto_Instruction `protobuf:"bytes,1,rep,name=instructions,proto3" json:"instructions,omitempty"` // Computation id (matches the id in HloComputationProto). ComputationId int64 `protobuf:"varint,2,opt,name=computation_id,json=computationId,proto3" json:"computation_id,omitempty"` HloModule *hlo.HloModuleProto `protobuf:"bytes,3,opt,name=hlo_module,json=hloModule,proto3" json:"hlo_module,omitempty"` CyclesPerMicrosecond int64 `protobuf:"varint,4,opt,name=cycles_per_microsecond,json=cyclesPerMicrosecond,proto3" json:"cycles_per_microsecond,omitempty"` // contains filtered or unexported fields }
A trace estimated by the Latency Hiding Scheduler.
func (*ScheduleProto) Descriptor
func (*ScheduleProto) Descriptor() ([]byte, []int)
Deprecated: Use ScheduleProto.ProtoReflect.Descriptor instead.
func (*ScheduleProto) GetComputationId ¶
func (x *ScheduleProto) GetComputationId() int64
func (*ScheduleProto) GetCyclesPerMicrosecond ¶
func (x *ScheduleProto) GetCyclesPerMicrosecond() int64
func (*ScheduleProto) GetHloModule ¶
func (x *ScheduleProto) GetHloModule() *hlo.HloModuleProto
func (*ScheduleProto) GetInstructions ¶
func (x *ScheduleProto) GetInstructions() []*ScheduleProto_Instruction
func (*ScheduleProto) ProtoMessage ¶
func (*ScheduleProto) ProtoMessage()
func (*ScheduleProto) ProtoReflect ¶
func (x *ScheduleProto) ProtoReflect() protoreflect.Message
func (*ScheduleProto) Reset ¶
func (x *ScheduleProto) Reset()
func (*ScheduleProto) String ¶
func (x *ScheduleProto) String() string
type ScheduleProto_Instruction ¶
type ScheduleProto_Instruction struct { // Instruction id (matches the id in HloInstructionProto). Id int64 `protobuf:"varint,1,opt,name=id,proto3" json:"id,omitempty"` // Start and end timestamps in cycles. StartTimestampCycles float64 `protobuf:"fixed64,2,opt,name=start_timestamp_cycles,json=startTimestampCycles,proto3" json:"start_timestamp_cycles,omitempty"` EndTimestampCycles float64 `protobuf:"fixed64,3,opt,name=end_timestamp_cycles,json=endTimestampCycles,proto3" json:"end_timestamp_cycles,omitempty"` // contains filtered or unexported fields }
func (*ScheduleProto_Instruction) Descriptor
func (*ScheduleProto_Instruction) Descriptor() ([]byte, []int)
Deprecated: Use ScheduleProto_Instruction.ProtoReflect.Descriptor instead.
func (*ScheduleProto_Instruction) GetEndTimestampCycles ¶
func (x *ScheduleProto_Instruction) GetEndTimestampCycles() float64
func (*ScheduleProto_Instruction) GetId ¶
func (x *ScheduleProto_Instruction) GetId() int64
func (*ScheduleProto_Instruction) GetStartTimestampCycles ¶
func (x *ScheduleProto_Instruction) GetStartTimestampCycles() float64
func (*ScheduleProto_Instruction) ProtoMessage ¶
func (*ScheduleProto_Instruction) ProtoMessage()
func (*ScheduleProto_Instruction) ProtoReflect ¶
func (x *ScheduleProto_Instruction) ProtoReflect() protoreflect.Message
func (*ScheduleProto_Instruction) Reset ¶
func (x *ScheduleProto_Instruction) Reset()
func (*ScheduleProto_Instruction) String ¶
func (x *ScheduleProto_Instruction) String() string
type ShardableValueUpdatePairProto ¶
type ShardableValueUpdatePairProto struct { InputParameterNumber int64 `protobuf:"varint,1,opt,name=input_parameter_number,json=inputParameterNumber,proto3" json:"input_parameter_number,omitempty"` ParameterShapeIndex []int64 `` /* 128-byte string literal not displayed */ OutputShapeIndex []int64 `protobuf:"varint,3,rep,packed,name=output_shape_index,json=outputShapeIndex,proto3" json:"output_shape_index,omitempty"` // contains filtered or unexported fields }
func (*ShardableValueUpdatePairProto) Descriptor
func (*ShardableValueUpdatePairProto) Descriptor() ([]byte, []int)
Deprecated: Use ShardableValueUpdatePairProto.ProtoReflect.Descriptor instead.
func (*ShardableValueUpdatePairProto) GetInputParameterNumber ¶
func (x *ShardableValueUpdatePairProto) GetInputParameterNumber() int64
func (*ShardableValueUpdatePairProto) GetOutputShapeIndex ¶
func (x *ShardableValueUpdatePairProto) GetOutputShapeIndex() []int64
func (*ShardableValueUpdatePairProto) GetParameterShapeIndex ¶
func (x *ShardableValueUpdatePairProto) GetParameterShapeIndex() []int64
func (*ShardableValueUpdatePairProto) ProtoMessage ¶
func (*ShardableValueUpdatePairProto) ProtoMessage()
func (*ShardableValueUpdatePairProto) ProtoReflect ¶
func (x *ShardableValueUpdatePairProto) ProtoReflect() protoreflect.Message
func (*ShardableValueUpdatePairProto) Reset ¶
func (x *ShardableValueUpdatePairProto) Reset()
func (*ShardableValueUpdatePairProto) String ¶
func (x *ShardableValueUpdatePairProto) String() string
type ShardingConfigProto ¶ added in v0.5.1
type ShardingConfigProto struct { // Configuration for each HLO instruction. Nodes []*NodeShardingConfigProto `protobuf:"bytes,1,rep,name=nodes,proto3" json:"nodes,omitempty"` // contains filtered or unexported fields }
Message that captures sharding configuration of an HLO module.
func (*ShardingConfigProto) Descriptor
added in
func (*ShardingConfigProto) Descriptor() ([]byte, []int)
Deprecated: Use ShardingConfigProto.ProtoReflect.Descriptor instead.
func (*ShardingConfigProto) GetNodes ¶ added in v0.5.1
func (x *ShardingConfigProto) GetNodes() []*NodeShardingConfigProto
func (*ShardingConfigProto) ProtoMessage ¶ added in v0.5.1
func (*ShardingConfigProto) ProtoMessage()
func (*ShardingConfigProto) ProtoReflect ¶ added in v0.5.1
func (x *ShardingConfigProto) ProtoReflect() protoreflect.Message
func (*ShardingConfigProto) Reset ¶ added in v0.5.1
func (x *ShardingConfigProto) Reset()
func (*ShardingConfigProto) String ¶ added in v0.5.1
func (x *ShardingConfigProto) String() string