xla

package
v0.3.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Aug 23, 2024 License: Apache-2.0 Imports: 7 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

View Source
var (
	DebugOptions_ShapeChecks_name = map[int32]string{
		0: "IGNORE",
		1: "RUNTIME",
		2: "COMPILE_TIME",
	}
	DebugOptions_ShapeChecks_value = map[string]int32{
		"IGNORE":       0,
		"RUNTIME":      1,
		"COMPILE_TIME": 2,
	}
)

Enum value maps for DebugOptions_ShapeChecks.

View Source
var (
	DebugOptions_StepMarkerLocation_name = map[int32]string{
		0: "STEP_MARK_AT_ENTRY",
		1: "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP",
		3: "STEP_MARK_AT_SECOND_LEVEL_WHILE_LOOP",
		2: "STEP_MARK_NONE",
	}
	DebugOptions_StepMarkerLocation_value = map[string]int32{
		"STEP_MARK_AT_ENTRY":                   0,
		"STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP":    1,
		"STEP_MARK_AT_SECOND_LEVEL_WHILE_LOOP": 3,
		"STEP_MARK_NONE":                       2,
	}
)

Enum value maps for DebugOptions_StepMarkerLocation.

View Source
var (
	DebugOptions_CollectiveOpType_name = map[int32]string{
		0: "NOOP",
		1: "ALLREDUCE",
		2: "ALLGATHER",
		3: "REDUCESCATTER",
		4: "COLLECTIVEBROADCAST",
		5: "ALLTOALL",
		6: "COLLECTIVEPERMUTE",
	}
	DebugOptions_CollectiveOpType_value = map[string]int32{
		"NOOP":                0,
		"ALLREDUCE":           1,
		"ALLGATHER":           2,
		"REDUCESCATTER":       3,
		"COLLECTIVEBROADCAST": 4,
		"ALLTOALL":            5,
		"COLLECTIVEPERMUTE":   6,
	}
)

Enum value maps for DebugOptions_CollectiveOpType.

View Source
var (
	DebugOptions_CommandBufferCmdType_name = map[int32]string{
		0: "INVALID",
		1: "FUSION",
		2: "CUBLAS",
		3: "CUDNN",
		4: "COLLECTIVES",
		5: "CONDITIONALS",
		6: "CUSTOM_CALL",
		7: "CUBLASLT",
	}
	DebugOptions_CommandBufferCmdType_value = map[string]int32{
		"INVALID":      0,
		"FUSION":       1,
		"CUBLAS":       2,
		"CUDNN":        3,
		"COLLECTIVES":  4,
		"CONDITIONALS": 5,
		"CUSTOM_CALL":  6,
		"CUBLASLT":     7,
	}
)

Enum value maps for DebugOptions_CommandBufferCmdType.

View Source
var (
	DebugOptions_PartitioningAlgorithm_name = map[int32]string{
		0: "PARTITIONING_ALGORITHM_NOOP",
		1: "PARTITIONING_ALGORITHM_EXP0",
		2: "PARTITIONING_ALGORITHM_EXP1",
		3: "PARTITIONING_ALGORITHM_EXP2",
	}
	DebugOptions_PartitioningAlgorithm_value = map[string]int32{
		"PARTITIONING_ALGORITHM_NOOP": 0,
		"PARTITIONING_ALGORITHM_EXP0": 1,
		"PARTITIONING_ALGORITHM_EXP1": 2,
		"PARTITIONING_ALGORITHM_EXP2": 3,
	}
)

Enum value maps for DebugOptions_PartitioningAlgorithm.

View Source
var (
	DebugOptions_WhileLoopUnrolling_name = map[int32]string{
		0: "WHILE_LOOP_UNROLLING_NO_UNROLL",
		1: "WHILE_LOOP_UNROLLING_DOUBLE_BUFFER",
		2: "WHILE_LOOP_UNROLLING_FULL_UNROLL",
	}
	DebugOptions_WhileLoopUnrolling_value = map[string]int32{
		"WHILE_LOOP_UNROLLING_NO_UNROLL":     0,
		"WHILE_LOOP_UNROLLING_DOUBLE_BUFFER": 1,
		"WHILE_LOOP_UNROLLING_FULL_UNROLL":   2,
	}
)

Enum value maps for DebugOptions_WhileLoopUnrolling.

View Source
var (
	HloModuleConfigProto_FusionConfigCollection_name = map[int32]string{
		0: "OFF",
		1: "PER_EDGE",
		2: "PER_NODE",
	}
	HloModuleConfigProto_FusionConfigCollection_value = map[string]int32{
		"OFF":      0,
		"PER_EDGE": 1,
		"PER_NODE": 2,
	}
)

Enum value maps for HloModuleConfigProto_FusionConfigCollection.

View Source
var File_xla_xla_proto protoreflect.FileDescriptor

Functions

This section is empty.

Types

type CompilationEnvironmentsProto

type CompilationEnvironmentsProto struct {
	Environments []*anypb.Any `protobuf:"bytes,1,rep,name=environments,proto3" json:"environments,omitempty"`
	// contains filtered or unexported fields
}

Proto version of `xla::CompilationEnvironments`.

func (*CompilationEnvironmentsProto) Descriptor deprecated

func (*CompilationEnvironmentsProto) Descriptor() ([]byte, []int)

Deprecated: Use CompilationEnvironmentsProto.ProtoReflect.Descriptor instead.

func (*CompilationEnvironmentsProto) GetEnvironments

func (x *CompilationEnvironmentsProto) GetEnvironments() []*anypb.Any

func (*CompilationEnvironmentsProto) ProtoMessage

func (*CompilationEnvironmentsProto) ProtoMessage()

func (*CompilationEnvironmentsProto) ProtoReflect

func (*CompilationEnvironmentsProto) Reset

func (x *CompilationEnvironmentsProto) Reset()

func (*CompilationEnvironmentsProto) String

type DebugOptions

type DebugOptions struct {

	// go/keep-sorted start newline_separated=yes
	//
	// When true, XLA:CPU uses HLO module scheduler that is optimized for
	// extracting concurrency at the cost of extra memory: we extend the live
	// ranges of temporaries to allow XLA runtime to schedule independent
	// operations in parallel on separate threads.
	XlaCpuEnableConcurrencyOptimizedScheduler bool `` /* 195-byte string literal not displayed */
	// When true, "unsafe" mathematical optimizations are enabled. These
	// transformations include but are not limited to:
	//
	//   - Reducing the precision of operations (e.g. using an approximate sin
	//     function, or transforming x/y into x * (1/y)).
	//   - Assuming that operations never produce or consume NaN or +/- Inf (this
	//     behavior can be adjusted using xla_cpu_fast_math_allow_{nans|infs}).
	//   - Assuming that +0 and -0 are indistinguishable.
	XlaCpuEnableFastMath bool `` /* 129-byte string literal not displayed */
	// When false we lower the Minimum and Maximum hlos in the CPU backend such
	// that Min(NotNaN, NaN) = Min(NaN, NotNaN) = NaN.  In other words, if flag
	// this is false we always propagate NaNs through Min and Max.
	//
	// Note, this does not correspond to the exact same behavior as the gpu flag
	// below!
	XlaCpuEnableFastMinMax bool `` /* 138-byte string literal not displayed */
	// When xla_cpu_enable_fast_math is true then this controls whether we forbid
	// to use the reciprocal of an argument instead of division. Ignored when
	// xla_cpu_enable_fast_math is false.
	XlaCpuFastMathHonorDivision bool `` /* 153-byte string literal not displayed */
	// When xla_cpu_enable_fast_math is true then this controls whether we forbid
	// to approximate calculations for functions. Ignored when
	// xla_cpu_enable_fast_math is false.
	XlaCpuFastMathHonorFunctions bool `` /* 156-byte string literal not displayed */
	// When xla_cpu_enable_fast_math is true then this controls whether we allow
	// operations to produce infinites. Ignored when xla_cpu_enable_fast_math is
	// false.
	XlaCpuFastMathHonorInfs bool `` /* 141-byte string literal not displayed */
	// When xla_cpu_enable_fast_math is true then this controls whether we allow
	// operations to produce NaNs.  Ignored when xla_cpu_enable_fast_math is
	// false.
	XlaCpuFastMathHonorNans bool `` /* 141-byte string literal not displayed */
	// When true, XLA:CPU uses the thunk runtime to execute compiled program.
	XlaCpuUseThunkRuntime bool `` /* 133-byte string literal not displayed */
	// The number of parts to split the LLVM module into before codegen. This
	// allows XLA to compile all parts in parallel, and resolve kernel symbols
	// from different dynamic libraries.
	XlaCpuParallelCodegenSplitCount int32 `` /* 165-byte string literal not displayed */
	// A `prefer-vector-width` value that is passed to the LLVM backend. Default
	// value is `256` (AVX2 on x86 platforms).
	XlaCpuPreferVectorWidth int32 `` /* 139-byte string literal not displayed */
	// Internal debug/testing flag to switch Triton GEMM fusions on or off.
	XlaGpuUnsupportedEnableTritonGemm bool `` /* 171-byte string literal not displayed */
	// Show addresses of HLO ops in graph dump.
	XlaHloGraphAddresses bool `` /* 126-byte string literal not displayed */
	// Instrument the computation to collect per-HLO cycle counts.
	XlaHloProfile bool `protobuf:"varint,9,opt,name=xla_hlo_profile,json=xlaHloProfile,proto3" json:"xla_hlo_profile,omitempty"`
	// List of HLO passes to disable/enable. These names must exactly match the
	// pass names as specified by the HloPassInterface::name() method.
	//
	// At least one of xla_disable_hlo_passes and xla_enable_hlo_passes_only must
	// be empty.
	XlaDisableHloPasses    []string `protobuf:"bytes,30,rep,name=xla_disable_hlo_passes,json=xlaDisableHloPasses,proto3" json:"xla_disable_hlo_passes,omitempty"`
	XlaEnableHloPassesOnly []string `` /* 135-byte string literal not displayed */
	// Disables all HLO passes.  Notes that some passes are necessary for
	// correctness and the invariants that must be satisfied by "fully optimized"
	// HLO are different for different devices and may change over time.  The only
	// "guarantee", such as it is, is that if you compile XLA and dump the
	// optimized HLO for some graph, you should be able to run it again on the
	// same device with the same build of XLA.
	XlaDisableAllHloPasses bool `` /* 136-byte string literal not displayed */
	// Numerical optimization level for the XLA compiler backend; the specific
	// interpretation of this value is left to the backends.
	XlaBackendOptimizationLevel int32 `` /* 148-byte string literal not displayed */
	// Embed the compiler IR as a string in the executable.
	XlaEmbedIrInExecutable bool `` /* 135-byte string literal not displayed */
	// Eliminate implicit broadcasts when lowering user computations to HLO
	// instructions; use explicit broadcast instead.
	XlaEliminateHloImplicitBroadcast bool `` /* 165-byte string literal not displayed */
	// When generating calls to Eigen in the CPU backend, use multi-threaded Eigen
	// mode.
	XlaCpuMultiThreadEigen bool `` /* 135-byte string literal not displayed */
	// Path to directory with cuda/ptx tools and libraries.
	XlaGpuCudaDataDir string `protobuf:"bytes,61,opt,name=xla_gpu_cuda_data_dir,json=xlaGpuCudaDataDir,proto3" json:"xla_gpu_cuda_data_dir,omitempty"`
	// Enable flush-to-zero semantics in the GPU backend.
	XlaGpuFtz bool `protobuf:"varint,62,opt,name=xla_gpu_ftz,json=xlaGpuFtz,proto3" json:"xla_gpu_ftz,omitempty"`
	// If true, in LLVM-based backends, emit !alias.scope metadata in
	// generated IR.
	XlaLlvmEnableAliasScopeMetadata bool `` /* 164-byte string literal not displayed */
	// If true, in LLVM-based backends, emit !noalias metadata in the
	// generated IR.
	XlaLlvmEnableNoaliasMetadata bool `` /* 153-byte string literal not displayed */
	// If true, in LLVM-based backends, emit !invariant.load metadata in
	// the generated IR.
	XlaLlvmEnableInvariantLoadMetadata bool `` /* 173-byte string literal not displayed */
	// If true, a set of expensive LLVM optimization passes will not be run.
	XlaLlvmDisableExpensivePasses bool `` /* 156-byte string literal not displayed */
	// This is used by ClientLibraryTestBase::ComputeAndCompare*. If true, the
	// computation will run n! times with all permunations of layouts for the
	// output shape in rank n. For example, with a 3D shape, all permutations of
	// the set {0, 1, 2} are tried.
	XlaTestAllOutputLayouts bool `` /* 138-byte string literal not displayed */
	// This is used by ClientLibraryTestBase::ComputeAndCompare*. If true, the
	// computation will run for all permunations of layouts of all input
	// arguments. For example, with 2 input arguments in 2D and 4D shapes, the
	// computation will run 2! * 4! times.
	XlaTestAllInputLayouts bool `` /* 135-byte string literal not displayed */
	// Assign colors based on sharding information when generating the Graphviz
	// HLO graph.
	XlaHloGraphShardingColor bool `` /* 141-byte string literal not displayed */
	// Generate calls to MKL-DNN in the CPU backend.
	XlaCpuUseMklDnn bool `protobuf:"varint,97,opt,name=xla_cpu_use_mkl_dnn,json=xlaCpuUseMklDnn,proto3" json:"xla_cpu_use_mkl_dnn,omitempty"`
	// When true we lower the Minimum and Maximum hlos in the GPU backend such
	// that Min(NotNaN, NaN) = Min(NaN, NotNaN) = NotNaN.  In other words, if flag
	// this is true we don't propagate NaNs through Min and Max.
	//
	// Note, this does not correspond to the exact same behavior as the cpu flag
	// above!
	XlaGpuEnableFastMinMax bool `` /* 138-byte string literal not displayed */
	// Allows xla to increase the output precision of floating point operations
	// and all floating-point conversions to be simplified, including those
	// that affect the numerics. The `FloatNormalization` pass inserts many
	// `f32 -> bf16 -> f32` conversion pairs. These are not removed by the
	// `AlgebraicSimplifier`, as that will only simplify conversions that are
	// no-ops, e.g. `bf16 -> f32 -> bf16`. Removing these improves accuracy.
	XlaAllowExcessPrecision bool `` /* 137-byte string literal not displayed */
	// Crashes the program when any kind of verification fails, instead of just
	// logging the failures. One example is cross checking of convolution results
	// among different algorithms.
	XlaGpuCrashOnVerificationFailures bool `` /* 171-byte string literal not displayed */
	// 0:   Disable gemm and convolution autotuning.
	// 1:   Enable autotuning, but disable correctness checking.
	// 2:   Also set output buffers to random numbers during autotuning.
	// 3:   Also reset output buffers to random numbers after autotuning each
	//
	//	algorithm.
	//
	// 4+:  Also check for correct outputs and for out-of-bounds reads/writes.
	//
	// Default: 4.
	XlaGpuAutotuneLevel int32 `protobuf:"varint,123,opt,name=xla_gpu_autotune_level,json=xlaGpuAutotuneLevel,proto3" json:"xla_gpu_autotune_level,omitempty"`
	// Force the host platform to pretend that there are these many host
	// "devices".  All these devices are backed by the same threadpool.  Defaults
	// to 1.
	//
	// Setting this to anything other than 1 can increase overhead from context
	// switching but we let the user override this behavior to help run tests on
	// the host that run models in parallel across multiple devices.
	XlaForceHostPlatformDeviceCount int32 `` /* 165-byte string literal not displayed */
	// If set to true XLA:GPU invokes `ptxas` with -O0 (default is -O3).
	XlaGpuDisableGpuasmOptimizations bool                     `` /* 166-byte string literal not displayed */
	XlaGpuShapeChecks                DebugOptions_ShapeChecks `` /* 153-byte string literal not displayed */
	// Enable fast math with eigen in the HLO evaluator.
	XlaHloEvaluatorUseFastPath bool `` /* 150-byte string literal not displayed */
	// Temporary option to allow support for both the R1 and the scalar index
	// versions of DynamicSlice and DynamicUpdateSlice. Only used for testing.
	XlaAllowScalarIndexDynamicOps bool `` /* 159-byte string literal not displayed */
	// Option to emit a target-specific marker to indicate the start of a training
	// step. The location of the marker (if any) is determined by the option
	// value.
	XlaStepMarkerLocation DebugOptions_StepMarkerLocation `` /* 172-byte string literal not displayed */
	// Directory to dump into.
	XlaDumpTo string `protobuf:"bytes,109,opt,name=xla_dump_to,json=xlaDumpTo,proto3" json:"xla_dump_to,omitempty"`
	// If specified, will only dump modules which match this regexp.
	XlaDumpHloModuleRe string `protobuf:"bytes,110,opt,name=xla_dump_hlo_module_re,json=xlaDumpHloModuleRe,proto3" json:"xla_dump_hlo_module_re,omitempty"`
	// If this flag is specified, will also dump HLO before and after passes that
	// match this regular expression.  Set to .* to dump before/after all passes.
	XlaDumpHloPassRe string `protobuf:"bytes,111,opt,name=xla_dump_hlo_pass_re,json=xlaDumpHloPassRe,proto3" json:"xla_dump_hlo_pass_re,omitempty"`
	// Specifies the format that HLO is dumped in.  Multiple of these may be
	// specified.
	XlaDumpHloAsText  bool `protobuf:"varint,112,opt,name=xla_dump_hlo_as_text,json=xlaDumpHloAsText,proto3" json:"xla_dump_hlo_as_text,omitempty"`
	XlaDumpHloAsProto bool `protobuf:"varint,113,opt,name=xla_dump_hlo_as_proto,json=xlaDumpHloAsProto,proto3" json:"xla_dump_hlo_as_proto,omitempty"`
	XlaDumpHloAsDot   bool `protobuf:"varint,114,opt,name=xla_dump_hlo_as_dot,json=xlaDumpHloAsDot,proto3" json:"xla_dump_hlo_as_dot,omitempty"`
	XlaDumpHloAsUrl   bool `protobuf:"varint,115,opt,name=xla_dump_hlo_as_url,json=xlaDumpHloAsUrl,proto3" json:"xla_dump_hlo_as_url,omitempty"`
	// Dump HLO graphs as an HTML (DOT -> SVG inlined in HTML)
	XlaDumpHloAsHtml bool `protobuf:"varint,116,opt,name=xla_dump_hlo_as_html,json=xlaDumpHloAsHtml,proto3" json:"xla_dump_hlo_as_html,omitempty"`
	// Dump the visualization of the fusion progress.
	XlaDumpFusionVisualization bool `` /* 146-byte string literal not displayed */
	// If true, every time an HLO module is run, we will dump an HloSnapshot
	// (essentially, a serialized module plus its inputs) to the --xla_dump_to
	// directory.
	XlaDumpHloSnapshots bool `protobuf:"varint,118,opt,name=xla_dump_hlo_snapshots,json=xlaDumpHloSnapshots,proto3" json:"xla_dump_hlo_snapshots,omitempty"`
	// Include a timestamp in the dumped filenames.
	XlaDumpIncludeTimestamp bool `` /* 137-byte string literal not displayed */
	// Max number of hlo module dumps in a directory. Set to < 0 for unbounded.
	XlaDumpMaxHloModules int32 `` /* 130-byte string literal not displayed */
	// Dump HloModuleMetadata as a text proto for each HLO module.
	XlaDumpModuleMetadata bool `` /* 131-byte string literal not displayed */
	// GZip-compress protos dumped via --xla_dump_hlo_as_proto.
	XlaDumpCompressProtos bool `` /* 131-byte string literal not displayed */
	// Dump HLO in long text format. Ignored unless xla_dump_hlo_as_text is true.
	XlaDumpHloAsLongText bool `` /* 132-byte string literal not displayed */
	// Overrides for XLA GPU's convolution layout heuristic.
	XlaGpuForceConvNchw bool `` /* 127-byte string literal not displayed */
	XlaGpuForceConvNhwc bool `` /* 127-byte string literal not displayed */
	// Paths to files with ptx code.
	XlaGpuPtxFile []string `protobuf:"bytes,127,rep,name=xla_gpu_ptx_file,json=xlaGpuPtxFile,proto3" json:"xla_gpu_ptx_file,omitempty"`
	// Whether to dump llvm ir when compiling to ptx.
	XlaGpuDumpLlvmir bool `protobuf:"varint,155,opt,name=xla_gpu_dump_llvmir,json=xlaGpuDumpLlvmir,proto3" json:"xla_gpu_dump_llvmir,omitempty"`
	// Whether to dump mlir using pretty print form.
	XlaDumpEnableMlirPrettyForm bool `` /* 153-byte string literal not displayed */
	// Denylist for cuDNN convolutions.
	XlaGpuAlgorithmDenylistPath string `` /* 150-byte string literal not displayed */
	// Debug options that trigger execution errors when NaN or Inf are detected.
	XlaTpuDetectNan bool `protobuf:"varint,135,opt,name=xla_tpu_detect_nan,json=xlaTpuDetectNan,proto3" json:"xla_tpu_detect_nan,omitempty"`
	XlaTpuDetectInf bool `protobuf:"varint,136,opt,name=xla_tpu_detect_inf,json=xlaTpuDetectInf,proto3" json:"xla_tpu_detect_inf,omitempty"`
	// True if TraceMe annotations are enabled for XLA:CPU.
	XlaCpuEnableXprofTraceme bool `` /* 142-byte string literal not displayed */
	// It is usually preferable to not fallback to the driver; it can consume more
	// memory, or have bugs.
	XlaGpuUnsafeFallbackToDriverOnPtxasNotFound bool `` /* 209-byte string literal not displayed */
	// Extra parameters to pass the GPU assembler.
	XlaGpuAsmExtraFlags string `` /* 126-byte string literal not displayed */
	// Per-heap size constraint. New heaps will be created if per-heap max size is
	// reached.
	XlaMultiheapSizeConstraintPerHeap int32 `` /* 171-byte string literal not displayed */
	// Enable detailed logging into vlog. If this is disabled, no
	// compilation summary will be printed in the end of computation.
	XlaDetailedLogging bool `protobuf:"varint,252,opt,name=xla_detailed_logging,json=xlaDetailedLogging,proto3" json:"xla_detailed_logging,omitempty"`
	// Enable HLO dumping. If this is disabled, no HLO modules will be dumped.
	XlaEnableDumping bool `protobuf:"varint,253,opt,name=xla_enable_dumping,json=xlaEnableDumping,proto3" json:"xla_enable_dumping,omitempty"`
	// Overrides normal multi-threaded compilation setting to use this many
	// threads. Setting to 0 (the default value) means no enforcement.
	XlaGpuForceCompilationParallelism            int32 `` /* 169-byte string literal not displayed */
	XlaGpuEnableLlvmModuleCompilationParallelism bool  `` /* 206-byte string literal not displayed */
	// Guarantees run-to-run determinism.
	// This flag implies --xla_gpu_exclude_nondeterministic_ops and in addition
	// disables autotuning.
	XlaGpuDeterministicOps bool `` /* 134-byte string literal not displayed */
	// Paths to files with LLVM code.
	XlaGpuLlvmIrFile              []string                        `protobuf:"bytes,150,rep,name=xla_gpu_llvm_ir_file,json=xlaGpuLlvmIrFile,proto3" json:"xla_gpu_llvm_ir_file,omitempty"`
	XlaGpuDisableAsyncCollectives []DebugOptions_CollectiveOpType `` /* 203-byte string literal not displayed */
	// Size threshold (in bytes) for the GPU collective combiners.
	XlaGpuAllReduceCombineThresholdBytes     int64 `` /* 182-byte string literal not displayed */
	XlaGpuAllGatherCombineThresholdBytes     int64 `` /* 182-byte string literal not displayed */
	XlaGpuReduceScatterCombineThresholdBytes int64 `` /* 194-byte string literal not displayed */
	// Combine all-gather/scatter-reduce ops with the same dimension or
	// irrespective of their dimension.
	XlaGpuEnableAllGatherCombineByDim     bool `` /* 175-byte string literal not displayed */
	XlaGpuEnableReduceScatterCombineByDim bool `` /* 187-byte string literal not displayed */
	// Enable allreduce reassociation on allreduces that are converted to a wider
	// type. The resulting allreduce will be promoted to a wider-typed allreduce.
	XlaGpuEnableReassociationForConvertedAr bool `` /* 191-byte string literal not displayed */
	// Number of devices per host for first stage of BlueConnect decomposition
	// pass. The pass will attempt to decompose all-reduces ops into a
	// ReduceScatter-AllReduce-AllGather sequence, with the initial ReduceScatter
	// being performed over all of the devices in the same host. Set to < 1 to
	// disable all-reduce decomposition.
	XlaGpuAllReduceBlueconnectNumDevicesPerHost int32 `` /* 207-byte string literal not displayed */
	// Enable hoisting of reduce-scatter out of while loops.
	XlaGpuEnableWhileLoopReduceScatterCodeMotion bool `` /* 210-byte string literal not displayed */
	// Inflate collective cost by running each collective multiple times.
	XlaGpuCollectiveInflationFactor int32 `` /* 163-byte string literal not displayed */
	// Whether to force inline before llvm module split to get a more balanced
	// splits for parallel compilation.
	XlaLlvmForceInlineBeforeSplit bool `` /* 159-byte string literal not displayed */
	// Whether to use the cuDNN frontend API for convolutions when possible.
	XlaGpuEnableCudnnFrontend       bool `` /* 145-byte string literal not displayed */
	XlaGpuEnableCudnnFmha           bool `` /* 133-byte string literal not displayed */
	XlaGpuFusedAttentionUseCudnnRng bool `` /* 167-byte string literal not displayed */
	// Rewrite layer norm patterns into cuDNN library calls.
	XlaGpuEnableCudnnLayerNorm bool `` /* 150-byte string literal not displayed */
	// Disable dumping metadata in HLO dumps.
	XlaDumpDisableMetadata bool `` /* 134-byte string literal not displayed */
	// If this flag is specified, will only dump HLO before and after passes in
	// the pass pipeline that matches this regular expression. Default empty value
	// enables dumping in all pipelines.
	XlaDumpHloPipelineRe string `` /* 129-byte string literal not displayed */
	// If true, abort immediately when conv algorithm picker fails, rather than
	// logging a warning and proceeding with fallback.
	XlaGpuStrictConvAlgorithmPicker bool `` /* 165-byte string literal not displayed */
	// If true, XLA will try to pattern match subgraphs of HLO operations into
	// custom fusions registered in the current process (pre-compiled hand written
	// kernels, e.g. various GEMM fusions writtent in CUTLASS).
	XlaGpuEnableCustomFusions bool `` /* 145-byte string literal not displayed */
	// A regular expression enabling only a subset of custom fusions. Enabled only
	// if `xla_gpu_enable_custom_fusion` set to true.
	XlaGpuEnableCustomFusionsRe string `` /* 152-byte string literal not displayed */
	// Enables address computation fusion to optimize dynamic-slice and
	// dynamic-update-slice operations around library calls.
	XlaGpuEnableDynamicSliceFusion bool `` /* 162-byte string literal not displayed */
	// Timeout in seconds before terminating jobs that are stuck in a NCCL
	// Rendezvous. Negative value disables the timeout and will not terminate.
	XlaGpuNcclTerminationTimeoutSeconds int64 `` /* 177-byte string literal not displayed */
	// Enables shared constants for XLA/GPU. This allows large constants to be
	// shared among multiple GPU executables.
	XlaGpuEnableSharedConstants bool `` /* 151-byte string literal not displayed */
	// Whether to use cuBLASLt for GEMMs on GPUs.
	XlaGpuEnableCublaslt bool `` /* 128-byte string literal not displayed */
	// Determine the types of commands that are recorded into command buffers.
	XlaGpuEnableCommandBuffer []DebugOptions_CommandBufferCmdType `` /* 195-byte string literal not displayed */
	// This number determines how many moved instructions like fusion kernels are
	// required for a region to be captured as a function to be launched as a GPU
	// graph.
	XlaGpuGraphMinGraphSize int32 `` /* 141-byte string literal not displayed */
	// Identify concurrent regions in GPU graphs and execute them concurrently.
	XlaGpuGraphEnableConcurrentRegion bool `` /* 171-byte string literal not displayed */
	// Size threshold (in megabytes) for the GPU redzone scratch allocator.
	XlaGpuRedzoneScratchMaxMegabytes int64 `` /* 168-byte string literal not displayed */
	// Amount of padding the redzone allocator will put on one side of each buffer
	// it allocates.  (So the buffer's total size will be increased by 2x this
	// value.)
	//
	// Higher values make it more likely that we'll catch an out-of-bounds read or
	// write.  Smaller values consume less memory during autotuning.  Note that a
	// fused cudnn conv has up to 6 total buffers (4 inputs, 1 output, and 1
	// scratch), so this can be multiplied by quite a lot.
	XlaGpuRedzonePaddingBytes int64 `` /* 145-byte string literal not displayed */
	// Generate calls to Arm Compute Library in the CPU backend.
	XlaCpuUseAcl bool `protobuf:"varint,174,opt,name=xla_cpu_use_acl,json=xlaCpuUseAcl,proto3" json:"xla_cpu_use_acl,omitempty"`
	// By default, XLA:CPU will run fp16 dot/conv as fp32, as this is generally
	// (much) faster on our hardware.  Set this flag to disable this behavior.
	XlaCpuStrictDotConvMath bool `` /* 141-byte string literal not displayed */
	// An option to enable using cuDNN runtime compiled fusion kernels which is
	// available and recommended for Ampere+ GPUs.
	XlaGpuUseRuntimeFusion       bool `` /* 136-byte string literal not displayed */
	XlaDumpLatencyHidingSchedule bool `` /* 154-byte string literal not displayed */
	// By default, MLIR lowering will use Linalg elementwise fusion. If this flag
	// is enabled, the pipeline will use tiling, fusion, peeling, vectorization
	// instead.
	XlaCpuEnableMlirTilingAndFusion bool `` /* 167-byte string literal not displayed */
	// XLA:CPU-Next tiling parameters for matmul.
	XlaCpuEnableCustomMatmulTiling  bool  `` /* 162-byte string literal not displayed */
	XlaCpuMatmulTilingMDim          int64 `` /* 138-byte string literal not displayed */
	XlaCpuMatmulTilingNDim          int64 `` /* 138-byte string literal not displayed */
	XlaCpuMatmulTilingKDim          int64 `` /* 138-byte string literal not displayed */
	XlaCpuEnableMlirFusionOutlining bool  `` /* 165-byte string literal not displayed */
	// If set, use the experimental deallocation pass from mlir-hlo.
	XlaCpuEnableExperimentalDeallocation   bool   `` /* 178-byte string literal not displayed */
	XlaGpuEnableLatencyHidingScheduler     bool   `` /* 174-byte string literal not displayed */
	XlaGpuEnableHighestPriorityAsyncStream bool   `` /* 188-byte string literal not displayed */
	XlaGpuEnableAnalyticalLatencyEstimator bool   `` /* 186-byte string literal not displayed */
	XlaGpuLhsEnableGpuAsyncTracker         bool   `` /* 164-byte string literal not displayed */
	XlaGpuPgleProfileFileOrDirectoryPath   string `` /* 183-byte string literal not displayed */
	XlaGpuMemoryLimitSlopFactor            int32  `` /* 153-byte string literal not displayed */
	XlaGpuEnablePipelinedCollectives       bool   `` /* 166-byte string literal not displayed */
	XlaGpuEnablePipelinedAllReduce         bool   `` /* 162-byte string literal not displayed */
	XlaGpuEnablePipelinedAllGather         bool   `` /* 162-byte string literal not displayed */
	XlaGpuEnablePipelinedReduceScatter     bool   `` /* 174-byte string literal not displayed */
	XlaGpuEnablePipelinedP2P               bool   `` /* 142-byte string literal not displayed */
	XlaGpuRunPostLayoutCollectivePipeliner bool   `` /* 188-byte string literal not displayed */
	// The minimum data size in bytes to trigger collective-permute-decomposer
	// transformation.
	XlaGpuCollectivePermuteDecomposerThreshold int64 `` /* 198-byte string literal not displayed */
	// The partitioning algorithm to be used in the PartitionAssignment pass.
	XlaPartitioningAlgorithm                      DebugOptions_PartitioningAlgorithm `` /* 182-byte string literal not displayed */
	XlaGpuEnableTritonGemm                        bool                               `` /* 136-byte string literal not displayed */
	XlaGpuEnableCudnnInt8X32ConvolutionReordering bool                               `` /* 209-byte string literal not displayed */
	// Creates triton fusion for all supported gemms.
	// To make sure only triton gemm is chosen by the autotuner run with
	// `xla_gpu_cublas_fallback` set to false.
	XlaGpuTritonGemmAny                     bool `` /* 127-byte string literal not displayed */
	XlaGpuExhaustiveTilingSearch            bool `` /* 154-byte string literal not displayed */
	XlaGpuEnableTritonSoftmaxFusion         bool `` /* 165-byte string literal not displayed */
	XlaGpuEnablePriorityFusion              bool `` /* 148-byte string literal not displayed */
	XlaGpuEnableTritonSoftmaxPriorityFusion bool `` /* 191-byte string literal not displayed */
	// File to write autotune results to. It will be a binary file unless the name
	// ends with .txt or .textproto. Warning: The results are written at every
	// compilation, possibly multiple times per process. This only works on CUDA.
	XlaGpuDumpAutotuneResultsTo string `` /* 152-byte string literal not displayed */
	// File to load autotune results from. It will be considered a binary file
	// unless the name ends with .txt or .textproto. At most one loading will
	// happen during the lifetime of one process, even if the first one is
	// unsuccessful or different file paths are passed here. This only works on
	// CUDA.
	XlaGpuLoadAutotuneResultsFrom string `` /* 158-byte string literal not displayed */
	// Description of the target platform in GpuTargetConfigProto format; if
	// provided, deviceless compilation is assumed, and the current device is
	// ignored.
	XlaGpuTargetConfigFilename string `` /* 147-byte string literal not displayed */
	// Memory budget in GB per device for AutoSharding.
	XlaGpuAutoSpmdPartitioningMemoryBudgetGb int32 `` /* 196-byte string literal not displayed */
	// See the definition of the
	// xla_gpu_auto_spmd_partitioning_memory_budget_ratio flag for the meaning of
	// this field.
	XlaGpuAutoSpmdPartitioningMemoryBudgetRatio      float32 `` /* 206-byte string literal not displayed */
	XlaGpuTritonGemmDisableReducedPrecisionReduction bool    `` /* 220-byte string literal not displayed */
	XlaGpuTritonFusionLevel                          int32   `` /* 139-byte string literal not displayed */
	XlaGpuDumpAutotunedGemmFusions                   bool    `` /* 162-byte string literal not displayed */
	XlaGpuOverrideGemmAutotuner                      string  `` /* 150-byte string literal not displayed */
	XlaGpuCopyInsertionUseRegionAnalysis             bool    `` /* 182-byte string literal not displayed */
	// If true, each fusion instruction will have a cost model runtime estimate in
	// backend config after compilation.
	XlaGpuCollectCostModelStats  bool `` /* 153-byte string literal not displayed */
	XlaGpuEnableSplitKAutotuning bool `` /* 156-byte string literal not displayed */
	// Whether reduction epilogue fusion is enabled in fusion passes.
	XlaGpuEnableReductionEpilogueFusion bool `` /* 177-byte string literal not displayed */
	// Allow early return when acquiring NCCL cliques.
	XlaGpuEnableNcclCliqueOptimization bool `` /* 174-byte string literal not displayed */
	// Replace custom calls with noop operations.
	XlaGpuMockCustomCalls bool `` /* 133-byte string literal not displayed */
	// Allow Triton GEMM autotuning to fall back to cuBLAS when that is
	// faster.
	XlaGpuCublasFallback bool `` /* 128-byte string literal not displayed */
	// Enable double buffering for loops.
	XlaGpuEnableWhileLoopDoubleBuffering bool `` /* 182-byte string literal not displayed */
	// Determine the while loop unrolling scheme.
	XlaGpuEnableWhileLoopUnrolling DebugOptions_WhileLoopUnrolling `` /* 203-byte string literal not displayed */
	// Change the layout of the second triton dot operand to be column major.
	// Only works for (bf16 x bf16) -> bf16.
	XlaGpuEnsureMinorDotContractionDims bool `` /* 179-byte string literal not displayed */
	// Filter out kernels that spill registers during autotuning.
	XlaGpuFilterKernelsSpillingRegistersOnAutotuning bool `` /* 220-byte string literal not displayed */
	// Maximum number of buffers to print when debugging buffer assignment.
	XlaDebugBufferAssignmentShowMax int64 `` /* 165-byte string literal not displayed */
	XlaGpuLlvmVerificationLevel     int32 `` /* 151-byte string literal not displayed */
	// Enable radix sort using CUB.
	XlaGpuEnableCubRadixSort bool `` /* 144-byte string literal not displayed */
	// Threshold to enable windowed einsum (collective matmul) in MB.
	XlaGpuThresholdForWindowedEinsumMib int64 `` /* 179-byte string literal not displayed */
	// Enables currently disabled features within Triton for Hopper.
	XlaGpuEnableTritonHopper bool `` /* 142-byte string literal not displayed */
	// Enable NCCL user buffers.
	XlaGpuEnableNcclUserBuffers bool `` /* 153-byte string literal not displayed */
	// Enable NCCL communicator splitting.
	XlaGpuEnableNcclCommSplitting bool `` /* 159-byte string literal not displayed */
	// Enable NCCL per stream communicators.
	XlaGpuEnableNcclPerStreamComms bool `` /* 164-byte string literal not displayed */
	// If enabled, uses the libnvptxcompiler library to compile PTX to cuBIN.
	XlaGpuEnableLibnvptxcompiler     bool `` /* 152-byte string literal not displayed */
	XlaGpuEnableDotStrengthReduction bool `` /* 168-byte string literal not displayed */
	// Whether to use multiple compute streams to run windowed einsum.
	XlaGpuMultiStreamedWindowedEinsum bool `` /* 171-byte string literal not displayed */
	// If enabled, uses bf16_6way gemm to compute F32 gemm.
	XlaGpuEnableBf16_6WayGemm bool `` /* 144-byte string literal not displayed */
	// If enabled, uses bf16_3way gemm to compute F32 gemm.
	XlaGpuEnableBf16_3WayGemm bool `` /* 144-byte string literal not displayed */
	// Specify the maximum number of channels(SMs) NCCL
	// will use for collective operations.
	XlaGpuNcclCollectiveMaxNchannels int64 `` /* 168-byte string literal not displayed */
	// Specify the maximum number of channels(SMs) NCCL
	// will use for p2p operations.
	XlaGpuNcclP2PMaxNchannels int64 `` /* 147-byte string literal not displayed */
	// Choose the level of mlir emitters that are enabled.
	// Current levels:
	// 0: Disabled.
	// 1: Loop emitter
	// 2: + Loop-like emitters
	// 3: + Transpose
	// 4: + Reduce
	XlaGpuMlirEmitterLevel int64 `` /* 136-byte string literal not displayed */
	// Threshold to rewrite matmul to cuBLAS or Triton (minimum combined number of
	// elements of both matrices in non-batch dimensions to be considered for a
	// rewrite).
	XlaGpuGemmRewriteSizeThreshold int64 `` /* 162-byte string literal not displayed */
	// If true, will require complete AOT autotuning results; in the case of
	// missing AOT result, the model will not be compiled or executed, a
	// `NotFound` error will be returned.
	XlaGpuRequireCompleteAotAutotuneResults bool `` /* 191-byte string literal not displayed */
	// Let GEMM fusion autotuning probe cuDNN as a backend.
	// Current levels:
	// 0: Disabled.
	// 1: Fusions of GEMM, elementwise, transpose/reshape operations.
	// 2: + Broadcasts.
	// 3: + Nontrivial noncontracting dimension reshapes/transposes.
	XlaGpuCudnnGemmFusionLevel int32 `` /* 150-byte string literal not displayed */
	// This instructs the runtime whether to use
	// memcpy for p2p communication when source and
	// target are located within a node(nvlink).
	XlaGpuUseMemcpyLocalP2P bool `` /* 141-byte string literal not displayed */
	// If non-zero, limits the number of solutions to be used by GEMM autotuner.
	// This might be useful if underlying math library returns too many GEMM
	// solutions.
	XlaGpuAutotuneMaxSolutions int64 `` /* 148-byte string literal not displayed */
	// If true, large constants will be printed out when dumping HLOs.
	XlaDumpLargeConstants bool `` /* 131-byte string literal not displayed */
	// If true, will verify that the numerical results of Triton fusions match
	// the results of regular emitters.
	XlaGpuVerifyTritonFusionNumerics bool `` /* 168-byte string literal not displayed */
	// File to write autotune logs to. It will stored in txt format.
	XlaGpuDumpAutotuneLogsTo string `` /* 143-byte string literal not displayed */
	// Base length to rewrite the reduce window to, no rewrite if set to 0.
	XlaReduceWindowRewriteBaseLength int64 `` /* 168-byte string literal not displayed */
	// If true, will enable host memory offloading on a device.
	XlaGpuEnableHostMemoryOffloading bool `` /* 168-byte string literal not displayed */
	// Excludes non-deterministic ops from compiled executables.
	// Unlike --xla_gpu_deterministic_ops does not disable autotuning - the
	// compilation itself can be non-deterministic.
	// At present, the HLO op SelectAndScatter does not have a
	// deterministic XLA:GPU implementation.
	// Compilation errors out if SelectAndScatter is encountered.
	// Scatter ops can non-deterministic by default; these get converted to
	// a deterministic implementation.
	XlaGpuExcludeNondeterministicOps bool `` /* 166-byte string literal not displayed */
	// If true, Nccl errors will terminate the process.
	XlaGpuNcclTerminateOnError          bool   `` /* 150-byte string literal not displayed */
	XlaGpuShardAutotuning               bool   `` /* 131-byte string literal not displayed */
	XlaGpuEnableApproxCostlyCollectives bool   `` /* 177-byte string literal not displayed */
	XlaGpuKernelCacheFile               string `` /* 132-byte string literal not displayed */
	// Recognises rotate-right patterns (slice, slice, concat) within a while
	// loop and labels the while loop as a pipelined while loop. This is an
	// unsafe flag.
	XlaGpuUnsafePipelinedLoopAnnotator bool   `` /* 174-byte string literal not displayed */
	XlaGpuPerFusionAutotuneCacheDir    string `` /* 166-byte string literal not displayed */
	// The command buffer trace cache size, increasing the cache size may
	// sometimes reduces the chances of doing command buffer tracing for
	// updating command buffer instance.
	XlaCmdBufferTraceCacheSize int64 `` /* 150-byte string literal not displayed */
	// Enable this flag will use a separate memory space color for
	// temp buffer, and then will use separate memory allocator to allocate it,
	// as there is no other memory allocation interference,
	// it will allocate temp buffer to some fix address on every iteration,
	// which is good for cuda-graph perf.
	XlaGpuTempBufferUseSeparateColor bool `` /* 170-byte string literal not displayed */
	// Custom call targets with legacy registry API (non FFI API),
	// that support recording to command buffer custom command,
	// i.e., custom call target supports cuda-graph capturing for CUDA devices.
	// This flag is read if CUSTOM_CALL command type is recorded into
	// command buffer.
	LegacyCommandBufferCustomCallTargets []string `` /* 179-byte string literal not displayed */
	// This flag is used for controlling HLO dumping and NVTX marker. If turned
	// on, both HLO dumping and NVTX marker will use syntactic sugar wrappers
	// as op names, while the actual op names will be shown if turned off.
	//
	// Here is an example HLO excerpt with the flag off:
	//
	//	 async_computation {
	//	  param_0 = f32[1,4,8]{1,0,2} parameter(0)
	//	  ROOT all-to-all.3.1 = f32[1,4,8]{1,0,2} all-to-all(param_0),
	//	                        replica_groups={{0,1,2,3,4,5,6,7}}, dimensions={2}
	//	 }
	//	...
	//
	//	all-to-all-start =
	//	  ((f32[1,4,8]{1,0,2}), f32[1,4,8]{1,0,2}) async-start(bitcast.24.0),
	//	  calls=async_computation, backend_config={...}
	//	all-to-all-done = f32[1,4,8]{1,0,2} async-done(all-to-all-start)
	//
	// and with the flag on:
	//
	//	all-to-all-start = ((f32[1,4,8]{1,0,2}), f32[1,4,8]{1,0,2})
	//	                   all-to-all-start(bitcast.24.0),
	//	                   replica_groups={{0,1,2,3,4,5,6,7}}, dimensions={2},
	//	                   backend_config={...}
	//	all-to-all-done = f32[1,4,8]{1,0,2} all-to-all-done(all-to-all-start)
	XlaSyntaxSugarAsyncOps bool `` /* 136-byte string literal not displayed */
	// Relative precision for comparing different GEMM solutions
	XlaGpuAutotuneGemmRtol float32 `` /* 137-byte string literal not displayed */
	// Allow launching command buffers while profiling active.
	// When disabled, execute in op-by-op mode.
	// TODO(b/355487968): Remove this option when validation complete.
	XlaEnableCommandBuffersDuringProfiling bool `` /* 186-byte string literal not displayed */
	// Limit for the number of kernel configurations (plans) to use during
	// autotuning of cuDNN GEMM fusions. The more - the slower the autotuning
	// but potentially higher the performance.
	XlaGpuCudnnGemmMaxPlans int32 `` /* 141-byte string literal not displayed */
	// If enabled, uses the libnvjitlink library for PTX compilation and linking
	XlaGpuEnableLibnvjitlink bool `` /* 140-byte string literal not displayed */
	// If enabled, generates triton gemm kernels for int4 inputs.
	XlaGpuEnableTritonGemmInt4 bool `` /* 150-byte string literal not displayed */
	// If true, XLA will wrap `dot` operations into async computations in an
	// effort to parallelize matrix operations.
	XlaGpuAsyncDot bool `protobuf:"varint,321,opt,name=xla_gpu_async_dot,json=xlaGpuAsyncDot,proto3" json:"xla_gpu_async_dot,omitempty"`
	// Extra options to pass to the compilation backend (e.g. LLVM); specific
	// interpretation of these values is left to the backend.
	XlaBackendExtraOptions map[string]string `` /* 221-byte string literal not displayed */
	// contains filtered or unexported fields
}

Debugging options for XLA. These options may change at any time - there are no guarantees about backward or forward compatibility for these fields.

Debug options naming and organization:

  1. Backend-agnostic options: `xla_$flag_name` - go first, and sorted alphabetically by the flag name.

  2. Backend-specific options: `xla_$backend_$flag_name` - must be in the corresponding backend section, and sorted alphabetically by the flag name.

func (*DebugOptions) Descriptor deprecated

func (*DebugOptions) Descriptor() ([]byte, []int)

Deprecated: Use DebugOptions.ProtoReflect.Descriptor instead.

func (*DebugOptions) GetLegacyCommandBufferCustomCallTargets

func (x *DebugOptions) GetLegacyCommandBufferCustomCallTargets() []string

func (*DebugOptions) GetXlaAllowExcessPrecision

func (x *DebugOptions) GetXlaAllowExcessPrecision() bool

func (*DebugOptions) GetXlaAllowScalarIndexDynamicOps

func (x *DebugOptions) GetXlaAllowScalarIndexDynamicOps() bool

func (*DebugOptions) GetXlaBackendExtraOptions

func (x *DebugOptions) GetXlaBackendExtraOptions() map[string]string

func (*DebugOptions) GetXlaBackendOptimizationLevel

func (x *DebugOptions) GetXlaBackendOptimizationLevel() int32

func (*DebugOptions) GetXlaCmdBufferTraceCacheSize

func (x *DebugOptions) GetXlaCmdBufferTraceCacheSize() int64

func (*DebugOptions) GetXlaCpuEnableConcurrencyOptimizedScheduler

func (x *DebugOptions) GetXlaCpuEnableConcurrencyOptimizedScheduler() bool

func (*DebugOptions) GetXlaCpuEnableCustomMatmulTiling

func (x *DebugOptions) GetXlaCpuEnableCustomMatmulTiling() bool

func (*DebugOptions) GetXlaCpuEnableExperimentalDeallocation

func (x *DebugOptions) GetXlaCpuEnableExperimentalDeallocation() bool

func (*DebugOptions) GetXlaCpuEnableFastMath

func (x *DebugOptions) GetXlaCpuEnableFastMath() bool

func (*DebugOptions) GetXlaCpuEnableFastMinMax

func (x *DebugOptions) GetXlaCpuEnableFastMinMax() bool

func (*DebugOptions) GetXlaCpuEnableMlirFusionOutlining

func (x *DebugOptions) GetXlaCpuEnableMlirFusionOutlining() bool

func (*DebugOptions) GetXlaCpuEnableMlirTilingAndFusion

func (x *DebugOptions) GetXlaCpuEnableMlirTilingAndFusion() bool

func (*DebugOptions) GetXlaCpuEnableXprofTraceme

func (x *DebugOptions) GetXlaCpuEnableXprofTraceme() bool

func (*DebugOptions) GetXlaCpuFastMathHonorDivision

func (x *DebugOptions) GetXlaCpuFastMathHonorDivision() bool

func (*DebugOptions) GetXlaCpuFastMathHonorFunctions

func (x *DebugOptions) GetXlaCpuFastMathHonorFunctions() bool

func (*DebugOptions) GetXlaCpuFastMathHonorInfs

func (x *DebugOptions) GetXlaCpuFastMathHonorInfs() bool

func (*DebugOptions) GetXlaCpuFastMathHonorNans

func (x *DebugOptions) GetXlaCpuFastMathHonorNans() bool

func (*DebugOptions) GetXlaCpuMatmulTilingKDim

func (x *DebugOptions) GetXlaCpuMatmulTilingKDim() int64

func (*DebugOptions) GetXlaCpuMatmulTilingMDim

func (x *DebugOptions) GetXlaCpuMatmulTilingMDim() int64

func (*DebugOptions) GetXlaCpuMatmulTilingNDim

func (x *DebugOptions) GetXlaCpuMatmulTilingNDim() int64

func (*DebugOptions) GetXlaCpuMultiThreadEigen

func (x *DebugOptions) GetXlaCpuMultiThreadEigen() bool

func (*DebugOptions) GetXlaCpuParallelCodegenSplitCount

func (x *DebugOptions) GetXlaCpuParallelCodegenSplitCount() int32

func (*DebugOptions) GetXlaCpuPreferVectorWidth

func (x *DebugOptions) GetXlaCpuPreferVectorWidth() int32

func (*DebugOptions) GetXlaCpuStrictDotConvMath

func (x *DebugOptions) GetXlaCpuStrictDotConvMath() bool

func (*DebugOptions) GetXlaCpuUseAcl

func (x *DebugOptions) GetXlaCpuUseAcl() bool

func (*DebugOptions) GetXlaCpuUseMklDnn

func (x *DebugOptions) GetXlaCpuUseMklDnn() bool

func (*DebugOptions) GetXlaCpuUseThunkRuntime

func (x *DebugOptions) GetXlaCpuUseThunkRuntime() bool

func (*DebugOptions) GetXlaDebugBufferAssignmentShowMax

func (x *DebugOptions) GetXlaDebugBufferAssignmentShowMax() int64

func (*DebugOptions) GetXlaDetailedLogging

func (x *DebugOptions) GetXlaDetailedLogging() bool

func (*DebugOptions) GetXlaDisableAllHloPasses

func (x *DebugOptions) GetXlaDisableAllHloPasses() bool

func (*DebugOptions) GetXlaDisableHloPasses

func (x *DebugOptions) GetXlaDisableHloPasses() []string

func (*DebugOptions) GetXlaDumpCompressProtos

func (x *DebugOptions) GetXlaDumpCompressProtos() bool

func (*DebugOptions) GetXlaDumpDisableMetadata

func (x *DebugOptions) GetXlaDumpDisableMetadata() bool

func (*DebugOptions) GetXlaDumpEnableMlirPrettyForm

func (x *DebugOptions) GetXlaDumpEnableMlirPrettyForm() bool

func (*DebugOptions) GetXlaDumpFusionVisualization

func (x *DebugOptions) GetXlaDumpFusionVisualization() bool

func (*DebugOptions) GetXlaDumpHloAsDot

func (x *DebugOptions) GetXlaDumpHloAsDot() bool

func (*DebugOptions) GetXlaDumpHloAsHtml

func (x *DebugOptions) GetXlaDumpHloAsHtml() bool

func (*DebugOptions) GetXlaDumpHloAsLongText

func (x *DebugOptions) GetXlaDumpHloAsLongText() bool

func (*DebugOptions) GetXlaDumpHloAsProto

func (x *DebugOptions) GetXlaDumpHloAsProto() bool

func (*DebugOptions) GetXlaDumpHloAsText

func (x *DebugOptions) GetXlaDumpHloAsText() bool

func (*DebugOptions) GetXlaDumpHloAsUrl

func (x *DebugOptions) GetXlaDumpHloAsUrl() bool

func (*DebugOptions) GetXlaDumpHloModuleRe

func (x *DebugOptions) GetXlaDumpHloModuleRe() string

func (*DebugOptions) GetXlaDumpHloPassRe

func (x *DebugOptions) GetXlaDumpHloPassRe() string

func (*DebugOptions) GetXlaDumpHloPipelineRe

func (x *DebugOptions) GetXlaDumpHloPipelineRe() string

func (*DebugOptions) GetXlaDumpHloSnapshots

func (x *DebugOptions) GetXlaDumpHloSnapshots() bool

func (*DebugOptions) GetXlaDumpIncludeTimestamp

func (x *DebugOptions) GetXlaDumpIncludeTimestamp() bool

func (*DebugOptions) GetXlaDumpLargeConstants

func (x *DebugOptions) GetXlaDumpLargeConstants() bool

func (*DebugOptions) GetXlaDumpLatencyHidingSchedule

func (x *DebugOptions) GetXlaDumpLatencyHidingSchedule() bool

func (*DebugOptions) GetXlaDumpMaxHloModules

func (x *DebugOptions) GetXlaDumpMaxHloModules() int32

func (*DebugOptions) GetXlaDumpModuleMetadata

func (x *DebugOptions) GetXlaDumpModuleMetadata() bool

func (*DebugOptions) GetXlaDumpTo

func (x *DebugOptions) GetXlaDumpTo() string

func (*DebugOptions) GetXlaEliminateHloImplicitBroadcast

func (x *DebugOptions) GetXlaEliminateHloImplicitBroadcast() bool

func (*DebugOptions) GetXlaEmbedIrInExecutable

func (x *DebugOptions) GetXlaEmbedIrInExecutable() bool

func (*DebugOptions) GetXlaEnableCommandBuffersDuringProfiling

func (x *DebugOptions) GetXlaEnableCommandBuffersDuringProfiling() bool

func (*DebugOptions) GetXlaEnableDumping

func (x *DebugOptions) GetXlaEnableDumping() bool

func (*DebugOptions) GetXlaEnableHloPassesOnly

func (x *DebugOptions) GetXlaEnableHloPassesOnly() []string

func (*DebugOptions) GetXlaForceHostPlatformDeviceCount

func (x *DebugOptions) GetXlaForceHostPlatformDeviceCount() int32

func (*DebugOptions) GetXlaGpuAlgorithmDenylistPath

func (x *DebugOptions) GetXlaGpuAlgorithmDenylistPath() string

func (*DebugOptions) GetXlaGpuAllGatherCombineThresholdBytes

func (x *DebugOptions) GetXlaGpuAllGatherCombineThresholdBytes() int64

func (*DebugOptions) GetXlaGpuAllReduceBlueconnectNumDevicesPerHost

func (x *DebugOptions) GetXlaGpuAllReduceBlueconnectNumDevicesPerHost() int32

func (*DebugOptions) GetXlaGpuAllReduceCombineThresholdBytes

func (x *DebugOptions) GetXlaGpuAllReduceCombineThresholdBytes() int64

func (*DebugOptions) GetXlaGpuAsmExtraFlags

func (x *DebugOptions) GetXlaGpuAsmExtraFlags() string

func (*DebugOptions) GetXlaGpuAsyncDot

func (x *DebugOptions) GetXlaGpuAsyncDot() bool

func (*DebugOptions) GetXlaGpuAutoSpmdPartitioningMemoryBudgetGb

func (x *DebugOptions) GetXlaGpuAutoSpmdPartitioningMemoryBudgetGb() int32

func (*DebugOptions) GetXlaGpuAutoSpmdPartitioningMemoryBudgetRatio

func (x *DebugOptions) GetXlaGpuAutoSpmdPartitioningMemoryBudgetRatio() float32

func (*DebugOptions) GetXlaGpuAutotuneGemmRtol

func (x *DebugOptions) GetXlaGpuAutotuneGemmRtol() float32

func (*DebugOptions) GetXlaGpuAutotuneLevel

func (x *DebugOptions) GetXlaGpuAutotuneLevel() int32

func (*DebugOptions) GetXlaGpuAutotuneMaxSolutions

func (x *DebugOptions) GetXlaGpuAutotuneMaxSolutions() int64

func (*DebugOptions) GetXlaGpuCollectCostModelStats

func (x *DebugOptions) GetXlaGpuCollectCostModelStats() bool

func (*DebugOptions) GetXlaGpuCollectiveInflationFactor

func (x *DebugOptions) GetXlaGpuCollectiveInflationFactor() int32

func (*DebugOptions) GetXlaGpuCollectivePermuteDecomposerThreshold

func (x *DebugOptions) GetXlaGpuCollectivePermuteDecomposerThreshold() int64

func (*DebugOptions) GetXlaGpuCopyInsertionUseRegionAnalysis

func (x *DebugOptions) GetXlaGpuCopyInsertionUseRegionAnalysis() bool

func (*DebugOptions) GetXlaGpuCrashOnVerificationFailures

func (x *DebugOptions) GetXlaGpuCrashOnVerificationFailures() bool

func (*DebugOptions) GetXlaGpuCublasFallback

func (x *DebugOptions) GetXlaGpuCublasFallback() bool

func (*DebugOptions) GetXlaGpuCudaDataDir

func (x *DebugOptions) GetXlaGpuCudaDataDir() string

func (*DebugOptions) GetXlaGpuCudnnGemmFusionLevel

func (x *DebugOptions) GetXlaGpuCudnnGemmFusionLevel() int32

func (*DebugOptions) GetXlaGpuCudnnGemmMaxPlans

func (x *DebugOptions) GetXlaGpuCudnnGemmMaxPlans() int32

func (*DebugOptions) GetXlaGpuDeterministicOps

func (x *DebugOptions) GetXlaGpuDeterministicOps() bool

func (*DebugOptions) GetXlaGpuDisableAsyncCollectives

func (x *DebugOptions) GetXlaGpuDisableAsyncCollectives() []DebugOptions_CollectiveOpType

func (*DebugOptions) GetXlaGpuDisableGpuasmOptimizations

func (x *DebugOptions) GetXlaGpuDisableGpuasmOptimizations() bool

func (*DebugOptions) GetXlaGpuDumpAutotuneLogsTo

func (x *DebugOptions) GetXlaGpuDumpAutotuneLogsTo() string

func (*DebugOptions) GetXlaGpuDumpAutotuneResultsTo

func (x *DebugOptions) GetXlaGpuDumpAutotuneResultsTo() string

func (*DebugOptions) GetXlaGpuDumpAutotunedGemmFusions

func (x *DebugOptions) GetXlaGpuDumpAutotunedGemmFusions() bool

func (*DebugOptions) GetXlaGpuDumpLlvmir

func (x *DebugOptions) GetXlaGpuDumpLlvmir() bool

func (*DebugOptions) GetXlaGpuEnableAllGatherCombineByDim

func (x *DebugOptions) GetXlaGpuEnableAllGatherCombineByDim() bool

func (*DebugOptions) GetXlaGpuEnableAnalyticalLatencyEstimator

func (x *DebugOptions) GetXlaGpuEnableAnalyticalLatencyEstimator() bool

func (*DebugOptions) GetXlaGpuEnableApproxCostlyCollectives

func (x *DebugOptions) GetXlaGpuEnableApproxCostlyCollectives() bool

func (*DebugOptions) GetXlaGpuEnableBf16_3WayGemm

func (x *DebugOptions) GetXlaGpuEnableBf16_3WayGemm() bool

func (*DebugOptions) GetXlaGpuEnableBf16_6WayGemm

func (x *DebugOptions) GetXlaGpuEnableBf16_6WayGemm() bool

func (*DebugOptions) GetXlaGpuEnableCommandBuffer

func (x *DebugOptions) GetXlaGpuEnableCommandBuffer() []DebugOptions_CommandBufferCmdType

func (*DebugOptions) GetXlaGpuEnableCubRadixSort

func (x *DebugOptions) GetXlaGpuEnableCubRadixSort() bool

func (*DebugOptions) GetXlaGpuEnableCublaslt

func (x *DebugOptions) GetXlaGpuEnableCublaslt() bool

func (*DebugOptions) GetXlaGpuEnableCudnnFmha

func (x *DebugOptions) GetXlaGpuEnableCudnnFmha() bool

func (*DebugOptions) GetXlaGpuEnableCudnnFrontend

func (x *DebugOptions) GetXlaGpuEnableCudnnFrontend() bool

func (*DebugOptions) GetXlaGpuEnableCudnnInt8X32ConvolutionReordering

func (x *DebugOptions) GetXlaGpuEnableCudnnInt8X32ConvolutionReordering() bool

func (*DebugOptions) GetXlaGpuEnableCudnnLayerNorm

func (x *DebugOptions) GetXlaGpuEnableCudnnLayerNorm() bool

func (*DebugOptions) GetXlaGpuEnableCustomFusions

func (x *DebugOptions) GetXlaGpuEnableCustomFusions() bool

func (*DebugOptions) GetXlaGpuEnableCustomFusionsRe

func (x *DebugOptions) GetXlaGpuEnableCustomFusionsRe() string

func (*DebugOptions) GetXlaGpuEnableDotStrengthReduction

func (x *DebugOptions) GetXlaGpuEnableDotStrengthReduction() bool

func (*DebugOptions) GetXlaGpuEnableDynamicSliceFusion

func (x *DebugOptions) GetXlaGpuEnableDynamicSliceFusion() bool

func (*DebugOptions) GetXlaGpuEnableFastMinMax

func (x *DebugOptions) GetXlaGpuEnableFastMinMax() bool

func (*DebugOptions) GetXlaGpuEnableHighestPriorityAsyncStream

func (x *DebugOptions) GetXlaGpuEnableHighestPriorityAsyncStream() bool

func (*DebugOptions) GetXlaGpuEnableHostMemoryOffloading

func (x *DebugOptions) GetXlaGpuEnableHostMemoryOffloading() bool

func (*DebugOptions) GetXlaGpuEnableLatencyHidingScheduler

func (x *DebugOptions) GetXlaGpuEnableLatencyHidingScheduler() bool
func (x *DebugOptions) GetXlaGpuEnableLibnvjitlink() bool

func (*DebugOptions) GetXlaGpuEnableLibnvptxcompiler

func (x *DebugOptions) GetXlaGpuEnableLibnvptxcompiler() bool

func (*DebugOptions) GetXlaGpuEnableLlvmModuleCompilationParallelism

func (x *DebugOptions) GetXlaGpuEnableLlvmModuleCompilationParallelism() bool

func (*DebugOptions) GetXlaGpuEnableNcclCliqueOptimization

func (x *DebugOptions) GetXlaGpuEnableNcclCliqueOptimization() bool

func (*DebugOptions) GetXlaGpuEnableNcclCommSplitting

func (x *DebugOptions) GetXlaGpuEnableNcclCommSplitting() bool

func (*DebugOptions) GetXlaGpuEnableNcclPerStreamComms

func (x *DebugOptions) GetXlaGpuEnableNcclPerStreamComms() bool

func (*DebugOptions) GetXlaGpuEnableNcclUserBuffers

func (x *DebugOptions) GetXlaGpuEnableNcclUserBuffers() bool

func (*DebugOptions) GetXlaGpuEnablePipelinedAllGather

func (x *DebugOptions) GetXlaGpuEnablePipelinedAllGather() bool

func (*DebugOptions) GetXlaGpuEnablePipelinedAllReduce

func (x *DebugOptions) GetXlaGpuEnablePipelinedAllReduce() bool

func (*DebugOptions) GetXlaGpuEnablePipelinedCollectives

func (x *DebugOptions) GetXlaGpuEnablePipelinedCollectives() bool

func (*DebugOptions) GetXlaGpuEnablePipelinedP2P

func (x *DebugOptions) GetXlaGpuEnablePipelinedP2P() bool

func (*DebugOptions) GetXlaGpuEnablePipelinedReduceScatter

func (x *DebugOptions) GetXlaGpuEnablePipelinedReduceScatter() bool

func (*DebugOptions) GetXlaGpuEnablePriorityFusion

func (x *DebugOptions) GetXlaGpuEnablePriorityFusion() bool

func (*DebugOptions) GetXlaGpuEnableReassociationForConvertedAr

func (x *DebugOptions) GetXlaGpuEnableReassociationForConvertedAr() bool

func (*DebugOptions) GetXlaGpuEnableReduceScatterCombineByDim

func (x *DebugOptions) GetXlaGpuEnableReduceScatterCombineByDim() bool

func (*DebugOptions) GetXlaGpuEnableReductionEpilogueFusion

func (x *DebugOptions) GetXlaGpuEnableReductionEpilogueFusion() bool

func (*DebugOptions) GetXlaGpuEnableSharedConstants

func (x *DebugOptions) GetXlaGpuEnableSharedConstants() bool

func (*DebugOptions) GetXlaGpuEnableSplitKAutotuning

func (x *DebugOptions) GetXlaGpuEnableSplitKAutotuning() bool

func (*DebugOptions) GetXlaGpuEnableTritonGemm

func (x *DebugOptions) GetXlaGpuEnableTritonGemm() bool

func (*DebugOptions) GetXlaGpuEnableTritonGemmInt4

func (x *DebugOptions) GetXlaGpuEnableTritonGemmInt4() bool

func (*DebugOptions) GetXlaGpuEnableTritonHopper

func (x *DebugOptions) GetXlaGpuEnableTritonHopper() bool

func (*DebugOptions) GetXlaGpuEnableTritonSoftmaxFusion

func (x *DebugOptions) GetXlaGpuEnableTritonSoftmaxFusion() bool

func (*DebugOptions) GetXlaGpuEnableTritonSoftmaxPriorityFusion

func (x *DebugOptions) GetXlaGpuEnableTritonSoftmaxPriorityFusion() bool

func (*DebugOptions) GetXlaGpuEnableWhileLoopDoubleBuffering

func (x *DebugOptions) GetXlaGpuEnableWhileLoopDoubleBuffering() bool

func (*DebugOptions) GetXlaGpuEnableWhileLoopReduceScatterCodeMotion

func (x *DebugOptions) GetXlaGpuEnableWhileLoopReduceScatterCodeMotion() bool

func (*DebugOptions) GetXlaGpuEnableWhileLoopUnrolling

func (x *DebugOptions) GetXlaGpuEnableWhileLoopUnrolling() DebugOptions_WhileLoopUnrolling

func (*DebugOptions) GetXlaGpuEnsureMinorDotContractionDims

func (x *DebugOptions) GetXlaGpuEnsureMinorDotContractionDims() bool

func (*DebugOptions) GetXlaGpuExcludeNondeterministicOps

func (x *DebugOptions) GetXlaGpuExcludeNondeterministicOps() bool

func (*DebugOptions) GetXlaGpuExhaustiveTilingSearch

func (x *DebugOptions) GetXlaGpuExhaustiveTilingSearch() bool

func (*DebugOptions) GetXlaGpuFilterKernelsSpillingRegistersOnAutotuning

func (x *DebugOptions) GetXlaGpuFilterKernelsSpillingRegistersOnAutotuning() bool

func (*DebugOptions) GetXlaGpuForceCompilationParallelism

func (x *DebugOptions) GetXlaGpuForceCompilationParallelism() int32

func (*DebugOptions) GetXlaGpuForceConvNchw

func (x *DebugOptions) GetXlaGpuForceConvNchw() bool

func (*DebugOptions) GetXlaGpuForceConvNhwc

func (x *DebugOptions) GetXlaGpuForceConvNhwc() bool

func (*DebugOptions) GetXlaGpuFtz

func (x *DebugOptions) GetXlaGpuFtz() bool

func (*DebugOptions) GetXlaGpuFusedAttentionUseCudnnRng

func (x *DebugOptions) GetXlaGpuFusedAttentionUseCudnnRng() bool

func (*DebugOptions) GetXlaGpuGemmRewriteSizeThreshold

func (x *DebugOptions) GetXlaGpuGemmRewriteSizeThreshold() int64

func (*DebugOptions) GetXlaGpuGraphEnableConcurrentRegion

func (x *DebugOptions) GetXlaGpuGraphEnableConcurrentRegion() bool

func (*DebugOptions) GetXlaGpuGraphMinGraphSize

func (x *DebugOptions) GetXlaGpuGraphMinGraphSize() int32

func (*DebugOptions) GetXlaGpuKernelCacheFile

func (x *DebugOptions) GetXlaGpuKernelCacheFile() string

func (*DebugOptions) GetXlaGpuLhsEnableGpuAsyncTracker

func (x *DebugOptions) GetXlaGpuLhsEnableGpuAsyncTracker() bool

func (*DebugOptions) GetXlaGpuLlvmIrFile

func (x *DebugOptions) GetXlaGpuLlvmIrFile() []string

func (*DebugOptions) GetXlaGpuLlvmVerificationLevel

func (x *DebugOptions) GetXlaGpuLlvmVerificationLevel() int32

func (*DebugOptions) GetXlaGpuLoadAutotuneResultsFrom

func (x *DebugOptions) GetXlaGpuLoadAutotuneResultsFrom() string

func (*DebugOptions) GetXlaGpuMemoryLimitSlopFactor

func (x *DebugOptions) GetXlaGpuMemoryLimitSlopFactor() int32

func (*DebugOptions) GetXlaGpuMlirEmitterLevel

func (x *DebugOptions) GetXlaGpuMlirEmitterLevel() int64

func (*DebugOptions) GetXlaGpuMockCustomCalls

func (x *DebugOptions) GetXlaGpuMockCustomCalls() bool

func (*DebugOptions) GetXlaGpuMultiStreamedWindowedEinsum

func (x *DebugOptions) GetXlaGpuMultiStreamedWindowedEinsum() bool

func (*DebugOptions) GetXlaGpuNcclCollectiveMaxNchannels

func (x *DebugOptions) GetXlaGpuNcclCollectiveMaxNchannels() int64

func (*DebugOptions) GetXlaGpuNcclP2PMaxNchannels

func (x *DebugOptions) GetXlaGpuNcclP2PMaxNchannels() int64

func (*DebugOptions) GetXlaGpuNcclTerminateOnError

func (x *DebugOptions) GetXlaGpuNcclTerminateOnError() bool

func (*DebugOptions) GetXlaGpuNcclTerminationTimeoutSeconds

func (x *DebugOptions) GetXlaGpuNcclTerminationTimeoutSeconds() int64

func (*DebugOptions) GetXlaGpuOverrideGemmAutotuner

func (x *DebugOptions) GetXlaGpuOverrideGemmAutotuner() string

func (*DebugOptions) GetXlaGpuPerFusionAutotuneCacheDir

func (x *DebugOptions) GetXlaGpuPerFusionAutotuneCacheDir() string

func (*DebugOptions) GetXlaGpuPgleProfileFileOrDirectoryPath

func (x *DebugOptions) GetXlaGpuPgleProfileFileOrDirectoryPath() string

func (*DebugOptions) GetXlaGpuPtxFile

func (x *DebugOptions) GetXlaGpuPtxFile() []string

func (*DebugOptions) GetXlaGpuReduceScatterCombineThresholdBytes

func (x *DebugOptions) GetXlaGpuReduceScatterCombineThresholdBytes() int64

func (*DebugOptions) GetXlaGpuRedzonePaddingBytes

func (x *DebugOptions) GetXlaGpuRedzonePaddingBytes() int64

func (*DebugOptions) GetXlaGpuRedzoneScratchMaxMegabytes

func (x *DebugOptions) GetXlaGpuRedzoneScratchMaxMegabytes() int64

func (*DebugOptions) GetXlaGpuRequireCompleteAotAutotuneResults

func (x *DebugOptions) GetXlaGpuRequireCompleteAotAutotuneResults() bool

func (*DebugOptions) GetXlaGpuRunPostLayoutCollectivePipeliner

func (x *DebugOptions) GetXlaGpuRunPostLayoutCollectivePipeliner() bool

func (*DebugOptions) GetXlaGpuShapeChecks

func (x *DebugOptions) GetXlaGpuShapeChecks() DebugOptions_ShapeChecks

func (*DebugOptions) GetXlaGpuShardAutotuning

func (x *DebugOptions) GetXlaGpuShardAutotuning() bool

func (*DebugOptions) GetXlaGpuStrictConvAlgorithmPicker

func (x *DebugOptions) GetXlaGpuStrictConvAlgorithmPicker() bool

func (*DebugOptions) GetXlaGpuTargetConfigFilename

func (x *DebugOptions) GetXlaGpuTargetConfigFilename() string

func (*DebugOptions) GetXlaGpuTempBufferUseSeparateColor

func (x *DebugOptions) GetXlaGpuTempBufferUseSeparateColor() bool

func (*DebugOptions) GetXlaGpuThresholdForWindowedEinsumMib

func (x *DebugOptions) GetXlaGpuThresholdForWindowedEinsumMib() int64

func (*DebugOptions) GetXlaGpuTritonFusionLevel

func (x *DebugOptions) GetXlaGpuTritonFusionLevel() int32

func (*DebugOptions) GetXlaGpuTritonGemmAny

func (x *DebugOptions) GetXlaGpuTritonGemmAny() bool

func (*DebugOptions) GetXlaGpuTritonGemmDisableReducedPrecisionReduction

func (x *DebugOptions) GetXlaGpuTritonGemmDisableReducedPrecisionReduction() bool

func (*DebugOptions) GetXlaGpuUnsafeFallbackToDriverOnPtxasNotFound

func (x *DebugOptions) GetXlaGpuUnsafeFallbackToDriverOnPtxasNotFound() bool

func (*DebugOptions) GetXlaGpuUnsafePipelinedLoopAnnotator

func (x *DebugOptions) GetXlaGpuUnsafePipelinedLoopAnnotator() bool

func (*DebugOptions) GetXlaGpuUnsupportedEnableTritonGemm

func (x *DebugOptions) GetXlaGpuUnsupportedEnableTritonGemm() bool

func (*DebugOptions) GetXlaGpuUseMemcpyLocalP2P

func (x *DebugOptions) GetXlaGpuUseMemcpyLocalP2P() bool

func (*DebugOptions) GetXlaGpuUseRuntimeFusion

func (x *DebugOptions) GetXlaGpuUseRuntimeFusion() bool

func (*DebugOptions) GetXlaGpuVerifyTritonFusionNumerics

func (x *DebugOptions) GetXlaGpuVerifyTritonFusionNumerics() bool

func (*DebugOptions) GetXlaHloEvaluatorUseFastPath

func (x *DebugOptions) GetXlaHloEvaluatorUseFastPath() bool

func (*DebugOptions) GetXlaHloGraphAddresses

func (x *DebugOptions) GetXlaHloGraphAddresses() bool

func (*DebugOptions) GetXlaHloGraphShardingColor

func (x *DebugOptions) GetXlaHloGraphShardingColor() bool

func (*DebugOptions) GetXlaHloProfile

func (x *DebugOptions) GetXlaHloProfile() bool

func (*DebugOptions) GetXlaLlvmDisableExpensivePasses

func (x *DebugOptions) GetXlaLlvmDisableExpensivePasses() bool

func (*DebugOptions) GetXlaLlvmEnableAliasScopeMetadata

func (x *DebugOptions) GetXlaLlvmEnableAliasScopeMetadata() bool

func (*DebugOptions) GetXlaLlvmEnableInvariantLoadMetadata

func (x *DebugOptions) GetXlaLlvmEnableInvariantLoadMetadata() bool

func (*DebugOptions) GetXlaLlvmEnableNoaliasMetadata

func (x *DebugOptions) GetXlaLlvmEnableNoaliasMetadata() bool

func (*DebugOptions) GetXlaLlvmForceInlineBeforeSplit

func (x *DebugOptions) GetXlaLlvmForceInlineBeforeSplit() bool

func (*DebugOptions) GetXlaMultiheapSizeConstraintPerHeap

func (x *DebugOptions) GetXlaMultiheapSizeConstraintPerHeap() int32

func (*DebugOptions) GetXlaPartitioningAlgorithm

func (x *DebugOptions) GetXlaPartitioningAlgorithm() DebugOptions_PartitioningAlgorithm

func (*DebugOptions) GetXlaReduceWindowRewriteBaseLength

func (x *DebugOptions) GetXlaReduceWindowRewriteBaseLength() int64

func (*DebugOptions) GetXlaStepMarkerLocation

func (x *DebugOptions) GetXlaStepMarkerLocation() DebugOptions_StepMarkerLocation

func (*DebugOptions) GetXlaSyntaxSugarAsyncOps

func (x *DebugOptions) GetXlaSyntaxSugarAsyncOps() bool

func (*DebugOptions) GetXlaTestAllInputLayouts

func (x *DebugOptions) GetXlaTestAllInputLayouts() bool

func (*DebugOptions) GetXlaTestAllOutputLayouts

func (x *DebugOptions) GetXlaTestAllOutputLayouts() bool

func (*DebugOptions) GetXlaTpuDetectInf

func (x *DebugOptions) GetXlaTpuDetectInf() bool

func (*DebugOptions) GetXlaTpuDetectNan

func (x *DebugOptions) GetXlaTpuDetectNan() bool

func (*DebugOptions) ProtoMessage

func (*DebugOptions) ProtoMessage()

func (*DebugOptions) ProtoReflect

func (x *DebugOptions) ProtoReflect() protoreflect.Message

func (*DebugOptions) Reset

func (x *DebugOptions) Reset()

func (*DebugOptions) String

func (x *DebugOptions) String() string

type DebugOptions_CollectiveOpType

type DebugOptions_CollectiveOpType int32

Enum to define all collective ops that xla supports.

const (
	DebugOptions_NOOP                DebugOptions_CollectiveOpType = 0
	DebugOptions_ALLREDUCE           DebugOptions_CollectiveOpType = 1
	DebugOptions_ALLGATHER           DebugOptions_CollectiveOpType = 2
	DebugOptions_REDUCESCATTER       DebugOptions_CollectiveOpType = 3
	DebugOptions_COLLECTIVEBROADCAST DebugOptions_CollectiveOpType = 4
	DebugOptions_ALLTOALL            DebugOptions_CollectiveOpType = 5
	DebugOptions_COLLECTIVEPERMUTE   DebugOptions_CollectiveOpType = 6
)

func (DebugOptions_CollectiveOpType) Descriptor

func (DebugOptions_CollectiveOpType) Enum

func (DebugOptions_CollectiveOpType) EnumDescriptor deprecated

func (DebugOptions_CollectiveOpType) EnumDescriptor() ([]byte, []int)

Deprecated: Use DebugOptions_CollectiveOpType.Descriptor instead.

func (DebugOptions_CollectiveOpType) Number

func (DebugOptions_CollectiveOpType) String

func (DebugOptions_CollectiveOpType) Type

type DebugOptions_CommandBufferCmdType

type DebugOptions_CommandBufferCmdType int32

Commands are categorized into 5 types: FUSION represents regular fusion kernels. CUBLAS/CUBLASLT, CUDNN, and COLLECTIVES represent library calls. CONDITIONALS represents control flow.

const (
	DebugOptions_INVALID      DebugOptions_CommandBufferCmdType = 0
	DebugOptions_FUSION       DebugOptions_CommandBufferCmdType = 1
	DebugOptions_CUBLAS       DebugOptions_CommandBufferCmdType = 2
	DebugOptions_CUDNN        DebugOptions_CommandBufferCmdType = 3
	DebugOptions_COLLECTIVES  DebugOptions_CommandBufferCmdType = 4
	DebugOptions_CONDITIONALS DebugOptions_CommandBufferCmdType = 5
	DebugOptions_CUSTOM_CALL  DebugOptions_CommandBufferCmdType = 6
	DebugOptions_CUBLASLT     DebugOptions_CommandBufferCmdType = 7
)

func (DebugOptions_CommandBufferCmdType) Descriptor

func (DebugOptions_CommandBufferCmdType) Enum

func (DebugOptions_CommandBufferCmdType) EnumDescriptor deprecated

func (DebugOptions_CommandBufferCmdType) EnumDescriptor() ([]byte, []int)

Deprecated: Use DebugOptions_CommandBufferCmdType.Descriptor instead.

func (DebugOptions_CommandBufferCmdType) Number

func (DebugOptions_CommandBufferCmdType) String

func (DebugOptions_CommandBufferCmdType) Type

type DebugOptions_PartitioningAlgorithm

type DebugOptions_PartitioningAlgorithm int32
const (
	DebugOptions_PARTITIONING_ALGORITHM_NOOP DebugOptions_PartitioningAlgorithm = 0
	DebugOptions_PARTITIONING_ALGORITHM_EXP0 DebugOptions_PartitioningAlgorithm = 1
	DebugOptions_PARTITIONING_ALGORITHM_EXP1 DebugOptions_PartitioningAlgorithm = 2
	DebugOptions_PARTITIONING_ALGORITHM_EXP2 DebugOptions_PartitioningAlgorithm = 3
)

func (DebugOptions_PartitioningAlgorithm) Descriptor

func (DebugOptions_PartitioningAlgorithm) Enum

func (DebugOptions_PartitioningAlgorithm) EnumDescriptor deprecated

func (DebugOptions_PartitioningAlgorithm) EnumDescriptor() ([]byte, []int)

Deprecated: Use DebugOptions_PartitioningAlgorithm.Descriptor instead.

func (DebugOptions_PartitioningAlgorithm) Number

func (DebugOptions_PartitioningAlgorithm) String

func (DebugOptions_PartitioningAlgorithm) Type

type DebugOptions_ShapeChecks

type DebugOptions_ShapeChecks int32
const (
	// Do not insert any shape checks for dynamically shaped operations; output
	// buffers might contain garbage data if shapes don't match.
	DebugOptions_IGNORE DebugOptions_ShapeChecks = 0
	// Check shapes at runtime, will insert an extra synchronization if shapes
	// cannot be proven correct at compile time.
	DebugOptions_RUNTIME DebugOptions_ShapeChecks = 1
	// Will refuse to compile any program where shape correctness can not be
	// established at compile time.
	DebugOptions_COMPILE_TIME DebugOptions_ShapeChecks = 2
)

func (DebugOptions_ShapeChecks) Descriptor

func (DebugOptions_ShapeChecks) Enum

func (DebugOptions_ShapeChecks) EnumDescriptor deprecated

func (DebugOptions_ShapeChecks) EnumDescriptor() ([]byte, []int)

Deprecated: Use DebugOptions_ShapeChecks.Descriptor instead.

func (DebugOptions_ShapeChecks) Number

func (DebugOptions_ShapeChecks) String

func (x DebugOptions_ShapeChecks) String() string

func (DebugOptions_ShapeChecks) Type

type DebugOptions_StepMarkerLocation

type DebugOptions_StepMarkerLocation int32
const (
	// Generate a step marker at the program entry. This handles the case where
	// each step is done by one or multiple program execution(s). Only the first
	// program will be tagged for generating a step marker at the program entry.
	// This is the default.
	DebugOptions_STEP_MARK_AT_ENTRY DebugOptions_StepMarkerLocation = 0
	// Generate a step marker at each iteration of the top level while loop,
	// which is assumed to be a training loop.
	DebugOptions_STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP DebugOptions_StepMarkerLocation = 1
	// Generate a step marker at each iteration of the second level while loops,
	// which is assumed to be a training or eval loop.
	DebugOptions_STEP_MARK_AT_SECOND_LEVEL_WHILE_LOOP DebugOptions_StepMarkerLocation = 3
	// No step marker generated.
	DebugOptions_STEP_MARK_NONE DebugOptions_StepMarkerLocation = 2
)

func (DebugOptions_StepMarkerLocation) Descriptor

func (DebugOptions_StepMarkerLocation) Enum

func (DebugOptions_StepMarkerLocation) EnumDescriptor deprecated

func (DebugOptions_StepMarkerLocation) EnumDescriptor() ([]byte, []int)

Deprecated: Use DebugOptions_StepMarkerLocation.Descriptor instead.

func (DebugOptions_StepMarkerLocation) Number

func (DebugOptions_StepMarkerLocation) String

func (DebugOptions_StepMarkerLocation) Type

type DebugOptions_WhileLoopUnrolling

type DebugOptions_WhileLoopUnrolling int32
const (
	DebugOptions_WHILE_LOOP_UNROLLING_NO_UNROLL DebugOptions_WhileLoopUnrolling = 0
	// Has the same effect as setting
	// `xla_gpu_enable_while_loop_double_buffering`.
	DebugOptions_WHILE_LOOP_UNROLLING_DOUBLE_BUFFER DebugOptions_WhileLoopUnrolling = 1
	// Enables full loop unrolling using the same strategy as `DOUBLE_BUFFER`.
	DebugOptions_WHILE_LOOP_UNROLLING_FULL_UNROLL DebugOptions_WhileLoopUnrolling = 2
)

func (DebugOptions_WhileLoopUnrolling) Descriptor

func (DebugOptions_WhileLoopUnrolling) Enum

func (DebugOptions_WhileLoopUnrolling) EnumDescriptor deprecated

func (DebugOptions_WhileLoopUnrolling) EnumDescriptor() ([]byte, []int)

Deprecated: Use DebugOptions_WhileLoopUnrolling.Descriptor instead.

func (DebugOptions_WhileLoopUnrolling) Number

func (DebugOptions_WhileLoopUnrolling) String

func (DebugOptions_WhileLoopUnrolling) Type

type ExecutionOptions

type ExecutionOptions struct {

	// This optional field's layout is used as a hint when storing the output of
	// this computation.  Subsequent transfers of this output array to the client
	// may be faster when using this layout.
	//
	// We use a Shape here to accommodate computations that return a tuple.
	ShapeWithOutputLayout *xla_data.ShapeProto `` /* 128-byte string literal not displayed */
	// Used to seed random-number generators used in this computation.  If this is
	// 0, we generate a seed ourselves.
	//
	// TODO(b/32083678): Changing the seed unnecessarily forces a recompilation.
	Seed         uint64        `protobuf:"varint,3,opt,name=seed,proto3" json:"seed,omitempty"`
	DebugOptions *DebugOptions `protobuf:"bytes,4,opt,name=debug_options,json=debugOptions,proto3" json:"debug_options,omitempty"`
	// This optional field specifies a particular set of devices to run the
	// computation on. The computation will be partitioned across these devices.
	// If not provided, the default device will be chosen.
	DeviceHandles []*xla_data.DeviceHandle `protobuf:"bytes,5,rep,name=device_handles,json=deviceHandles,proto3" json:"device_handles,omitempty"`
	// Number of replicas of the computation to run. If zero, uses the default
	// number of replicas for the XLA service.
	NumReplicas int32 `protobuf:"varint,6,opt,name=num_replicas,json=numReplicas,proto3" json:"num_replicas,omitempty"`
	// This optional field specifies the device assignment if known at compile
	// time.
	DeviceAssignment *xla_data.DeviceAssignmentProto `protobuf:"bytes,7,opt,name=device_assignment,json=deviceAssignment,proto3" json:"device_assignment,omitempty"`
	// Alias input and output buffers for parameters that are passed-through XLA
	// modules without being changed.
	AliasPassthroughParams bool `` /* 130-byte string literal not displayed */
	// Number of partitions of the computation to run (model parallelism).
	// If zero, uses the default number of partitions for the XLA service.
	NumPartitions int32 `protobuf:"varint,9,opt,name=num_partitions,json=numPartitions,proto3" json:"num_partitions,omitempty"`
	// Used to identify a set of programs that should be launch together.
	LaunchId int32 `protobuf:"varint,10,opt,name=launch_id,json=launchId,proto3" json:"launch_id,omitempty"`
	// Indicates whether to use SPMD (true) or MPMD (false) partitioning when
	// num_partitions > 1 and XLA is requested to partition the input program.
	UseSpmdPartitioning bool `protobuf:"varint,11,opt,name=use_spmd_partitioning,json=useSpmdPartitioning,proto3" json:"use_spmd_partitioning,omitempty"`
	// Whether to automatically generate XLA shardings for SPMD partitioner.
	UseAutoSpmdPartitioning bool `` /* 136-byte string literal not displayed */
	// Device mesh shape used to create the sharding search space when
	// use_auto_spmd_partitioning=true.
	AutoSpmdPartitioningMeshShape []int64 `` /* 163-byte string literal not displayed */
	// Device mesh ids compatible with the above mesh_shape used when
	// use_auto_spmd_partitioning=true.
	AutoSpmdPartitioningMeshIds []int64 `` /* 157-byte string literal not displayed */
	// If set, deduplicate hlo into function calls to reduce binary size. Only
	// works on TPU.
	DeduplicateHlo bool `protobuf:"varint,12,opt,name=deduplicate_hlo,json=deduplicateHlo,proto3" json:"deduplicate_hlo,omitempty"`
	// Allows sharding propagation to propagate to the parameters. This changes
	// the input shape of the computation (which is undesirable), but it can be
	// used to allow to run partial compilation to determine what would be the
	// input sharding of a computation if XLA would be allowed to propagate the
	// sharding which can be used by higher level framework as a way to query
	// intermediate sharding of operations when multiple computation would be
	// chained and merged together.
	// This is a vector of bool, because the user can control which parameters can
	// have the sharding substituted. If only one boolean value is passed in the
	// vector that is interpreted as the value to be applied for every parameter.
	AllowSpmdShardingPropagationToParameters []bool `` /* 198-byte string literal not displayed */
	// Allows sharding propagation to propagate to the outputs. This changes the
	// output shape of the computation (which is undesirable), but it can be used
	// to allow to run partial compilation to determine what would be the output
	// sharding of a computation if XLA would be allowed to propagate the sharding
	// which can be used by higher level framework as a way to query intermediate
	// sharding of operations when multiple computation would be chained and
	// merged together.
	// This is a vector of bool, because the user can control (if the output of
	// the computation is a tuple) which elements of the tuple can have the
	// sharding substituted and which don't. If only one boolean value is passed
	// in the vector that's interpreted as the value to be applied for every
	// single element of the output tuple. One value per element of the tuple
	// means that each value is attached to one of the output elements.
	AllowSpmdShardingPropagationToOutput []bool `` /* 186-byte string literal not displayed */
	// Whether to broadcast args across all replicas. One entry per arg.
	ParamRequiresBroadcastViaCollectives []bool `` /* 184-byte string literal not displayed */
	// If enabled, the compiler may generate sharding and unsharding programs as
	// separate HLO modules, and modify the main program's input and output to
	// be sharded.
	AllowSeparateShardingPrograms bool `` /* 154-byte string literal not displayed */
	// The list of input/output pairs in the main program that could be sharded.
	ShardableValueUpdatePairs []*ShardableValueUpdatePairProto `` /* 141-byte string literal not displayed */
	// Profiling data for feedback directed optimizations. Note that this is not
	// the only way to feed FDO data into the compiler and individual backends
	// may choose to get FDO data by other means.
	FdoProfile []byte `protobuf:"bytes,21,opt,name=fdo_profile,json=fdoProfile,proto3" json:"fdo_profile,omitempty"`
	// Amount of device memory available for the executable to use.
	DeviceMemorySize int64 `protobuf:"varint,22,opt,name=device_memory_size,json=deviceMemorySize,proto3" json:"device_memory_size,omitempty"`
	// Use Shardy, a new partitioner, to replace the existing
	// ShardingPropagation and SpmdPartitioner. See go/xla-sdy-pipeline for
	// details.
	UseShardyPartitioner bool `protobuf:"varint,24,opt,name=use_shardy_partitioner,json=useShardyPartitioner,proto3" json:"use_shardy_partitioner,omitempty"`
	// contains filtered or unexported fields
}

These settings control how XLA compiles and/or runs code. Not all settings will have an effect on every platform.

When adding new fields, keep in mind that boolean fields default to false. Next id: 25.

func (*ExecutionOptions) Descriptor deprecated

func (*ExecutionOptions) Descriptor() ([]byte, []int)

Deprecated: Use ExecutionOptions.ProtoReflect.Descriptor instead.

func (*ExecutionOptions) GetAliasPassthroughParams

func (x *ExecutionOptions) GetAliasPassthroughParams() bool

func (*ExecutionOptions) GetAllowSeparateShardingPrograms

func (x *ExecutionOptions) GetAllowSeparateShardingPrograms() bool

func (*ExecutionOptions) GetAllowSpmdShardingPropagationToOutput

func (x *ExecutionOptions) GetAllowSpmdShardingPropagationToOutput() []bool

func (*ExecutionOptions) GetAllowSpmdShardingPropagationToParameters

func (x *ExecutionOptions) GetAllowSpmdShardingPropagationToParameters() []bool

func (*ExecutionOptions) GetAutoSpmdPartitioningMeshIds

func (x *ExecutionOptions) GetAutoSpmdPartitioningMeshIds() []int64

func (*ExecutionOptions) GetAutoSpmdPartitioningMeshShape

func (x *ExecutionOptions) GetAutoSpmdPartitioningMeshShape() []int64

func (*ExecutionOptions) GetDebugOptions

func (x *ExecutionOptions) GetDebugOptions() *DebugOptions

func (*ExecutionOptions) GetDeduplicateHlo

func (x *ExecutionOptions) GetDeduplicateHlo() bool

func (*ExecutionOptions) GetDeviceAssignment

func (x *ExecutionOptions) GetDeviceAssignment() *xla_data.DeviceAssignmentProto

func (*ExecutionOptions) GetDeviceHandles

func (x *ExecutionOptions) GetDeviceHandles() []*xla_data.DeviceHandle

func (*ExecutionOptions) GetDeviceMemorySize

func (x *ExecutionOptions) GetDeviceMemorySize() int64

func (*ExecutionOptions) GetFdoProfile

func (x *ExecutionOptions) GetFdoProfile() []byte

func (*ExecutionOptions) GetLaunchId

func (x *ExecutionOptions) GetLaunchId() int32

func (*ExecutionOptions) GetNumPartitions

func (x *ExecutionOptions) GetNumPartitions() int32

func (*ExecutionOptions) GetNumReplicas

func (x *ExecutionOptions) GetNumReplicas() int32

func (*ExecutionOptions) GetParamRequiresBroadcastViaCollectives

func (x *ExecutionOptions) GetParamRequiresBroadcastViaCollectives() []bool

func (*ExecutionOptions) GetSeed

func (x *ExecutionOptions) GetSeed() uint64

func (*ExecutionOptions) GetShapeWithOutputLayout

func (x *ExecutionOptions) GetShapeWithOutputLayout() *xla_data.ShapeProto

func (*ExecutionOptions) GetShardableValueUpdatePairs

func (x *ExecutionOptions) GetShardableValueUpdatePairs() []*ShardableValueUpdatePairProto

func (*ExecutionOptions) GetUseAutoSpmdPartitioning

func (x *ExecutionOptions) GetUseAutoSpmdPartitioning() bool

func (*ExecutionOptions) GetUseShardyPartitioner

func (x *ExecutionOptions) GetUseShardyPartitioner() bool

func (*ExecutionOptions) GetUseSpmdPartitioning

func (x *ExecutionOptions) GetUseSpmdPartitioning() bool

func (*ExecutionOptions) ProtoMessage

func (*ExecutionOptions) ProtoMessage()

func (*ExecutionOptions) ProtoReflect

func (x *ExecutionOptions) ProtoReflect() protoreflect.Message

func (*ExecutionOptions) Reset

func (x *ExecutionOptions) Reset()

func (*ExecutionOptions) String

func (x *ExecutionOptions) String() string

type GpuCompilationEnvironment

type GpuCompilationEnvironment struct {

	// Temporary dummy flag is added to test the flow.
	// To be removed when we add flags here.
	DummyFlag int64 `protobuf:"varint,1,opt,name=dummy_flag,json=dummyFlag,proto3" json:"dummy_flag,omitempty"`
	// contains filtered or unexported fields
}

Contains flags which affects the GPU compilation result. These flags are part of Debug Options as of now, and will be migrated to this proto.

func (*GpuCompilationEnvironment) Descriptor deprecated

func (*GpuCompilationEnvironment) Descriptor() ([]byte, []int)

Deprecated: Use GpuCompilationEnvironment.ProtoReflect.Descriptor instead.

func (*GpuCompilationEnvironment) GetDummyFlag

func (x *GpuCompilationEnvironment) GetDummyFlag() int64

func (*GpuCompilationEnvironment) ProtoMessage

func (*GpuCompilationEnvironment) ProtoMessage()

func (*GpuCompilationEnvironment) ProtoReflect

func (*GpuCompilationEnvironment) Reset

func (x *GpuCompilationEnvironment) Reset()

func (*GpuCompilationEnvironment) String

func (x *GpuCompilationEnvironment) String() string

type HloModuleConfigProto

type HloModuleConfigProto struct {
	EntryComputationLayout                   *xla_data.ProgramShapeProto                 `` /* 129-byte string literal not displayed */
	Seed                                     uint64                                      `protobuf:"varint,2,opt,name=seed,proto3" json:"seed,omitempty"`
	LaunchId                                 int32                                       `protobuf:"varint,3,opt,name=launch_id,json=launchId,proto3" json:"launch_id,omitempty"`
	ReplicaCount                             int64                                       `protobuf:"varint,4,opt,name=replica_count,json=replicaCount,proto3" json:"replica_count,omitempty"`
	NumPartitions                            int64                                       `protobuf:"varint,5,opt,name=num_partitions,json=numPartitions,proto3" json:"num_partitions,omitempty"`
	ParamRequiresBroadcastViaCollectives     []bool                                      `` /* 183-byte string literal not displayed */
	UseSpmdPartitioning                      bool                                        `protobuf:"varint,7,opt,name=use_spmd_partitioning,json=useSpmdPartitioning,proto3" json:"use_spmd_partitioning,omitempty"`
	UseAutoSpmdPartitioning                  bool                                        `` /* 135-byte string literal not displayed */
	AutoSpmdPartitioningMeshShape            []int64                                     `` /* 162-byte string literal not displayed */
	AutoSpmdPartitioningMeshIds              []int64                                     `` /* 157-byte string literal not displayed */
	DeduplicateHlo                           bool                                        `protobuf:"varint,11,opt,name=deduplicate_hlo,json=deduplicateHlo,proto3" json:"deduplicate_hlo,omitempty"`
	IntraOpParallelismThreads                int64                                       `` /* 142-byte string literal not displayed */
	DeviceType                               string                                      `protobuf:"bytes,13,opt,name=device_type,json=deviceType,proto3" json:"device_type,omitempty"`
	DebugOptions                             *DebugOptions                               `protobuf:"bytes,14,opt,name=debug_options,json=debugOptions,proto3" json:"debug_options,omitempty"`
	StaticDeviceAssignment                   *xla_data.DeviceAssignmentProto             `` /* 130-byte string literal not displayed */
	AllowSeparateShardingPrograms            bool                                        `` /* 154-byte string literal not displayed */
	ShardableValueUpdatePairs                []*ShardableValueUpdatePairProto            `` /* 141-byte string literal not displayed */
	AliasPassthroughParams                   bool                                        `` /* 131-byte string literal not displayed */
	ContentAwareComputationSorting           bool                                        `` /* 157-byte string literal not displayed */
	FusionConfigCollection                   HloModuleConfigProto_FusionConfigCollection `` /* 184-byte string literal not displayed */
	FusionConfig                             []*HloModuleConfigProto_BoolList            `protobuf:"bytes,20,rep,name=fusion_config,json=fusionConfig,proto3" json:"fusion_config,omitempty"`
	DotConfig                                map[string]*HloModuleConfigProto_Int64List  `` /* 177-byte string literal not displayed */
	LayoutConfig                             []*HloModuleConfigProto_Int64ListList       `protobuf:"bytes,22,rep,name=layout_config,json=layoutConfig,proto3" json:"layout_config,omitempty"`
	MemorySpaceAssignmentConfig              []uint64                                    `` /* 155-byte string literal not displayed */
	PhaseOrderingConfig                      []*HloModuleConfigProto_BoolList            `protobuf:"bytes,24,rep,name=phase_ordering_config,json=phaseOrderingConfig,proto3" json:"phase_ordering_config,omitempty"`
	PhaseIndex                               int32                                       `protobuf:"varint,25,opt,name=phase_index,json=phaseIndex,proto3" json:"phase_index,omitempty"`
	AllowSpmdShardingPropagationToParameters []bool                                      `` /* 198-byte string literal not displayed */
	AllowSpmdShardingPropagationToOutput     []bool                                      `` /* 186-byte string literal not displayed */
	AnalysisAllowanceMap                     map[string]int64                            `` /* 213-byte string literal not displayed */
	MatrixUnitOperandPrecision               xla_data.PrecisionConfig_Precision          `` /* 180-byte string literal not displayed */
	FdoProfile                               []byte                                      `protobuf:"bytes,31,opt,name=fdo_profile,json=fdoProfile,proto3" json:"fdo_profile,omitempty"`
	DeviceMemorySize                         int64                                       `protobuf:"varint,32,opt,name=device_memory_size,json=deviceMemorySize,proto3" json:"device_memory_size,omitempty"`
	UseShardyPartitioner                     bool                                        `protobuf:"varint,34,opt,name=use_shardy_partitioner,json=useShardyPartitioner,proto3" json:"use_shardy_partitioner,omitempty"`
	// contains filtered or unexported fields
}

Serialization of HloModuleConfig. See the C++ class definition for descriptions of each field. There are no guarantees of backwards or forwards compatibility. Next id: 35.

func (*HloModuleConfigProto) Descriptor deprecated

func (*HloModuleConfigProto) Descriptor() ([]byte, []int)

Deprecated: Use HloModuleConfigProto.ProtoReflect.Descriptor instead.

func (*HloModuleConfigProto) GetAliasPassthroughParams

func (x *HloModuleConfigProto) GetAliasPassthroughParams() bool

func (*HloModuleConfigProto) GetAllowSeparateShardingPrograms

func (x *HloModuleConfigProto) GetAllowSeparateShardingPrograms() bool

func (*HloModuleConfigProto) GetAllowSpmdShardingPropagationToOutput

func (x *HloModuleConfigProto) GetAllowSpmdShardingPropagationToOutput() []bool

func (*HloModuleConfigProto) GetAllowSpmdShardingPropagationToParameters

func (x *HloModuleConfigProto) GetAllowSpmdShardingPropagationToParameters() []bool

func (*HloModuleConfigProto) GetAnalysisAllowanceMap

func (x *HloModuleConfigProto) GetAnalysisAllowanceMap() map[string]int64

func (*HloModuleConfigProto) GetAutoSpmdPartitioningMeshIds

func (x *HloModuleConfigProto) GetAutoSpmdPartitioningMeshIds() []int64

func (*HloModuleConfigProto) GetAutoSpmdPartitioningMeshShape

func (x *HloModuleConfigProto) GetAutoSpmdPartitioningMeshShape() []int64

func (*HloModuleConfigProto) GetContentAwareComputationSorting

func (x *HloModuleConfigProto) GetContentAwareComputationSorting() bool

func (*HloModuleConfigProto) GetDebugOptions

func (x *HloModuleConfigProto) GetDebugOptions() *DebugOptions

func (*HloModuleConfigProto) GetDeduplicateHlo

func (x *HloModuleConfigProto) GetDeduplicateHlo() bool

func (*HloModuleConfigProto) GetDeviceMemorySize

func (x *HloModuleConfigProto) GetDeviceMemorySize() int64

func (*HloModuleConfigProto) GetDeviceType

func (x *HloModuleConfigProto) GetDeviceType() string

func (*HloModuleConfigProto) GetDotConfig

func (*HloModuleConfigProto) GetEntryComputationLayout

func (x *HloModuleConfigProto) GetEntryComputationLayout() *xla_data.ProgramShapeProto

func (*HloModuleConfigProto) GetFdoProfile

func (x *HloModuleConfigProto) GetFdoProfile() []byte

func (*HloModuleConfigProto) GetFusionConfig

func (x *HloModuleConfigProto) GetFusionConfig() []*HloModuleConfigProto_BoolList

func (*HloModuleConfigProto) GetFusionConfigCollection

func (*HloModuleConfigProto) GetIntraOpParallelismThreads

func (x *HloModuleConfigProto) GetIntraOpParallelismThreads() int64

func (*HloModuleConfigProto) GetLaunchId

func (x *HloModuleConfigProto) GetLaunchId() int32

func (*HloModuleConfigProto) GetLayoutConfig

func (*HloModuleConfigProto) GetMatrixUnitOperandPrecision

func (x *HloModuleConfigProto) GetMatrixUnitOperandPrecision() xla_data.PrecisionConfig_Precision

func (*HloModuleConfigProto) GetMemorySpaceAssignmentConfig

func (x *HloModuleConfigProto) GetMemorySpaceAssignmentConfig() []uint64

func (*HloModuleConfigProto) GetNumPartitions

func (x *HloModuleConfigProto) GetNumPartitions() int64

func (*HloModuleConfigProto) GetParamRequiresBroadcastViaCollectives

func (x *HloModuleConfigProto) GetParamRequiresBroadcastViaCollectives() []bool

func (*HloModuleConfigProto) GetPhaseIndex

func (x *HloModuleConfigProto) GetPhaseIndex() int32

func (*HloModuleConfigProto) GetPhaseOrderingConfig

func (x *HloModuleConfigProto) GetPhaseOrderingConfig() []*HloModuleConfigProto_BoolList

func (*HloModuleConfigProto) GetReplicaCount

func (x *HloModuleConfigProto) GetReplicaCount() int64

func (*HloModuleConfigProto) GetSeed

func (x *HloModuleConfigProto) GetSeed() uint64

func (*HloModuleConfigProto) GetShardableValueUpdatePairs

func (x *HloModuleConfigProto) GetShardableValueUpdatePairs() []*ShardableValueUpdatePairProto

func (*HloModuleConfigProto) GetStaticDeviceAssignment

func (x *HloModuleConfigProto) GetStaticDeviceAssignment() *xla_data.DeviceAssignmentProto

func (*HloModuleConfigProto) GetUseAutoSpmdPartitioning

func (x *HloModuleConfigProto) GetUseAutoSpmdPartitioning() bool

func (*HloModuleConfigProto) GetUseShardyPartitioner

func (x *HloModuleConfigProto) GetUseShardyPartitioner() bool

func (*HloModuleConfigProto) GetUseSpmdPartitioning

func (x *HloModuleConfigProto) GetUseSpmdPartitioning() bool

func (*HloModuleConfigProto) ProtoMessage

func (*HloModuleConfigProto) ProtoMessage()

func (*HloModuleConfigProto) ProtoReflect

func (x *HloModuleConfigProto) ProtoReflect() protoreflect.Message

func (*HloModuleConfigProto) Reset

func (x *HloModuleConfigProto) Reset()

func (*HloModuleConfigProto) String

func (x *HloModuleConfigProto) String() string

type HloModuleConfigProto_BoolList

type HloModuleConfigProto_BoolList struct {
	Vals []bool `protobuf:"varint,1,rep,packed,name=vals,proto3" json:"vals,omitempty"`
	// contains filtered or unexported fields
}

func (*HloModuleConfigProto_BoolList) Descriptor deprecated

func (*HloModuleConfigProto_BoolList) Descriptor() ([]byte, []int)

Deprecated: Use HloModuleConfigProto_BoolList.ProtoReflect.Descriptor instead.

func (*HloModuleConfigProto_BoolList) GetVals

func (x *HloModuleConfigProto_BoolList) GetVals() []bool

func (*HloModuleConfigProto_BoolList) ProtoMessage

func (*HloModuleConfigProto_BoolList) ProtoMessage()

func (*HloModuleConfigProto_BoolList) ProtoReflect

func (*HloModuleConfigProto_BoolList) Reset

func (x *HloModuleConfigProto_BoolList) Reset()

func (*HloModuleConfigProto_BoolList) String

type HloModuleConfigProto_FusionConfigCollection

type HloModuleConfigProto_FusionConfigCollection int32
const (
	HloModuleConfigProto_OFF      HloModuleConfigProto_FusionConfigCollection = 0 // Do not collect configuration.
	HloModuleConfigProto_PER_EDGE HloModuleConfigProto_FusionConfigCollection = 1 // Collect per-edge configuration.
	HloModuleConfigProto_PER_NODE HloModuleConfigProto_FusionConfigCollection = 2 // Collect per-node configuration.
)

func (HloModuleConfigProto_FusionConfigCollection) Descriptor

func (HloModuleConfigProto_FusionConfigCollection) Enum

func (HloModuleConfigProto_FusionConfigCollection) EnumDescriptor deprecated

func (HloModuleConfigProto_FusionConfigCollection) EnumDescriptor() ([]byte, []int)

Deprecated: Use HloModuleConfigProto_FusionConfigCollection.Descriptor instead.

func (HloModuleConfigProto_FusionConfigCollection) Number

func (HloModuleConfigProto_FusionConfigCollection) String

func (HloModuleConfigProto_FusionConfigCollection) Type

type HloModuleConfigProto_Int64List

type HloModuleConfigProto_Int64List struct {
	Vals []int64 `protobuf:"varint,1,rep,packed,name=vals,proto3" json:"vals,omitempty"`
	// contains filtered or unexported fields
}

func (*HloModuleConfigProto_Int64List) Descriptor deprecated

func (*HloModuleConfigProto_Int64List) Descriptor() ([]byte, []int)

Deprecated: Use HloModuleConfigProto_Int64List.ProtoReflect.Descriptor instead.

func (*HloModuleConfigProto_Int64List) GetVals

func (x *HloModuleConfigProto_Int64List) GetVals() []int64

func (*HloModuleConfigProto_Int64List) ProtoMessage

func (*HloModuleConfigProto_Int64List) ProtoMessage()

func (*HloModuleConfigProto_Int64List) ProtoReflect

func (*HloModuleConfigProto_Int64List) Reset

func (x *HloModuleConfigProto_Int64List) Reset()

func (*HloModuleConfigProto_Int64List) String

type HloModuleConfigProto_Int64ListList

type HloModuleConfigProto_Int64ListList struct {
	Lists []*HloModuleConfigProto_Int64List `protobuf:"bytes,1,rep,name=lists,proto3" json:"lists,omitempty"`
	// contains filtered or unexported fields
}

func (*HloModuleConfigProto_Int64ListList) Descriptor deprecated

func (*HloModuleConfigProto_Int64ListList) Descriptor() ([]byte, []int)

Deprecated: Use HloModuleConfigProto_Int64ListList.ProtoReflect.Descriptor instead.

func (*HloModuleConfigProto_Int64ListList) GetLists

func (*HloModuleConfigProto_Int64ListList) ProtoMessage

func (*HloModuleConfigProto_Int64ListList) ProtoMessage()

func (*HloModuleConfigProto_Int64ListList) ProtoReflect

func (*HloModuleConfigProto_Int64ListList) Reset

func (*HloModuleConfigProto_Int64ListList) String

type HloModuleProtoWithConfig

type HloModuleProtoWithConfig struct {
	HloModule *hlo.HloModuleProto   `protobuf:"bytes,1,opt,name=hlo_module,json=hloModule,proto3" json:"hlo_module,omitempty"`
	Config    *HloModuleConfigProto `protobuf:"bytes,2,opt,name=config,proto3" json:"config,omitempty"`
	// contains filtered or unexported fields
}

func (*HloModuleProtoWithConfig) Descriptor deprecated

func (*HloModuleProtoWithConfig) Descriptor() ([]byte, []int)

Deprecated: Use HloModuleProtoWithConfig.ProtoReflect.Descriptor instead.

func (*HloModuleProtoWithConfig) GetConfig

func (*HloModuleProtoWithConfig) GetHloModule

func (x *HloModuleProtoWithConfig) GetHloModule() *hlo.HloModuleProto

func (*HloModuleProtoWithConfig) ProtoMessage

func (*HloModuleProtoWithConfig) ProtoMessage()

func (*HloModuleProtoWithConfig) ProtoReflect

func (x *HloModuleProtoWithConfig) ProtoReflect() protoreflect.Message

func (*HloModuleProtoWithConfig) Reset

func (x *HloModuleProtoWithConfig) Reset()

func (*HloModuleProtoWithConfig) String

func (x *HloModuleProtoWithConfig) String() string

type ScheduleProto

type ScheduleProto struct {
	Instructions []*ScheduleProto_Instruction `protobuf:"bytes,1,rep,name=instructions,proto3" json:"instructions,omitempty"`
	// Computation id (matches the id in HloComputationProto).
	ComputationId        int64               `protobuf:"varint,2,opt,name=computation_id,json=computationId,proto3" json:"computation_id,omitempty"`
	HloModule            *hlo.HloModuleProto `protobuf:"bytes,3,opt,name=hlo_module,json=hloModule,proto3" json:"hlo_module,omitempty"`
	CyclesPerMicrosecond int64               `protobuf:"varint,4,opt,name=cycles_per_microsecond,json=cyclesPerMicrosecond,proto3" json:"cycles_per_microsecond,omitempty"`
	// contains filtered or unexported fields
}

A trace estimated by the Latency Hiding Scheduler.

func (*ScheduleProto) Descriptor deprecated

func (*ScheduleProto) Descriptor() ([]byte, []int)

Deprecated: Use ScheduleProto.ProtoReflect.Descriptor instead.

func (*ScheduleProto) GetComputationId

func (x *ScheduleProto) GetComputationId() int64

func (*ScheduleProto) GetCyclesPerMicrosecond

func (x *ScheduleProto) GetCyclesPerMicrosecond() int64

func (*ScheduleProto) GetHloModule

func (x *ScheduleProto) GetHloModule() *hlo.HloModuleProto

func (*ScheduleProto) GetInstructions

func (x *ScheduleProto) GetInstructions() []*ScheduleProto_Instruction

func (*ScheduleProto) ProtoMessage

func (*ScheduleProto) ProtoMessage()

func (*ScheduleProto) ProtoReflect

func (x *ScheduleProto) ProtoReflect() protoreflect.Message

func (*ScheduleProto) Reset

func (x *ScheduleProto) Reset()

func (*ScheduleProto) String

func (x *ScheduleProto) String() string

type ScheduleProto_Instruction

type ScheduleProto_Instruction struct {

	// Instruction id (matches the id in HloInstructionProto).
	Id int64 `protobuf:"varint,1,opt,name=id,proto3" json:"id,omitempty"`
	// Start and end timestamps in cycles.
	StartTimestampCycles float64 `protobuf:"fixed64,2,opt,name=start_timestamp_cycles,json=startTimestampCycles,proto3" json:"start_timestamp_cycles,omitempty"`
	EndTimestampCycles   float64 `protobuf:"fixed64,3,opt,name=end_timestamp_cycles,json=endTimestampCycles,proto3" json:"end_timestamp_cycles,omitempty"`
	// contains filtered or unexported fields
}

func (*ScheduleProto_Instruction) Descriptor deprecated

func (*ScheduleProto_Instruction) Descriptor() ([]byte, []int)

Deprecated: Use ScheduleProto_Instruction.ProtoReflect.Descriptor instead.

func (*ScheduleProto_Instruction) GetEndTimestampCycles

func (x *ScheduleProto_Instruction) GetEndTimestampCycles() float64

func (*ScheduleProto_Instruction) GetId

func (x *ScheduleProto_Instruction) GetId() int64

func (*ScheduleProto_Instruction) GetStartTimestampCycles

func (x *ScheduleProto_Instruction) GetStartTimestampCycles() float64

func (*ScheduleProto_Instruction) ProtoMessage

func (*ScheduleProto_Instruction) ProtoMessage()

func (*ScheduleProto_Instruction) ProtoReflect

func (*ScheduleProto_Instruction) Reset

func (x *ScheduleProto_Instruction) Reset()

func (*ScheduleProto_Instruction) String

func (x *ScheduleProto_Instruction) String() string

type ShardableValueUpdatePairProto

type ShardableValueUpdatePairProto struct {
	InputParameterNumber int64   `protobuf:"varint,1,opt,name=input_parameter_number,json=inputParameterNumber,proto3" json:"input_parameter_number,omitempty"`
	ParameterShapeIndex  []int64 `` /* 128-byte string literal not displayed */
	OutputShapeIndex     []int64 `protobuf:"varint,3,rep,packed,name=output_shape_index,json=outputShapeIndex,proto3" json:"output_shape_index,omitempty"`
	// contains filtered or unexported fields
}

func (*ShardableValueUpdatePairProto) Descriptor deprecated

func (*ShardableValueUpdatePairProto) Descriptor() ([]byte, []int)

Deprecated: Use ShardableValueUpdatePairProto.ProtoReflect.Descriptor instead.

func (*ShardableValueUpdatePairProto) GetInputParameterNumber

func (x *ShardableValueUpdatePairProto) GetInputParameterNumber() int64

func (*ShardableValueUpdatePairProto) GetOutputShapeIndex

func (x *ShardableValueUpdatePairProto) GetOutputShapeIndex() []int64

func (*ShardableValueUpdatePairProto) GetParameterShapeIndex

func (x *ShardableValueUpdatePairProto) GetParameterShapeIndex() []int64

func (*ShardableValueUpdatePairProto) ProtoMessage

func (*ShardableValueUpdatePairProto) ProtoMessage()

func (*ShardableValueUpdatePairProto) ProtoReflect

func (*ShardableValueUpdatePairProto) Reset

func (x *ShardableValueUpdatePairProto) Reset()

func (*ShardableValueUpdatePairProto) String

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL