sysdump

package
v1.17.0-pre.3 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Dec 2, 2024 License: Apache-2.0 Imports: 47 Imported by: 0

Documentation

Index

Constants

View Source
const (
	DefaultCiliumLabelSelector               = labelPrefix + "cilium"
	DefaultCiliumEnvoyLabelSelector          = labelPrefix + "cilium-envoy"
	DefaultCiliumOperatorLabelSelector       = "io.cilium/app=operator"
	DefaultClustermeshApiserverLabelSelector = labelPrefix + "clustermesh-apiserver"
	DefaultCiliumNodeInitLabelSelector       = "app=cilium-node-init"
	DefaultCiliumSpireAgentLabelSelector     = "app=spire-agent"
	DefaultCiliumSpireServerLabelSelector    = "app=spire-server"
	DefaultDebug                             = false
	DefaultProfiling                         = true
	DefaultTracing                           = false
	DefaultHubbleLabelSelector               = labelPrefix + "hubble"
	DefaultHubbleFlowsCount                  = 10000
	DefaultHubbleFlowsTimeout                = 5 * time.Second
	DefaultHubbleRelayLabelSelector          = labelPrefix + "hubble-relay"
	DefaultHubbleUILabelSelector             = labelPrefix + "hubble-ui"
	DefaultHubbleGenerateCertsLabelSelector  = labelPrefix + "hubble-generate-certs"
	DefaultLargeSysdumpAbortTimeout          = 5 * time.Second
	DefaultLargeSysdumpThreshold             = 20
	DefaultLogsSinceTime                     = 8760 * time.Hour // 1y
	DefaultLogsLimitBytes                    = 1073741824       // 1GiB
	DefaultNodeList                          = ""
	DefaultQuick                             = false
	DefaultOutputFileName                    = "cilium-sysdump-<ts>" // "<ts>" will be replaced with the timestamp
	DefaultDetectGopsPID                     = false
	DefaultCNIConfigDirectory                = "/etc/cni/net.d/"
	DefaultCNIConfigMapName                  = "cni-configuration"
	DefaultTetragonNamespace                 = "kube-system"
	DefaultTetragonLabelSelector             = "app.kubernetes.io/name=tetragon"
	DefaultTetragonOperatorLabelSelector     = "app.kubernetes.io/name=tetragon-operator"
	DefaultTetragonAgentContainerName        = "tetragon"
	DefaultTetragonConfigMapName             = "tetragon-config"
	DefaultTetragonBugtoolPrefix             = "tetragon-bugtool"
	DefaultTetragonCLICommand                = "tetra"
	DefaultTetragonPodInfo                   = "tetragonpodinfo-<ts>.yaml"
	DefaultTetragonTracingPolicy             = "tetragontracingpolicy-<ts>.yaml"
	DefaultTetragonTracingPolicyNamespaced   = "tetragontracingpolicynamespaced-<ts>.yaml"
)

Variables

View Source
var (
	// DefaultWorkerCount is the default number of parallel workers for sysdump collection.
	DefaultWorkerCount = 20

	// DefaultCopyRetryLimit limits retries done while copying files from pods
	DefaultCopyRetryLimit = 100

	// DefaultCiliumNamespaces will be used to attempt to autodetect what namespace Cilium is installed in
	// unless otherwise specified.
	DefaultCiliumNamespaces = []string{"kube-system", "cilium"}

	// DefaultCiliumSPIRENamespaces will be used to attempt to autodetect what namespace Cilium SPIRE is installed in
	// unless otherwise specified.
	DefaultCiliumSPIRENamespaces = []string{"kube-system", "cilium", "cilium-spire"}
)

Functions

func AllPods

func AllPods(l *corev1.PodList) []*corev1.Pod

AllPods converts a PodList into a slice of Pod objects.

func FilterPods

func FilterPods(l *corev1.PodList, n []string) []*corev1.Pod

FilterPods filters a list of pods by node names.

func InitSysdumpFlags

func InitSysdumpFlags(cmd *cobra.Command, options *Options, optionPrefix string, hooks Hooks)

Types

type Collector

type Collector struct {
	Client  KubernetesClient
	Options Options
	Pool    *workerpool.WorkerPool

	// NodeList is a list of nodes to collect sysdump information from.
	NodeList []string
	// CiliumPods is a list of Cilium agent pods running on nodes in NodeList.
	CiliumPods []*corev1.Pod
	// CiliumOperatorPods is the list of Cilium operator pods.
	CiliumOperatorPods []*corev1.Pod
	// CiliumConfigMap is a pointer to cilium-config ConfigMap.
	CiliumConfigMap *corev1.ConfigMap

	// FeatureSet is a map of enabled / disabled features based on the contents of cilium-config ConfigMap.
	FeatureSet features.Set
	// contains filtered or unexported fields
}

Collector knows how to collect information required to troubleshoot issues with Cilium and Hubble.

func NewCollector

func NewCollector(
	k KubernetesClient,
	o Options,
	hooks Hooks,
	startTime time.Time,
) (*Collector, error)

NewCollector returns a new sysdump collector.

func (*Collector) AbsoluteTempPath

func (c *Collector) AbsoluteTempPath(f string) string

AbsoluteTempPath returns the absolute path where to store the specified filename temporarily.

func (*Collector) AddTasks

func (c *Collector) AddTasks(tasks []Task)

AddTasks adds extra tasks for the collector to execute. Must be called before Run().

func (*Collector) GatherResourceUnstructured

func (c *Collector) GatherResourceUnstructured(ctx context.Context, r schema.GroupVersionResource, fname string, keep ...string) error

GatherResourceUnstructured queries resources with the given GroupVersionResource, storing them in the file specified by fname. If keep is non-empty; then it will filter the items returned, keeping only those with names listed in keep. If keep is empty, it will not filter the resources returned.

func (*Collector) Run

func (c *Collector) Run() error

Run performs the actual sysdump collection.

func (*Collector) SubmitCniConflistSubtask

func (c *Collector) SubmitCniConflistSubtask(pods []*corev1.Pod, containerName string) error

func (*Collector) SubmitGopsSubtasks

func (c *Collector) SubmitGopsSubtasks(pods []*corev1.Pod, containerName string) error

SubmitGopsSubtasks submits tasks to collect gops statistics from pods.

func (*Collector) SubmitLogsTasks

func (c *Collector) SubmitLogsTasks(pods []*corev1.Pod, since time.Duration, limitBytes int64) error

SubmitLogsTasks submits tasks to collect kubernetes logs from pods.

func (*Collector) SubmitMetricsSubtask

func (c *Collector) SubmitMetricsSubtask(pods []*corev1.Pod, containerName, portName string) error

SubmitMetricsSubtask submits tasks to collect metrics from pods.

func (*Collector) SubmitProfilingGopsSubtasks

func (c *Collector) SubmitProfilingGopsSubtasks(pods []*corev1.Pod, containerName string) error

SubmitProfilingGopsSubtasks submits tasks to collect profiling data from pods.

func (*Collector) SubmitStreamProfilingGopsSubtasks

func (c *Collector) SubmitStreamProfilingGopsSubtasks(pods []*corev1.Pod, containerName string, port uint16) error

SubmitStreamProfilingGopsSubtasks submits tasks to collect profiling data from pods. Differently from SubmitProfilingGopsSubtasks, it directly retrieves the profiles from the remote gops server, rather than calling the gops client binary. This allows to retrieve the profiles from distroless containers as well, as it does not depend on any shell tools.

func (*Collector) SubmitTetragonBugtoolTasks

func (c *Collector) SubmitTetragonBugtoolTasks(pods []*corev1.Pod, tetragonAgentContainerName,
	tetragonBugtoolPrefix, tetragonCLICommand string) error

func (*Collector) SubmitTracingGopsSubtask

func (c *Collector) SubmitTracingGopsSubtask(pods []*corev1.Pod, containerName string) error

SubmitTracingGopsSubtask submits task to collect tracing data from pods.

func (*Collector) WriteBytes

func (c *Collector) WriteBytes(filename string, value []byte) error

WriteBytes writes a byte array to a file.

func (*Collector) WriteString

func (c *Collector) WriteString(filename string, value string) error

WriteString writes a string to a file.

func (*Collector) WriteTable

func (c *Collector) WriteTable(filename string, value *metav1.Table) error

WriteTable writes a kubernetes table to a file.

func (*Collector) WriteYAML

func (c *Collector) WriteYAML(filename string, o runtime.Object) error

WriteYAML writes a kubernetes object to a file as YAML.

type Hooks

type Hooks interface {
	AddSysdumpFlags(flags *pflag.FlagSet)
	AddSysdumpTasks(*Collector) error
}

Hooks to extend cilium-cli with additional sysdump tasks and related flags.

type KubernetesClient

type KubernetesClient interface {
	AutodetectFlavor(ctx context.Context) k8s.Flavor
	CopyFromPod(ctx context.Context, namespace, pod, container, fromFile, destFile string, retryLimit int) error
	CreateEphemeralContainer(ctx context.Context, pod *corev1.Pod, ec *corev1.EphemeralContainer) (*corev1.Pod, error)
	CreatePod(ctx context.Context, namespace string, pod *corev1.Pod, opts metav1.CreateOptions) (*corev1.Pod, error)
	GetPod(ctx context.Context, namespace, name string, opts metav1.GetOptions) (*corev1.Pod, error)
	GetRaw(ctx context.Context, path string) (string, error)
	DeletePod(ctx context.Context, namespace, name string, opts metav1.DeleteOptions) error
	ExecInPod(ctx context.Context, namespace, pod, container string, command []string) (bytes.Buffer, error)
	ExecInPodWithStderr(ctx context.Context, namespace, pod, container string, command []string) (bytes.Buffer, bytes.Buffer, error)
	GetConfigMap(ctx context.Context, namespace, name string, opts metav1.GetOptions) (*corev1.ConfigMap, error)
	GetNamespace(ctx context.Context, namespace string, options metav1.GetOptions) (*corev1.Namespace, error)
	GetDaemonSet(ctx context.Context, namespace, name string, opts metav1.GetOptions) (*appsv1.DaemonSet, error)
	GetStatefulSet(ctx context.Context, namespace, name string, opts metav1.GetOptions) (*appsv1.StatefulSet, error)
	GetDeployment(ctx context.Context, namespace, name string, opts metav1.GetOptions) (*appsv1.Deployment, error)
	GetCronJob(ctx context.Context, namespace, name string, opts metav1.GetOptions) (*batchv1.CronJob, error)
	GetLogs(ctx context.Context, namespace, name, container string, opts corev1.PodLogOptions) (string, error)
	GetPodsTable(ctx context.Context) (*metav1.Table, error)
	ProxyGet(ctx context.Context, namespace, name, url string) (string, error)
	ProxyTCP(ctx context.Context, namespace, name string, port uint16, handler func(io.ReadWriteCloser) error) error
	GetSecret(ctx context.Context, namespace, name string, opts metav1.GetOptions) (*corev1.Secret, error)
	GetCiliumVersion(ctx context.Context, p *corev1.Pod) (*semver.Version, error)
	GetVersion(ctx context.Context) (string, error)
	GetHelmMetadata(ctx context.Context, releaseName string, namespace string) (string, error)
	GetHelmValues(ctx context.Context, releaseName string, namespace string) (string, error)
	ListCiliumBGPPeeringPolicies(ctx context.Context, opts metav1.ListOptions) (*ciliumv2alpha1.CiliumBGPPeeringPolicyList, error)
	ListCiliumBGPClusterConfigs(ctx context.Context, opts metav1.ListOptions) (*ciliumv2alpha1.CiliumBGPClusterConfigList, error)
	ListCiliumBGPPeerConfigs(ctx context.Context, opts metav1.ListOptions) (*ciliumv2alpha1.CiliumBGPPeerConfigList, error)
	ListCiliumBGPAdvertisements(ctx context.Context, opts metav1.ListOptions) (*ciliumv2alpha1.CiliumBGPAdvertisementList, error)
	ListCiliumBGPNodeConfigs(ctx context.Context, opts metav1.ListOptions) (*ciliumv2alpha1.CiliumBGPNodeConfigList, error)
	ListCiliumBGPNodeConfigOverrides(ctx context.Context, opts metav1.ListOptions) (*ciliumv2alpha1.CiliumBGPNodeConfigOverrideList, error)
	ListCiliumCIDRGroups(ctx context.Context, opts metav1.ListOptions) (*ciliumv2alpha1.CiliumCIDRGroupList, error)
	ListCiliumClusterwideNetworkPolicies(ctx context.Context, opts metav1.ListOptions) (*ciliumv2.CiliumClusterwideNetworkPolicyList, error)
	ListCiliumClusterwideEnvoyConfigs(ctx context.Context, opts metav1.ListOptions) (*ciliumv2.CiliumClusterwideEnvoyConfigList, error)
	ListCiliumIdentities(ctx context.Context) (*ciliumv2.CiliumIdentityList, error)
	ListCiliumEgressGatewayPolicies(ctx context.Context, opts metav1.ListOptions) (*ciliumv2.CiliumEgressGatewayPolicyList, error)
	ListCiliumEndpoints(ctx context.Context, namespace string, options metav1.ListOptions) (*ciliumv2.CiliumEndpointList, error)
	ListCiliumEndpointSlices(ctx context.Context, options metav1.ListOptions) (*ciliumv2alpha1.CiliumEndpointSliceList, error)
	ListCiliumEnvoyConfigs(ctx context.Context, namespace string, options metav1.ListOptions) (*ciliumv2.CiliumEnvoyConfigList, error)
	ListCiliumExternalWorkloads(ctx context.Context, options metav1.ListOptions) (*ciliumv2.CiliumExternalWorkloadList, error)
	ListCiliumLoadBalancerIPPools(ctx context.Context, opts metav1.ListOptions) (*ciliumv2alpha1.CiliumLoadBalancerIPPoolList, error)
	ListCiliumLocalRedirectPolicies(ctx context.Context, namespace string, options metav1.ListOptions) (*ciliumv2.CiliumLocalRedirectPolicyList, error)
	ListCiliumNetworkPolicies(ctx context.Context, namespace string, opts metav1.ListOptions) (*ciliumv2.CiliumNetworkPolicyList, error)
	ListCiliumNodes(ctx context.Context) (*ciliumv2.CiliumNodeList, error)
	ListCiliumNodeConfigs(ctx context.Context, namespace string, opts metav1.ListOptions) (*ciliumv2alpha1.CiliumNodeConfigList, error)
	ListCiliumPodIPPools(ctx context.Context, opts metav1.ListOptions) (*ciliumv2alpha1.CiliumPodIPPoolList, error)
	ListDaemonSet(ctx context.Context, namespace string, o metav1.ListOptions) (*appsv1.DaemonSetList, error)
	ListEvents(ctx context.Context, o metav1.ListOptions) (*corev1.EventList, error)
	ListEndpoints(ctx context.Context, o metav1.ListOptions) (*corev1.EndpointsList, error)
	ListEndpointSlices(ctx context.Context, o metav1.ListOptions) (*discoveryv1.EndpointSliceList, error)
	ListIngressClasses(ctx context.Context, o metav1.ListOptions) (*networkingv1.IngressClassList, error)
	ListIngresses(ctx context.Context, o metav1.ListOptions) (*networkingv1.IngressList, error)
	ListNamespaces(ctx context.Context, o metav1.ListOptions) (*corev1.NamespaceList, error)
	ListNetworkPolicies(ctx context.Context, o metav1.ListOptions) (*networkingv1.NetworkPolicyList, error)
	ListNodes(ctx context.Context, options metav1.ListOptions) (*corev1.NodeList, error)
	ListPods(ctx context.Context, namespace string, options metav1.ListOptions) (*corev1.PodList, error)
	ListServices(ctx context.Context, namespace string, options metav1.ListOptions) (*corev1.ServiceList, error)
	ListUnstructured(ctx context.Context, gvr schema.GroupVersionResource, namespace *string, o metav1.ListOptions) (*unstructured.UnstructuredList, error)
}

type Options

type Options struct {
	// The labels used to target Cilium pods.
	CiliumLabelSelector string
	// The namespace Cilium is running in.
	CiliumNamespace string
	// The namespace Cilium operator is running in.
	CiliumOperatorNamespace string
	// The namespace Cilium SPIRE installation is running in.
	CiliumSPIRENamespace string
	// The labels used to target Cilium daemon set. Usually, this label is same as CiliumLabelSelector.
	CiliumDaemonSetSelector string
	// The labels used to target Cilium Envoy pods.
	CiliumEnvoyLabelSelector string
	// The release name of Cilium Helm chart.
	CiliumHelmReleaseName string
	// The labels used to target Cilium Node Init daemon set. Usually, this label is same as CiliumNodeInitLabelSelector.
	CiliumNodeInitDaemonSetSelector string
	// The labels used to target Cilium Node Init pods.
	CiliumNodeInitLabelSelector string
	// The labels used to target Cilium operator pods.
	CiliumOperatorLabelSelector string
	// The labels used to target 'clustermesh-apiserver' pods.
	ClustermeshApiserverLabelSelector string
	// The labels used to target Cilium SPIRE server pods.
	CiliumSPIREServerLabelSelector string
	// The labels used to target Cilium SPIRE agent pods.
	CiliumSPIREAgentLabelSelector string
	// Whether to enable debug logging.
	Debug bool
	// Whether to enable scraping profiling data.
	Profiling bool
	// Whether to enable scraping tracing data.
	Tracing bool
	// The labels used to target additional pods
	ExtraLabelSelectors []string
	// The labels used to target Hubble pods.
	HubbleLabelSelector string
	// Number of Hubble flows to collect.
	HubbleFlowsCount int64
	// Timeout for collecting Hubble flows.
	HubbleFlowsTimeout time.Duration
	// The labels used to target Hubble Relay pods.
	HubbleRelayLabelSelector string
	// The labels used to target Hubble UI pods.
	HubbleUILabelSelector string
	// The labels used to target Hubble generate certs pods.
	HubbleGenerateCertsLabelSelector string
	// The amount of time to wait for the user to cancel the sysdump on a large cluster.
	LargeSysdumpAbortTimeout time.Duration
	// The threshold on the number of nodes present in the cluster that triggers a warning message.
	LargeSysdumpThreshold int
	// The limit on the number of bytes to retrieve when collecting logs
	LogsLimitBytes int64
	// How far back in time to go when collecting logs.
	LogsSinceTime time.Duration
	// Comma-separated list of node IPs or names to filter pods for which to collect gops and logs.
	NodeList string
	// The name of the resulting file (without extension)\n'<ts>' can be used as the placeholder for the timestamp.
	OutputFileName string
	// Whether to enable quick mode (i.e. skip collection of 'cilium-bugtool' output and logs).
	Quick bool
	// A 'RESTClientGetter' that can be used to create REST clients for the Kubernetes API.
	// Required at least for getting the proper output of 'kubectl get pod -o wide' without actually using 'kubectl'.
	RESTClientGetter genericclioptions.RESTClientGetter
	// The number of workers to use.
	WorkerCount int
	// The writer used for logging.
	Writer io.Writer
	// Flags to pass to cilium-bugtool command
	CiliumBugtoolFlags []string
	// Whether to automatically detect the gops agent PID
	DetectGopsPID bool
	// Directory where CNI configs are located
	CNIConfigDirectory string
	// The name of the CNI config map
	CNIConfigMapName string
	// The labels used to target Tetragon pods.
	TetragonLabelSelector string
	// The labels used to target Tetragon oeprator pods.
	TetragonOperatorLabelSelector string
	// The namespace Namespace is running in.
	TetragonNamespace string
	// Retry limit for copying files from pods
	CopyRetryLimit int
}

Options groups together the set of options required to collect a sysdump.

type Task

type Task struct {
	// MUST be set to true if the task submits additional tasks to the worker pool.
	CreatesSubtasks bool
	// The description of the task.
	Description string
	// Whether this task runs when running in quick mode.
	Quick bool
	// The task itself.
	Task func(context.Context) error
}

Task defines a task for the sysdump collector to execute.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL