iptables

package
v3.7.1+incompatible Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Apr 30, 2019 License: Apache-2.0 Imports: 22 Imported by: 0

Documentation

Index

Constants

View Source
const (
	// Compromise: shorter is better for table occupancy and readability. Longer is better for
	// collision-resistance.  16 chars gives us 96 bits of entropy, which is fairly collision
	// resistant.
	HashLength = 16
)
View Source
const (
	MaxChainNameLength = 28
)

Variables

View Source
var (
	Err14LockTimeout = errors.New("Timed out waiting for iptables 1.4 lock")
	Err16LockTimeout = errors.New("Timed out waiting for iptables 1.6 lock")
)

Functions

func GrabIptablesLocks

func GrabIptablesLocks(lockFilePath, socketName string, timeout, probeInterval time.Duration) (io.Closer, error)

func PortRangessToMultiport

func PortRangessToMultiport(ports []*proto.PortRange) string

func PortsToMultiport

func PortsToMultiport(ports []uint16) string

Types

type AcceptAction

type AcceptAction struct {
	TypeAccept struct{}
}

func (AcceptAction) String

func (g AcceptAction) String() string

func (AcceptAction) ToFragment

func (g AcceptAction) ToFragment(features *Features) string

type Action

type Action interface {
	ToFragment(features *Features) string
}

type AddrType

type AddrType string
const (
	AddrTypeLocal AddrType = "LOCAL"
)

type Chain

type Chain struct {
	Name  string
	Rules []Rule
}

func (*Chain) RuleHashes

func (c *Chain) RuleHashes(features *Features) []string

type ClearMarkAction

type ClearMarkAction struct {
	Mark          uint32
	TypeClearMark struct{}
}

func (ClearMarkAction) String

func (c ClearMarkAction) String() string

func (ClearMarkAction) ToFragment

func (c ClearMarkAction) ToFragment(features *Features) string

type CmdIface

type CmdIface interface {
	SetStdin(io.Reader)
	SetStdout(io.Writer)
	SetStderr(io.Writer)
	Run() error
	Start() error
	Kill() error
	Wait() error
	Output() ([]byte, error)
	StdoutPipe() (io.ReadCloser, error)
	String() string
}

type DNATAction

type DNATAction struct {
	DestAddr string
	DestPort uint16
	TypeDNAT struct{}
}

func (DNATAction) String

func (g DNATAction) String() string

func (DNATAction) ToFragment

func (g DNATAction) ToFragment(features *Features) string

type DropAction

type DropAction struct {
	TypeDrop struct{}
}

func (DropAction) String

func (g DropAction) String() string

func (DropAction) ToFragment

func (g DropAction) ToFragment(features *Features) string

type FeatureDetector

type FeatureDetector struct {

	// Path to file with kernel version
	GetKernelVersionReader func() (io.Reader, error)
	// Factory for making commands, used by UTs to shim exec.Command().
	NewCmd cmdFactory
	// contains filtered or unexported fields
}

func NewFeatureDetector

func NewFeatureDetector() *FeatureDetector

func (*FeatureDetector) GetFeatures

func (d *FeatureDetector) GetFeatures() *Features

func (*FeatureDetector) RefreshFeatures

func (d *FeatureDetector) RefreshFeatures()

type Features

type Features struct {
	// SNATFullyRandom is true if --random-fully is supported by the SNAT action.
	SNATFullyRandom bool
	// MASQFullyRandom is true if --random-fully is supported by the MASQUERADE action.
	MASQFullyRandom bool
	// RestoreSupportsLock is true if the iptables-restore command supports taking the xtables lock and the
	// associated -w and -W arguments.
	RestoreSupportsLock bool
}

type GotoAction

type GotoAction struct {
	Target   string
	TypeGoto struct{}
}

func (GotoAction) String

func (g GotoAction) String() string

func (GotoAction) ToFragment

func (g GotoAction) ToFragment(features *Features) string

type JumpAction

type JumpAction struct {
	Target   string
	TypeJump struct{}
}

func (JumpAction) String

func (g JumpAction) String() string

func (JumpAction) ToFragment

func (g JumpAction) ToFragment(features *Features) string

type Locker

type Locker struct {
	Lock16 io.Closer
	Lock14 io.Closer
}

func (*Locker) Close

func (l *Locker) Close() error

type LogAction

type LogAction struct {
	Prefix  string
	TypeLog struct{}
}

func (LogAction) String

func (g LogAction) String() string

func (LogAction) ToFragment

func (g LogAction) ToFragment(features *Features) string

type MasqAction

type MasqAction struct {
	ToPorts  string
	TypeMasq struct{}
}

func (MasqAction) String

func (g MasqAction) String() string

func (MasqAction) ToFragment

func (g MasqAction) ToFragment(features *Features) string

type MatchCriteria

type MatchCriteria []string

func Match

func Match() MatchCriteria

func (MatchCriteria) ConntrackState

func (m MatchCriteria) ConntrackState(stateNames string) MatchCriteria

func (MatchCriteria) DestAddrType

func (m MatchCriteria) DestAddrType(addrType AddrType) MatchCriteria

func (MatchCriteria) DestIPPortSet

func (m MatchCriteria) DestIPPortSet(name string) MatchCriteria

func (MatchCriteria) DestIPSet

func (m MatchCriteria) DestIPSet(name string) MatchCriteria

func (MatchCriteria) DestNet

func (m MatchCriteria) DestNet(net string) MatchCriteria

func (MatchCriteria) DestPortRanges

func (m MatchCriteria) DestPortRanges(ports []*proto.PortRange) MatchCriteria

func (MatchCriteria) DestPorts

func (m MatchCriteria) DestPorts(ports ...uint16) MatchCriteria

func (MatchCriteria) ICMPType

func (m MatchCriteria) ICMPType(t uint8) MatchCriteria

func (MatchCriteria) ICMPTypeAndCode

func (m MatchCriteria) ICMPTypeAndCode(t, c uint8) MatchCriteria

func (MatchCriteria) ICMPV6Type

func (m MatchCriteria) ICMPV6Type(t uint8) MatchCriteria

func (MatchCriteria) ICMPV6TypeAndCode

func (m MatchCriteria) ICMPV6TypeAndCode(t, c uint8) MatchCriteria

func (MatchCriteria) IPVSConnection

func (m MatchCriteria) IPVSConnection() MatchCriteria

func (MatchCriteria) InInterface

func (m MatchCriteria) InInterface(ifaceMatch string) MatchCriteria

func (MatchCriteria) MarkClear

func (m MatchCriteria) MarkClear(mark uint32) MatchCriteria

func (MatchCriteria) MarkMatchesWithMask

func (m MatchCriteria) MarkMatchesWithMask(mark, mask uint32) MatchCriteria

func (MatchCriteria) MarkNotClear

func (m MatchCriteria) MarkNotClear(mark uint32) MatchCriteria

func (MatchCriteria) MarkSingleBitSet

func (m MatchCriteria) MarkSingleBitSet(mark uint32) MatchCriteria

func (MatchCriteria) NotDestIPPortSet

func (m MatchCriteria) NotDestIPPortSet(name string) MatchCriteria

func (MatchCriteria) NotDestIPSet

func (m MatchCriteria) NotDestIPSet(name string) MatchCriteria

func (MatchCriteria) NotDestNet

func (m MatchCriteria) NotDestNet(net string) MatchCriteria

func (MatchCriteria) NotDestPortRanges

func (m MatchCriteria) NotDestPortRanges(ports []*proto.PortRange) MatchCriteria

func (MatchCriteria) NotDestPorts

func (m MatchCriteria) NotDestPorts(ports ...uint16) MatchCriteria

func (MatchCriteria) NotICMPType

func (m MatchCriteria) NotICMPType(t uint8) MatchCriteria

func (MatchCriteria) NotICMPTypeAndCode

func (m MatchCriteria) NotICMPTypeAndCode(t, c uint8) MatchCriteria

func (MatchCriteria) NotICMPV6Type

func (m MatchCriteria) NotICMPV6Type(t uint8) MatchCriteria

func (MatchCriteria) NotICMPV6TypeAndCode

func (m MatchCriteria) NotICMPV6TypeAndCode(t, c uint8) MatchCriteria

func (MatchCriteria) NotIPVSConnection

func (m MatchCriteria) NotIPVSConnection() MatchCriteria

func (MatchCriteria) NotMarkMatchesWithMask

func (m MatchCriteria) NotMarkMatchesWithMask(mark, mask uint32) MatchCriteria

func (MatchCriteria) NotProtocol

func (m MatchCriteria) NotProtocol(name string) MatchCriteria

func (MatchCriteria) NotProtocolNum

func (m MatchCriteria) NotProtocolNum(num uint8) MatchCriteria

func (MatchCriteria) NotSourceIPPortSet

func (m MatchCriteria) NotSourceIPPortSet(name string) MatchCriteria

func (MatchCriteria) NotSourceIPSet

func (m MatchCriteria) NotSourceIPSet(name string) MatchCriteria

func (MatchCriteria) NotSourceNet

func (m MatchCriteria) NotSourceNet(net string) MatchCriteria

func (MatchCriteria) NotSourcePortRanges

func (m MatchCriteria) NotSourcePortRanges(ports []*proto.PortRange) MatchCriteria

func (MatchCriteria) NotSourcePorts

func (m MatchCriteria) NotSourcePorts(ports ...uint16) MatchCriteria

func (MatchCriteria) NotSrcAddrType

func (m MatchCriteria) NotSrcAddrType(addrType AddrType, limitIfaceOut bool) MatchCriteria

func (MatchCriteria) OutInterface

func (m MatchCriteria) OutInterface(ifaceMatch string) MatchCriteria

func (MatchCriteria) Protocol

func (m MatchCriteria) Protocol(name string) MatchCriteria

func (MatchCriteria) ProtocolNum

func (m MatchCriteria) ProtocolNum(num uint8) MatchCriteria

func (MatchCriteria) RPFCheckFailed

func (m MatchCriteria) RPFCheckFailed() MatchCriteria

func (MatchCriteria) RPFCheckPassed

func (m MatchCriteria) RPFCheckPassed() MatchCriteria

func (MatchCriteria) Render

func (m MatchCriteria) Render() string

func (MatchCriteria) SourceIPPortSet

func (m MatchCriteria) SourceIPPortSet(name string) MatchCriteria

func (MatchCriteria) SourceIPSet

func (m MatchCriteria) SourceIPSet(name string) MatchCriteria

func (MatchCriteria) SourceNet

func (m MatchCriteria) SourceNet(net string) MatchCriteria

func (MatchCriteria) SourcePortRanges

func (m MatchCriteria) SourcePortRanges(ports []*proto.PortRange) MatchCriteria

func (MatchCriteria) SourcePorts

func (m MatchCriteria) SourcePorts(ports ...uint16) MatchCriteria

func (MatchCriteria) SrcAddrType

func (m MatchCriteria) SrcAddrType(addrType AddrType, limitIfaceOut bool) MatchCriteria

func (MatchCriteria) String

func (m MatchCriteria) String() string

func (MatchCriteria) VXLANVNI

func (m MatchCriteria) VXLANVNI(vni uint32) MatchCriteria

VXLANVNI matches on the VNI contained within the VXLAN header. It assumes that this is indeed a VXLAN packet; i.e. it should be used with a protocol==UDP and port==VXLAN port match.

type NoTrackAction

type NoTrackAction struct {
	TypeNoTrack struct{}
}

func (NoTrackAction) String

func (g NoTrackAction) String() string

func (NoTrackAction) ToFragment

func (g NoTrackAction) ToFragment(features *Features) string

type ReturnAction

type ReturnAction struct {
	TypeReturn struct{}
}

func (ReturnAction) String

func (r ReturnAction) String() string

func (ReturnAction) ToFragment

func (r ReturnAction) ToFragment(features *Features) string

type Rule

type Rule struct {
	Match   MatchCriteria
	Action  Action
	Comment string
}

func (Rule) RenderAppend

func (r Rule) RenderAppend(chainName, prefixFragment string, features *Features) string

func (Rule) RenderInsert

func (r Rule) RenderInsert(chainName, prefixFragment string, features *Features) string

func (Rule) RenderReplace

func (r Rule) RenderReplace(chainName string, ruleNum int, prefixFragment string, features *Features) string

type SNATAction

type SNATAction struct {
	ToAddr   string
	TypeSNAT struct{}
}

func (SNATAction) String

func (g SNATAction) String() string

func (SNATAction) ToFragment

func (g SNATAction) ToFragment(features *Features) string

type SetMarkAction

type SetMarkAction struct {
	Mark        uint32
	TypeSetMark struct{}
}

func (SetMarkAction) String

func (c SetMarkAction) String() string

func (SetMarkAction) ToFragment

func (c SetMarkAction) ToFragment(features *Features) string

type SetMaskedMarkAction

type SetMaskedMarkAction struct {
	Mark              uint32
	Mask              uint32
	TypeSetMaskedMark struct{}
}

func (SetMaskedMarkAction) String

func (c SetMaskedMarkAction) String() string

func (SetMaskedMarkAction) ToFragment

func (c SetMaskedMarkAction) ToFragment(features *Features) string

type SharedLock

type SharedLock struct {
	GrabIptablesLocks func(lockFilePath, socketName string, timeout, probeInterval time.Duration) (io.Closer, error)
	// contains filtered or unexported fields
}

SharedLock allows for multiple goroutines to share the iptables lock without blocking on each other. That is safe because each of our goroutines is accessing a different iptables table, so they do not conflict.

func NewSharedLock

func NewSharedLock(lockFilePath string, lockTimeout, lockProbeInterval time.Duration) *SharedLock

func (*SharedLock) Lock

func (l *SharedLock) Lock()

func (*SharedLock) Unlock

func (l *SharedLock) Unlock()

type Table

type Table struct {
	Name      string
	IPVersion uint8
	// contains filtered or unexported fields
}

Table represents a single one of the iptables tables i.e. "raw", "nat", "filter", etc. It caches the desired state of that table, then attempts to bring it into sync when Apply() is called.

API Model

Table supports two classes of operation: "rule insertions" and "full chain updates".

As the name suggests, rule insertions allow for inserting one or more rules into a pre-existing chain. Rule insertions are intended to be used to hook kernel chains (such as "FORWARD") in order to direct them to a Felix-owned chain. It is important to minimise the use of rule insertions because the top-level chains are shared resources, which can be modified by other applications. In addition, rule insertions are harder to clean up after an upgrade to a new version of Felix (because we need a way to recognise our rules in a crowded chain).

Full chain updates replace the entire contents of a Felix-owned chain with a new set of rules. Limiting the operation to "replace whole chain" in this way significantly simplifies the API. Although the API operates on full chains, the dataplane write logic tries to avoid rewriting a whole chain if only part of it has changed (this was not the case in Felix 1.4). This prevents iptables counters from being reset unnecessarily.

In either case, the actual dataplane updates are deferred until the next call to Apply() so chain updates and insertions may occur in any order as long as they are consistent (i.e. there are no references to non-existent chains) by the time Apply() is called.

Design

We had several goals in designing the iptables machinery in 2.0.0:

(1) High performance. Felix needs to handle high churn of endpoints and rules.

(2) Ability to restore rules, even if other applications accidentally break them: we found that other applications sometimes misuse iptables-save and iptables-restore to do a read, modify, write cycle. That behaviour is not safe under concurrent modification.

(3) Avoid rewriting rules that haven't changed so that we don't reset iptables counters.

(4) Avoid parsing iptables commands (for example, the output from iptables/iptables-save). This is very hard to do robustly because iptables rules do not necessarily round-trip through the kernel in the same form. In addition, the format could easily change due to changes or fixes in the iptables/iptables-save command.

(5) Support for graceful restart. I.e. deferring potentially incorrect updates until we're in-sync with the datastore. For example, if we have 100 endpoints on a host, after a restart we don't want to write a "dispatch" chain when we learn about the first endpoint (possibly replacing an existing one that had all 100 endpoints in place and causing traffic to glitch); instead, we want to defer until we've seen all 100 and then do the write.

(6) Improved handling of rule inserts vs Felix 1.4.x. Previous versions of Felix sometimes inserted special-case rules that were not marked as Calico rules in any sensible way making cleanup of those rules after an upgrade difficult.

Implementation

For high performance (goal 1), we use iptables-restore to do bulk updates to iptables. This is much faster than individual iptables calls.

To allow us to restore rules after they are clobbered by another process (goal 2), we cache them at this layer. This means that we don't need a mechanism to ask the other layers of Felix to do a resync. Note: Table doesn't start a thread of its own so it relies on the main event loop to trigger any dataplane resync polls.

There is tension between goals 3 and 4. In order to avoid full rewrites (goal 3), we need to know what rules are in place, but we also don't want to parse them to find out (goal 4)! As a compromise, we deterministically calculate an ID for each rule and store it in an iptables comment. Then, when we want to know what rules are in place, we _do_ parse the output from iptables-save, but only to read back the rule IDs. That limits the amount of parsing we need to do and keeps it manageable/robust.

To support graceful restart (goal 5), we defer updates to the dataplane until Apply() is called, then we do an atomic update using iptables-restore. As long as the first Apply() call is after we're in sync, the dataplane won't be touched until the right time. Felix 1.4.x had a more complex mechanism to support partial updates during the graceful restart period but Felix 2.0.0 resyncs so quickly that the added complexity is not justified.

To make it easier to manage rule insertions (goal 6), we add rule IDs to those too. With rule IDs in place, we can easily distinguish Calico rules from non-Calico rules without needing to know exactly which rules to expect. To deal with cleanup after upgrade from older versions that did not write rule IDs, we support special-case regexes to detect our old rules.

Thread safety

Table doesn't do any internal synchronization, its methods should only be called from one thread. To avoid conflicts in the dataplane itself, there should only be one instance of Table for each iptable table in an application.

func NewTable

func NewTable(
	name string,
	ipVersion uint8,
	hashPrefix string,
	iptablesWriteLock sync.Locker,
	detector *FeatureDetector,
	options TableOptions,
) *Table

func (*Table) Apply

func (t *Table) Apply() (rescheduleAfter time.Duration)

func (*Table) InvalidateDataplaneCache

func (t *Table) InvalidateDataplaneCache(reason string)

func (*Table) RemoveChainByName

func (t *Table) RemoveChainByName(name string)

func (*Table) RemoveChains

func (t *Table) RemoveChains(chains []*Chain)

func (*Table) SetRuleInsertions

func (t *Table) SetRuleInsertions(chainName string, rules []Rule)

func (*Table) UpdateChain

func (t *Table) UpdateChain(chain *Chain)

func (*Table) UpdateChains

func (t *Table) UpdateChains(chains []*Chain)

type TableOptions

type TableOptions struct {
	HistoricChainPrefixes    []string
	ExtraCleanupRegexPattern string
	InsertMode               string
	RefreshInterval          time.Duration
	PostWriteInterval        time.Duration

	// LockTimeout is the timeout to use for iptables-restore's native xtables lock.
	LockTimeout time.Duration
	// LockProbeInterval is the probe interval to use for iptables-restore's native xtables lock.
	LockProbeInterval time.Duration

	// NewCmdOverride for tests, if non-nil, factory to use instead of the real exec.Command()
	NewCmdOverride cmdFactory
	// SleepOverride for tests, if non-nil, replacement for time.Sleep()
	SleepOverride func(d time.Duration)
	// NowOverride for tests, if non-nil, replacement for time.Now()
	NowOverride func() time.Time
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL